LLVM OpenMP* Runtime Library
kmp_stats.h
1 #ifndef KMP_STATS_H
2 #define KMP_STATS_H
3 
9 //===----------------------------------------------------------------------===//
10 //
11 // The LLVM Compiler Infrastructure
12 //
13 // This file is dual licensed under the MIT and the University of Illinois Open
14 // Source Licenses. See LICENSE.txt for details.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "kmp_config.h"
19 
20 #if KMP_STATS_ENABLED
21 /*
22  * Statistics accumulator.
23  * Accumulates number of samples and computes min, max, mean, standard deviation on the fly.
24  *
25  * Online variance calculation algorithm from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm
26  */
27 
28 #include <limits>
29 #include <math.h>
30 #include <vector>
31 #include <string>
32 #include <stdint.h>
33 #include <new> // placement new
34 #include "kmp_stats_timing.h"
35 
36 /*
37  * Enable developer statistics here if you want them. They are more detailed than is useful for application characterisation and
38  * are intended for the runtime library developer.
39  */
40 // #define KMP_DEVELOPER_STATS 1
41 
48  noTotal = 1<<0,
49  onlyInMaster = 1<<1,
50  noUnits = 1<<2,
51  notInMaster = 1<<3,
52  logEvent = 1<<4
53 };
54 
61  IDLE,
62  SERIAL_REGION,
63  FORK_JOIN_BARRIER,
64  PLAIN_BARRIER,
65  TASKWAIT,
66  TASKYIELD,
67  TASKGROUP,
68  IMPLICIT_TASK,
69  EXPLICIT_TASK
70 };
71 
88 #define KMP_FOREACH_COUNTER(macro, arg) \
89  macro (OMP_PARALLEL, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, arg) \
90  macro (OMP_NESTED_PARALLEL, 0, arg) \
91  macro (OMP_FOR_static, 0, arg) \
92  macro (OMP_FOR_dynamic, 0, arg) \
93  macro (OMP_DISTRIBUTE, 0, arg) \
94  macro (OMP_BARRIER, 0, arg) \
95  macro (OMP_CRITICAL,0, arg) \
96  macro (OMP_SINGLE, 0, arg) \
97  macro (OMP_MASTER, 0, arg) \
98  macro (OMP_TEAMS, 0, arg) \
99  macro (OMP_set_lock, 0, arg) \
100  macro (OMP_test_lock, 0, arg) \
101  macro (REDUCE_wait, 0, arg) \
102  macro (REDUCE_nowait, 0, arg) \
103  macro (OMP_TASKYIELD, 0, arg) \
104  macro (OMP_TASKLOOP, 0, arg) \
105  macro (TASK_executed, 0, arg) \
106  macro (TASK_cancelled, 0, arg) \
107  macro (TASK_stolen, 0, arg)
108 
124 #define KMP_FOREACH_TIMER(macro, arg) \
125  macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \
126  macro (FOR_static_scheduling, 0, arg) \
127  macro (FOR_dynamic_scheduling, 0, arg) \
128  macro (OMP_critical, 0, arg) \
129  macro (OMP_critical_wait, 0, arg) \
130  macro (OMP_single, 0, arg) \
131  macro (OMP_master, 0, arg) \
132  macro (OMP_idle, stats_flags_e::logEvent, arg) \
133  macro (OMP_plain_barrier, stats_flags_e::logEvent, arg) \
134  macro (OMP_fork_barrier, stats_flags_e::logEvent, arg) \
135  macro (OMP_join_barrier, stats_flags_e::logEvent, arg) \
136  macro (OMP_parallel, stats_flags_e::logEvent, arg) \
137  macro (OMP_task_immediate, 0, arg) \
138  macro (OMP_task_taskwait, 0, arg) \
139  macro (OMP_task_taskyield, 0, arg) \
140  macro (OMP_task_taskgroup, 0, arg) \
141  macro (OMP_task_join_bar, 0, arg) \
142  macro (OMP_task_plain_bar, 0, arg) \
143  macro (OMP_serial, stats_flags_e::logEvent, arg) \
144  macro (OMP_taskloop_scheduling, 0, arg) \
145  macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
146  macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
147  macro (FOR_static_iterations, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
148  macro (FOR_dynamic_iterations,stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
149  KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
150 
151 
152 // OMP_start_end -- Time from when OpenMP is initialized until the stats are printed at exit
153 // OMP_serial -- Thread zero time executing serial code
154 // OMP_work -- Elapsed time in code dispatched by a fork (measured in the thread)
155 // OMP_barrier -- Time at "real" barriers (includes task time)
156 // FOR_static_scheduling -- Time spent doing scheduling for a static "for"
157 // FOR_dynamic_scheduling -- Time spent doing scheduling for a dynamic "for"
158 // OMP_idle -- Worker threads time spent waiting for inclusion in a parallel region
159 // OMP_plain_barrier -- Time spent in a barrier construct
160 // OMP_fork_join_barrier -- Time spent in a the fork-join barrier surrounding a parallel region
161 // OMP_parallel -- Time spent inside a parallel construct
162 // OMP_task_immediate -- Time spent executing non-deferred tasks
163 // OMP_task_taskwait -- Time spent executing tasks inside a taskwait construct
164 // OMP_task_taskyield -- Time spent executing tasks inside a taskyield construct
165 // OMP_task_taskgroup -- Time spent executing tasks inside a taskygroup construct
166 // OMP_task_join_bar -- Time spent executing tasks inside a join barrier
167 // OMP_task_plain_bar -- Time spent executing tasks inside a barrier construct
168 // OMP_single -- Time spent executing a "single" region
169 // OMP_master -- Time spent executing a "master" region
170 // OMP_set_numthreads -- Values passed to omp_set_num_threads
171 // OMP_PARALLEL_args -- Number of arguments passed to a parallel region
172 // FOR_static_iterations -- Number of available parallel chunks of work in a static for
173 // FOR_dynamic_iterations -- Number of available parallel chunks of work in a dynamic for
174 // Both adjust for any chunking, so if there were an iteration count of 20 but a chunk size of 10, we'd record 2.
175 
176 #if (KMP_DEVELOPER_STATS)
177 // Timers which are of interest to runtime library developers, not end users.
178 // These have to be explicitly enabled in addition to the other stats.
179 
180 // KMP_fork_barrier -- time in __kmp_fork_barrier
181 // KMP_join_barrier -- time in __kmp_join_barrier
182 // KMP_barrier -- time in __kmp_barrier
183 // KMP_end_split_barrier -- time in __kmp_end_split_barrier
184 // KMP_setup_icv_copy -- time in __kmp_setup_icv_copy
185 // KMP_icv_copy -- start/stop timer for any ICV copying
186 // KMP_linear_gather -- time in __kmp_linear_barrier_gather
187 // KMP_linear_release -- time in __kmp_linear_barrier_release
188 // KMP_tree_gather -- time in __kmp_tree_barrier_gather
189 // KMP_tree_release -- time in __kmp_tree_barrier_release
190 // KMP_hyper_gather -- time in __kmp_hyper_barrier_gather
191 // KMP_hyper_release -- time in __kmp_hyper_barrier_release
192 # define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
193  macro (KMP_fork_call, 0, arg) \
194  macro (KMP_join_call, 0, arg) \
195  macro (KMP_end_split_barrier, 0, arg) \
196  macro (KMP_hier_gather, 0, arg) \
197  macro (KMP_hier_release, 0, arg) \
198  macro (KMP_hyper_gather, 0, arg) \
199  macro (KMP_hyper_release, 0, arg) \
200  macro (KMP_linear_gather, 0, arg) \
201  macro (KMP_linear_release, 0, arg) \
202  macro (KMP_tree_gather, 0, arg) \
203  macro (KMP_tree_release, 0, arg) \
204  macro (USER_resume, 0, arg) \
205  macro (USER_suspend, 0, arg) \
206  macro (KMP_allocate_team, 0, arg) \
207  macro (KMP_setup_icv_copy, 0, arg) \
208  macro (USER_icv_copy, 0, arg)
209 #else
210 # define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
211 #endif
212 
229 #define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \
230  KMP_FOREACH_TIMER(macro, arg)
231 
232 #define ENUMERATE(name,ignore,prefix) prefix##name,
233 enum timer_e {
234  KMP_FOREACH_TIMER(ENUMERATE, TIMER_)
235  TIMER_LAST
236 };
237 
238 enum explicit_timer_e {
239  KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_)
240  EXPLICIT_TIMER_LAST
241 };
242 
243 enum counter_e {
244  KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_)
245  COUNTER_LAST
246 };
247 #undef ENUMERATE
248 
249 class timerPair {
250  explicit_timer_e timer_index;
251  timer_e timer;
252  public:
253  timerPair(explicit_timer_e ti, timer_e t) : timer_index(ti), timer(t) {}
254  inline explicit_timer_e get_index() const { return timer_index; }
255  inline timer_e get_timer() const { return timer; }
256  bool operator==(const timerPair & rhs) {
257  return this->get_index() == rhs.get_index();
258  }
259  bool operator!=(const timerPair & rhs) {
260  return !(*this == rhs);
261  }
262 };
263 
264 class statistic
265 {
266  double minVal;
267  double maxVal;
268  double meanVal;
269  double m2;
270  uint64_t sampleCount;
271 
272  public:
273  statistic() { reset(); }
274  statistic (statistic const &o): minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2), sampleCount(o.sampleCount) {}
275 
276  double getMin() const { return minVal; }
277  double getMean() const { return meanVal; }
278  double getMax() const { return maxVal; }
279  uint64_t getCount() const { return sampleCount; }
280  double getSD() const { return sqrt(m2/sampleCount); }
281  double getTotal() const { return sampleCount*meanVal; }
282 
283  void reset()
284  {
285  minVal = std::numeric_limits<double>::max();
286  maxVal = -std::numeric_limits<double>::max();
287  meanVal= 0.0;
288  m2 = 0.0;
289  sampleCount = 0;
290  }
291  void addSample(double sample);
292  void scale (double factor);
293  void scaleDown(double f) { scale (1./f); }
294  statistic & operator+= (statistic const & other);
295 
296  std::string format(char unit, bool total=false) const;
297 };
298 
299 struct statInfo
300 {
301  const char * name;
302  uint32_t flags;
303 };
304 
305 class timeStat : public statistic
306 {
307  static statInfo timerInfo[];
308 
309  public:
310  timeStat() : statistic() {}
311  static const char * name(timer_e e) { return timerInfo[e].name; }
312  static bool noTotal (timer_e e) { return timerInfo[e].flags & stats_flags_e::noTotal; }
313  static bool masterOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::onlyInMaster; }
314  static bool workerOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::notInMaster; }
315  static bool noUnits (timer_e e) { return timerInfo[e].flags & stats_flags_e::noUnits; }
316  static bool logEvent (timer_e e) { return timerInfo[e].flags & stats_flags_e::logEvent; }
317  static void clearEventFlags() {
318  for(int i=0;i<TIMER_LAST;i++) {
319  timerInfo[i].flags &= (~(stats_flags_e::logEvent));
320  }
321  }
322 };
323 
324 // Where we need explicitly to start and end the timer, this version can be used
325 // Since these timers normally aren't nicely scoped, so don't have a good place to live
326 // on the stack of the thread, they're more work to use.
327 class explicitTimer
328 {
329  timeStat * stat;
330  tsc_tick_count startTime;
331  tsc_tick_count pauseStartTime;
332  tsc_tick_count::tsc_interval_t totalPauseTime;
333 
334  public:
335  explicitTimer () : stat(0), startTime(0), pauseStartTime(0), totalPauseTime() { }
336  explicitTimer (timeStat * s) : stat(s), startTime(), pauseStartTime(0), totalPauseTime() { }
337 
338  void setStat (timeStat *s) { stat = s; }
339  void start(timer_e timerEnumValue);
340  void pause() { pauseStartTime = tsc_tick_count::now(); }
341  void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); }
342  void stop(timer_e timerEnumValue, kmp_stats_list* stats_ptr = nullptr);
343  void reset() { startTime = 0; pauseStartTime = 0; totalPauseTime = 0; }
344 };
345 
346 // Where all you need is to time a block, this is enough.
347 // (It avoids the need to have an explicit end, leaving the scope suffices.)
348 class blockTimer : public explicitTimer
349 {
350  timer_e timerEnumValue;
351  public:
352  blockTimer (timeStat * s, timer_e newTimerEnumValue) : timerEnumValue(newTimerEnumValue), explicitTimer(s) { start(timerEnumValue); }
353  ~blockTimer() { stop(timerEnumValue); }
354 };
355 
356 // Where you need to partition a threads clock ticks into separate states
357 // e.g., a partitionedTimers class with two timers of EXECUTING_TASK, and
358 // DOING_NOTHING would render these conditions:
359 // time(EXECUTING_TASK) + time(DOING_NOTHING) = total time thread is alive
360 // No clock tick in the EXECUTING_TASK is a member of DOING_NOTHING and vice versa
361 class partitionedTimers
362 {
363  private:
364  explicitTimer* timers[EXPLICIT_TIMER_LAST+1];
365  std::vector<timerPair> timer_stack;
366  public:
367  partitionedTimers();
368  void add_timer(explicit_timer_e timer_index, explicitTimer* timer_pointer);
369  void init(timerPair timer_index);
370  void push(timerPair timer_index);
371  void pop();
372  void windup();
373 };
374 
375 // Special wrapper around the partioned timers to aid timing code blocks
376 // It avoids the need to have an explicit end, leaving the scope suffices.
377 class blockPartitionedTimer
378 {
379  partitionedTimers* part_timers;
380  timerPair timer_pair;
381  public:
382  blockPartitionedTimer(partitionedTimers* pt, timerPair tp) : part_timers(pt), timer_pair(tp) { part_timers->push(timer_pair); }
383  ~blockPartitionedTimer() { part_timers->pop(); }
384 };
385 
386 // Special wrapper around the thread state to aid in keeping state in code blocks
387 // It avoids the need to have an explicit end, leaving the scope suffices.
388 class blockThreadState
389 {
390  stats_state_e* state_pointer;
391  stats_state_e old_state;
392  public:
393  blockThreadState(stats_state_e* thread_state_pointer, stats_state_e new_state) : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) {
394  *state_pointer = new_state;
395  }
396  ~blockThreadState() { *state_pointer = old_state; }
397 };
398 
399 // If all you want is a count, then you can use this...
400 // The individual per-thread counts will be aggregated into a statistic at program exit.
401 class counter
402 {
403  uint64_t value;
404  static const statInfo counterInfo[];
405 
406  public:
407  counter() : value(0) {}
408  void increment() { value++; }
409  uint64_t getValue() const { return value; }
410  void reset() { value = 0; }
411  static const char * name(counter_e e) { return counterInfo[e].name; }
412  static bool masterOnly (counter_e e) { return counterInfo[e].flags & stats_flags_e::onlyInMaster; }
413 };
414 
415 /* ****************************************************************
416  Class to implement an event
417 
418  There are four components to an event: start time, stop time
419  nest_level, and timer_name.
420  The start and stop time should be obvious (recorded in clock ticks).
421  The nest_level relates to the bar width in the timeline graph.
422  The timer_name is used to determine which timer event triggered this event.
423 
424  the interface to this class is through four read-only operations:
425  1) getStart() -- returns the start time as 64 bit integer
426  2) getStop() -- returns the stop time as 64 bit integer
427  3) getNestLevel() -- returns the nest level of the event
428  4) getTimerName() -- returns the timer name that triggered event
429 
430  *MORE ON NEST_LEVEL*
431  The nest level is used in the bar graph that represents the timeline.
432  Its main purpose is for showing how events are nested inside eachother.
433  For example, say events, A, B, and C are recorded. If the timeline
434  looks like this:
435 
436 Begin -------------------------------------------------------------> Time
437  | | | | | |
438  A B C C B A
439  start start start end end end
440 
441  Then A, B, C will have a nest level of 1, 2, 3 respectively.
442  These values are then used to calculate the barwidth so you can
443  see that inside A, B has occurred, and inside B, C has occurred.
444  Currently, this is shown with A's bar width being larger than B's
445  bar width, and B's bar width being larger than C's bar width.
446 
447 **************************************************************** */
448 class kmp_stats_event {
449  uint64_t start;
450  uint64_t stop;
451  int nest_level;
452  timer_e timer_name;
453  public:
454  kmp_stats_event() : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {}
455  kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme) : start(strt), stop(stp), nest_level(nst), timer_name(nme) {}
456  inline uint64_t getStart() const { return start; }
457  inline uint64_t getStop() const { return stop; }
458  inline int getNestLevel() const { return nest_level; }
459  inline timer_e getTimerName() const { return timer_name; }
460 };
461 
462 /* ****************************************************************
463  Class to implement a dynamically expandable array of events
464 
465  ---------------------------------------------------------
466  | event 1 | event 2 | event 3 | event 4 | ... | event N |
467  ---------------------------------------------------------
468 
469  An event is pushed onto the back of this array at every
470  explicitTimer->stop() call. The event records the thread #,
471  start time, stop time, and nest level related to the bar width.
472 
473  The event vector starts at size INIT_SIZE and grows (doubles in size)
474  if needed. An implication of this behavior is that log(N)
475  reallocations are needed (where N is number of events). If you want
476  to avoid reallocations, then set INIT_SIZE to a large value.
477 
478  the interface to this class is through six operations:
479  1) reset() -- sets the internal_size back to 0 but does not deallocate any memory
480  2) size() -- returns the number of valid elements in the vector
481  3) push_back(start, stop, nest, timer_name) -- pushes an event onto
482  the back of the array
483  4) deallocate() -- frees all memory associated with the vector
484  5) sort() -- sorts the vector by start time
485  6) operator[index] or at(index) -- returns event reference at that index
486 
487 **************************************************************** */
488 class kmp_stats_event_vector {
489  kmp_stats_event* events;
490  int internal_size;
491  int allocated_size;
492  static const int INIT_SIZE = 1024;
493  public:
494  kmp_stats_event_vector() {
495  events = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*INIT_SIZE);
496  internal_size = 0;
497  allocated_size = INIT_SIZE;
498  }
499  ~kmp_stats_event_vector() {}
500  inline void reset() { internal_size = 0; }
501  inline int size() const { return internal_size; }
502  void push_back(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) {
503  int i;
504  if(internal_size == allocated_size) {
505  kmp_stats_event* tmp = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*allocated_size*2);
506  for(i=0;i<internal_size;i++) tmp[i] = events[i];
507  __kmp_free(events);
508  events = tmp;
509  allocated_size*=2;
510  }
511  events[internal_size] = kmp_stats_event(start_time, stop_time, nest_level, name);
512  internal_size++;
513  return;
514  }
515  void deallocate();
516  void sort();
517  const kmp_stats_event & operator[](int index) const { return events[index]; }
518  kmp_stats_event & operator[](int index) { return events[index]; }
519  const kmp_stats_event & at(int index) const { return events[index]; }
520  kmp_stats_event & at(int index) { return events[index]; }
521 };
522 
523 /* ****************************************************************
524  Class to implement a doubly-linked, circular, statistics list
525 
526  |---| ---> |---| ---> |---| ---> |---| ---> ... next
527  | | | | | | | |
528  |---| <--- |---| <--- |---| <--- |---| <--- ... prev
529  Sentinel first second third
530  Node node node node
531 
532  The Sentinel Node is the user handle on the list.
533  The first node corresponds to thread 0's statistics.
534  The second node corresponds to thread 1's statistics and so on...
535 
536  Each node has a _timers, _counters, and _explicitTimers array to
537  hold that thread's statistics. The _explicitTimers
538  point to the correct _timer and update its statistics at every stop() call.
539  The explicitTimers' pointers are set up in the constructor.
540  Each node also has an event vector to hold that thread's timing events.
541  The event vector expands as necessary and records the start-stop times
542  for each timer.
543 
544  The nestLevel variable is for plotting events and is related
545  to the bar width in the timeline graph.
546 
547  Every thread will have a __thread local pointer to its node in
548  the list. The sentinel node is used by the master thread to
549  store "dummy" statistics before __kmp_create_worker() is called.
550 
551 **************************************************************** */
552 class kmp_stats_list {
553  int gtid;
554  timeStat _timers[TIMER_LAST+1];
555  counter _counters[COUNTER_LAST+1];
556  explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST+1];
557  partitionedTimers _partitionedTimers;
558  int _nestLevel; // one per thread
559  kmp_stats_event_vector _event_vector;
560  kmp_stats_list* next;
561  kmp_stats_list* prev;
562  stats_state_e state;
563  int thread_is_idle_flag;
564  public:
565  kmp_stats_list() : _nestLevel(0), _event_vector(), next(this), prev(this),
566  state(IDLE), thread_is_idle_flag(0) {
567 #define doInit(name,ignore1,ignore2) \
568  getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); \
569  _partitionedTimers.add_timer(EXPLICIT_TIMER_##name, getExplicitTimer(EXPLICIT_TIMER_##name));
570  KMP_FOREACH_EXPLICIT_TIMER(doInit,0);
571 #undef doInit
572  }
573  ~kmp_stats_list() { }
574  inline timeStat * getTimer(timer_e idx) { return &_timers[idx]; }
575  inline counter * getCounter(counter_e idx) { return &_counters[idx]; }
576  inline explicitTimer * getExplicitTimer(explicit_timer_e idx) { return &_explicitTimers[idx]; }
577  inline partitionedTimers * getPartitionedTimers() { return &_partitionedTimers; }
578  inline timeStat * getTimers() { return _timers; }
579  inline counter * getCounters() { return _counters; }
580  inline explicitTimer * getExplicitTimers() { return _explicitTimers; }
581  inline kmp_stats_event_vector & getEventVector() { return _event_vector; }
582  inline void resetEventVector() { _event_vector.reset(); }
583  inline void incrementNestValue() { _nestLevel++; }
584  inline int getNestValue() { return _nestLevel; }
585  inline void decrementNestValue() { _nestLevel--; }
586  inline int getGtid() const { return gtid; }
587  inline void setGtid(int newgtid) { gtid = newgtid; }
588  inline void setState(stats_state_e newstate) { state = newstate; }
589  inline stats_state_e getState() const { return state; }
590  inline stats_state_e * getStatePointer() { return &state; }
591  inline bool isIdle() { return thread_is_idle_flag==1; }
592  inline void setIdleFlag() { thread_is_idle_flag = 1; }
593  inline void resetIdleFlag() { thread_is_idle_flag = 0; }
594  kmp_stats_list* push_back(int gtid); // returns newly created list node
595  inline void push_event(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) {
596  _event_vector.push_back(start_time, stop_time, nest_level, name);
597  }
598  void deallocate();
599  class iterator;
600  kmp_stats_list::iterator begin();
601  kmp_stats_list::iterator end();
602  int size();
603  class iterator {
604  kmp_stats_list* ptr;
605  friend kmp_stats_list::iterator kmp_stats_list::begin();
606  friend kmp_stats_list::iterator kmp_stats_list::end();
607  public:
608  iterator();
609  ~iterator();
610  iterator operator++();
611  iterator operator++(int dummy);
612  iterator operator--();
613  iterator operator--(int dummy);
614  bool operator!=(const iterator & rhs);
615  bool operator==(const iterator & rhs);
616  kmp_stats_list* operator*() const; // dereference operator
617  };
618 };
619 
620 /* ****************************************************************
621  Class to encapsulate all output functions and the environment variables
622 
623  This module holds filenames for various outputs (normal stats, events, plot file),
624  as well as coloring information for the plot file.
625 
626  The filenames and flags variables are read from environment variables.
627  These are read once by the constructor of the global variable __kmp_stats_output
628  which calls init().
629 
630  During this init() call, event flags for the timeStat::timerInfo[] global array
631  are cleared if KMP_STATS_EVENTS is not true (on, 1, yes).
632 
633  The only interface function that is public is outputStats(heading). This function
634  should print out everything it needs to, either to files or stderr,
635  depending on the environment variables described below
636 
637  ENVIRONMENT VARIABLES:
638  KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this file,
639  otherwise, print to stderr
640  KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to either
641  KMP_STATS_FILE or stderr
642  KMP_STATS_PLOT_FILE -- if set, print the ploticus plot file to this filename,
643  otherwise, the plot file is sent to "events.plt"
644  KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log events
645  KMP_STATS_EVENTS_FILE -- if set, all events are outputted to this file,
646  otherwise, output is sent to "events.dat"
647 
648 **************************************************************** */
649 class kmp_stats_output_module {
650 
651  public:
652  struct rgb_color {
653  float r;
654  float g;
655  float b;
656  };
657 
658  private:
659  std::string outputFileName;
660  static const char* eventsFileName;
661  static const char* plotFileName;
662  static int printPerThreadFlag;
663  static int printPerThreadEventsFlag;
664  static const rgb_color globalColorArray[];
665  static rgb_color timerColorInfo[];
666 
667  void init();
668  static void setupEventColors();
669  static void printPloticusFile();
670  static void printHeaderInfo(FILE *statsOut);
671  static void printTimerStats(FILE *statsOut, statistic const * theStats, statistic const * totalStats);
672  static void printCounterStats(FILE *statsOut, statistic const * theStats);
673  static void printCounters(FILE * statsOut, counter const * theCounters);
674  static void printEvents(FILE * eventsOut, kmp_stats_event_vector* theEvents, int gtid);
675  static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; }
676  static void windupExplicitTimers();
677  bool eventPrintingEnabled() const { return printPerThreadEventsFlag; }
678 
679  public:
680  kmp_stats_output_module() { init(); }
681  void outputStats(const char* heading);
682 };
683 
684 #ifdef __cplusplus
685 extern "C" {
686 #endif
687 void __kmp_stats_init();
688 void __kmp_stats_fini();
689 void __kmp_reset_stats();
690 void __kmp_output_stats(const char *);
691 void __kmp_accumulate_stats_at_exit(void);
692 // thread local pointer to stats node within list
693 extern __thread kmp_stats_list* __kmp_stats_thread_ptr;
694 // head to stats list.
695 extern kmp_stats_list* __kmp_stats_list;
696 // lock for __kmp_stats_list
697 extern kmp_tas_lock_t __kmp_stats_lock;
698 // reference start time
699 extern tsc_tick_count __kmp_stats_start_time;
700 // interface to output
701 extern kmp_stats_output_module __kmp_stats_output;
702 
703 #ifdef __cplusplus
704 }
705 #endif
706 
707 // Simple, standard interfaces that drop out completely if stats aren't enabled
708 
709 
722 #define KMP_TIME_BLOCK(name) \
723  blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name)
724 
735 #define KMP_COUNT_VALUE(name, value) \
736  __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value)
737 
747 #define KMP_COUNT_BLOCK(name) \
748  __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment()
749 
761 #define KMP_START_EXPLICIT_TIMER(name) \
762  __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->start(TIMER_##name)
763 
775 #define KMP_STOP_EXPLICIT_TIMER(name) \
776  __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->stop(TIMER_##name)
777 
792 #define KMP_OUTPUT_STATS(heading_string) \
793  __kmp_output_stats(heading_string)
794 
802 #define KMP_INIT_PARTITIONED_TIMERS(name) \
803  __kmp_stats_thread_ptr->getPartitionedTimers()->init(timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
804 
805 #define KMP_TIME_PARTITIONED_BLOCK(name) \
806  blockPartitionedTimer __PBLOCKTIME__(__kmp_stats_thread_ptr->getPartitionedTimers(), \
807  timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
808 
809 #define KMP_PUSH_PARTITIONED_TIMER(name) \
810  __kmp_stats_thread_ptr->getPartitionedTimers()->push(timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
811 
812 #define KMP_POP_PARTITIONED_TIMER() \
813  __kmp_stats_thread_ptr->getPartitionedTimers()->pop()
814 
815 #define KMP_SET_THREAD_STATE(state_name) \
816  __kmp_stats_thread_ptr->setState(state_name)
817 
818 #define KMP_GET_THREAD_STATE() \
819  __kmp_stats_thread_ptr->getState()
820 
821 #define KMP_SET_THREAD_STATE_BLOCK(state_name) \
822  blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), state_name)
823 
831 #define KMP_RESET_STATS() __kmp_reset_stats()
832 
833 #if (KMP_DEVELOPER_STATS)
834 # define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n)
835 # define KMP_COUNT_DEVELOPER_VALUE(n,v) KMP_COUNT_VALUE(n,v)
836 # define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n)
837 # define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n)
838 # define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n)
839 # define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n)
840 #else
841 // Null definitions
842 # define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
843 # define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0)
844 # define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
845 # define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
846 # define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
847 # define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
848 #endif
849 
850 #else // KMP_STATS_ENABLED
851 
852 // Null definitions
853 #define KMP_TIME_BLOCK(n) ((void)0)
854 #define KMP_COUNT_VALUE(n,v) ((void)0)
855 #define KMP_COUNT_BLOCK(n) ((void)0)
856 #define KMP_START_EXPLICIT_TIMER(n) ((void)0)
857 #define KMP_STOP_EXPLICIT_TIMER(n) ((void)0)
858 
859 #define KMP_OUTPUT_STATS(heading_string) ((void)0)
860 #define KMP_RESET_STATS() ((void)0)
861 
862 #define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
863 #define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0)
864 #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
865 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
866 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
867 #define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0)
868 #define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0)
869 #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
870 #define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0)
871 #define KMP_POP_PARTITIONED_TIMER() ((void)0)
872 #define KMP_SET_THREAD_STATE(state_name) ((void)0)
873 #define KMP_GET_THREAD_STATE() ((void)0)
874 #define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0)
875 #endif // KMP_STATS_ENABLED
876 
877 #endif // KMP_STATS_H
statistic is valid only for master
Definition: kmp_stats.h:49
statistic is valid only for non-master threads
Definition: kmp_stats.h:51
do not show a TOTAL_aggregation for this statistic
Definition: kmp_stats.h:48
statistic can be logged on the event timeline when KMP_STATS_EVENTS is on (valid only for timers) ...
Definition: kmp_stats.h:52
#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg)
Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro.
Definition: kmp_stats.h:229
statistic doesn&#39;t need units printed next to it in output
Definition: kmp_stats.h:50
stats_flags_e
flags to describe the statistic (timer or counter)
Definition: kmp_stats.h:47
#define KMP_FOREACH_COUNTER(macro, arg)
Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h.
Definition: kmp_stats.h:88
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:60