18 #include "kmp_config.h" 34 #include "kmp_stats_timing.h" 88 #define KMP_FOREACH_COUNTER(macro, arg) \ 89 macro (OMP_PARALLEL, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, arg) \ 90 macro (OMP_NESTED_PARALLEL, 0, arg) \ 91 macro (OMP_FOR_static, 0, arg) \ 92 macro (OMP_FOR_dynamic, 0, arg) \ 93 macro (OMP_DISTRIBUTE, 0, arg) \ 94 macro (OMP_BARRIER, 0, arg) \ 95 macro (OMP_CRITICAL,0, arg) \ 96 macro (OMP_SINGLE, 0, arg) \ 97 macro (OMP_MASTER, 0, arg) \ 98 macro (OMP_TEAMS, 0, arg) \ 99 macro (OMP_set_lock, 0, arg) \ 100 macro (OMP_test_lock, 0, arg) \ 101 macro (REDUCE_wait, 0, arg) \ 102 macro (REDUCE_nowait, 0, arg) \ 103 macro (OMP_TASKYIELD, 0, arg) \ 104 macro (OMP_TASKLOOP, 0, arg) \ 105 macro (TASK_executed, 0, arg) \ 106 macro (TASK_cancelled, 0, arg) \ 107 macro (TASK_stolen, 0, arg) \ 125 #define KMP_FOREACH_TIMER(macro, arg) \ 126 macro (OMP_worker_thread_life, 0, arg) \ 127 macro (FOR_static_scheduling, 0, arg) \ 128 macro (FOR_dynamic_scheduling, 0, arg) \ 129 macro (OMP_critical, 0, arg) \ 130 macro (OMP_critical_wait, 0, arg) \ 131 macro (OMP_single, 0, arg) \ 132 macro (OMP_master, 0, arg) \ 133 macro (OMP_idle, 0, arg) \ 134 macro (OMP_plain_barrier, 0, arg) \ 135 macro (OMP_fork_join_barrier, 0, arg) \ 136 macro (OMP_parallel, 0, arg) \ 137 macro (OMP_task_immediate, 0, arg) \ 138 macro (OMP_task_taskwait, 0, arg) \ 139 macro (OMP_task_taskyield, 0, arg) \ 140 macro (OMP_task_taskgroup, 0, arg) \ 141 macro (OMP_task_join_bar, 0, arg) \ 142 macro (OMP_task_plain_bar, 0, arg) \ 143 macro (OMP_serial, 0, arg) \ 144 macro (OMP_taskloop_scheduling, 0, arg) \ 145 macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 146 macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 147 macro (FOR_static_iterations, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 148 macro (FOR_dynamic_iterations,stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 149 KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ 177 #if (KMP_DEVELOPER_STATS) 193 # define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ 194 macro (KMP_fork_call, 0, arg) \ 195 macro (KMP_join_call, 0, arg) \ 196 macro (KMP_fork_barrier, stats_flags_e::logEvent, arg) \ 197 macro (KMP_join_barrier, stats_flags_e::logEvent, arg) \ 198 macro (KMP_barrier, 0, arg) \ 199 macro (KMP_end_split_barrier, 0, arg) \ 200 macro (KMP_hier_gather, 0, arg) \ 201 macro (KMP_hier_release, 0, arg) \ 202 macro (KMP_hyper_gather, stats_flags_e::logEvent, arg) \ 203 macro (KMP_hyper_release, stats_flags_e::logEvent, arg) \ 204 macro (KMP_linear_gather, 0, arg) \ 205 macro (KMP_linear_release, 0, arg) \ 206 macro (KMP_tree_gather, 0, arg) \ 207 macro (KMP_tree_release, 0, arg) \ 208 macro (USER_master_invoke, stats_flags_e::logEvent, arg) \ 209 macro (USER_worker_invoke, stats_flags_e::logEvent, arg) \ 210 macro (USER_resume, stats_flags_e::logEvent, arg) \ 211 macro (USER_suspend, stats_flags_e::logEvent, arg) \ 212 macro (USER_launch_thread_loop, stats_flags_e::logEvent, arg) \ 213 macro (KMP_allocate_team, 0, arg) \ 214 macro (KMP_setup_icv_copy, 0, arg) \ 215 macro (USER_icv_copy, 0, arg) 217 # define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) 236 #define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \ 237 macro(OMP_worker_thread_life, 0, arg) \ 238 macro(FOR_static_scheduling, 0, arg) \ 239 macro(FOR_dynamic_scheduling, 0, arg) \ 240 macro(OMP_critical, 0, arg) \ 241 macro(OMP_critical_wait, 0, arg) \ 242 macro(OMP_single, 0, arg) \ 243 macro(OMP_master, 0, arg) \ 244 macro(OMP_idle, 0, arg) \ 245 macro(OMP_plain_barrier, 0, arg) \ 246 macro(OMP_fork_join_barrier, 0, arg) \ 247 macro(OMP_parallel, 0, arg) \ 248 macro(OMP_task_immediate, 0, arg) \ 249 macro(OMP_task_taskwait, 0, arg) \ 250 macro(OMP_task_taskyield, 0, arg) \ 251 macro(OMP_task_taskgroup, 0, arg) \ 252 macro(OMP_task_join_bar, 0, arg) \ 253 macro(OMP_task_plain_bar, 0, arg) \ 254 macro(OMP_serial, 0, arg) \ 255 macro(OMP_taskloop_scheduling, 0, arg) \ 256 KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro,arg) \ 259 #if (KMP_DEVELOPER_STATS) 260 # define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg) \ 261 macro(USER_launch_thread_loop, stats_flags_e::logEvent, arg) 263 # define KMP_FOREACH_EXPLICIT_DEVELOPER_TIMER(macro, arg) 266 #define ENUMERATE(name,ignore,prefix) prefix##name, 268 KMP_FOREACH_TIMER(ENUMERATE, TIMER_)
271 enum explicit_timer_e {
281 explicit_timer_e timer_index;
284 timerPair(explicit_timer_e ti, timer_e t) : timer_index(ti), timer(t) {}
285 inline explicit_timer_e get_index()
const {
return timer_index; }
286 inline timer_e get_timer()
const {
return timer; }
287 bool operator==(
const timerPair & rhs) {
288 return this->get_index() == rhs.get_index();
290 bool operator!=(
const timerPair & rhs) {
291 return !(*
this == rhs);
301 uint64_t sampleCount;
304 statistic() { reset(); }
305 statistic (statistic
const &o): minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2), sampleCount(o.sampleCount) {}
307 double getMin()
const {
return minVal; }
308 double getMean()
const {
return meanVal; }
309 double getMax()
const {
return maxVal; }
310 uint64_t getCount()
const {
return sampleCount; }
311 double getSD()
const {
return sqrt(m2/sampleCount); }
312 double getTotal()
const {
return sampleCount*meanVal; }
316 minVal = std::numeric_limits<double>::max();
317 maxVal = -std::numeric_limits<double>::max();
322 void addSample(
double sample);
323 void scale (
double factor);
324 void scaleDown(
double f) { scale (1./f); }
325 statistic & operator+= (statistic
const & other);
327 std::string format(
char unit,
bool total=
false)
const;
336 class timeStat :
public statistic
338 static statInfo timerInfo[];
341 timeStat() : statistic() {}
342 static const char * name(timer_e e) {
return timerInfo[e].name; }
348 static void clearEventFlags() {
349 for(
int i=0;i<TIMER_LAST;i++) {
361 tsc_tick_count startTime;
362 tsc_tick_count pauseStartTime;
363 tsc_tick_count::tsc_interval_t totalPauseTime;
366 explicitTimer () : stat(0), startTime(0), pauseStartTime(0), totalPauseTime() { }
367 explicitTimer (timeStat * s) : stat(s), startTime(), pauseStartTime(0), totalPauseTime() { }
369 void setStat (timeStat *s) { stat = s; }
370 void start(timer_e timerEnumValue);
371 void pause() { pauseStartTime = tsc_tick_count::now(); }
372 void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); }
373 void stop(timer_e timerEnumValue);
374 void reset() { startTime = 0; pauseStartTime = 0; totalPauseTime = 0; }
379 class blockTimer :
public explicitTimer
381 timer_e timerEnumValue;
383 blockTimer (timeStat * s, timer_e newTimerEnumValue) : timerEnumValue(newTimerEnumValue), explicitTimer(s) { start(timerEnumValue); }
384 ~blockTimer() { stop(timerEnumValue); }
392 class partitionedTimers
395 explicitTimer* timers[EXPLICIT_TIMER_LAST+1];
396 std::vector<timerPair> timer_stack;
399 void add_timer(explicit_timer_e timer_index, explicitTimer* timer_pointer);
400 void init(timerPair timer_index);
401 void push(timerPair timer_index);
408 class blockPartitionedTimer
410 partitionedTimers* part_timers;
411 timerPair timer_pair;
413 blockPartitionedTimer(partitionedTimers* pt, timerPair tp) : part_timers(pt), timer_pair(tp) { part_timers->push(timer_pair); }
414 ~blockPartitionedTimer() { part_timers->pop(); }
419 class blockThreadState
424 blockThreadState(
stats_state_e* thread_state_pointer,
stats_state_e new_state) : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) {
425 *state_pointer = new_state;
427 ~blockThreadState() { *state_pointer = old_state; }
435 static const statInfo counterInfo[];
438 counter() : value(0) {}
439 void increment() { value++; }
440 uint64_t getValue()
const {
return value; }
441 void reset() { value = 0; }
442 static const char * name(counter_e e) {
return counterInfo[e].name; }
479 class kmp_stats_event {
485 kmp_stats_event() : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {}
486 kmp_stats_event(uint64_t strt, uint64_t stp,
int nst, timer_e nme) : start(strt), stop(stp), nest_level(nst), timer_name(nme) {}
487 inline uint64_t getStart()
const {
return start; }
488 inline uint64_t getStop()
const {
return stop; }
489 inline int getNestLevel()
const {
return nest_level; }
490 inline timer_e getTimerName()
const {
return timer_name; }
519 class kmp_stats_event_vector {
520 kmp_stats_event* events;
523 static const int INIT_SIZE = 1024;
525 kmp_stats_event_vector() {
526 events = (kmp_stats_event*)__kmp_allocate(
sizeof(kmp_stats_event)*INIT_SIZE);
528 allocated_size = INIT_SIZE;
530 ~kmp_stats_event_vector() {}
531 inline void reset() { internal_size = 0; }
532 inline int size()
const {
return internal_size; }
533 void push_back(uint64_t start_time, uint64_t stop_time,
int nest_level, timer_e name) {
535 if(internal_size == allocated_size) {
536 kmp_stats_event* tmp = (kmp_stats_event*)__kmp_allocate(
sizeof(kmp_stats_event)*allocated_size*2);
537 for(i=0;i<internal_size;i++) tmp[i] = events[i];
542 events[internal_size] = kmp_stats_event(start_time, stop_time, nest_level, name);
548 const kmp_stats_event & operator[](
int index)
const {
return events[index]; }
549 kmp_stats_event & operator[](
int index) {
return events[index]; }
550 const kmp_stats_event & at(
int index)
const {
return events[index]; }
551 kmp_stats_event & at(
int index) {
return events[index]; }
583 class kmp_stats_list {
585 timeStat _timers[TIMER_LAST+1];
586 counter _counters[COUNTER_LAST+1];
587 explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST+1];
588 partitionedTimers _partitionedTimers;
590 kmp_stats_event_vector _event_vector;
591 kmp_stats_list* next;
592 kmp_stats_list* prev;
594 int thread_is_idle_flag;
596 kmp_stats_list() : _nestLevel(0), _event_vector(), next(
this), prev(
this),
597 state(IDLE), thread_is_idle_flag(0) {
598 #define doInit(name,ignore1,ignore2) \ 599 getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); \ 600 _partitionedTimers.add_timer(EXPLICIT_TIMER_##name, getExplicitTimer(EXPLICIT_TIMER_##name)); 604 ~kmp_stats_list() { }
605 inline timeStat * getTimer(timer_e idx) {
return &_timers[idx]; }
606 inline counter * getCounter(counter_e idx) {
return &_counters[idx]; }
607 inline explicitTimer * getExplicitTimer(explicit_timer_e idx) {
return &_explicitTimers[idx]; }
608 inline partitionedTimers * getPartitionedTimers() {
return &_partitionedTimers; }
609 inline timeStat * getTimers() {
return _timers; }
610 inline counter * getCounters() {
return _counters; }
611 inline explicitTimer * getExplicitTimers() {
return _explicitTimers; }
612 inline kmp_stats_event_vector & getEventVector() {
return _event_vector; }
613 inline void resetEventVector() { _event_vector.reset(); }
614 inline void incrementNestValue() { _nestLevel++; }
615 inline int getNestValue() {
return _nestLevel; }
616 inline void decrementNestValue() { _nestLevel--; }
617 inline int getGtid()
const {
return gtid; }
618 inline void setGtid(
int newgtid) { gtid = newgtid; }
619 inline void setState(
stats_state_e newstate) { state = newstate; }
622 inline bool isIdle() {
return thread_is_idle_flag==1; }
623 inline void setIdleFlag() { thread_is_idle_flag = 1; }
624 inline void resetIdleFlag() { thread_is_idle_flag = 0; }
625 kmp_stats_list* push_back(
int gtid);
626 inline void push_event(uint64_t start_time, uint64_t stop_time,
int nest_level, timer_e name) {
627 _event_vector.push_back(start_time, stop_time, nest_level, name);
631 kmp_stats_list::iterator begin();
632 kmp_stats_list::iterator end();
636 friend kmp_stats_list::iterator kmp_stats_list::begin();
637 friend kmp_stats_list::iterator kmp_stats_list::end();
641 iterator operator++();
642 iterator operator++(
int dummy);
643 iterator operator--();
644 iterator operator--(
int dummy);
645 bool operator!=(
const iterator & rhs);
646 bool operator==(
const iterator & rhs);
647 kmp_stats_list* operator*()
const;
680 class kmp_stats_output_module {
690 std::string outputFileName;
691 static const char* eventsFileName;
692 static const char* plotFileName;
693 static int printPerThreadFlag;
694 static int printPerThreadEventsFlag;
695 static const rgb_color globalColorArray[];
696 static rgb_color timerColorInfo[];
699 static void setupEventColors();
700 static void printPloticusFile();
701 static void printHeaderInfo(FILE *statsOut);
702 static void printTimerStats(FILE *statsOut, statistic
const * theStats, statistic
const * totalStats);
703 static void printCounterStats(FILE *statsOut, statistic
const * theStats);
704 static void printCounters(FILE * statsOut, counter
const * theCounters);
705 static void printEvents(FILE * eventsOut, kmp_stats_event_vector* theEvents,
int gtid);
706 static rgb_color getEventColor(timer_e e) {
return timerColorInfo[e]; }
707 static void windupExplicitTimers();
708 bool eventPrintingEnabled()
const {
return printPerThreadEventsFlag; }
711 kmp_stats_output_module() { init(); }
712 void outputStats(
const char* heading);
718 void __kmp_stats_init();
719 void __kmp_reset_stats();
720 void __kmp_output_stats(
const char *);
721 void __kmp_accumulate_stats_at_exit(
void);
723 extern __thread kmp_stats_list* __kmp_stats_thread_ptr;
725 extern kmp_stats_list __kmp_stats_list;
727 extern kmp_tas_lock_t __kmp_stats_lock;
729 extern tsc_tick_count __kmp_stats_start_time;
731 extern kmp_stats_output_module __kmp_stats_output;
752 #define KMP_TIME_BLOCK(name) \ 753 blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name) 765 #define KMP_COUNT_VALUE(name, value) \ 766 __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value) 777 #define KMP_COUNT_BLOCK(name) \ 778 __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment() 791 #define KMP_START_EXPLICIT_TIMER(name) \ 792 __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->start(TIMER_##name) 805 #define KMP_STOP_EXPLICIT_TIMER(name) \ 806 __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->stop(TIMER_##name) 822 #define KMP_OUTPUT_STATS(heading_string) \ 823 __kmp_output_stats(heading_string) 832 #define KMP_INIT_PARTITIONED_TIMERS(name) \ 833 __kmp_stats_thread_ptr->getPartitionedTimers()->init(timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) 835 #define KMP_TIME_PARTITIONED_BLOCK(name) \ 836 blockPartitionedTimer __PBLOCKTIME__(__kmp_stats_thread_ptr->getPartitionedTimers(), \ 837 timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) 839 #define KMP_PUSH_PARTITIONED_TIMER(name) \ 840 __kmp_stats_thread_ptr->getPartitionedTimers()->push(timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) 842 #define KMP_POP_PARTITIONED_TIMER() \ 843 __kmp_stats_thread_ptr->getPartitionedTimers()->pop() 845 #define KMP_SET_THREAD_STATE(state_name) \ 846 __kmp_stats_thread_ptr->setState(state_name) 848 #define KMP_GET_THREAD_STATE() \ 849 __kmp_stats_thread_ptr->getState() 851 #define KMP_SET_THREAD_STATE_BLOCK(state_name) \ 852 blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), state_name) 861 #define KMP_RESET_STATS() __kmp_reset_stats() 863 #if (KMP_DEVELOPER_STATS) 864 # define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n) 865 # define KMP_COUNT_DEVELOPER_VALUE(n,v) KMP_COUNT_VALUE(n,v) 866 # define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n) 867 # define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n) 868 # define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n) 871 # define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) 872 # define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0) 873 # define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) 874 # define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) 875 # define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) 878 #else // KMP_STATS_ENABLED 881 #define KMP_TIME_BLOCK(n) ((void)0) 882 #define KMP_COUNT_VALUE(n,v) ((void)0) 883 #define KMP_COUNT_BLOCK(n) ((void)0) 884 #define KMP_START_EXPLICIT_TIMER(n) ((void)0) 885 #define KMP_STOP_EXPLICIT_TIMER(n) ((void)0) 887 #define KMP_OUTPUT_STATS(heading_string) ((void)0) 888 #define KMP_RESET_STATS() ((void)0) 890 #define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) 891 #define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0) 892 #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) 893 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) 894 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) 895 #define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0) 896 #define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0) 897 #define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0) 898 #define KMP_POP_PARTITIONED_TIMER() ((void)0) 899 #define KMP_SET_THREAD_STATE(state_name) ((void)0) 900 #define KMP_GET_THREAD_STATE() ((void)0) 901 #define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0) 902 #endif // KMP_STATS_ENABLED 904 #endif // KMP_STATS_H statistic is valid only for master
statistic is valid only for non-master threads
do not show a TOTAL_aggregation for this statistic
statistic can be logged on the event timeline when KMP_STATS_EVENTS is on (valid only for timers) ...
#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg)
Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro.
statistic doesn't need units printed next to it in output
stats_flags_e
flags to describe the statistic (timer or counter)
#define KMP_FOREACH_COUNTER(macro, arg)
Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h.
stats_state_e
the states which a thread can be in