LLVM OpenMP* Runtime Library
kmp.h
1 
2 /*
3  * kmp.h -- KPTS runtime header file.
4  */
5 
6 
7 //===----------------------------------------------------------------------===//
8 //
9 // The LLVM Compiler Infrastructure
10 //
11 // This file is dual licensed under the MIT and the University of Illinois Open
12 // Source Licenses. See LICENSE.txt for details.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 
17 #ifndef KMP_H
18 #define KMP_H
19 
20 /* #define BUILD_PARALLEL_ORDERED 1 */
21 
22 /* This fix replaces gettimeofday with clock_gettime for better scalability on
23  the Altix. Requires user code to be linked with -lrt.
24 */
25 //#define FIX_SGI_CLOCK
26 
27 /* Defines for OpenMP 3.0 tasking and auto scheduling */
28 
29 # ifndef KMP_STATIC_STEAL_ENABLED
30 # define KMP_STATIC_STEAL_ENABLED 1
31 # endif
32 
33 #define TASK_CURRENT_NOT_QUEUED 0
34 #define TASK_CURRENT_QUEUED 1
35 
36 #define TASK_DEQUE_BITS 8 // Used solely to define TASK_DEQUE_SIZE and TASK_DEQUE_MASK.
37 #define TASK_DEQUE_SIZE ( 1 << TASK_DEQUE_BITS )
38 #define TASK_DEQUE_MASK ( TASK_DEQUE_SIZE - 1 )
39 
40 #ifdef BUILD_TIED_TASK_STACK
41 #define TASK_STACK_EMPTY 0 // entries when the stack is empty
42 
43 #define TASK_STACK_BLOCK_BITS 5 // Used to define TASK_STACK_SIZE and TASK_STACK_MASK
44 #define TASK_STACK_BLOCK_SIZE ( 1 << TASK_STACK_BLOCK_BITS ) // Number of entries in each task stack array
45 #define TASK_STACK_INDEX_MASK ( TASK_STACK_BLOCK_SIZE - 1 ) // Mask for determining index into stack block
46 #endif // BUILD_TIED_TASK_STACK
47 
48 #define TASK_NOT_PUSHED 1
49 #define TASK_SUCCESSFULLY_PUSHED 0
50 #define TASK_TIED 1
51 #define TASK_UNTIED 0
52 #define TASK_EXPLICIT 1
53 #define TASK_IMPLICIT 0
54 #define TASK_PROXY 1
55 #define TASK_FULL 0
56 
57 #define KMP_CANCEL_THREADS
58 #define KMP_THREAD_ATTR
59 
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <stddef.h>
63 #include <stdarg.h>
64 #include <string.h>
65 #include <signal.h>
66 /* include <ctype.h> don't use; problems with /MD on Windows* OS NT due to bad Microsoft library */
67 /* some macros provided below to replace some of these functions */
68 #ifndef __ABSOFT_WIN
69 #include <sys/types.h>
70 #endif
71 #include <limits.h>
72 #include <time.h>
73 
74 #include <errno.h>
75 
76 #include "kmp_os.h"
77 
78 #if KMP_STATS_ENABLED
79 class kmp_stats_list;
80 #endif
81 
82 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
83 #include <xmmintrin.h>
84 #endif
85 
86 #include "kmp_version.h"
87 #include "kmp_debug.h"
88 #include "kmp_lock.h"
89 #if USE_DEBUGGER
90 #include "kmp_debugger.h"
91 #endif
92 #include "kmp_i18n.h"
93 
94 #define KMP_HANDLE_SIGNALS (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN)
95 
96 #ifdef KMP_SETVERSION
97 /* from factory/Include, to get VERSION_STRING embedded for 'what' */
98 #include "kaiconfig.h"
99 #include "eye.h"
100 #include "own.h"
101 #include "setversion.h"
102 #endif
103 
104 #include "kmp_wrapper_malloc.h"
105 #if KMP_OS_UNIX
106 # include <unistd.h>
107 # if !defined NSIG && defined _NSIG
108 # define NSIG _NSIG
109 # endif
110 #endif
111 
112 #if KMP_OS_LINUX
113 # pragma weak clock_gettime
114 #endif
115 
116 #if OMPT_SUPPORT
117 #include "ompt-internal.h"
118 #endif
119 
120 /*Select data placement in NUMA memory */
121 #define NO_FIRST_TOUCH 0
122 #define FIRST_TOUCH 1 /* Exploit SGI's first touch page placement algo */
123 
124 /* If not specified on compile command line, assume no first touch */
125 #ifndef BUILD_MEMORY
126 #define BUILD_MEMORY NO_FIRST_TOUCH
127 #endif
128 
129 // 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64.
130 // 3 - fast allocation using sync, non-sync free lists of any size, non-self free lists of limited size.
131 #ifndef USE_FAST_MEMORY
132 #define USE_FAST_MEMORY 3
133 #endif
134 
135 #ifndef KMP_NESTED_HOT_TEAMS
136 # define KMP_NESTED_HOT_TEAMS 0
137 # define USE_NESTED_HOT_ARG(x)
138 #else
139 # if KMP_NESTED_HOT_TEAMS
140 # if OMP_40_ENABLED
141 # define USE_NESTED_HOT_ARG(x) ,x
142 # else
143 // Nested hot teams feature depends on omp 4.0, disable it for earlier versions
144 # undef KMP_NESTED_HOT_TEAMS
145 # define KMP_NESTED_HOT_TEAMS 0
146 # define USE_NESTED_HOT_ARG(x)
147 # endif
148 # else
149 # define USE_NESTED_HOT_ARG(x)
150 # endif
151 #endif
152 
153 // Assume using BGET compare_exchange instruction instead of lock by default.
154 #ifndef USE_CMP_XCHG_FOR_BGET
155 #define USE_CMP_XCHG_FOR_BGET 1
156 #endif
157 
158 // Test to see if queuing lock is better than bootstrap lock for bget
159 // #ifndef USE_QUEUING_LOCK_FOR_BGET
160 // #define USE_QUEUING_LOCK_FOR_BGET
161 // #endif
162 
163 #define KMP_NSEC_PER_SEC 1000000000L
164 #define KMP_USEC_PER_SEC 1000000L
165 
171 // FIXME DOXYGEN... need to group these flags somehow (Making them an anonymous enum would do it...)
176 #define KMP_IDENT_IMB 0x01
177 
178 #define KMP_IDENT_KMPC 0x02
179 /* 0x04 is no longer used */
181 #define KMP_IDENT_AUTOPAR 0x08
182 
183 #define KMP_IDENT_ATOMIC_REDUCE 0x10
184 
185 #define KMP_IDENT_BARRIER_EXPL 0x20
186 
187 #define KMP_IDENT_BARRIER_IMPL 0x0040
188 #define KMP_IDENT_BARRIER_IMPL_MASK 0x01C0
189 #define KMP_IDENT_BARRIER_IMPL_FOR 0x0040
190 #define KMP_IDENT_BARRIER_IMPL_SECTIONS 0x00C0
191 
192 #define KMP_IDENT_BARRIER_IMPL_SINGLE 0x0140
193 #define KMP_IDENT_BARRIER_IMPL_WORKSHARE 0x01C0
194 
198 typedef struct ident {
199  kmp_int32 reserved_1;
200  kmp_int32 flags;
201  kmp_int32 reserved_2;
202 #if USE_ITT_BUILD
203  /* but currently used for storing region-specific ITT */
204  /* contextual information. */
205 #endif /* USE_ITT_BUILD */
206  kmp_int32 reserved_3;
207  char const *psource;
211 } ident_t;
216 // Some forward declarations.
217 
218 typedef union kmp_team kmp_team_t;
219 typedef struct kmp_taskdata kmp_taskdata_t;
220 typedef union kmp_task_team kmp_task_team_t;
221 typedef union kmp_team kmp_team_p;
222 typedef union kmp_info kmp_info_p;
223 typedef union kmp_root kmp_root_p;
224 
225 
226 #ifdef __cplusplus
227 extern "C" {
228 #endif
229 
230 /* ------------------------------------------------------------------------ */
231 /* ------------------------------------------------------------------------ */
232 
233 /* Pack two 32-bit signed integers into a 64-bit signed integer */
234 /* ToDo: Fix word ordering for big-endian machines. */
235 #define KMP_PACK_64(HIGH_32,LOW_32) \
236  ( (kmp_int64) ((((kmp_uint64)(HIGH_32))<<32) | (kmp_uint64)(LOW_32)) )
237 
238 
239 /*
240  * Generic string manipulation macros.
241  * Assume that _x is of type char *
242  */
243 #define SKIP_WS(_x) { while (*(_x) == ' ' || *(_x) == '\t') (_x)++; }
244 #define SKIP_DIGITS(_x) { while (*(_x) >= '0' && *(_x) <= '9') (_x)++; }
245 #define SKIP_TO(_x,_c) { while (*(_x) != '\0' && *(_x) != (_c)) (_x)++; }
246 
247 /* ------------------------------------------------------------------------ */
248 /* ------------------------------------------------------------------------ */
249 
250 #define KMP_MAX( x, y ) ( (x) > (y) ? (x) : (y) )
251 #define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
252 
253 /* ------------------------------------------------------------------------ */
254 /* ------------------------------------------------------------------------ */
255 
256 
257 /* Enumeration types */
258 
259 enum kmp_state_timer {
260  ts_stop,
261  ts_start,
262  ts_pause,
263 
264  ts_last_state
265 };
266 
267 enum dynamic_mode {
268  dynamic_default,
269 #ifdef USE_LOAD_BALANCE
270  dynamic_load_balance,
271 #endif /* USE_LOAD_BALANCE */
272  dynamic_random,
273  dynamic_thread_limit,
274  dynamic_max
275 };
276 
277 /* external schedule constants, duplicate enum omp_sched in omp.h in order to not include it here */
278 #ifndef KMP_SCHED_TYPE_DEFINED
279 #define KMP_SCHED_TYPE_DEFINED
280 typedef enum kmp_sched {
281  kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check
282  // Note: need to adjust __kmp_sch_map global array in case this enum is changed
283  kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33)
284  kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35)
285  kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36)
286  kmp_sched_auto = 4, // mapped to kmp_sch_auto (38)
287  kmp_sched_upper_std = 5, // upper bound for standard schedules
288  kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules
289  kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39)
290 // kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44)
291  kmp_sched_upper = 102,
292  kmp_sched_default = kmp_sched_static // default scheduling
293 } kmp_sched_t;
294 #endif
295 
302  kmp_sch_static_chunked = 33,
304  kmp_sch_dynamic_chunked = 35,
306  kmp_sch_runtime = 37,
308  kmp_sch_trapezoidal = 39,
309 
310  /* accessible only through KMP_SCHEDULE environment variable */
311  kmp_sch_static_greedy = 40,
312  kmp_sch_static_balanced = 41,
313  /* accessible only through KMP_SCHEDULE environment variable */
314  kmp_sch_guided_iterative_chunked = 42,
315  kmp_sch_guided_analytical_chunked = 43,
316 
319  /* accessible only through KMP_SCHEDULE environment variable */
323  kmp_ord_static_chunked = 65,
325  kmp_ord_dynamic_chunked = 67,
326  kmp_ord_guided_chunked = 68,
327  kmp_ord_runtime = 69,
329  kmp_ord_trapezoidal = 71,
332 #if OMP_40_ENABLED
333  /* Schedules for Distribute construct */
336 #endif
337 
338  /*
339  * For the "nomerge" versions, kmp_dispatch_next*() will always return
340  * a single iteration/chunk, even if the loop is serialized. For the
341  * schedule types listed above, the entire iteration vector is returned
342  * if the loop is serialized. This doesn't work for gcc/gcomp sections.
343  */
344  kmp_nm_lower = 160,
346  kmp_nm_static_chunked = (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower),
348  kmp_nm_dynamic_chunked = 163,
350  kmp_nm_runtime = 165,
351  kmp_nm_auto = 166,
352  kmp_nm_trapezoidal = 167,
353 
354  /* accessible only through KMP_SCHEDULE environment variable */
355  kmp_nm_static_greedy = 168,
356  kmp_nm_static_balanced = 169,
357  /* accessible only through KMP_SCHEDULE environment variable */
358  kmp_nm_guided_iterative_chunked = 170,
359  kmp_nm_guided_analytical_chunked = 171,
360  kmp_nm_static_steal = 172, /* accessible only through OMP_SCHEDULE environment variable */
361 
362  kmp_nm_ord_static_chunked = 193,
364  kmp_nm_ord_dynamic_chunked = 195,
365  kmp_nm_ord_guided_chunked = 196,
366  kmp_nm_ord_runtime = 197,
368  kmp_nm_ord_trapezoidal = 199,
369  kmp_nm_upper = 200,
372 };
373 
374 /* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */
375 typedef struct kmp_r_sched {
376  enum sched_type r_sched_type;
377  int chunk;
378 } kmp_r_sched_t;
379 
380 extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our internal schedule types
381 
382 enum library_type {
383  library_none,
384  library_serial,
385  library_turnaround,
386  library_throughput
387 };
388 
389 #if KMP_OS_LINUX
390 enum clock_function_type {
391  clock_function_gettimeofday,
392  clock_function_clock_gettime
393 };
394 #endif /* KMP_OS_LINUX */
395 
396 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
397 enum mic_type {
398  non_mic,
399  mic1,
400  mic2,
401  mic3,
402  dummy
403 };
404 #endif
405 
406 /* ------------------------------------------------------------------------ */
407 /* -- fast reduction stuff ------------------------------------------------ */
408 
409 #undef KMP_FAST_REDUCTION_BARRIER
410 #define KMP_FAST_REDUCTION_BARRIER 1
411 
412 #undef KMP_FAST_REDUCTION_CORE_DUO
413 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
414  #define KMP_FAST_REDUCTION_CORE_DUO 1
415 #endif
416 
417 enum _reduction_method {
418  reduction_method_not_defined = 0,
419  critical_reduce_block = ( 1 << 8 ),
420  atomic_reduce_block = ( 2 << 8 ),
421  tree_reduce_block = ( 3 << 8 ),
422  empty_reduce_block = ( 4 << 8 )
423 };
424 
425 // description of the packed_reduction_method variable
426 // the packed_reduction_method variable consists of two enum types variables that are packed together into 0-th byte and 1-st byte:
427 // 0: ( packed_reduction_method & 0x000000FF ) is a 'enum barrier_type' value of barrier that will be used in fast reduction: bs_plain_barrier or bs_reduction_barrier
428 // 1: ( packed_reduction_method & 0x0000FF00 ) is a reduction method that will be used in fast reduction;
429 // reduction method is of 'enum _reduction_method' type and it's defined the way so that the bits of 0-th byte are empty,
430 // so no need to execute a shift instruction while packing/unpacking
431 
432 #if KMP_FAST_REDUCTION_BARRIER
433  #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method,barrier_type) \
434  ( ( reduction_method ) | ( barrier_type ) )
435 
436  #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \
437  ( ( enum _reduction_method )( ( packed_reduction_method ) & ( 0x0000FF00 ) ) )
438 
439  #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \
440  ( ( enum barrier_type )( ( packed_reduction_method ) & ( 0x000000FF ) ) )
441 #else
442  #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method,barrier_type) \
443  ( reduction_method )
444 
445  #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \
446  ( packed_reduction_method )
447 
448  #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \
449  ( bs_plain_barrier )
450 #endif
451 
452 #define TEST_REDUCTION_METHOD(packed_reduction_method,which_reduction_block) \
453  ( ( UNPACK_REDUCTION_METHOD( packed_reduction_method ) ) == ( which_reduction_block ) )
454 
455 #if KMP_FAST_REDUCTION_BARRIER
456  #define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER \
457  ( PACK_REDUCTION_METHOD_AND_BARRIER( tree_reduce_block, bs_reduction_barrier ) )
458 
459  #define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER \
460  ( PACK_REDUCTION_METHOD_AND_BARRIER( tree_reduce_block, bs_plain_barrier ) )
461 #endif
462 
463 typedef int PACKED_REDUCTION_METHOD_T;
464 
465 /* -- end of fast reduction stuff ----------------------------------------- */
466 
467 /* ------------------------------------------------------------------------ */
468 /* ------------------------------------------------------------------------ */
469 
470 #if KMP_OS_WINDOWS
471 # define USE_CBLKDATA
472 # pragma warning( push )
473 # pragma warning( disable: 271 310 )
474 # include <windows.h>
475 # pragma warning( pop )
476 #endif
477 
478 #if KMP_OS_UNIX
479 # include <pthread.h>
480 # include <dlfcn.h>
481 #endif
482 
483 /* ------------------------------------------------------------------------ */
484 /* ------------------------------------------------------------------------ */
485 
486 /*
487  * Only Linux* OS and Windows* OS support thread affinity.
488  */
489 #if KMP_AFFINITY_SUPPORTED
490 
491 extern size_t __kmp_affin_mask_size;
492 # define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0)
493 # define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0)
494 # define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size)
495 # define KMP_CPU_SETSIZE (__kmp_affin_mask_size * CHAR_BIT)
496 
497 # if KMP_OS_LINUX
498 //
499 // On Linux* OS, the mask is actually a vector of length __kmp_affin_mask_size
500 // (in bytes). It should be allocated on a word boundary.
501 //
502 // WARNING!!! We have made the base type of the affinity mask unsigned char,
503 // in order to eliminate a lot of checks that the true system mask size is
504 // really a multiple of 4 bytes (on Linux* OS).
505 //
506 // THESE MACROS WON'T WORK PROPERLY ON BIG ENDIAN MACHINES!!!
507 //
508 
509 typedef unsigned char kmp_affin_mask_t;
510 
511 # define _KMP_CPU_SET(i,mask) (mask[i/CHAR_BIT] |= (((kmp_affin_mask_t)1) << (i % CHAR_BIT)))
512 # define KMP_CPU_SET(i,mask) _KMP_CPU_SET((i), ((kmp_affin_mask_t *)(mask)))
513 # define _KMP_CPU_ISSET(i,mask) (!!(mask[i/CHAR_BIT] & (((kmp_affin_mask_t)1) << (i % CHAR_BIT))))
514 # define KMP_CPU_ISSET(i,mask) _KMP_CPU_ISSET((i), ((kmp_affin_mask_t *)(mask)))
515 # define _KMP_CPU_CLR(i,mask) (mask[i/CHAR_BIT] &= ~(((kmp_affin_mask_t)1) << (i % CHAR_BIT)))
516 # define KMP_CPU_CLR(i,mask) _KMP_CPU_CLR((i), ((kmp_affin_mask_t *)(mask)))
517 
518 # define KMP_CPU_ZERO(mask) \
519  { \
520  size_t __i; \
521  for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \
522  ((kmp_affin_mask_t *)(mask))[__i] = 0; \
523  } \
524  }
525 
526 # define KMP_CPU_COPY(dest, src) \
527  { \
528  size_t __i; \
529  for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \
530  ((kmp_affin_mask_t *)(dest))[__i] \
531  = ((kmp_affin_mask_t *)(src))[__i]; \
532  } \
533  }
534 
535 # define KMP_CPU_COMPLEMENT(mask) \
536  { \
537  size_t __i; \
538  for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \
539  ((kmp_affin_mask_t *)(mask))[__i] \
540  = ~((kmp_affin_mask_t *)(mask))[__i]; \
541  } \
542  }
543 
544 # define KMP_CPU_UNION(dest, src) \
545  { \
546  size_t __i; \
547  for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \
548  ((kmp_affin_mask_t *)(dest))[__i] \
549  |= ((kmp_affin_mask_t *)(src))[__i]; \
550  } \
551  }
552 
553 # endif /* KMP_OS_LINUX */
554 
555 # if KMP_OS_WINDOWS
556 //
557 // On Windows* OS, the mask size is 4 bytes for IA-32 architecture, and on
558 // Intel(R) 64 it is 8 bytes times the number of processor groups.
559 //
560 
561 # if KMP_GROUP_AFFINITY
562 
563 // GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
564 # if _MSC_VER < 1600
565 typedef struct GROUP_AFFINITY {
566  KAFFINITY Mask;
567  WORD Group;
568  WORD Reserved[3];
569 } GROUP_AFFINITY;
570 # endif
571 
572 typedef DWORD_PTR kmp_affin_mask_t;
573 
574 extern int __kmp_num_proc_groups;
575 
576 # define _KMP_CPU_SET(i,mask) \
577  (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] |= \
578  (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t)))))
579 
580 # define KMP_CPU_SET(i,mask) \
581  _KMP_CPU_SET((i), ((kmp_affin_mask_t *)(mask)))
582 
583 # define _KMP_CPU_ISSET(i,mask) \
584  (!!(mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] & \
585  (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t))))))
586 
587 # define KMP_CPU_ISSET(i,mask) \
588  _KMP_CPU_ISSET((i), ((kmp_affin_mask_t *)(mask)))
589 
590 # define _KMP_CPU_CLR(i,mask) \
591  (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] &= \
592  ~(((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t)))))
593 
594 # define KMP_CPU_CLR(i,mask) \
595  _KMP_CPU_CLR((i), ((kmp_affin_mask_t *)(mask)))
596 
597 # define KMP_CPU_ZERO(mask) \
598  { \
599  int __i; \
600  for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \
601  ((kmp_affin_mask_t *)(mask))[__i] = 0; \
602  } \
603  }
604 
605 # define KMP_CPU_COPY(dest, src) \
606  { \
607  int __i; \
608  for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \
609  ((kmp_affin_mask_t *)(dest))[__i] \
610  = ((kmp_affin_mask_t *)(src))[__i]; \
611  } \
612  }
613 
614 # define KMP_CPU_COMPLEMENT(mask) \
615  { \
616  int __i; \
617  for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \
618  ((kmp_affin_mask_t *)(mask))[__i] \
619  = ~((kmp_affin_mask_t *)(mask))[__i]; \
620  } \
621  }
622 
623 # define KMP_CPU_UNION(dest, src) \
624  { \
625  int __i; \
626  for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \
627  ((kmp_affin_mask_t *)(dest))[__i] \
628  |= ((kmp_affin_mask_t *)(src))[__i]; \
629  } \
630  }
631 
632 typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD);
633 extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount;
634 
635 typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void);
636 extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount;
637 
638 typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *);
639 extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity;
640 
641 typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, GROUP_AFFINITY *);
642 extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
643 
644 extern int __kmp_get_proc_group(kmp_affin_mask_t const *mask);
645 
646 # else
647 
648 typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */
649 
650 # define KMP_CPU_SET(i,mask) (*(mask) |= (((kmp_affin_mask_t)1) << (i)))
651 # define KMP_CPU_ISSET(i,mask) (!!(*(mask) & (((kmp_affin_mask_t)1) << (i))))
652 # define KMP_CPU_CLR(i,mask) (*(mask) &= ~(((kmp_affin_mask_t)1) << (i)))
653 # define KMP_CPU_ZERO(mask) (*(mask) = 0)
654 # define KMP_CPU_COPY(dest, src) (*(dest) = *(src))
655 # define KMP_CPU_COMPLEMENT(mask) (*(mask) = ~*(mask))
656 # define KMP_CPU_UNION(dest, src) (*(dest) |= *(src))
657 
658 # endif /* KMP_GROUP_AFFINITY */
659 
660 # endif /* KMP_OS_WINDOWS */
661 
662 //
663 // __kmp_allocate() will return memory allocated on a 4-bytes boundary.
664 // after zeroing it - it takes care of those assumptions stated above.
665 //
666 # define KMP_CPU_ALLOC(ptr) \
667  (ptr = ((kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size)))
668 # define KMP_CPU_FREE(ptr) __kmp_free(ptr)
669 
670 //
671 // The following macro should be used to index an array of masks.
672 // The array should be declared as "kmp_affinity_t *" and allocated with
673 // size "__kmp_affinity_mask_size * len". The macro takes care of the fact
674 // that on Windows* OS, sizeof(kmp_affin_t) is really the size of the mask, but
675 // on Linux* OS, sizeof(kmp_affin_t) is 1.
676 //
677 # define KMP_CPU_INDEX(array,i) \
678  ((kmp_affin_mask_t *)(((char *)(array)) + (i) * __kmp_affin_mask_size))
679 
680 //
681 // Declare local char buffers with this size for printing debug and info
682 // messages, using __kmp_affinity_print_mask().
683 //
684 #define KMP_AFFIN_MASK_PRINT_LEN 1024
685 
686 enum affinity_type {
687  affinity_none = 0,
688  affinity_physical,
689  affinity_logical,
690  affinity_compact,
691  affinity_scatter,
692  affinity_explicit,
693  affinity_balanced,
694  affinity_disabled, // not used outsize the env var parser
695  affinity_default
696 };
697 
698 enum affinity_gran {
699  affinity_gran_fine = 0,
700  affinity_gran_thread,
701  affinity_gran_core,
702  affinity_gran_package,
703  affinity_gran_node,
704 #if KMP_GROUP_AFFINITY
705  //
706  // The "group" granularity isn't necesssarily coarser than all of the
707  // other levels, but we put it last in the enum.
708  //
709  affinity_gran_group,
710 #endif /* KMP_GROUP_AFFINITY */
711  affinity_gran_default
712 };
713 
714 enum affinity_top_method {
715  affinity_top_method_all = 0, // try all (supported) methods, in order
716 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
717  affinity_top_method_apicid,
718  affinity_top_method_x2apicid,
719 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
720  affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too
721 #if KMP_GROUP_AFFINITY
722  affinity_top_method_group,
723 #endif /* KMP_GROUP_AFFINITY */
724  affinity_top_method_flat,
725  affinity_top_method_default
726 };
727 
728 #define affinity_respect_mask_default (-1)
729 
730 extern enum affinity_type __kmp_affinity_type; /* Affinity type */
731 extern enum affinity_gran __kmp_affinity_gran; /* Affinity granularity */
732 extern int __kmp_affinity_gran_levels; /* corresponding int value */
733 extern int __kmp_affinity_dups; /* Affinity duplicate masks */
734 extern enum affinity_top_method __kmp_affinity_top_method;
735 extern int __kmp_affinity_compact; /* Affinity 'compact' value */
736 extern int __kmp_affinity_offset; /* Affinity offset value */
737 extern int __kmp_affinity_verbose; /* Was verbose specified for KMP_AFFINITY? */
738 extern int __kmp_affinity_warnings; /* KMP_AFFINITY warnings enabled ? */
739 extern int __kmp_affinity_respect_mask; /* Respect process' initial affinity mask? */
740 extern char * __kmp_affinity_proclist; /* proc ID list */
741 extern kmp_affin_mask_t *__kmp_affinity_masks;
742 extern unsigned __kmp_affinity_num_masks;
743 extern int __kmp_get_system_affinity(kmp_affin_mask_t *mask, int abort_on_error);
744 extern int __kmp_set_system_affinity(kmp_affin_mask_t const *mask, int abort_on_error);
745 extern void __kmp_affinity_bind_thread(int which);
746 
747 # if KMP_OS_LINUX
748 extern kmp_affin_mask_t *__kmp_affinity_get_fullMask();
749 # endif /* KMP_OS_LINUX */
750 extern char const * __kmp_cpuinfo_file;
751 
752 #endif /* KMP_AFFINITY_SUPPORTED */
753 
754 #if OMP_40_ENABLED
755 
756 //
757 // This needs to be kept in sync with the values in omp.h !!!
758 //
759 typedef enum kmp_proc_bind_t {
760  proc_bind_false = 0,
761  proc_bind_true,
762  proc_bind_master,
763  proc_bind_close,
764  proc_bind_spread,
765  proc_bind_intel, // use KMP_AFFINITY interface
766  proc_bind_default
767 } kmp_proc_bind_t;
768 
769 typedef struct kmp_nested_proc_bind_t {
770  kmp_proc_bind_t *bind_types;
771  int size;
772  int used;
773 } kmp_nested_proc_bind_t;
774 
775 extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
776 
777 #endif /* OMP_40_ENABLED */
778 
779 # if KMP_AFFINITY_SUPPORTED
780 # define KMP_PLACE_ALL (-1)
781 # define KMP_PLACE_UNDEFINED (-2)
782 # endif /* KMP_AFFINITY_SUPPORTED */
783 
784 extern int __kmp_affinity_num_places;
785 
786 
787 #if OMP_40_ENABLED
788 typedef enum kmp_cancel_kind_t {
789  cancel_noreq = 0,
790  cancel_parallel = 1,
791  cancel_loop = 2,
792  cancel_sections = 3,
793  cancel_taskgroup = 4
794 } kmp_cancel_kind_t;
795 #endif // OMP_40_ENABLED
796 
797 extern int __kmp_place_num_cores;
798 extern int __kmp_place_num_threads_per_core;
799 extern int __kmp_place_core_offset;
800 
801 /* ------------------------------------------------------------------------ */
802 /* ------------------------------------------------------------------------ */
803 
804 #define KMP_PAD(type, sz) (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
805 
806 //
807 // We need to avoid using -1 as a GTID as +1 is added to the gtid
808 // when storing it in a lock, and the value 0 is reserved.
809 //
810 #define KMP_GTID_DNE (-2) /* Does not exist */
811 #define KMP_GTID_SHUTDOWN (-3) /* Library is shutting down */
812 #define KMP_GTID_MONITOR (-4) /* Monitor thread ID */
813 #define KMP_GTID_UNKNOWN (-5) /* Is not known */
814 #define KMP_GTID_MIN (-6) /* Minimal gtid for low bound check in DEBUG */
815 
816 #define __kmp_get_gtid() __kmp_get_global_thread_id()
817 #define __kmp_entry_gtid() __kmp_get_global_thread_id_reg()
818 
819 #define __kmp_tid_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \
820  __kmp_threads[ (gtid) ]->th.th_info.ds.ds_tid )
821 
822 #define __kmp_get_tid() ( __kmp_tid_from_gtid( __kmp_get_gtid() ) )
823 #define __kmp_gtid_from_tid(tid,team) ( KMP_DEBUG_ASSERT( (tid) >= 0 && (team) != NULL ), \
824  team -> t.t_threads[ (tid) ] -> th.th_info .ds.ds_gtid )
825 
826 #define __kmp_get_team() ( __kmp_threads[ (__kmp_get_gtid()) ]-> th.th_team )
827 #define __kmp_team_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \
828  __kmp_threads[ (gtid) ]-> th.th_team )
829 
830 #define __kmp_thread_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), __kmp_threads[ (gtid) ] )
831 #define __kmp_get_thread() ( __kmp_thread_from_gtid( __kmp_get_gtid() ) )
832 
833  // Returns current thread (pointer to kmp_info_t). In contrast to __kmp_get_thread(), it works
834  // with registered and not-yet-registered threads.
835 #define __kmp_gtid_from_thread(thr) ( KMP_DEBUG_ASSERT( (thr) != NULL ), \
836  (thr)->th.th_info.ds.ds_gtid )
837 
838 // AT: Which way is correct?
839 // AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc;
840 // AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc;
841 #define __kmp_get_team_num_threads(gtid) ( __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc )
842 
843 
844 /* ------------------------------------------------------------------------ */
845 /* ------------------------------------------------------------------------ */
846 
847 #define KMP_UINT64_MAX (~((kmp_uint64)1<<((sizeof(kmp_uint64)*(1<<3))-1)))
848 
849 #define KMP_MIN_NTH 1
850 
851 #ifndef KMP_MAX_NTH
852 # ifdef PTHREAD_THREADS_MAX
853 # define KMP_MAX_NTH PTHREAD_THREADS_MAX
854 # else
855 # define KMP_MAX_NTH (32 * 1024)
856 # endif
857 #endif /* KMP_MAX_NTH */
858 
859 #ifdef PTHREAD_STACK_MIN
860 # define KMP_MIN_STKSIZE PTHREAD_STACK_MIN
861 #else
862 # define KMP_MIN_STKSIZE ((size_t)(32 * 1024))
863 #endif
864 
865 #define KMP_MAX_STKSIZE (~((size_t)1<<((sizeof(size_t)*(1<<3))-1)))
866 
867 #if KMP_ARCH_X86
868 # define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024))
869 #elif KMP_ARCH_X86_64
870 # define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024))
871 # define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024))
872 #else
873 # define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024))
874 #endif
875 
876 #define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024))
877 
878 #define KMP_DEFAULT_MALLOC_POOL_INCR ((size_t) (1024 * 1024))
879 #define KMP_MIN_MALLOC_POOL_INCR ((size_t) (4 * 1024))
880 #define KMP_MAX_MALLOC_POOL_INCR (~((size_t)1<<((sizeof(size_t)*(1<<3))-1)))
881 
882 #define KMP_MIN_STKOFFSET (0)
883 #define KMP_MAX_STKOFFSET KMP_MAX_STKSIZE
884 #if KMP_OS_DARWIN
885 # define KMP_DEFAULT_STKOFFSET KMP_MIN_STKOFFSET
886 #else
887 # define KMP_DEFAULT_STKOFFSET CACHE_LINE
888 #endif
889 
890 #define KMP_MIN_STKPADDING (0)
891 #define KMP_MAX_STKPADDING (2 * 1024 * 1024)
892 
893 #define KMP_MIN_MONITOR_WAKEUPS (1) /* min number of times monitor wakes up per second */
894 #define KMP_MAX_MONITOR_WAKEUPS (1000) /* maximum number of times monitor can wake up per second */
895 #define KMP_BLOCKTIME_MULTIPLIER (1000) /* number of blocktime units per second */
896 #define KMP_MIN_BLOCKTIME (0)
897 #define KMP_MAX_BLOCKTIME (INT_MAX) /* Must be this for "infinite" setting the work */
898 #define KMP_DEFAULT_BLOCKTIME (200) /* __kmp_blocktime is in milliseconds */
899 /* Calculate new number of monitor wakeups for a specific block time based on previous monitor_wakeups */
900 /* Only allow increasing number of wakeups */
901 #define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
902  ( ((blocktime) == KMP_MAX_BLOCKTIME) ? (monitor_wakeups) : \
903  ((blocktime) == KMP_MIN_BLOCKTIME) ? KMP_MAX_MONITOR_WAKEUPS : \
904  ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) ? (monitor_wakeups) : \
905  (KMP_BLOCKTIME_MULTIPLIER) / (blocktime) )
906 
907 /* Calculate number of intervals for a specific block time based on monitor_wakeups */
908 #define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
909  ( ( (blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1 ) / \
910  (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) )
911 
912 #define KMP_MIN_STATSCOLS 40
913 #define KMP_MAX_STATSCOLS 4096
914 #define KMP_DEFAULT_STATSCOLS 80
915 
916 #define KMP_MIN_INTERVAL 0
917 #define KMP_MAX_INTERVAL (INT_MAX-1)
918 #define KMP_DEFAULT_INTERVAL 0
919 
920 #define KMP_MIN_CHUNK 1
921 #define KMP_MAX_CHUNK (INT_MAX-1)
922 #define KMP_DEFAULT_CHUNK 1
923 
924 #define KMP_MIN_INIT_WAIT 1
925 #define KMP_MAX_INIT_WAIT (INT_MAX/2)
926 #define KMP_DEFAULT_INIT_WAIT 2048U
927 
928 #define KMP_MIN_NEXT_WAIT 1
929 #define KMP_MAX_NEXT_WAIT (INT_MAX/2)
930 #define KMP_DEFAULT_NEXT_WAIT 1024U
931 
932 // max possible dynamic loops in concurrent execution per team
933 #define KMP_MAX_DISP_BUF 7
934 #define KMP_MAX_ORDERED 8
935 
936 #define KMP_MAX_FIELDS 32
937 
938 #define KMP_MAX_BRANCH_BITS 31
939 
940 #define KMP_MAX_ACTIVE_LEVELS_LIMIT INT_MAX
941 
942 /* Minimum number of threads before switch to TLS gtid (experimentally determined) */
943 /* josh TODO: what about OS X* tuning? */
944 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
945 # define KMP_TLS_GTID_MIN 5
946 #else
947 # define KMP_TLS_GTID_MIN INT_MAX
948 #endif
949 
950 #define KMP_MASTER_TID(tid) ( (tid) == 0 )
951 #define KMP_WORKER_TID(tid) ( (tid) != 0 )
952 
953 #define KMP_MASTER_GTID(gtid) ( __kmp_tid_from_gtid((gtid)) == 0 )
954 #define KMP_WORKER_GTID(gtid) ( __kmp_tid_from_gtid((gtid)) != 0 )
955 #define KMP_UBER_GTID(gtid) \
956  ( \
957  KMP_DEBUG_ASSERT( (gtid) >= KMP_GTID_MIN ), \
958  KMP_DEBUG_ASSERT( (gtid) < __kmp_threads_capacity ), \
959  (gtid) >= 0 && __kmp_root[(gtid)] && __kmp_threads[(gtid)] && \
960  (__kmp_threads[(gtid)] == __kmp_root[(gtid)]->r.r_uber_thread)\
961  )
962 #define KMP_INITIAL_GTID(gtid) ( (gtid) == 0 )
963 
964 #ifndef TRUE
965 #define FALSE 0
966 #define TRUE (! FALSE)
967 #endif
968 
969 /* NOTE: all of the following constants must be even */
970 
971 #if KMP_OS_WINDOWS
972 # define KMP_INIT_WAIT 64U /* initial number of spin-tests */
973 # define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */
974 #elif KMP_OS_CNK
975 # define KMP_INIT_WAIT 16U /* initial number of spin-tests */
976 # define KMP_NEXT_WAIT 8U /* susequent number of spin-tests */
977 #elif KMP_OS_LINUX
978 # define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
979 # define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
980 #elif KMP_OS_DARWIN
981 /* TODO: tune for KMP_OS_DARWIN */
982 # define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
983 # define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
984 #elif KMP_OS_FREEBSD
985 /* TODO: tune for KMP_OS_FREEBSD */
986 # define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
987 # define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
988 #endif
989 
990 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
991 typedef struct kmp_cpuid {
992  kmp_uint32 eax;
993  kmp_uint32 ebx;
994  kmp_uint32 ecx;
995  kmp_uint32 edx;
996 } kmp_cpuid_t;
997 extern void __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );
998 # if KMP_ARCH_X86
999  extern void __kmp_x86_pause( void );
1000 # elif KMP_MIC
1001  static void __kmp_x86_pause( void ) { _mm_delay_32( 100 ); };
1002 # else
1003  static void __kmp_x86_pause( void ) { _mm_pause(); };
1004 # endif
1005 # define KMP_CPU_PAUSE() __kmp_x86_pause()
1006 #elif KMP_ARCH_PPC64
1007 # define KMP_PPC64_PRI_LOW() __asm__ volatile ("or 1, 1, 1")
1008 # define KMP_PPC64_PRI_MED() __asm__ volatile ("or 2, 2, 2")
1009 # define KMP_PPC64_PRI_LOC_MB() __asm__ volatile ("" : : : "memory")
1010 # define KMP_CPU_PAUSE() do { KMP_PPC64_PRI_LOW(); KMP_PPC64_PRI_MED(); KMP_PPC64_PRI_LOC_MB(); } while (0)
1011 #else
1012 # define KMP_CPU_PAUSE() /* nothing to do */
1013 #endif
1014 
1015 #define KMP_INIT_YIELD(count) { (count) = __kmp_yield_init; }
1016 
1017 #define KMP_YIELD(cond) { KMP_CPU_PAUSE(); __kmp_yield( (cond) ); }
1018 
1019 // Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround,
1020 // there should be no yielding since the starting value from KMP_INIT_YIELD() is odd.
1021 
1022 #define KMP_YIELD_WHEN(cond,count) { KMP_CPU_PAUSE(); (count) -= 2; \
1023  if (!(count)) { KMP_YIELD(cond); (count) = __kmp_yield_next; } }
1024 #define KMP_YIELD_SPIN(count) { KMP_CPU_PAUSE(); (count) -=2; \
1025  if (!(count)) { KMP_YIELD(1); (count) = __kmp_yield_next; } }
1026 
1027 /* ------------------------------------------------------------------------ */
1028 /* Support datatypes for the orphaned construct nesting checks. */
1029 /* ------------------------------------------------------------------------ */
1030 
1031 enum cons_type {
1032  ct_none,
1033  ct_parallel,
1034  ct_pdo,
1035  ct_pdo_ordered,
1036  ct_psections,
1037  ct_psingle,
1038 
1039  /* the following must be left in order and not split up */
1040  ct_taskq,
1041  ct_task, /* really task inside non-ordered taskq, considered a worksharing type */
1042  ct_task_ordered, /* really task inside ordered taskq, considered a worksharing type */
1043  /* the preceding must be left in order and not split up */
1044 
1045  ct_critical,
1046  ct_ordered_in_parallel,
1047  ct_ordered_in_pdo,
1048  ct_ordered_in_taskq,
1049  ct_master,
1050  ct_reduce,
1051  ct_barrier
1052 };
1053 
1054 /* test to see if we are in a taskq construct */
1055 # define IS_CONS_TYPE_TASKQ( ct ) ( ((int)(ct)) >= ((int)ct_taskq) && ((int)(ct)) <= ((int)ct_task_ordered) )
1056 # define IS_CONS_TYPE_ORDERED( ct ) ((ct) == ct_pdo_ordered || (ct) == ct_task_ordered)
1057 
1058 struct cons_data {
1059  ident_t const *ident;
1060  enum cons_type type;
1061  int prev;
1062  kmp_user_lock_p name; /* address exclusively for critical section name comparison */
1063 };
1064 
1065 struct cons_header {
1066  int p_top, w_top, s_top;
1067  int stack_size, stack_top;
1068  struct cons_data *stack_data;
1069 };
1070 
1071 struct kmp_region_info {
1072  char *text;
1073  int offset[KMP_MAX_FIELDS];
1074  int length[KMP_MAX_FIELDS];
1075 };
1076 
1077 
1078 /* ---------------------------------------------------------------------- */
1079 /* ---------------------------------------------------------------------- */
1080 
1081 #if KMP_OS_WINDOWS
1082  typedef HANDLE kmp_thread_t;
1083  typedef DWORD kmp_key_t;
1084 #endif /* KMP_OS_WINDOWS */
1085 
1086 #if KMP_OS_UNIX
1087  typedef pthread_t kmp_thread_t;
1088  typedef pthread_key_t kmp_key_t;
1089 #endif
1090 
1091 extern kmp_key_t __kmp_gtid_threadprivate_key;
1092 
1093 typedef struct kmp_sys_info {
1094  long maxrss; /* the maximum resident set size utilized (in kilobytes) */
1095  long minflt; /* the number of page faults serviced without any I/O */
1096  long majflt; /* the number of page faults serviced that required I/O */
1097  long nswap; /* the number of times a process was "swapped" out of memory */
1098  long inblock; /* the number of times the file system had to perform input */
1099  long oublock; /* the number of times the file system had to perform output */
1100  long nvcsw; /* the number of times a context switch was voluntarily */
1101  long nivcsw; /* the number of times a context switch was forced */
1102 } kmp_sys_info_t;
1103 
1104 typedef struct kmp_cpuinfo {
1105  int initialized; // If 0, other fields are not initialized.
1106  int signature; // CPUID(1).EAX
1107  int family; // CPUID(1).EAX[27:20] + CPUID(1).EAX[11:8] ( Extended Family + Family )
1108  int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended Model << 4 ) + Model)
1109  int stepping; // CPUID(1).EAX[3:0] ( Stepping )
1110  int sse2; // 0 if SSE2 instructions are not supported, 1 otherwise.
1111  int rtm; // 0 if RTM instructions are not supported, 1 otherwise.
1112  int cpu_stackoffset;
1113  int apic_id;
1114  int physical_id;
1115  int logical_id;
1116  kmp_uint64 frequency; // Nominal CPU frequency in Hz.
1117 } kmp_cpuinfo_t;
1118 
1119 
1120 #ifdef BUILD_TV
1121 
1122 struct tv_threadprivate {
1123  /* Record type #1 */
1124  void *global_addr;
1125  void *thread_addr;
1126 };
1127 
1128 struct tv_data {
1129  struct tv_data *next;
1130  void *type;
1131  union tv_union {
1132  struct tv_threadprivate tp;
1133  } u;
1134 };
1135 
1136 extern kmp_key_t __kmp_tv_key;
1137 
1138 #endif /* BUILD_TV */
1139 
1140 /* ------------------------------------------------------------------------ */
1141 
1142 #if USE_ITT_BUILD
1143 // We cannot include "kmp_itt.h" due to circular dependency. Declare the only required type here.
1144 // Later we will check the type meets requirements.
1145 typedef int kmp_itt_mark_t;
1146 #define KMP_ITT_DEBUG 0
1147 #endif /* USE_ITT_BUILD */
1148 
1149 /* ------------------------------------------------------------------------ */
1150 
1151 /*
1152  * Taskq data structures
1153  */
1154 
1155 #define HIGH_WATER_MARK(nslots) (((nslots) * 3) / 4)
1156 #define __KMP_TASKQ_THUNKS_PER_TH 1 /* num thunks that each thread can simultaneously execute from a task queue */
1157 
1158 /* flags for taskq_global_flags, kmp_task_queue_t tq_flags, kmpc_thunk_t th_flags */
1159 
1160 #define TQF_IS_ORDERED 0x0001 /* __kmpc_taskq interface, taskq ordered */
1161 #define TQF_IS_LASTPRIVATE 0x0002 /* __kmpc_taskq interface, taskq with lastprivate list */
1162 #define TQF_IS_NOWAIT 0x0004 /* __kmpc_taskq interface, end taskq nowait */
1163 #define TQF_HEURISTICS 0x0008 /* __kmpc_taskq interface, use heuristics to decide task queue size */
1164 #define TQF_INTERFACE_RESERVED1 0x0010 /* __kmpc_taskq interface, reserved for future use */
1165 #define TQF_INTERFACE_RESERVED2 0x0020 /* __kmpc_taskq interface, reserved for future use */
1166 #define TQF_INTERFACE_RESERVED3 0x0040 /* __kmpc_taskq interface, reserved for future use */
1167 #define TQF_INTERFACE_RESERVED4 0x0080 /* __kmpc_taskq interface, reserved for future use */
1168 
1169 #define TQF_INTERFACE_FLAGS 0x00ff /* all the __kmpc_taskq interface flags */
1170 
1171 #define TQF_IS_LAST_TASK 0x0100 /* internal/read by instrumentation; only used with TQF_IS_LASTPRIVATE */
1172 #define TQF_TASKQ_TASK 0x0200 /* internal use only; this thunk->th_task is the taskq_task */
1173 #define TQF_RELEASE_WORKERS 0x0400 /* internal use only; must release worker threads once ANY queued task exists (global) */
1174 #define TQF_ALL_TASKS_QUEUED 0x0800 /* internal use only; notify workers that master has finished enqueuing tasks */
1175 #define TQF_PARALLEL_CONTEXT 0x1000 /* internal use only: this queue encountered in a parallel context: not serialized */
1176 #define TQF_DEALLOCATED 0x2000 /* internal use only; this queue is on the freelist and not in use */
1177 
1178 #define TQF_INTERNAL_FLAGS 0x3f00 /* all the internal use only flags */
1179 
1180 typedef struct KMP_ALIGN_CACHE kmpc_aligned_int32_t {
1181  kmp_int32 ai_data;
1182 } kmpc_aligned_int32_t;
1183 
1184 typedef struct KMP_ALIGN_CACHE kmpc_aligned_queue_slot_t {
1185  struct kmpc_thunk_t *qs_thunk;
1186 } kmpc_aligned_queue_slot_t;
1187 
1188 typedef struct kmpc_task_queue_t {
1189  /* task queue linkage fields for n-ary tree of queues (locked with global taskq_tree_lck) */
1190  kmp_lock_t tq_link_lck; /* lock for child link, child next/prev links and child ref counts */
1191  union {
1192  struct kmpc_task_queue_t *tq_parent; /* pointer to parent taskq, not locked */
1193  struct kmpc_task_queue_t *tq_next_free; /* for taskq internal freelists, locked with global taskq_freelist_lck */
1194  } tq;
1195  volatile struct kmpc_task_queue_t *tq_first_child; /* pointer to linked-list of children, locked by tq's tq_link_lck */
1196  struct kmpc_task_queue_t *tq_next_child; /* next child in linked-list, locked by parent tq's tq_link_lck */
1197  struct kmpc_task_queue_t *tq_prev_child; /* previous child in linked-list, locked by parent tq's tq_link_lck */
1198  volatile kmp_int32 tq_ref_count; /* reference count of threads with access to this task queue */
1199  /* (other than the thread executing the kmpc_end_taskq call) */
1200  /* locked by parent tq's tq_link_lck */
1201 
1202  /* shared data for task queue */
1203  struct kmpc_aligned_shared_vars_t *tq_shareds; /* per-thread array of pointers to shared variable structures */
1204  /* only one array element exists for all but outermost taskq */
1205 
1206  /* bookkeeping for ordered task queue */
1207  kmp_uint32 tq_tasknum_queuing; /* ordered task number assigned while queuing tasks */
1208  volatile kmp_uint32 tq_tasknum_serving; /* ordered number of next task to be served (executed) */
1209 
1210  /* thunk storage management for task queue */
1211  kmp_lock_t tq_free_thunks_lck; /* lock for thunk freelist manipulation */
1212  struct kmpc_thunk_t *tq_free_thunks; /* thunk freelist, chained via th.th_next_free */
1213  struct kmpc_thunk_t *tq_thunk_space; /* space allocated for thunks for this task queue */
1214 
1215  /* data fields for queue itself */
1216  kmp_lock_t tq_queue_lck; /* lock for [de]enqueue operations: tq_queue, tq_head, tq_tail, tq_nfull */
1217  kmpc_aligned_queue_slot_t *tq_queue; /* array of queue slots to hold thunks for tasks */
1218  volatile struct kmpc_thunk_t *tq_taskq_slot; /* special slot for taskq task thunk, occupied if not NULL */
1219  kmp_int32 tq_nslots; /* # of tq_thunk_space thunks alloc'd (not incl. tq_taskq_slot space) */
1220  kmp_int32 tq_head; /* enqueue puts next item in here (index into tq_queue array) */
1221  kmp_int32 tq_tail; /* dequeue takes next item out of here (index into tq_queue array) */
1222  volatile kmp_int32 tq_nfull; /* # of occupied entries in task queue right now */
1223  kmp_int32 tq_hiwat; /* high-water mark for tq_nfull and queue scheduling */
1224  volatile kmp_int32 tq_flags; /* TQF_xxx */
1225 
1226  /* bookkeeping for outstanding thunks */
1227  struct kmpc_aligned_int32_t *tq_th_thunks; /* per-thread array for # of regular thunks currently being executed */
1228  kmp_int32 tq_nproc; /* number of thunks in the th_thunks array */
1229 
1230  /* statistics library bookkeeping */
1231  ident_t *tq_loc; /* source location information for taskq directive */
1232 } kmpc_task_queue_t;
1233 
1234 typedef void (*kmpc_task_t) (kmp_int32 global_tid, struct kmpc_thunk_t *thunk);
1235 
1236 /* sizeof_shareds passed as arg to __kmpc_taskq call */
1237 typedef struct kmpc_shared_vars_t { /* aligned during dynamic allocation */
1238  kmpc_task_queue_t *sv_queue;
1239  /* (pointers to) shared vars */
1240 } kmpc_shared_vars_t;
1241 
1242 typedef struct KMP_ALIGN_CACHE kmpc_aligned_shared_vars_t {
1243  volatile struct kmpc_shared_vars_t *ai_data;
1244 } kmpc_aligned_shared_vars_t;
1245 
1246 /* sizeof_thunk passed as arg to kmpc_taskq call */
1247 typedef struct kmpc_thunk_t { /* aligned during dynamic allocation */
1248  union { /* field used for internal freelists too */
1249  kmpc_shared_vars_t *th_shareds;
1250  struct kmpc_thunk_t *th_next_free; /* freelist of individual thunks within queue, head at tq_free_thunks */
1251  } th;
1252  kmpc_task_t th_task; /* taskq_task if flags & TQF_TASKQ_TASK */
1253  struct kmpc_thunk_t *th_encl_thunk; /* pointer to dynamically enclosing thunk on this thread's call stack */
1254  kmp_int32 th_flags; /* TQF_xxx (tq_flags interface plus possible internal flags) */
1255  kmp_int32 th_status;
1256  kmp_uint32 th_tasknum; /* task number assigned in order of queuing, used for ordered sections */
1257  /* private vars */
1258 } kmpc_thunk_t;
1259 
1260 typedef struct KMP_ALIGN_CACHE kmp_taskq {
1261  int tq_curr_thunk_capacity;
1262 
1263  kmpc_task_queue_t *tq_root;
1264  kmp_int32 tq_global_flags;
1265 
1266  kmp_lock_t tq_freelist_lck;
1267  kmpc_task_queue_t *tq_freelist;
1268 
1269  kmpc_thunk_t **tq_curr_thunk;
1270 } kmp_taskq_t;
1271 
1272 /* END Taskq data structures */
1273 /* --------------------------------------------------------------------------- */
1274 
1275 typedef kmp_int32 kmp_critical_name[8];
1276 
1285 typedef void (*kmpc_micro) ( kmp_int32 * global_tid, kmp_int32 * bound_tid, ... );
1286 typedef void (*kmpc_micro_bound) ( kmp_int32 * bound_tid, kmp_int32 * bound_nth, ... );
1287 
1292 /* --------------------------------------------------------------------------- */
1293 /* Threadprivate initialization/finalization function declarations */
1294 
1295 /* for non-array objects: __kmpc_threadprivate_register() */
1296 
1301 typedef void *(*kmpc_ctor) (void *);
1302 
1307 typedef void (*kmpc_dtor) (void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel compiler */
1312 typedef void *(*kmpc_cctor) (void *, void *);
1313 
1314 /* for array objects: __kmpc_threadprivate_register_vec() */
1315  /* First arg: "this" pointer */
1316  /* Last arg: number of array elements */
1322 typedef void *(*kmpc_ctor_vec) (void *, size_t);
1328 typedef void (*kmpc_dtor_vec) (void *, size_t);
1334 typedef void *(*kmpc_cctor_vec) (void *, void *, size_t); /* function unused by compiler */
1335 
1341 /* ------------------------------------------------------------------------ */
1342 
1343 /* keeps tracked of threadprivate cache allocations for cleanup later */
1344 typedef struct kmp_cached_addr {
1345  void **addr; /* address of allocated cache */
1346  struct kmp_cached_addr *next; /* pointer to next cached address */
1347 } kmp_cached_addr_t;
1348 
1349 struct private_data {
1350  struct private_data *next; /* The next descriptor in the list */
1351  void *data; /* The data buffer for this descriptor */
1352  int more; /* The repeat count for this descriptor */
1353  size_t size; /* The data size for this descriptor */
1354 };
1355 
1356 struct private_common {
1357  struct private_common *next;
1358  struct private_common *link;
1359  void *gbl_addr;
1360  void *par_addr; /* par_addr == gbl_addr for MASTER thread */
1361  size_t cmn_size;
1362 };
1363 
1364 struct shared_common
1365 {
1366  struct shared_common *next;
1367  struct private_data *pod_init;
1368  void *obj_init;
1369  void *gbl_addr;
1370  union {
1371  kmpc_ctor ctor;
1372  kmpc_ctor_vec ctorv;
1373  } ct;
1374  union {
1375  kmpc_cctor cctor;
1376  kmpc_cctor_vec cctorv;
1377  } cct;
1378  union {
1379  kmpc_dtor dtor;
1380  kmpc_dtor_vec dtorv;
1381  } dt;
1382  size_t vec_len;
1383  int is_vec;
1384  size_t cmn_size;
1385 };
1386 
1387 #define KMP_HASH_TABLE_LOG2 9 /* log2 of the hash table size */
1388 #define KMP_HASH_TABLE_SIZE (1 << KMP_HASH_TABLE_LOG2) /* size of the hash table */
1389 #define KMP_HASH_SHIFT 3 /* throw away this many low bits from the address */
1390 #define KMP_HASH(x) ((((kmp_uintptr_t) x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE-1))
1391 
1392 struct common_table {
1393  struct private_common *data[ KMP_HASH_TABLE_SIZE ];
1394 };
1395 
1396 struct shared_table {
1397  struct shared_common *data[ KMP_HASH_TABLE_SIZE ];
1398 };
1399 /* ------------------------------------------------------------------------ */
1400 /* ------------------------------------------------------------------------ */
1401 
1402 #ifdef KMP_STATIC_STEAL_ENABLED
1403 typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
1404  kmp_int32 count;
1405  kmp_int32 ub;
1406  /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
1407  kmp_int32 lb;
1408  kmp_int32 st;
1409  kmp_int32 tc;
1410  kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put after ub */
1411 
1412  // KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on )
1413  // a) parm3 is properly aligned and
1414  // b) all parm1-4 are in the same cache line.
1415  // Because of parm1-4 are used together, performance seems to be better
1416  // if they are in the same line (not measured though).
1417 
1418  struct KMP_ALIGN( 32 ) { // AC: changed 16 to 32 in order to simplify template
1419  kmp_int32 parm1; // structures in kmp_dispatch.cpp. This should
1420  kmp_int32 parm2; // make no real change at least while padding is off.
1421  kmp_int32 parm3;
1422  kmp_int32 parm4;
1423  };
1424 
1425  kmp_uint32 ordered_lower;
1426  kmp_uint32 ordered_upper;
1427 #if KMP_OS_WINDOWS
1428  // This var can be placed in the hole between 'tc' and 'parm1', instead of 'static_steal_counter'.
1429  // It would be nice to measure execution times.
1430  // Conditional if/endif can be removed at all.
1431  kmp_int32 last_upper;
1432 #endif /* KMP_OS_WINDOWS */
1433 } dispatch_private_info32_t;
1434 
1435 typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
1436  kmp_int64 count; /* current chunk number for static and static-steal scheduling*/
1437  kmp_int64 ub; /* upper-bound */
1438  /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
1439  kmp_int64 lb; /* lower-bound */
1440  kmp_int64 st; /* stride */
1441  kmp_int64 tc; /* trip count (number of iterations) */
1442  kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put after ub */
1443 
1444  /* parm[1-4] are used in different ways by different scheduling algorithms */
1445 
1446  // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on )
1447  // a) parm3 is properly aligned and
1448  // b) all parm1-4 are in the same cache line.
1449  // Because of parm1-4 are used together, performance seems to be better
1450  // if they are in the same line (not measured though).
1451 
1452  struct KMP_ALIGN( 32 ) {
1453  kmp_int64 parm1;
1454  kmp_int64 parm2;
1455  kmp_int64 parm3;
1456  kmp_int64 parm4;
1457  };
1458 
1459  kmp_uint64 ordered_lower;
1460  kmp_uint64 ordered_upper;
1461 #if KMP_OS_WINDOWS
1462  // This var can be placed in the hole between 'tc' and 'parm1', instead of 'static_steal_counter'.
1463  // It would be nice to measure execution times.
1464  // Conditional if/endif can be removed at all.
1465  kmp_int64 last_upper;
1466 #endif /* KMP_OS_WINDOWS */
1467 } dispatch_private_info64_t;
1468 #else /* KMP_STATIC_STEAL_ENABLED */
1469 typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
1470  kmp_int32 lb;
1471  kmp_int32 ub;
1472  kmp_int32 st;
1473  kmp_int32 tc;
1474 
1475  kmp_int32 parm1;
1476  kmp_int32 parm2;
1477  kmp_int32 parm3;
1478  kmp_int32 parm4;
1479 
1480  kmp_int32 count;
1481 
1482  kmp_uint32 ordered_lower;
1483  kmp_uint32 ordered_upper;
1484 #if KMP_OS_WINDOWS
1485  kmp_int32 last_upper;
1486 #endif /* KMP_OS_WINDOWS */
1487 } dispatch_private_info32_t;
1488 
1489 typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
1490  kmp_int64 lb; /* lower-bound */
1491  kmp_int64 ub; /* upper-bound */
1492  kmp_int64 st; /* stride */
1493  kmp_int64 tc; /* trip count (number of iterations) */
1494 
1495  /* parm[1-4] are used in different ways by different scheduling algorithms */
1496  kmp_int64 parm1;
1497  kmp_int64 parm2;
1498  kmp_int64 parm3;
1499  kmp_int64 parm4;
1500 
1501  kmp_int64 count; /* current chunk number for static scheduling */
1502 
1503  kmp_uint64 ordered_lower;
1504  kmp_uint64 ordered_upper;
1505 #if KMP_OS_WINDOWS
1506  kmp_int64 last_upper;
1507 #endif /* KMP_OS_WINDOWS */
1508 } dispatch_private_info64_t;
1509 #endif /* KMP_STATIC_STEAL_ENABLED */
1510 
1511 typedef struct KMP_ALIGN_CACHE dispatch_private_info {
1512  union private_info {
1513  dispatch_private_info32_t p32;
1514  dispatch_private_info64_t p64;
1515  } u;
1516  enum sched_type schedule; /* scheduling algorithm */
1517  kmp_int32 ordered; /* ordered clause specified */
1518  kmp_int32 ordered_bumped;
1519  kmp_int32 ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size after making ordered_iteration scalar
1520  struct dispatch_private_info * next; /* stack of buffers for nest of serial regions */
1521  kmp_int32 nomerge; /* don't merge iters if serialized */
1522  kmp_int32 type_size; /* the size of types in private_info */
1523  enum cons_type pushed_ws;
1524 } dispatch_private_info_t;
1525 
1526 typedef struct dispatch_shared_info32 {
1527  /* chunk index under dynamic, number of idle threads under static-steal;
1528  iteration index otherwise */
1529  volatile kmp_uint32 iteration;
1530  volatile kmp_uint32 num_done;
1531  volatile kmp_uint32 ordered_iteration;
1532  kmp_int32 ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size after making ordered_iteration scalar
1533 } dispatch_shared_info32_t;
1534 
1535 typedef struct dispatch_shared_info64 {
1536  /* chunk index under dynamic, number of idle threads under static-steal;
1537  iteration index otherwise */
1538  volatile kmp_uint64 iteration;
1539  volatile kmp_uint64 num_done;
1540  volatile kmp_uint64 ordered_iteration;
1541  kmp_int64 ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size after making ordered_iteration scalar
1542 } dispatch_shared_info64_t;
1543 
1544 typedef struct dispatch_shared_info {
1545  union shared_info {
1546  dispatch_shared_info32_t s32;
1547  dispatch_shared_info64_t s64;
1548  } u;
1549 /* volatile kmp_int32 dispatch_abort; depricated */
1550  volatile kmp_uint32 buffer_index;
1551 } dispatch_shared_info_t;
1552 
1553 typedef struct kmp_disp {
1554  /* Vector for ORDERED SECTION */
1555  void (*th_deo_fcn)( int * gtid, int * cid, ident_t *);
1556  /* Vector for END ORDERED SECTION */
1557  void (*th_dxo_fcn)( int * gtid, int * cid, ident_t *);
1558 
1559  dispatch_shared_info_t *th_dispatch_sh_current;
1560  dispatch_private_info_t *th_dispatch_pr_current;
1561 
1562  dispatch_private_info_t *th_disp_buffer;
1563  kmp_int32 th_disp_index;
1564  void* dummy_padding[2]; // make it 64 bytes on Intel(R) 64
1565 #if KMP_USE_INTERNODE_ALIGNMENT
1566  char more_padding[INTERNODE_CACHE_LINE];
1567 #endif
1568 } kmp_disp_t;
1569 
1570 /* ------------------------------------------------------------------------ */
1571 /* ------------------------------------------------------------------------ */
1572 
1573 /* Barrier stuff */
1574 
1575 /* constants for barrier state update */
1576 #define KMP_INIT_BARRIER_STATE 0 /* should probably start from zero */
1577 #define KMP_BARRIER_SLEEP_BIT 0 /* bit used for suspend/sleep part of state */
1578 #define KMP_BARRIER_UNUSED_BIT 1 /* bit that must never be set for valid state */
1579 #define KMP_BARRIER_BUMP_BIT 2 /* lsb used for bump of go/arrived state */
1580 
1581 #define KMP_BARRIER_SLEEP_STATE ((kmp_uint) (1 << KMP_BARRIER_SLEEP_BIT))
1582 #define KMP_BARRIER_UNUSED_STATE ((kmp_uint) (1 << KMP_BARRIER_UNUSED_BIT))
1583 #define KMP_BARRIER_STATE_BUMP ((kmp_uint) (1 << KMP_BARRIER_BUMP_BIT))
1584 
1585 #if (KMP_BARRIER_SLEEP_BIT >= KMP_BARRIER_BUMP_BIT)
1586 # error "Barrier sleep bit must be smaller than barrier bump bit"
1587 #endif
1588 #if (KMP_BARRIER_UNUSED_BIT >= KMP_BARRIER_BUMP_BIT)
1589 # error "Barrier unused bit must be smaller than barrier bump bit"
1590 #endif
1591 
1592 // Constants for release barrier wait state: currently, hierarchical only
1593 #define KMP_BARRIER_NOT_WAITING 0 // Normal state; worker not in wait_sleep
1594 #define KMP_BARRIER_OWN_FLAG 1 // Normal state; worker waiting on own b_go flag in release
1595 #define KMP_BARRIER_PARENT_FLAG 2 // Special state; worker waiting on parent's b_go flag in release
1596 #define KMP_BARRIER_SWITCH_TO_OWN_FLAG 3 // Special state; tells worker to shift from parent to own b_go
1597 #define KMP_BARRIER_SWITCHING 4 // Special state; worker resets appropriate flag on wake-up
1598 
1599 enum barrier_type {
1600  bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction barriers if enabled) */
1601  bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */
1602  #if KMP_FAST_REDUCTION_BARRIER
1603  bs_reduction_barrier, /* 2, All barriers that are used in reduction */
1604  #endif // KMP_FAST_REDUCTION_BARRIER
1605  bs_last_barrier /* Just a placeholder to mark the end */
1606 };
1607 
1608 // to work with reduction barriers just like with plain barriers
1609 #if !KMP_FAST_REDUCTION_BARRIER
1610  #define bs_reduction_barrier bs_plain_barrier
1611 #endif // KMP_FAST_REDUCTION_BARRIER
1612 
1613 typedef enum kmp_bar_pat { /* Barrier communication patterns */
1614  bp_linear_bar = 0, /* Single level (degenerate) tree */
1615  bp_tree_bar = 1, /* Balanced tree with branching factor 2^n */
1616  bp_hyper_bar = 2, /* Hypercube-embedded tree with min branching factor 2^n */
1617  bp_hierarchical_bar = 3, /* Machine hierarchy tree */
1618  bp_last_bar = 4 /* Placeholder to mark the end */
1619 } kmp_bar_pat_e;
1620 
1621 # define KMP_BARRIER_ICV_PUSH 1
1622 
1623 /* Record for holding the values of the internal controls stack records */
1624 typedef struct kmp_internal_control {
1625  int serial_nesting_level; /* corresponds to the value of the th_team_serialized field */
1626  kmp_int8 nested; /* internal control for nested parallelism (per thread) */
1627  kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per thread) */
1628  kmp_int8 bt_set; /* internal control for whether blocktime is explicitly set */
1629  int blocktime; /* internal control for blocktime */
1630  int bt_intervals; /* internal control for blocktime intervals */
1631  int nproc; /* internal control for #threads for next parallel region (per thread) */
1632  int max_active_levels; /* internal control for max_active_levels */
1633  kmp_r_sched_t sched; /* internal control for runtime schedule {sched,chunk} pair */
1634 #if OMP_40_ENABLED
1635  kmp_proc_bind_t proc_bind; /* internal control for affinity */
1636 #endif // OMP_40_ENABLED
1637  struct kmp_internal_control *next;
1638 } kmp_internal_control_t;
1639 
1640 static inline void
1641 copy_icvs( kmp_internal_control_t *dst, kmp_internal_control_t *src ) {
1642  *dst = *src;
1643 }
1644 
1645 /* Thread barrier needs volatile barrier fields */
1646 typedef struct KMP_ALIGN_CACHE kmp_bstate {
1647  // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all uses of it).
1648  // It is not explicitly aligned below, because we *don't* want it to be padded -- instead,
1649  // we fit b_go into the same cache line with th_fixed_icvs, enabling NGO cache lines
1650  // stores in the hierarchical barrier.
1651  kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread
1652  // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with same NGO store
1653  volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical)
1654  KMP_ALIGN_CACHE volatile kmp_uint64 b_arrived; // STATE => task reached synch point.
1655  kmp_uint32 *skip_per_level;
1656  kmp_uint32 my_level;
1657  kmp_int32 parent_tid;
1658  kmp_int32 old_tid;
1659  kmp_uint32 depth;
1660  struct kmp_bstate *parent_bar;
1661  kmp_team_t *team;
1662  kmp_uint64 leaf_state;
1663  kmp_uint32 nproc;
1664  kmp_uint8 base_leaf_kids;
1665  kmp_uint8 leaf_kids;
1666  kmp_uint8 offset;
1667  kmp_uint8 wait_flag;
1668  kmp_uint8 use_oncore_barrier;
1669 #if USE_DEBUGGER
1670  // The following field is intended for the debugger solely. Only the worker thread itself accesses this
1671  // field: the worker increases it by 1 when it arrives to a barrier.
1672  KMP_ALIGN_CACHE kmp_uint b_worker_arrived;
1673 #endif /* USE_DEBUGGER */
1674 } kmp_bstate_t;
1675 
1676 union KMP_ALIGN_CACHE kmp_barrier_union {
1677  double b_align; /* use worst case alignment */
1678  char b_pad[ KMP_PAD(kmp_bstate_t, CACHE_LINE) ];
1679  kmp_bstate_t bb;
1680 };
1681 
1682 typedef union kmp_barrier_union kmp_balign_t;
1683 
1684 /* Team barrier needs only non-volatile arrived counter */
1685 union KMP_ALIGN_CACHE kmp_barrier_team_union {
1686  double b_align; /* use worst case alignment */
1687  char b_pad[ CACHE_LINE ];
1688  struct {
1689  kmp_uint b_arrived; /* STATE => task reached synch point. */
1690 #if USE_DEBUGGER
1691  // The following two fields are indended for the debugger solely. Only master of the team accesses
1692  // these fields: the first one is increased by 1 when master arrives to a barrier, the
1693  // second one is increased by one when all the threads arrived.
1694  kmp_uint b_master_arrived;
1695  kmp_uint b_team_arrived;
1696 #endif
1697  };
1698 };
1699 
1700 typedef union kmp_barrier_team_union kmp_balign_team_t;
1701 
1702 /*
1703  * Padding for Linux* OS pthreads condition variables and mutexes used to signal
1704  * threads when a condition changes. This is to workaround an NPTL bug
1705  * where padding was added to pthread_cond_t which caused the initialization
1706  * routine to write outside of the structure if compiled on pre-NPTL threads.
1707  */
1708 
1709 #if KMP_OS_WINDOWS
1710 typedef struct kmp_win32_mutex
1711 {
1712  /* The Lock */
1713  CRITICAL_SECTION cs;
1714 } kmp_win32_mutex_t;
1715 
1716 typedef struct kmp_win32_cond
1717 {
1718  /* Count of the number of waiters. */
1719  int waiters_count_;
1720 
1721  /* Serialize access to <waiters_count_> */
1722  kmp_win32_mutex_t waiters_count_lock_;
1723 
1724  /* Number of threads to release via a <cond_broadcast> or a */
1725  /* <cond_signal> */
1726  int release_count_;
1727 
1728  /* Keeps track of the current "generation" so that we don't allow */
1729  /* one thread to steal all the "releases" from the broadcast. */
1730  int wait_generation_count_;
1731 
1732  /* A manual-reset event that's used to block and release waiting */
1733  /* threads. */
1734  HANDLE event_;
1735 } kmp_win32_cond_t;
1736 #endif
1737 
1738 #if KMP_OS_UNIX
1739 
1740 union KMP_ALIGN_CACHE kmp_cond_union {
1741  double c_align;
1742  char c_pad[ CACHE_LINE ];
1743  pthread_cond_t c_cond;
1744 };
1745 
1746 typedef union kmp_cond_union kmp_cond_align_t;
1747 
1748 union KMP_ALIGN_CACHE kmp_mutex_union {
1749  double m_align;
1750  char m_pad[ CACHE_LINE ];
1751  pthread_mutex_t m_mutex;
1752 };
1753 
1754 typedef union kmp_mutex_union kmp_mutex_align_t;
1755 
1756 #endif /* KMP_OS_UNIX */
1757 
1758 typedef struct kmp_desc_base {
1759  void *ds_stackbase;
1760  size_t ds_stacksize;
1761  int ds_stackgrow;
1762  kmp_thread_t ds_thread;
1763  volatile int ds_tid;
1764  int ds_gtid;
1765 #if KMP_OS_WINDOWS
1766  volatile int ds_alive;
1767  DWORD ds_thread_id;
1768  /*
1769  ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes. However,
1770  debugger support (libomp_db) cannot work with handles, because they uncomparable. For
1771  example, debugger requests info about thread with handle h. h is valid within debugger
1772  process, and meaningless within debugee process. Even if h is duped by call to
1773  DuplicateHandle(), so the result h' is valid within debugee process, but it is a *new*
1774  handle which does *not* equal to any other handle in debugee... The only way to
1775  compare handles is convert them to system-wide ids. GetThreadId() function is
1776  available only in Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is
1777  available on all Windows* OS flavours (including Windows* 95). Thus, we have to get thread id by
1778  call to GetCurrentThreadId() from within the thread and save it to let libomp_db
1779  identify threads.
1780  */
1781 #endif /* KMP_OS_WINDOWS */
1782 } kmp_desc_base_t;
1783 
1784 typedef union KMP_ALIGN_CACHE kmp_desc {
1785  double ds_align; /* use worst case alignment */
1786  char ds_pad[ KMP_PAD(kmp_desc_base_t, CACHE_LINE) ];
1787  kmp_desc_base_t ds;
1788 } kmp_desc_t;
1789 
1790 
1791 typedef struct kmp_local {
1792  volatile int this_construct; /* count of single's encountered by thread */
1793  void *reduce_data;
1794 #if KMP_USE_BGET
1795  void *bget_data;
1796  void *bget_list;
1797 #if ! USE_CMP_XCHG_FOR_BGET
1798 #ifdef USE_QUEUING_LOCK_FOR_BGET
1799  kmp_lock_t bget_lock; /* Lock for accessing bget free list */
1800 #else
1801  kmp_bootstrap_lock_t bget_lock; /* Lock for accessing bget free list */
1802  /* Must be bootstrap lock so we can use it at library shutdown */
1803 #endif /* USE_LOCK_FOR_BGET */
1804 #endif /* ! USE_CMP_XCHG_FOR_BGET */
1805 #endif /* KMP_USE_BGET */
1806 
1807 #ifdef BUILD_TV
1808  struct tv_data *tv_data;
1809 #endif
1810 
1811  PACKED_REDUCTION_METHOD_T packed_reduction_method; /* stored by __kmpc_reduce*(), used by __kmpc_end_reduce*() */
1812 
1813 } kmp_local_t;
1814 
1815 #define get__blocktime( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime)
1816 #define get__bt_set( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set)
1817 #define get__bt_intervals( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals)
1818 
1819 #define get__nested_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nested)
1820 #define get__dynamic_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic)
1821 #define get__nproc_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc)
1822 #define get__sched_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched)
1823 
1824 #define set__blocktime_team( xteam, xtid, xval ) \
1825  ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime ) = (xval) )
1826 
1827 #define set__bt_intervals_team( xteam, xtid, xval ) \
1828  ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals ) = (xval) )
1829 
1830 #define set__bt_set_team( xteam, xtid, xval ) \
1831  ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set ) = (xval) )
1832 
1833 
1834 #define set__nested( xthread, xval ) \
1835  ( ( (xthread)->th.th_current_task->td_icvs.nested ) = (xval) )
1836 #define get__nested( xthread ) \
1837  ( ( (xthread)->th.th_current_task->td_icvs.nested ) ? (FTN_TRUE) : (FTN_FALSE) )
1838 
1839 #define set__dynamic( xthread, xval ) \
1840  ( ( (xthread)->th.th_current_task->td_icvs.dynamic ) = (xval) )
1841 #define get__dynamic( xthread ) \
1842  ( ( (xthread)->th.th_current_task->td_icvs.dynamic ) ? (FTN_TRUE) : (FTN_FALSE) )
1843 
1844 #define set__nproc( xthread, xval ) \
1845  ( ( (xthread)->th.th_current_task->td_icvs.nproc ) = (xval) )
1846 
1847 #define set__max_active_levels( xthread, xval ) \
1848  ( ( (xthread)->th.th_current_task->td_icvs.max_active_levels ) = (xval) )
1849 
1850 #define set__sched( xthread, xval ) \
1851  ( ( (xthread)->th.th_current_task->td_icvs.sched ) = (xval) )
1852 
1853 #if OMP_40_ENABLED
1854 
1855 #define set__proc_bind( xthread, xval ) \
1856  ( ( (xthread)->th.th_current_task->td_icvs.proc_bind ) = (xval) )
1857 #define get__proc_bind( xthread ) \
1858  ( (xthread)->th.th_current_task->td_icvs.proc_bind )
1859 
1860 #endif /* OMP_40_ENABLED */
1861 
1862 
1863 /* ------------------------------------------------------------------------ */
1864 // OpenMP tasking data structures
1865 //
1866 
1867 typedef enum kmp_tasking_mode {
1868  tskm_immediate_exec = 0,
1869  tskm_extra_barrier = 1,
1870  tskm_task_teams = 2,
1871  tskm_max = 2
1872 } kmp_tasking_mode_t;
1873 
1874 extern kmp_tasking_mode_t __kmp_tasking_mode; /* determines how/when to execute tasks */
1875 extern kmp_int32 __kmp_task_stealing_constraint;
1876 
1877 /* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with taskdata first */
1878 #define KMP_TASK_TO_TASKDATA(task) (((kmp_taskdata_t *) task) - 1)
1879 #define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *) (taskdata + 1)
1880 
1881 // The tt_found_tasks flag is a signal to all threads in the team that tasks were spawned and
1882 // queued since the previous barrier release.
1883 #define KMP_TASKING_ENABLED(task_team) \
1884  (TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE)
1885 
1892 typedef kmp_int32 (* kmp_routine_entry_t)( kmp_int32, void * );
1893 
1894 /* sizeof_kmp_task_t passed as arg to kmpc_omp_task call */
1897 typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */
1898  void * shareds;
1899  kmp_routine_entry_t routine;
1900  kmp_int32 part_id;
1901 #if OMP_40_ENABLED
1902  kmp_routine_entry_t destructors; /* pointer to function to invoke deconstructors of firstprivate C++ objects */
1903 #endif // OMP_40_ENABLED
1904  /* private vars */
1905 } kmp_task_t;
1906 
1911 #if OMP_40_ENABLED
1912 typedef struct kmp_taskgroup {
1913  kmp_uint32 count; // number of allocated and not yet complete tasks
1914  kmp_int32 cancel_request; // request for cancellation of this taskgroup
1915  struct kmp_taskgroup *parent; // parent taskgroup
1916 } kmp_taskgroup_t;
1917 
1918 
1919 // forward declarations
1920 typedef union kmp_depnode kmp_depnode_t;
1921 typedef struct kmp_depnode_list kmp_depnode_list_t;
1922 typedef struct kmp_dephash_entry kmp_dephash_entry_t;
1923 
1924 typedef struct kmp_depend_info {
1925  kmp_intptr_t base_addr;
1926  size_t len;
1927  struct {
1928  bool in:1;
1929  bool out:1;
1930  } flags;
1931 } kmp_depend_info_t;
1932 
1933 struct kmp_depnode_list {
1934  kmp_depnode_t * node;
1935  kmp_depnode_list_t * next;
1936 };
1937 
1938 typedef struct kmp_base_depnode {
1939  kmp_depnode_list_t * successors;
1940  kmp_task_t * task;
1941 
1942  kmp_lock_t lock;
1943 
1944 #if KMP_SUPPORT_GRAPH_OUTPUT
1945  kmp_uint32 id;
1946 #endif
1947 
1948  volatile kmp_int32 npredecessors;
1949  volatile kmp_int32 nrefs;
1950 } kmp_base_depnode_t;
1951 
1952 union KMP_ALIGN_CACHE kmp_depnode {
1953  double dn_align; /* use worst case alignment */
1954  char dn_pad[ KMP_PAD(kmp_base_depnode_t, CACHE_LINE) ];
1955  kmp_base_depnode_t dn;
1956 };
1957 
1958 struct kmp_dephash_entry {
1959  kmp_intptr_t addr;
1960  kmp_depnode_t * last_out;
1961  kmp_depnode_list_t * last_ins;
1962  kmp_dephash_entry_t * next_in_bucket;
1963 };
1964 
1965 typedef struct kmp_dephash {
1966  kmp_dephash_entry_t ** buckets;
1967 #ifdef KMP_DEBUG
1968  kmp_uint32 nelements;
1969  kmp_uint32 nconflicts;
1970 #endif
1971 } kmp_dephash_t;
1972 
1973 #endif
1974 
1975 #ifdef BUILD_TIED_TASK_STACK
1976 
1977 /* Tied Task stack definitions */
1978 typedef struct kmp_stack_block {
1979  kmp_taskdata_t * sb_block[ TASK_STACK_BLOCK_SIZE ];
1980  struct kmp_stack_block * sb_next;
1981  struct kmp_stack_block * sb_prev;
1982 } kmp_stack_block_t;
1983 
1984 typedef struct kmp_task_stack {
1985  kmp_stack_block_t ts_first_block; // first block of stack entries
1986  kmp_taskdata_t ** ts_top; // pointer to the top of stack
1987  kmp_int32 ts_entries; // number of entries on the stack
1988 } kmp_task_stack_t;
1989 
1990 #endif // BUILD_TIED_TASK_STACK
1991 
1992 typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
1993  /* Compiler flags */ /* Total compiler flags must be 16 bits */
1994  unsigned tiedness : 1; /* task is either tied (1) or untied (0) */
1995  unsigned final : 1; /* task is final(1) so execute immediately */
1996  unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0 code path */
1997 #if OMP_40_ENABLED
1998  unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to invoke destructors from the runtime */
1999 #if OMP_41_ENABLED
2000  unsigned proxy : 1; /* task is a proxy task (it will be executed outside the context of the RTL) */
2001  unsigned reserved : 11; /* reserved for compiler use */
2002 #else
2003  unsigned reserved : 12; /* reserved for compiler use */
2004 #endif
2005 #else // OMP_40_ENABLED
2006  unsigned reserved : 13; /* reserved for compiler use */
2007 #endif // OMP_40_ENABLED
2008 
2009  /* Library flags */ /* Total library flags must be 16 bits */
2010  unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */
2011  unsigned task_serial : 1; /* this task is executed immediately (1) or deferred (0) */
2012  unsigned tasking_ser : 1; /* all tasks in team are either executed immediately (1) or may be deferred (0) */
2013  unsigned team_serial : 1; /* entire team is serial (1) [1 thread] or parallel (0) [>= 2 threads] */
2014  /* If either team_serial or tasking_ser is set, task team may be NULL */
2015  /* Task State Flags: */
2016  unsigned started : 1; /* 1==started, 0==not started */
2017  unsigned executing : 1; /* 1==executing, 0==not executing */
2018  unsigned complete : 1; /* 1==complete, 0==not complete */
2019  unsigned freed : 1; /* 1==freed, 0==allocateed */
2020  unsigned native : 1; /* 1==gcc-compiled task, 0==intel */
2021  unsigned reserved31 : 7; /* reserved for library use */
2022 
2023 } kmp_tasking_flags_t;
2024 
2025 
2026 struct kmp_taskdata { /* aligned during dynamic allocation */
2027  kmp_int32 td_task_id; /* id, assigned by debugger */
2028  kmp_tasking_flags_t td_flags; /* task flags */
2029  kmp_team_t * td_team; /* team for this task */
2030  kmp_info_p * td_alloc_thread; /* thread that allocated data structures */
2031  /* Currently not used except for perhaps IDB */
2032  kmp_taskdata_t * td_parent; /* parent task */
2033  kmp_int32 td_level; /* task nesting level */
2034  ident_t * td_ident; /* task identifier */
2035  // Taskwait data.
2036  ident_t * td_taskwait_ident;
2037  kmp_uint32 td_taskwait_counter;
2038  kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */
2039  KMP_ALIGN_CACHE kmp_internal_control_t td_icvs; /* Internal control variables for the task */
2040  volatile kmp_uint32 td_allocated_child_tasks; /* Child tasks (+ current task) not yet deallocated */
2041  volatile kmp_uint32 td_incomplete_child_tasks; /* Child tasks not yet complete */
2042 #if OMP_40_ENABLED
2043  kmp_taskgroup_t * td_taskgroup; // Each task keeps pointer to its current taskgroup
2044  kmp_dephash_t * td_dephash; // Dependencies for children tasks are tracked from here
2045  kmp_depnode_t * td_depnode; // Pointer to graph node if this task has dependencies
2046 #endif
2047 #if OMPT_SUPPORT
2048  ompt_task_info_t ompt_task_info;
2049 #endif
2050 #if KMP_HAVE_QUAD
2051  _Quad td_dummy; // Align structure 16-byte size since allocated just before kmp_task_t
2052 #else
2053  kmp_uint32 td_dummy[2];
2054 #endif
2055 }; // struct kmp_taskdata
2056 
2057 // Make sure padding above worked
2058 KMP_BUILD_ASSERT( sizeof(kmp_taskdata_t) % sizeof(void *) == 0 );
2059 
2060 // Data for task team but per thread
2061 typedef struct kmp_base_thread_data {
2062  kmp_info_p * td_thr; // Pointer back to thread info
2063  // Used only in __kmp_execute_tasks_template, maybe not avail until task is queued?
2064  kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque
2065  kmp_taskdata_t ** td_deque; // Deque of tasks encountered by td_thr, dynamically allocated
2066  kmp_uint32 td_deque_head; // Head of deque (will wrap)
2067  kmp_uint32 td_deque_tail; // Tail of deque (will wrap)
2068  kmp_int32 td_deque_ntasks; // Number of tasks in deque
2069  // GEH: shouldn't this be volatile since used in while-spin?
2070  kmp_int32 td_deque_last_stolen; // Thread number of last successful steal
2071 #ifdef BUILD_TIED_TASK_STACK
2072  kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task scheduling constraint
2073 #endif // BUILD_TIED_TASK_STACK
2074 } kmp_base_thread_data_t;
2075 
2076 typedef union KMP_ALIGN_CACHE kmp_thread_data {
2077  kmp_base_thread_data_t td;
2078  double td_align; /* use worst case alignment */
2079  char td_pad[ KMP_PAD(kmp_base_thread_data_t, CACHE_LINE) ];
2080 } kmp_thread_data_t;
2081 
2082 
2083 // Data for task teams which are used when tasking is enabled for the team
2084 typedef struct kmp_base_task_team {
2085  kmp_bootstrap_lock_t tt_threads_lock; /* Lock used to allocate per-thread part of task team */
2086  /* must be bootstrap lock since used at library shutdown*/
2087  kmp_task_team_t * tt_next; /* For linking the task team free list */
2088  kmp_thread_data_t * tt_threads_data; /* Array of per-thread structures for task team */
2089  /* Data survives task team deallocation */
2090  kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while executing this team? */
2091  /* TRUE means tt_threads_data is set up and initialized */
2092  kmp_int32 tt_nproc; /* #threads in team */
2093  kmp_int32 tt_max_threads; /* number of entries allocated for threads_data array */
2094 #if OMP_41_ENABLED
2095  kmp_int32 tt_found_proxy_tasks; /* Have we found proxy tasks since last barrier */
2096 #endif
2097 
2098  KMP_ALIGN_CACHE
2099  volatile kmp_uint32 tt_unfinished_threads; /* #threads still active */
2100 
2101  KMP_ALIGN_CACHE
2102  volatile kmp_uint32 tt_active; /* is the team still actively executing tasks */
2103 
2104  KMP_ALIGN_CACHE
2105 #if KMP_USE_INTERNODE_ALIGNMENT
2106  kmp_int32 tt_padme[INTERNODE_CACHE_LINE/sizeof(kmp_int32)];
2107 #endif
2108 
2109  volatile kmp_uint32 tt_ref_ct; /* #threads accessing struct */
2110  /* (not incl. master) */
2111 } kmp_base_task_team_t;
2112 
2113 union KMP_ALIGN_CACHE kmp_task_team {
2114  kmp_base_task_team_t tt;
2115  double tt_align; /* use worst case alignment */
2116  char tt_pad[ KMP_PAD(kmp_base_task_team_t, CACHE_LINE) ];
2117 };
2118 
2119 #if ( USE_FAST_MEMORY == 3 ) || ( USE_FAST_MEMORY == 5 )
2120 // Free lists keep same-size free memory slots for fast memory allocation routines
2121 typedef struct kmp_free_list {
2122  void *th_free_list_self; // Self-allocated tasks free list
2123  void *th_free_list_sync; // Self-allocated tasks stolen/returned by other threads
2124  void *th_free_list_other; // Non-self free list (to be returned to owner's sync list)
2125 } kmp_free_list_t;
2126 #endif
2127 #if KMP_NESTED_HOT_TEAMS
2128 // Hot teams array keeps hot teams and their sizes for given thread.
2129 // Hot teams are not put in teams pool, and they don't put threads in threads pool.
2130 typedef struct kmp_hot_team_ptr {
2131  kmp_team_p *hot_team; // pointer to hot_team of given nesting level
2132  kmp_int32 hot_team_nth; // number of threads allocated for the hot_team
2133 } kmp_hot_team_ptr_t;
2134 #endif
2135 #if OMP_40_ENABLED
2136 typedef struct kmp_teams_size {
2137  kmp_int32 nteams; // number of teams in a league
2138  kmp_int32 nth; // number of threads in each team of the league
2139 } kmp_teams_size_t;
2140 #endif
2141 
2142 /* ------------------------------------------------------------------------ */
2143 // OpenMP thread data structures
2144 //
2145 
2146 typedef struct KMP_ALIGN_CACHE kmp_base_info {
2147 /*
2148  * Start with the readonly data which is cache aligned and padded.
2149  * this is written before the thread starts working by the master.
2150  * (uber masters may update themselves later)
2151  * (usage does not consider serialized regions)
2152  */
2153  kmp_desc_t th_info;
2154  kmp_team_p *th_team; /* team we belong to */
2155  kmp_root_p *th_root; /* pointer to root of task hierarchy */
2156  kmp_info_p *th_next_pool; /* next available thread in the pool */
2157  kmp_disp_t *th_dispatch; /* thread's dispatch data */
2158  int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */
2159 
2160  /* The following are cached from the team info structure */
2161  /* TODO use these in more places as determined to be needed via profiling */
2162  int th_team_nproc; /* number of threads in a team */
2163  kmp_info_p *th_team_master; /* the team's master thread */
2164  int th_team_serialized; /* team is serialized */
2165 #if OMP_40_ENABLED
2166  microtask_t th_teams_microtask; /* save entry address for teams construct */
2167  int th_teams_level; /* save initial level of teams construct */
2168  /* it is 0 on device but may be any on host */
2169 #endif
2170 
2171  /* The blocktime info is copied from the team struct to the thread sruct */
2172  /* at the start of a barrier, and the values stored in the team are used */
2173  /* at points in the code where the team struct is no longer guaranteed */
2174  /* to exist (from the POV of worker threads). */
2175  int th_team_bt_intervals;
2176  int th_team_bt_set;
2177 
2178 
2179 #if KMP_AFFINITY_SUPPORTED
2180  kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */
2181 #endif
2182 
2183 /*
2184  * The data set by the master at reinit, then R/W by the worker
2185  */
2186  KMP_ALIGN_CACHE int th_set_nproc; /* if > 0, then only use this request for the next fork */
2187 #if KMP_NESTED_HOT_TEAMS
2188  kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */
2189 #endif
2190 #if OMP_40_ENABLED
2191  kmp_proc_bind_t th_set_proc_bind; /* if != proc_bind_default, use request for next fork */
2192  kmp_teams_size_t th_teams_size; /* number of teams/threads in teams construct */
2193 # if KMP_AFFINITY_SUPPORTED
2194  int th_current_place; /* place currently bound to */
2195  int th_new_place; /* place to bind to in par reg */
2196  int th_first_place; /* first place in partition */
2197  int th_last_place; /* last place in partition */
2198 # endif
2199 #endif
2200 #if USE_ITT_BUILD
2201  kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */
2202  kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */
2203  kmp_uint64 th_frame_time; /* frame timestamp */
2204  kmp_uint64 th_frame_time_serialized; /* frame timestamp in serialized parallel */
2205 #endif /* USE_ITT_BUILD */
2206  kmp_local_t th_local;
2207  struct private_common *th_pri_head;
2208 
2209 /*
2210  * Now the data only used by the worker (after initial allocation)
2211  */
2212  /* TODO the first serial team should actually be stored in the info_t
2213  * structure. this will help reduce initial allocation overhead */
2214  KMP_ALIGN_CACHE kmp_team_p *th_serial_team; /*serialized team held in reserve*/
2215 
2216 #if OMPT_SUPPORT
2217  ompt_thread_info_t ompt_thread_info;
2218 #endif
2219 
2220 /* The following are also read by the master during reinit */
2221  struct common_table *th_pri_common;
2222 
2223  volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */
2224  /* while awaiting queuing lock acquire */
2225 
2226  volatile void *th_sleep_loc; // this points at a kmp_flag<T>
2227 
2228  ident_t *th_ident;
2229  unsigned th_x; // Random number generator data
2230  unsigned th_a; // Random number generator data
2231 
2232 /*
2233  * Tasking-related data for the thread
2234  */
2235  kmp_task_team_t * th_task_team; // Task team struct
2236  kmp_taskdata_t * th_current_task; // Innermost Task being executed
2237  kmp_uint8 th_task_state; // alternating 0/1 for task team identification
2238  kmp_uint8 * th_task_state_memo_stack; // Stack holding memos of th_task_state at nested levels
2239  kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack
2240  kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack
2241 
2242  /*
2243  * More stuff for keeping track of active/sleeping threads
2244  * (this part is written by the worker thread)
2245  */
2246  kmp_uint8 th_active_in_pool; // included in count of
2247  // #active threads in pool
2248  int th_active; // ! sleeping
2249  // 32 bits for TCR/TCW
2250 
2251 
2252  struct cons_header * th_cons; // used for consistency check
2253 
2254 /*
2255  * Add the syncronizing data which is cache aligned and padded.
2256  */
2257  KMP_ALIGN_CACHE kmp_balign_t th_bar[ bs_last_barrier ];
2258 
2259  KMP_ALIGN_CACHE volatile kmp_int32 th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */
2260 
2261 #if ( USE_FAST_MEMORY == 3 ) || ( USE_FAST_MEMORY == 5 )
2262  #define NUM_LISTS 4
2263  kmp_free_list_t th_free_lists[NUM_LISTS]; // Free lists for fast memory allocation routines
2264 #endif
2265 
2266 #if KMP_OS_WINDOWS
2267  kmp_win32_cond_t th_suspend_cv;
2268  kmp_win32_mutex_t th_suspend_mx;
2269  int th_suspend_init;
2270 #endif
2271 #if KMP_OS_UNIX
2272  kmp_cond_align_t th_suspend_cv;
2273  kmp_mutex_align_t th_suspend_mx;
2274  int th_suspend_init_count;
2275 #endif
2276 
2277 #if USE_ITT_BUILD
2278  kmp_itt_mark_t th_itt_mark_single;
2279  // alignment ???
2280 #endif /* USE_ITT_BUILD */
2281 #if KMP_STATS_ENABLED
2282  kmp_stats_list* th_stats;
2283 #endif
2284 } kmp_base_info_t;
2285 
2286 typedef union KMP_ALIGN_CACHE kmp_info {
2287  double th_align; /* use worst case alignment */
2288  char th_pad[ KMP_PAD(kmp_base_info_t, CACHE_LINE) ];
2289  kmp_base_info_t th;
2290 } kmp_info_t;
2291 
2292 /* ------------------------------------------------------------------------ */
2293 // OpenMP thread team data structures
2294 //
2295 typedef struct kmp_base_data {
2296  volatile kmp_uint32 t_value;
2297 } kmp_base_data_t;
2298 
2299 typedef union KMP_ALIGN_CACHE kmp_sleep_team {
2300  double dt_align; /* use worst case alignment */
2301  char dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ];
2302  kmp_base_data_t dt;
2303 } kmp_sleep_team_t;
2304 
2305 typedef union KMP_ALIGN_CACHE kmp_ordered_team {
2306  double dt_align; /* use worst case alignment */
2307  char dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ];
2308  kmp_base_data_t dt;
2309 } kmp_ordered_team_t;
2310 
2311 typedef int (*launch_t)( int gtid );
2312 
2313 /* Minimum number of ARGV entries to malloc if necessary */
2314 #define KMP_MIN_MALLOC_ARGV_ENTRIES 100
2315 
2316 // Set up how many argv pointers will fit in cache lines containing t_inline_argv. Historically, we
2317 // have supported at least 96 bytes. Using a larger value for more space between the master write/worker
2318 // read section and read/write by all section seems to buy more performance on EPCC PARALLEL.
2319 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2320 # define KMP_INLINE_ARGV_BYTES ( 4 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32) ) % CACHE_LINE ) )
2321 #else
2322 # define KMP_INLINE_ARGV_BYTES ( 2 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) ) % CACHE_LINE ) )
2323 #endif
2324 #define KMP_INLINE_ARGV_ENTRIES (int)( KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP )
2325 
2326 typedef struct KMP_ALIGN_CACHE kmp_base_team {
2327  // Synchronization Data ---------------------------------------------------------------------------------
2328  KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered;
2329  kmp_balign_team_t t_bar[ bs_last_barrier ];
2330  volatile int t_construct; // count of single directive encountered by team
2331  kmp_lock_t t_single_lock; // team specific lock
2332 
2333  // Master only -----------------------------------------------------------------------------------------
2334  KMP_ALIGN_CACHE int t_master_tid; // tid of master in parent team
2335  int t_master_this_cons; // "this_construct" single counter of master in parent team
2336  ident_t *t_ident; // if volatile, have to change too much other crud to volatile too
2337  kmp_team_p *t_parent; // parent team
2338  kmp_team_p *t_next_pool; // next free team in the team pool
2339  kmp_disp_t *t_dispatch; // thread's dispatch data
2340  kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2
2341 #if OMP_40_ENABLED
2342  kmp_proc_bind_t t_proc_bind; // bind type for par region
2343 #endif // OMP_40_ENABLED
2344 #if USE_ITT_BUILD
2345  kmp_uint64 t_region_time; // region begin timestamp
2346 #endif /* USE_ITT_BUILD */
2347 
2348  // Master write, workers read --------------------------------------------------------------------------
2349  KMP_ALIGN_CACHE void **t_argv;
2350  int t_argc;
2351  int t_nproc; // number of threads in team
2352  microtask_t t_pkfn;
2353  launch_t t_invoke; // procedure to launch the microtask
2354 
2355 #if OMPT_SUPPORT
2356  ompt_team_info_t ompt_team_info;
2357  ompt_lw_taskteam_t *ompt_serialized_team_info;
2358 #endif
2359 
2360 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2361  kmp_int8 t_fp_control_saved;
2362  kmp_int8 t_pad2b;
2363  kmp_int16 t_x87_fpu_control_word; // FP control regs
2364  kmp_uint32 t_mxcsr;
2365 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
2366 
2367  void *t_inline_argv[ KMP_INLINE_ARGV_ENTRIES ];
2368 
2369  KMP_ALIGN_CACHE kmp_info_t **t_threads;
2370  int t_max_argc;
2371  int t_max_nproc; // maximum threads this team can handle (dynamicly expandable)
2372  int t_serialized; // levels deep of serialized teams
2373  dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system
2374  int t_id; // team's id, assigned by debugger.
2375  int t_level; // nested parallel level
2376  int t_active_level; // nested active parallel level
2377  kmp_r_sched_t t_sched; // run-time schedule for the team
2378 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
2379  int t_first_place; // first & last place in parent thread's partition.
2380  int t_last_place; // Restore these values to master after par region.
2381 #endif // OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
2382  int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via omp_set_num_threads() call
2383 
2384  // Read/write by workers as well -----------------------------------------------------------------------
2385 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2386  // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf regression of epcc 'parallel'
2387  // and 'barrier' on fxe256lin01. This extra padding serves to fix the performance of epcc 'parallel'
2388  // and 'barrier' when CACHE_LINE=64. TODO: investigate more and get rid if this padding.
2389  char dummy_padding[1024];
2390 #endif
2391  KMP_ALIGN_CACHE kmp_taskdata_t *t_implicit_task_taskdata; // Taskdata for the thread's implicit task
2392  kmp_internal_control_t *t_control_stack_top; // internal control stack for additional nested teams.
2393  // for SERIALIZED teams nested 2 or more levels deep
2394 #if OMP_40_ENABLED
2395  kmp_int32 t_cancel_request; // typed flag to store request state of cancellation
2396 #endif
2397  int t_master_active; // save on fork, restore on join
2398  kmp_taskq_t t_taskq; // this team's task queue
2399  void *t_copypriv_data; // team specific pointer to copyprivate data array
2400  kmp_uint32 t_copyin_counter;
2401 #if USE_ITT_BUILD
2402  void *t_stack_id; // team specific stack stitching id (for ittnotify)
2403 #endif /* USE_ITT_BUILD */
2404 } kmp_base_team_t;
2405 
2406 union KMP_ALIGN_CACHE kmp_team {
2407  kmp_base_team_t t;
2408  double t_align; /* use worst case alignment */
2409  char t_pad[ KMP_PAD(kmp_base_team_t, CACHE_LINE) ];
2410 };
2411 
2412 
2413 typedef union KMP_ALIGN_CACHE kmp_time_global {
2414  double dt_align; /* use worst case alignment */
2415  char dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ];
2416  kmp_base_data_t dt;
2417 } kmp_time_global_t;
2418 
2419 typedef struct kmp_base_global {
2420  /* cache-aligned */
2421  kmp_time_global_t g_time;
2422 
2423  /* non cache-aligned */
2424  volatile int g_abort;
2425  volatile int g_done;
2426 
2427  int g_dynamic;
2428  enum dynamic_mode g_dynamic_mode;
2429 
2430 } kmp_base_global_t;
2431 
2432 typedef union KMP_ALIGN_CACHE kmp_global {
2433  kmp_base_global_t g;
2434  double g_align; /* use worst case alignment */
2435  char g_pad[ KMP_PAD(kmp_base_global_t, CACHE_LINE) ];
2436 } kmp_global_t;
2437 
2438 
2439 typedef struct kmp_base_root {
2440  // TODO: GEH - combine r_active with r_in_parallel then r_active == (r_in_parallel>= 0)
2441  // TODO: GEH - then replace r_active with t_active_levels if we can to reduce the synch
2442  // overhead or keeping r_active
2443 
2444  volatile int r_active; /* TRUE if some region in a nest has > 1 thread */
2445  // GEH: This is misnamed, should be r_in_parallel
2446  volatile int r_nested; // TODO: GEH - This is unused, just remove it entirely.
2447  int r_in_parallel; /* keeps a count of active parallel regions per root */
2448  // GEH: This is misnamed, should be r_active_levels
2449  kmp_team_t *r_root_team;
2450  kmp_team_t *r_hot_team;
2451  kmp_info_t *r_uber_thread;
2452  kmp_lock_t r_begin_lock;
2453  volatile int r_begin;
2454  int r_blocktime; /* blocktime for this root and descendants */
2455 } kmp_base_root_t;
2456 
2457 typedef union KMP_ALIGN_CACHE kmp_root {
2458  kmp_base_root_t r;
2459  double r_align; /* use worst case alignment */
2460  char r_pad[ KMP_PAD(kmp_base_root_t, CACHE_LINE) ];
2461 } kmp_root_t;
2462 
2463 struct fortran_inx_info {
2464  kmp_int32 data;
2465 };
2466 
2467 /* ------------------------------------------------------------------------ */
2468 
2469 /* ------------------------------------------------------------------------ */
2470 /* ------------------------------------------------------------------------ */
2471 
2472 extern int __kmp_settings;
2473 extern int __kmp_duplicate_library_ok;
2474 #if USE_ITT_BUILD
2475 extern int __kmp_forkjoin_frames;
2476 extern int __kmp_forkjoin_frames_mode;
2477 #endif
2478 extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method;
2479 extern int __kmp_determ_red;
2480 
2481 #ifdef KMP_DEBUG
2482 extern int kmp_a_debug;
2483 extern int kmp_b_debug;
2484 extern int kmp_c_debug;
2485 extern int kmp_d_debug;
2486 extern int kmp_e_debug;
2487 extern int kmp_f_debug;
2488 #endif /* KMP_DEBUG */
2489 
2490 /* For debug information logging using rotating buffer */
2491 #define KMP_DEBUG_BUF_LINES_INIT 512
2492 #define KMP_DEBUG_BUF_LINES_MIN 1
2493 
2494 #define KMP_DEBUG_BUF_CHARS_INIT 128
2495 #define KMP_DEBUG_BUF_CHARS_MIN 2
2496 
2497 extern int __kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */
2498 extern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */
2499 extern int __kmp_debug_buf_chars; /* How many characters allowed per line in buffer */
2500 extern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer entry pointer */
2501 
2502 extern char *__kmp_debug_buffer; /* Debug buffer itself */
2503 extern int __kmp_debug_count; /* Counter for number of lines printed in buffer so far */
2504 extern int __kmp_debug_buf_warn_chars; /* Keep track of char increase recommended in warnings */
2505 /* end rotating debug buffer */
2506 
2507 #ifdef KMP_DEBUG
2508 extern int __kmp_par_range; /* +1 => only go par for constructs in range */
2509 
2510 #define KMP_PAR_RANGE_ROUTINE_LEN 1024
2511 extern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN];
2512 #define KMP_PAR_RANGE_FILENAME_LEN 1024
2513 extern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN];
2514 extern int __kmp_par_range_lb;
2515 extern int __kmp_par_range_ub;
2516 #endif
2517 
2518 /* For printing out dynamic storage map for threads and teams */
2519 extern int __kmp_storage_map; /* True means print storage map for threads and teams */
2520 extern int __kmp_storage_map_verbose; /* True means storage map includes placement info */
2521 extern int __kmp_storage_map_verbose_specified;
2522 
2523 extern kmp_cpuinfo_t __kmp_cpuinfo;
2524 
2525 extern volatile int __kmp_init_serial;
2526 extern volatile int __kmp_init_gtid;
2527 extern volatile int __kmp_init_common;
2528 extern volatile int __kmp_init_middle;
2529 extern volatile int __kmp_init_parallel;
2530 extern volatile int __kmp_init_monitor;
2531 extern volatile int __kmp_init_user_locks;
2532 extern int __kmp_init_counter;
2533 extern int __kmp_root_counter;
2534 extern int __kmp_version;
2535 
2536 /* list of address of allocated caches for commons */
2537 extern kmp_cached_addr_t *__kmp_threadpriv_cache_list;
2538 
2539 /* Barrier algorithm types and options */
2540 extern kmp_uint32 __kmp_barrier_gather_bb_dflt;
2541 extern kmp_uint32 __kmp_barrier_release_bb_dflt;
2542 extern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt;
2543 extern kmp_bar_pat_e __kmp_barrier_release_pat_dflt;
2544 extern kmp_uint32 __kmp_barrier_gather_branch_bits [ bs_last_barrier ];
2545 extern kmp_uint32 __kmp_barrier_release_branch_bits [ bs_last_barrier ];
2546 extern kmp_bar_pat_e __kmp_barrier_gather_pattern [ bs_last_barrier ];
2547 extern kmp_bar_pat_e __kmp_barrier_release_pattern [ bs_last_barrier ];
2548 extern char const *__kmp_barrier_branch_bit_env_name [ bs_last_barrier ];
2549 extern char const *__kmp_barrier_pattern_env_name [ bs_last_barrier ];
2550 extern char const *__kmp_barrier_type_name [ bs_last_barrier ];
2551 extern char const *__kmp_barrier_pattern_name [ bp_last_bar ];
2552 
2553 /* Global Locks */
2554 extern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */
2555 extern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */
2556 extern kmp_bootstrap_lock_t __kmp_exit_lock; /* exit() is not always thread-safe */
2557 extern kmp_bootstrap_lock_t __kmp_monitor_lock; /* control monitor thread creation */
2558 extern kmp_bootstrap_lock_t __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and __kmp_threads expansion to co-exist */
2559 
2560 extern kmp_lock_t __kmp_global_lock; /* control OS/global access */
2561 extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */
2562 extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */
2563 
2564 /* used for yielding spin-waits */
2565 extern unsigned int __kmp_init_wait; /* initial number of spin-tests */
2566 extern unsigned int __kmp_next_wait; /* susequent number of spin-tests */
2567 
2568 extern enum library_type __kmp_library;
2569 
2570 extern enum sched_type __kmp_sched; /* default runtime scheduling */
2571 extern enum sched_type __kmp_static; /* default static scheduling method */
2572 extern enum sched_type __kmp_guided; /* default guided scheduling method */
2573 extern enum sched_type __kmp_auto; /* default auto scheduling method */
2574 extern int __kmp_chunk; /* default runtime chunk size */
2575 
2576 extern size_t __kmp_stksize; /* stack size per thread */
2577 extern size_t __kmp_monitor_stksize;/* stack size for monitor thread */
2578 extern size_t __kmp_stkoffset; /* stack offset per thread */
2579 extern int __kmp_stkpadding; /* Should we pad root thread(s) stack */
2580 
2581 extern size_t __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */
2582 extern int __kmp_env_chunk; /* was KMP_CHUNK specified? */
2583 extern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */
2584 extern int __kmp_env_omp_stksize;/* was OMP_STACKSIZE specified? */
2585 extern int __kmp_env_all_threads; /* was KMP_ALL_THREADS or KMP_MAX_THREADS specified? */
2586 extern int __kmp_env_omp_all_threads;/* was OMP_THREAD_LIMIT specified? */
2587 extern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */
2588 extern int __kmp_env_checks; /* was KMP_CHECKS specified? */
2589 extern int __kmp_env_consistency_check; /* was KMP_CONSISTENCY_CHECK specified? */
2590 extern int __kmp_generate_warnings; /* should we issue warnings? */
2591 extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */
2592 
2593 #ifdef DEBUG_SUSPEND
2594 extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */
2595 #endif
2596 
2597 extern kmp_uint32 __kmp_yield_init;
2598 extern kmp_uint32 __kmp_yield_next;
2599 extern kmp_uint32 __kmp_yielding_on;
2600 extern kmp_uint32 __kmp_yield_cycle;
2601 extern kmp_int32 __kmp_yield_on_count;
2602 extern kmp_int32 __kmp_yield_off_count;
2603 
2604 
2605 /* ------------------------------------------------------------------------- */
2606 extern int __kmp_allThreadsSpecified;
2607 
2608 extern size_t __kmp_align_alloc;
2609 /* following data protected by initialization routines */
2610 extern int __kmp_xproc; /* number of processors in the system */
2611 extern int __kmp_avail_proc; /* number of processors available to the process */
2612 extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */
2613 extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */
2614 extern int __kmp_max_nth; /* maximum total number of concurrently-existing threads */
2615 extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and __kmp_root */
2616 extern int __kmp_dflt_team_nth; /* default number of threads in a parallel region a la OMP_NUM_THREADS */
2617 extern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial initialization */
2618 extern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is used (fixed) */
2619 extern int __kmp_tp_cached; /* whether threadprivate cache has been created (__kmpc_threadprivate_cached()) */
2620 extern int __kmp_dflt_nested; /* nested parallelism enabled by default a la OMP_NESTED */
2621 extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before blocking (env setting) */
2622 extern int __kmp_monitor_wakeups;/* number of times monitor wakes up per second */
2623 extern int __kmp_bt_intervals; /* number of monitor timestamp intervals before blocking */
2624 #ifdef KMP_ADJUST_BLOCKTIME
2625 extern int __kmp_zero_bt; /* whether blocktime has been forced to zero */
2626 #endif /* KMP_ADJUST_BLOCKTIME */
2627 #ifdef KMP_DFLT_NTH_CORES
2628 extern int __kmp_ncores; /* Total number of cores for threads placement */
2629 #endif
2630 extern int __kmp_abort_delay; /* Number of millisecs to delay on abort for VTune */
2631 
2632 extern int __kmp_need_register_atfork_specified;
2633 extern int __kmp_need_register_atfork;/* At initialization, call pthread_atfork to install fork handler */
2634 extern int __kmp_gtid_mode; /* Method of getting gtid, values:
2635  0 - not set, will be set at runtime
2636  1 - using stack search
2637  2 - dynamic TLS (pthread_getspecific(Linux* OS/OS X*) or TlsGetValue(Windows* OS))
2638  3 - static TLS (__declspec(thread) __kmp_gtid), Linux* OS .so only.
2639  */
2640 extern int __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */
2641 #ifdef KMP_TDATA_GTID
2642 #if KMP_OS_WINDOWS
2643 extern __declspec(thread) int __kmp_gtid; /* This thread's gtid, if __kmp_gtid_mode == 3 */
2644 #else
2645 extern __thread int __kmp_gtid;
2646 #endif /* KMP_OS_WINDOWS - workaround because Intel(R) Many Integrated Core compiler 20110316 doesn't accept __declspec */
2647 #endif
2648 extern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */
2649 extern int __kmp_foreign_tp; /* If true, separate TP var for each foreign thread */
2650 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2651 extern int __kmp_inherit_fp_control; /* copy fp creg(s) parent->workers at fork */
2652 extern kmp_int16 __kmp_init_x87_fpu_control_word; /* init thread's FP control reg */
2653 extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */
2654 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
2655 
2656 extern int __kmp_dflt_max_active_levels; /* max_active_levels for nested parallelism enabled by default a la OMP_MAX_ACTIVE_LEVELS */
2657 #if KMP_NESTED_HOT_TEAMS
2658 extern int __kmp_hot_teams_mode;
2659 extern int __kmp_hot_teams_max_level;
2660 #endif
2661 
2662 # if KMP_OS_LINUX
2663 extern enum clock_function_type __kmp_clock_function;
2664 extern int __kmp_clock_function_param;
2665 # endif /* KMP_OS_LINUX */
2666 
2667 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
2668 extern enum mic_type __kmp_mic_type;
2669 #endif
2670 
2671 # ifdef USE_LOAD_BALANCE
2672 extern double __kmp_load_balance_interval; /* Interval for the load balance algorithm */
2673 # endif /* USE_LOAD_BALANCE */
2674 
2675 // OpenMP 3.1 - Nested num threads array
2676 typedef struct kmp_nested_nthreads_t {
2677  int * nth;
2678  int size;
2679  int used;
2680 } kmp_nested_nthreads_t;
2681 
2682 extern kmp_nested_nthreads_t __kmp_nested_nth;
2683 
2684 #if KMP_USE_ADAPTIVE_LOCKS
2685 
2686 // Parameters for the speculative lock backoff system.
2687 struct kmp_adaptive_backoff_params_t {
2688  // Number of soft retries before it counts as a hard retry.
2689  kmp_uint32 max_soft_retries;
2690  // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to the right
2691  kmp_uint32 max_badness;
2692 };
2693 
2694 extern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params;
2695 
2696 #if KMP_DEBUG_ADAPTIVE_LOCKS
2697 extern char * __kmp_speculative_statsfile;
2698 #endif
2699 
2700 #endif // KMP_USE_ADAPTIVE_LOCKS
2701 
2702 #if OMP_40_ENABLED
2703 extern int __kmp_display_env; /* TRUE or FALSE */
2704 extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */
2705 extern int __kmp_omp_cancellation; /* TRUE or FALSE */
2706 #endif
2707 
2708 /* ------------------------------------------------------------------------- */
2709 
2710 /* --------------------------------------------------------------------------- */
2711 /* the following are protected by the fork/join lock */
2712 /* write: lock read: anytime */
2713 extern kmp_info_t **__kmp_threads; /* Descriptors for the threads */
2714 /* read/write: lock */
2715 extern volatile kmp_team_t * __kmp_team_pool;
2716 extern volatile kmp_info_t * __kmp_thread_pool;
2717 
2718 /* total number of threads reachable from some root thread including all root threads*/
2719 extern volatile int __kmp_nth;
2720 /* total number of threads reachable from some root thread including all root threads,
2721  and those in the thread pool */
2722 extern volatile int __kmp_all_nth;
2723 extern int __kmp_thread_pool_nth;
2724 extern volatile int __kmp_thread_pool_active_nth;
2725 
2726 extern kmp_root_t **__kmp_root; /* root of thread hierarchy */
2727 /* end data protected by fork/join lock */
2728 /* --------------------------------------------------------------------------- */
2729 
2730 extern kmp_global_t __kmp_global; /* global status */
2731 
2732 extern kmp_info_t __kmp_monitor;
2733 extern volatile kmp_uint32 __kmp_team_counter; // Used by Debugging Support Library.
2734 extern volatile kmp_uint32 __kmp_task_counter; // Used by Debugging Support Library.
2735 
2736 #if USE_DEBUGGER
2737 
2738 #define _KMP_GEN_ID( counter ) \
2739  ( \
2740  __kmp_debugging \
2741  ? \
2742  KMP_TEST_THEN_INC32( (volatile kmp_int32 *) & counter ) + 1 \
2743  : \
2744  ~ 0 \
2745  )
2746 #else
2747 #define _KMP_GEN_ID( counter ) \
2748  ( \
2749  ~ 0 \
2750  )
2751 #endif /* USE_DEBUGGER */
2752 
2753 #define KMP_GEN_TASK_ID() _KMP_GEN_ID( __kmp_task_counter )
2754 #define KMP_GEN_TEAM_ID() _KMP_GEN_ID( __kmp_team_counter )
2755 
2756 /* ------------------------------------------------------------------------ */
2757 /* ------------------------------------------------------------------------ */
2758 
2759 extern void __kmp_print_storage_map_gtid( int gtid, void *p1, void* p2, size_t size, char const *format, ... );
2760 
2761 extern void __kmp_serial_initialize( void );
2762 extern void __kmp_middle_initialize( void );
2763 extern void __kmp_parallel_initialize( void );
2764 
2765 extern void __kmp_internal_begin( void );
2766 extern void __kmp_internal_end_library( int gtid );
2767 extern void __kmp_internal_end_thread( int gtid );
2768 extern void __kmp_internal_end_atexit( void );
2769 extern void __kmp_internal_end_fini( void );
2770 extern void __kmp_internal_end_dtor( void );
2771 extern void __kmp_internal_end_dest( void* );
2772 
2773 extern int __kmp_register_root( int initial_thread );
2774 extern void __kmp_unregister_root( int gtid );
2775 
2776 extern int __kmp_ignore_mppbeg( void );
2777 extern int __kmp_ignore_mppend( void );
2778 
2779 extern int __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws );
2780 extern void __kmp_exit_single( int gtid );
2781 
2782 extern void __kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref );
2783 extern void __kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref );
2784 
2785 
2786 #ifdef USE_LOAD_BALANCE
2787 extern int __kmp_get_load_balance( int );
2788 #endif
2789 
2790 #ifdef BUILD_TV
2791 extern void __kmp_tv_threadprivate_store( kmp_info_t *th, void *global_addr, void *thread_addr );
2792 #endif
2793 
2794 extern int __kmp_get_global_thread_id( void );
2795 extern int __kmp_get_global_thread_id_reg( void );
2796 extern void __kmp_exit_thread( int exit_status );
2797 extern void __kmp_abort( char const * format, ... );
2798 extern void __kmp_abort_thread( void );
2799 extern void __kmp_abort_process( void );
2800 extern void __kmp_warn( char const * format, ... );
2801 
2802 extern void __kmp_set_num_threads( int new_nth, int gtid );
2803 
2804 // Returns current thread (pointer to kmp_info_t). Current thread *must* be registered.
2805 static inline kmp_info_t * __kmp_entry_thread()
2806 {
2807  int gtid = __kmp_entry_gtid();
2808 
2809  return __kmp_threads[gtid];
2810 }
2811 
2812 extern void __kmp_set_max_active_levels( int gtid, int new_max_active_levels );
2813 extern int __kmp_get_max_active_levels( int gtid );
2814 extern int __kmp_get_ancestor_thread_num( int gtid, int level );
2815 extern int __kmp_get_team_size( int gtid, int level );
2816 extern void __kmp_set_schedule( int gtid, kmp_sched_t new_sched, int chunk );
2817 extern void __kmp_get_schedule( int gtid, kmp_sched_t * sched, int * chunk );
2818 
2819 extern unsigned short __kmp_get_random( kmp_info_t * thread );
2820 extern void __kmp_init_random( kmp_info_t * thread );
2821 
2822 extern kmp_r_sched_t __kmp_get_schedule_global( void );
2823 extern void __kmp_adjust_num_threads( int new_nproc );
2824 
2825 extern void * ___kmp_allocate( size_t size KMP_SRC_LOC_DECL );
2826 extern void * ___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL );
2827 extern void ___kmp_free( void * ptr KMP_SRC_LOC_DECL );
2828 #define __kmp_allocate( size ) ___kmp_allocate( (size) KMP_SRC_LOC_CURR )
2829 #define __kmp_page_allocate( size ) ___kmp_page_allocate( (size) KMP_SRC_LOC_CURR )
2830 #define __kmp_free( ptr ) ___kmp_free( (ptr) KMP_SRC_LOC_CURR )
2831 
2832 #if USE_FAST_MEMORY
2833 extern void * ___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL );
2834 extern void ___kmp_fast_free( kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL );
2835 extern void __kmp_free_fast_memory( kmp_info_t *this_thr );
2836 extern void __kmp_initialize_fast_memory( kmp_info_t *this_thr );
2837 #define __kmp_fast_allocate( this_thr, size ) ___kmp_fast_allocate( (this_thr), (size) KMP_SRC_LOC_CURR )
2838 #define __kmp_fast_free( this_thr, ptr ) ___kmp_fast_free( (this_thr), (ptr) KMP_SRC_LOC_CURR )
2839 #endif
2840 
2841 extern void * ___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL );
2842 extern void * ___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL );
2843 extern void * ___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL );
2844 extern void ___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL );
2845 #define __kmp_thread_malloc( th, size ) ___kmp_thread_malloc( (th), (size) KMP_SRC_LOC_CURR )
2846 #define __kmp_thread_calloc( th, nelem, elsize ) ___kmp_thread_calloc( (th), (nelem), (elsize) KMP_SRC_LOC_CURR )
2847 #define __kmp_thread_realloc( th, ptr, size ) ___kmp_thread_realloc( (th), (ptr), (size) KMP_SRC_LOC_CURR )
2848 #define __kmp_thread_free( th, ptr ) ___kmp_thread_free( (th), (ptr) KMP_SRC_LOC_CURR )
2849 
2850 #define KMP_INTERNAL_MALLOC(sz) malloc(sz)
2851 #define KMP_INTERNAL_FREE(p) free(p)
2852 #define KMP_INTERNAL_REALLOC(p,sz) realloc((p),(sz))
2853 #define KMP_INTERNAL_CALLOC(n,sz) calloc((n),(sz))
2854 
2855 extern void __kmp_push_num_threads( ident_t *loc, int gtid, int num_threads );
2856 
2857 #if OMP_40_ENABLED
2858 extern void __kmp_push_proc_bind( ident_t *loc, int gtid, kmp_proc_bind_t proc_bind );
2859 extern void __kmp_push_num_teams( ident_t *loc, int gtid, int num_teams, int num_threads );
2860 #endif
2861 
2862 extern void __kmp_yield( int cond );
2863 
2864 extern void __kmpc_dispatch_init_4( ident_t *loc, kmp_int32 gtid,
2865  enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
2866  kmp_int32 chunk );
2867 extern void __kmpc_dispatch_init_4u( ident_t *loc, kmp_int32 gtid,
2868  enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
2869  kmp_int32 chunk );
2870 extern void __kmpc_dispatch_init_8( ident_t *loc, kmp_int32 gtid,
2871  enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
2872  kmp_int64 chunk );
2873 extern void __kmpc_dispatch_init_8u( ident_t *loc, kmp_int32 gtid,
2874  enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
2875  kmp_int64 chunk );
2876 
2877 extern int __kmpc_dispatch_next_4( ident_t *loc, kmp_int32 gtid,
2878  kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st );
2879 extern int __kmpc_dispatch_next_4u( ident_t *loc, kmp_int32 gtid,
2880  kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st );
2881 extern int __kmpc_dispatch_next_8( ident_t *loc, kmp_int32 gtid,
2882  kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st );
2883 extern int __kmpc_dispatch_next_8u( ident_t *loc, kmp_int32 gtid,
2884  kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st );
2885 
2886 extern void __kmpc_dispatch_fini_4( ident_t *loc, kmp_int32 gtid );
2887 extern void __kmpc_dispatch_fini_8( ident_t *loc, kmp_int32 gtid );
2888 extern void __kmpc_dispatch_fini_4u( ident_t *loc, kmp_int32 gtid );
2889 extern void __kmpc_dispatch_fini_8u( ident_t *loc, kmp_int32 gtid );
2890 
2891 
2892 #ifdef KMP_GOMP_COMPAT
2893 
2894 extern void __kmp_aux_dispatch_init_4( ident_t *loc, kmp_int32 gtid,
2895  enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
2896  kmp_int32 chunk, int push_ws );
2897 extern void __kmp_aux_dispatch_init_4u( ident_t *loc, kmp_int32 gtid,
2898  enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
2899  kmp_int32 chunk, int push_ws );
2900 extern void __kmp_aux_dispatch_init_8( ident_t *loc, kmp_int32 gtid,
2901  enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
2902  kmp_int64 chunk, int push_ws );
2903 extern void __kmp_aux_dispatch_init_8u( ident_t *loc, kmp_int32 gtid,
2904  enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
2905  kmp_int64 chunk, int push_ws );
2906 extern void __kmp_aux_dispatch_fini_chunk_4( ident_t *loc, kmp_int32 gtid );
2907 extern void __kmp_aux_dispatch_fini_chunk_8( ident_t *loc, kmp_int32 gtid );
2908 extern void __kmp_aux_dispatch_fini_chunk_4u( ident_t *loc, kmp_int32 gtid );
2909 extern void __kmp_aux_dispatch_fini_chunk_8u( ident_t *loc, kmp_int32 gtid );
2910 
2911 #endif /* KMP_GOMP_COMPAT */
2912 
2913 
2914 extern kmp_uint32 __kmp_eq_4( kmp_uint32 value, kmp_uint32 checker );
2915 extern kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker );
2916 extern kmp_uint32 __kmp_lt_4( kmp_uint32 value, kmp_uint32 checker );
2917 extern kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker );
2918 extern kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker );
2919 
2920 extern kmp_uint32 __kmp_eq_8( kmp_uint64 value, kmp_uint64 checker );
2921 extern kmp_uint32 __kmp_neq_8( kmp_uint64 value, kmp_uint64 checker );
2922 extern kmp_uint32 __kmp_lt_8( kmp_uint64 value, kmp_uint64 checker );
2923 extern kmp_uint32 __kmp_ge_8( kmp_uint64 value, kmp_uint64 checker );
2924 extern kmp_uint32 __kmp_le_8( kmp_uint64 value, kmp_uint64 checker );
2925 
2926 extern kmp_uint32 __kmp_wait_yield_4( kmp_uint32 volatile * spinner, kmp_uint32 checker, kmp_uint32 (*pred) (kmp_uint32, kmp_uint32), void * obj );
2927 extern kmp_uint64 __kmp_wait_yield_8( kmp_uint64 volatile * spinner, kmp_uint64 checker, kmp_uint32 (*pred) (kmp_uint64, kmp_uint64), void * obj );
2928 
2929 class kmp_flag_32;
2930 class kmp_flag_64;
2931 class kmp_flag_oncore;
2932 extern void __kmp_wait_32(kmp_info_t *this_thr, kmp_flag_32 *flag, int final_spin
2933 #if USE_ITT_BUILD
2934  , void * itt_sync_obj
2935 #endif
2936  );
2937 extern void __kmp_release_32(kmp_flag_32 *flag);
2938 extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, int final_spin
2939 #if USE_ITT_BUILD
2940  , void * itt_sync_obj
2941 #endif
2942  );
2943 extern void __kmp_release_64(kmp_flag_64 *flag);
2944 extern void __kmp_wait_oncore(kmp_info_t *this_thr, kmp_flag_oncore *flag, int final_spin
2945 #if USE_ITT_BUILD
2946  , void * itt_sync_obj
2947 #endif
2948  );
2949 extern void __kmp_release_oncore(kmp_flag_oncore *flag);
2950 
2951 extern void __kmp_infinite_loop( void );
2952 
2953 extern void __kmp_cleanup( void );
2954 
2955 #if KMP_HANDLE_SIGNALS
2956  extern int __kmp_handle_signals;
2957  extern void __kmp_install_signals( int parallel_init );
2958  extern void __kmp_remove_signals( void );
2959 #endif
2960 
2961 extern void __kmp_clear_system_time( void );
2962 extern void __kmp_read_system_time( double *delta );
2963 
2964 extern void __kmp_check_stack_overlap( kmp_info_t *thr );
2965 
2966 extern void __kmp_expand_host_name( char *buffer, size_t size );
2967 extern void __kmp_expand_file_name( char *result, size_t rlen, char *pattern );
2968 
2969 #if KMP_OS_WINDOWS
2970 extern void __kmp_initialize_system_tick( void ); /* Initialize timer tick value */
2971 #endif
2972 
2973 extern void __kmp_runtime_initialize( void ); /* machine specific initialization */
2974 extern void __kmp_runtime_destroy( void );
2975 
2976 #if KMP_AFFINITY_SUPPORTED
2977 extern char *__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask);
2978 extern void __kmp_affinity_initialize(void);
2979 extern void __kmp_affinity_uninitialize(void);
2980 extern void __kmp_affinity_set_init_mask(int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */
2981 #if OMP_40_ENABLED
2982 extern void __kmp_affinity_set_place(int gtid);
2983 #endif
2984 extern void __kmp_affinity_determine_capable( const char *env_var );
2985 extern int __kmp_aux_set_affinity(void **mask);
2986 extern int __kmp_aux_get_affinity(void **mask);
2987 extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask);
2988 extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask);
2989 extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask);
2990 extern void __kmp_balanced_affinity( int tid, int team_size );
2991 #endif /* KMP_AFFINITY_SUPPORTED */
2992 
2993 extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar);
2994 
2995 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2996 
2997 extern int __kmp_futex_determine_capable( void );
2998 
2999 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3000 
3001 extern void __kmp_gtid_set_specific( int gtid );
3002 extern int __kmp_gtid_get_specific( void );
3003 
3004 extern double __kmp_read_cpu_time( void );
3005 
3006 extern int __kmp_read_system_info( struct kmp_sys_info *info );
3007 
3008 extern void __kmp_create_monitor( kmp_info_t *th );
3009 
3010 extern void *__kmp_launch_thread( kmp_info_t *thr );
3011 
3012 extern void __kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size );
3013 
3014 #if KMP_OS_WINDOWS
3015 extern int __kmp_still_running(kmp_info_t *th);
3016 extern int __kmp_is_thread_alive( kmp_info_t * th, DWORD *exit_val );
3017 extern void __kmp_free_handle( kmp_thread_t tHandle );
3018 #endif
3019 
3020 extern void __kmp_reap_monitor( kmp_info_t *th );
3021 extern void __kmp_reap_worker( kmp_info_t *th );
3022 extern void __kmp_terminate_thread( int gtid );
3023 
3024 extern void __kmp_suspend_32( int th_gtid, kmp_flag_32 *flag );
3025 extern void __kmp_suspend_64( int th_gtid, kmp_flag_64 *flag );
3026 extern void __kmp_suspend_oncore( int th_gtid, kmp_flag_oncore *flag );
3027 extern void __kmp_resume_32( int target_gtid, kmp_flag_32 *flag );
3028 extern void __kmp_resume_64( int target_gtid, kmp_flag_64 *flag );
3029 extern void __kmp_resume_oncore( int target_gtid, kmp_flag_oncore *flag );
3030 
3031 extern void __kmp_elapsed( double * );
3032 extern void __kmp_elapsed_tick( double * );
3033 
3034 extern void __kmp_enable( int old_state );
3035 extern void __kmp_disable( int *old_state );
3036 
3037 extern void __kmp_thread_sleep( int millis );
3038 
3039 extern void __kmp_common_initialize( void );
3040 extern void __kmp_common_destroy( void );
3041 extern void __kmp_common_destroy_gtid( int gtid );
3042 
3043 #if KMP_OS_UNIX
3044 extern void __kmp_register_atfork( void );
3045 #endif
3046 extern void __kmp_suspend_initialize( void );
3047 extern void __kmp_suspend_uninitialize_thread( kmp_info_t *th );
3048 
3049 extern kmp_info_t * __kmp_allocate_thread( kmp_root_t *root,
3050  kmp_team_t *team, int tid);
3051 #if OMP_40_ENABLED
3052 extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
3053 #if OMPT_SUPPORT
3054  ompt_parallel_id_t ompt_parallel_id,
3055 #endif
3056  kmp_proc_bind_t proc_bind,
3057  kmp_internal_control_t *new_icvs,
3058  int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) );
3059 #else
3060 extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
3061 #if OMPT_SUPPORT
3062  ompt_parallel_id_t ompt_parallel_id,
3063 #endif
3064  kmp_internal_control_t *new_icvs,
3065  int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) );
3066 #endif // OMP_40_ENABLED
3067 extern void __kmp_free_thread( kmp_info_t * );
3068 extern void __kmp_free_team( kmp_root_t *, kmp_team_t * USE_NESTED_HOT_ARG(kmp_info_t *) );
3069 extern kmp_team_t * __kmp_reap_team( kmp_team_t * );
3070 
3071 /* ------------------------------------------------------------------------ */
3072 
3073 extern void __kmp_initialize_bget( kmp_info_t *th );
3074 extern void __kmp_finalize_bget( kmp_info_t *th );
3075 
3076 KMP_EXPORT void *kmpc_malloc( size_t size );
3077 KMP_EXPORT void *kmpc_calloc( size_t nelem, size_t elsize );
3078 KMP_EXPORT void *kmpc_realloc( void *ptr, size_t size );
3079 KMP_EXPORT void kmpc_free( void *ptr );
3080 
3081 /* ------------------------------------------------------------------------ */
3082 /* declarations for internal use */
3083 
3084 extern int __kmp_barrier( enum barrier_type bt, int gtid, int is_split,
3085  size_t reduce_size, void *reduce_data, void (*reduce)(void *, void *) );
3086 extern void __kmp_end_split_barrier ( enum barrier_type bt, int gtid );
3087 
3091 enum fork_context_e
3092 {
3093  fork_context_gnu,
3094  fork_context_intel,
3095  fork_context_last
3096 };
3097 extern int __kmp_fork_call( ident_t *loc, int gtid, enum fork_context_e fork_context,
3098  kmp_int32 argc,
3099 #if OMPT_SUPPORT
3100  void *unwrapped_task,
3101 #endif
3102  microtask_t microtask, launch_t invoker,
3103 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
3104 #if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && KMP_OS_LINUX
3105  va_list *ap
3106 #else
3107  va_list ap
3108 #endif
3109  );
3110 
3111 extern void __kmp_join_call( ident_t *loc, int gtid
3112 #if OMP_40_ENABLED
3113  , int exit_teams = 0
3114 #endif
3115  );
3116 
3117 extern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid);
3118 extern void __kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team );
3119 extern void __kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team );
3120 extern int __kmp_invoke_task_func( int gtid );
3121 extern void __kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team );
3122 extern void __kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team );
3123 
3124 // should never have been exported
3125 KMP_EXPORT int __kmpc_invoke_task_func( int gtid );
3126 #if OMP_40_ENABLED
3127 extern int __kmp_invoke_teams_master( int gtid );
3128 extern void __kmp_teams_master( int gtid );
3129 #endif
3130 extern void __kmp_save_internal_controls( kmp_info_t * thread );
3131 extern void __kmp_user_set_library (enum library_type arg);
3132 extern void __kmp_aux_set_library (enum library_type arg);
3133 extern void __kmp_aux_set_stacksize( size_t arg);
3134 extern void __kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid);
3135 extern void __kmp_aux_set_defaults( char const * str, int len );
3136 
3137 /* Functions below put here to call them from __kmp_aux_env_initialize() in kmp_settings.c */
3138 void kmpc_set_blocktime (int arg);
3139 void ompc_set_nested( int flag );
3140 void ompc_set_dynamic( int flag );
3141 void ompc_set_num_threads( int arg );
3142 
3143 extern void __kmp_push_current_task_to_thread( kmp_info_t *this_thr,
3144  kmp_team_t *team, int tid );
3145 extern void __kmp_pop_current_task_from_thread( kmp_info_t *this_thr );
3146 extern kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid,
3147  kmp_tasking_flags_t *flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3148  kmp_routine_entry_t task_entry );
3149 #if OMPT_SUPPORT
3150 extern void __kmp_task_init_ompt( kmp_taskdata_t * task, int tid );
3151 #endif
3152 extern void __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr,
3153  kmp_team_t *team, int tid, int set_curr_task );
3154 
3155 int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
3156  int *thread_finished,
3157 #if USE_ITT_BUILD
3158  void * itt_sync_obj,
3159 #endif /* USE_ITT_BUILD */
3160  kmp_int32 is_constrained);
3161 int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
3162  int *thread_finished,
3163 #if USE_ITT_BUILD
3164  void * itt_sync_obj,
3165 #endif /* USE_ITT_BUILD */
3166  kmp_int32 is_constrained);
3167 int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
3168  int *thread_finished,
3169 #if USE_ITT_BUILD
3170  void * itt_sync_obj,
3171 #endif /* USE_ITT_BUILD */
3172  kmp_int32 is_constrained);
3173 
3174 extern void __kmp_reap_task_teams( void );
3175 extern void __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread );
3176 extern void __kmp_wait_to_unref_task_teams( void );
3177 extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team, int both, int always );
3178 extern void __kmp_task_team_sync ( kmp_info_t *this_thr, kmp_team_t *team );
3179 extern void __kmp_task_team_wait ( kmp_info_t *this_thr, kmp_team_t *team
3180 #if USE_ITT_BUILD
3181  , void * itt_sync_obj
3182 #endif /* USE_ITT_BUILD */
3183 );
3184 extern void __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid );
3185 
3186 extern int __kmp_is_address_mapped( void *addr );
3187 extern kmp_uint64 __kmp_hardware_timestamp(void);
3188 
3189 #if KMP_OS_UNIX
3190 extern int __kmp_read_from_file( char const *path, char const *format, ... );
3191 #endif
3192 
3193 /* ------------------------------------------------------------------------ */
3194 //
3195 // Assembly routines that have no compiler intrinsic replacement
3196 //
3197 
3198 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3199 
3200 extern void __kmp_query_cpuid( kmp_cpuinfo_t *p );
3201 
3202 #define __kmp_load_mxcsr(p) _mm_setcsr(*(p))
3203 static inline void __kmp_store_mxcsr( kmp_uint32 *p ) { *p = _mm_getcsr(); }
3204 
3205 extern void __kmp_load_x87_fpu_control_word( kmp_int16 *p );
3206 extern void __kmp_store_x87_fpu_control_word( kmp_int16 *p );
3207 extern void __kmp_clear_x87_fpu_status_word();
3208 # define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */
3209 
3210 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3211 
3212 extern int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int npr, int argc, void *argv[]
3213 #if OMPT_SUPPORT
3214  , void **exit_frame_ptr
3215 #endif
3216 );
3217 
3218 
3219 /* ------------------------------------------------------------------------ */
3220 
3221 KMP_EXPORT void __kmpc_begin ( ident_t *, kmp_int32 flags );
3222 KMP_EXPORT void __kmpc_end ( ident_t * );
3223 
3224 KMP_EXPORT void __kmpc_threadprivate_register_vec ( ident_t *, void * data, kmpc_ctor_vec ctor,
3225  kmpc_cctor_vec cctor, kmpc_dtor_vec dtor, size_t vector_length );
3226 KMP_EXPORT void __kmpc_threadprivate_register ( ident_t *, void * data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor );
3227 KMP_EXPORT void * __kmpc_threadprivate ( ident_t *, kmp_int32 global_tid, void * data, size_t size );
3228 
3229 KMP_EXPORT kmp_int32 __kmpc_global_thread_num ( ident_t * );
3230 KMP_EXPORT kmp_int32 __kmpc_global_num_threads ( ident_t * );
3231 KMP_EXPORT kmp_int32 __kmpc_bound_thread_num ( ident_t * );
3232 KMP_EXPORT kmp_int32 __kmpc_bound_num_threads ( ident_t * );
3233 
3234 KMP_EXPORT kmp_int32 __kmpc_ok_to_fork ( ident_t * );
3235 KMP_EXPORT void __kmpc_fork_call ( ident_t *, kmp_int32 nargs, kmpc_micro microtask, ... );
3236 
3237 KMP_EXPORT void __kmpc_serialized_parallel ( ident_t *, kmp_int32 global_tid );
3238 KMP_EXPORT void __kmpc_end_serialized_parallel ( ident_t *, kmp_int32 global_tid );
3239 
3240 KMP_EXPORT void __kmpc_flush ( ident_t *);
3241 KMP_EXPORT void __kmpc_barrier ( ident_t *, kmp_int32 global_tid );
3242 KMP_EXPORT kmp_int32 __kmpc_master ( ident_t *, kmp_int32 global_tid );
3243 KMP_EXPORT void __kmpc_end_master ( ident_t *, kmp_int32 global_tid );
3244 KMP_EXPORT void __kmpc_ordered ( ident_t *, kmp_int32 global_tid );
3245 KMP_EXPORT void __kmpc_end_ordered ( ident_t *, kmp_int32 global_tid );
3246 KMP_EXPORT void __kmpc_critical ( ident_t *, kmp_int32 global_tid, kmp_critical_name * );
3247 KMP_EXPORT void __kmpc_end_critical ( ident_t *, kmp_int32 global_tid, kmp_critical_name * );
3248 
3249 KMP_EXPORT kmp_int32 __kmpc_barrier_master ( ident_t *, kmp_int32 global_tid );
3250 KMP_EXPORT void __kmpc_end_barrier_master ( ident_t *, kmp_int32 global_tid );
3251 
3252 KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait ( ident_t *, kmp_int32 global_tid );
3253 
3254 KMP_EXPORT kmp_int32 __kmpc_single ( ident_t *, kmp_int32 global_tid );
3255 KMP_EXPORT void __kmpc_end_single ( ident_t *, kmp_int32 global_tid );
3256 
3257 KMP_EXPORT void KMPC_FOR_STATIC_INIT ( ident_t *loc, kmp_int32 global_tid, kmp_int32 schedtype, kmp_int32 *plastiter,
3258  kmp_int *plower, kmp_int *pupper, kmp_int *pstride, kmp_int incr, kmp_int chunk );
3259 
3260 KMP_EXPORT void __kmpc_for_static_fini ( ident_t *loc, kmp_int32 global_tid );
3261 
3262 KMP_EXPORT void __kmpc_copyprivate( ident_t *loc, kmp_int32 global_tid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit );
3263 
3264 extern void KMPC_SET_NUM_THREADS ( int arg );
3265 extern void KMPC_SET_DYNAMIC ( int flag );
3266 extern void KMPC_SET_NESTED ( int flag );
3267 
3268 /* --------------------------------------------------------------------------- */
3269 
3270 /*
3271  * Taskq interface routines
3272  */
3273 
3274 KMP_EXPORT kmpc_thunk_t * __kmpc_taskq (ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task, size_t sizeof_thunk,
3275  size_t sizeof_shareds, kmp_int32 flags, kmpc_shared_vars_t **shareds);
3276 KMP_EXPORT void __kmpc_end_taskq (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk);
3277 KMP_EXPORT kmp_int32 __kmpc_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk);
3278 KMP_EXPORT void __kmpc_taskq_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status);
3279 KMP_EXPORT void __kmpc_end_taskq_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk);
3280 KMP_EXPORT kmpc_thunk_t * __kmpc_task_buffer (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task);
3281 
3282 /* ------------------------------------------------------------------------ */
3283 
3284 /*
3285  * OMP 3.0 tasking interface routines
3286  */
3287 
3288 KMP_EXPORT kmp_int32
3289 __kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task );
3290 KMP_EXPORT kmp_task_t*
3291 __kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
3292  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3293  kmp_routine_entry_t task_entry );
3294 KMP_EXPORT void
3295 __kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task );
3296 KMP_EXPORT void
3297 __kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task );
3298 KMP_EXPORT kmp_int32
3299 __kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task );
3300 KMP_EXPORT kmp_int32
3301 __kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid );
3302 
3303 KMP_EXPORT kmp_int32
3304 __kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part );
3305 
3306 #if TASK_UNUSED
3307 void __kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task );
3308 void __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task );
3309 #endif // TASK_UNUSED
3310 
3311 /* ------------------------------------------------------------------------ */
3312 
3313 #if OMP_40_ENABLED
3314 
3315 KMP_EXPORT void __kmpc_taskgroup( ident_t * loc, int gtid );
3316 KMP_EXPORT void __kmpc_end_taskgroup( ident_t * loc, int gtid );
3317 
3318 KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task,
3319  kmp_int32 ndeps, kmp_depend_info_t *dep_list,
3320  kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list );
3321 KMP_EXPORT void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
3322  kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list );
3323 extern void __kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task );
3324 
3325 extern kmp_int32 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate );
3326 
3327 KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind);
3328 KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind);
3329 KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t* loc_ref, kmp_int32 gtid);
3330 KMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind);
3331 
3332 #if OMP_41_ENABLED
3333 
3334 KMP_EXPORT void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask );
3335 KMP_EXPORT void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask );
3336 
3337 #endif
3338 
3339 #endif
3340 
3341 
3342 /*
3343  * Lock interface routines (fast versions with gtid passed in)
3344  */
3345 KMP_EXPORT void __kmpc_init_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
3346 KMP_EXPORT void __kmpc_init_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
3347 KMP_EXPORT void __kmpc_destroy_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
3348 KMP_EXPORT void __kmpc_destroy_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
3349 KMP_EXPORT void __kmpc_set_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
3350 KMP_EXPORT void __kmpc_set_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
3351 KMP_EXPORT void __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
3352 KMP_EXPORT void __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
3353 KMP_EXPORT int __kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
3354 KMP_EXPORT int __kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
3355 
3356 /* ------------------------------------------------------------------------ */
3357 
3358 /*
3359  * Interface to fast scalable reduce methods routines
3360  */
3361 
3362 KMP_EXPORT kmp_int32 __kmpc_reduce_nowait( ident_t *loc, kmp_int32 global_tid,
3363  kmp_int32 num_vars, size_t reduce_size,
3364  void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
3365  kmp_critical_name *lck );
3366 KMP_EXPORT void __kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck );
3367 KMP_EXPORT kmp_int32 __kmpc_reduce( ident_t *loc, kmp_int32 global_tid,
3368  kmp_int32 num_vars, size_t reduce_size,
3369  void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
3370  kmp_critical_name *lck );
3371 KMP_EXPORT void __kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck );
3372 
3373 /*
3374  * internal fast reduction routines
3375  */
3376 
3377 extern PACKED_REDUCTION_METHOD_T
3378 __kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
3379  kmp_int32 num_vars, size_t reduce_size,
3380  void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
3381  kmp_critical_name *lck );
3382 
3383 // this function is for testing set/get/determine reduce method
3384 KMP_EXPORT kmp_int32 __kmp_get_reduce_method( void );
3385 
3386 KMP_EXPORT kmp_uint64 __kmpc_get_taskid();
3387 KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid();
3388 
3389 KMP_EXPORT void __kmpc_place_threads(int,int,int);
3390 
3391 /* ------------------------------------------------------------------------ */
3392 /* ------------------------------------------------------------------------ */
3393 
3394 // C++ port
3395 // missing 'extern "C"' declarations
3396 
3397 KMP_EXPORT kmp_int32 __kmpc_in_parallel( ident_t *loc );
3398 KMP_EXPORT void __kmpc_pop_num_threads( ident_t *loc, kmp_int32 global_tid );
3399 KMP_EXPORT void __kmpc_push_num_threads( ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads );
3400 
3401 #if OMP_40_ENABLED
3402 KMP_EXPORT void __kmpc_push_proc_bind( ident_t *loc, kmp_int32 global_tid, int proc_bind );
3403 KMP_EXPORT void __kmpc_push_num_teams( ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads );
3404 KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...);
3405 
3406 #endif
3407 
3408 KMP_EXPORT void*
3409 __kmpc_threadprivate_cached( ident_t * loc, kmp_int32 global_tid,
3410  void * data, size_t size, void *** cache );
3411 
3412 // Symbols for MS mutual detection.
3413 extern int _You_must_link_with_exactly_one_OpenMP_library;
3414 extern int _You_must_link_with_Intel_OpenMP_library;
3415 #if KMP_OS_WINDOWS && ( KMP_VERSION_MAJOR > 4 )
3416  extern int _You_must_link_with_Microsoft_OpenMP_library;
3417 #endif
3418 
3419 
3420 // The routines below are not exported.
3421 // Consider making them 'static' in corresponding source files.
3422 void
3423 kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size );
3424 struct private_common *
3425 kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size );
3426 
3427 //
3428 // ompc_, kmpc_ entries moved from omp.h.
3429 //
3430 #if KMP_OS_WINDOWS
3431 # define KMPC_CONVENTION __cdecl
3432 #else
3433 # define KMPC_CONVENTION
3434 #endif
3435 
3436 #ifndef __OMP_H
3437 typedef enum omp_sched_t {
3438  omp_sched_static = 1,
3439  omp_sched_dynamic = 2,
3440  omp_sched_guided = 3,
3441  omp_sched_auto = 4
3442 } omp_sched_t;
3443 typedef void * kmp_affinity_mask_t;
3444 #endif
3445 
3446 KMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int);
3447 KMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int);
3448 KMP_EXPORT int KMPC_CONVENTION ompc_get_ancestor_thread_num(int);
3449 KMP_EXPORT int KMPC_CONVENTION ompc_get_team_size(int);
3450 KMP_EXPORT int KMPC_CONVENTION kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *);
3451 KMP_EXPORT int KMPC_CONVENTION kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *);
3452 KMP_EXPORT int KMPC_CONVENTION kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *);
3453 
3454 KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int);
3455 KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t);
3456 KMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int);
3457 KMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *);
3458 
3459 #ifdef __cplusplus
3460 }
3461 #endif
3462 
3463 #endif /* KMP_H */
3464 
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)
Definition: kmp_csupport.c:689
KMP_EXPORT kmp_int32 __kmpc_barrier_master(ident_t *, kmp_int32 global_tid)
kmp_int32 reserved_2
Definition: kmp.h:201
void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid)
KMP_EXPORT void __kmpc_end_single(ident_t *, kmp_int32 global_tid)
void(* kmpc_dtor)(void *)
Definition: kmp.h:1307
void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
KMP_EXPORT kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
KMP_EXPORT kmp_int32 __kmpc_global_thread_num(ident_t *)
Definition: kmp_csupport.c:98
int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st)
void(* kmpc_dtor_vec)(void *, size_t)
Definition: kmp.h:1328
KMP_EXPORT void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
kmp_int32 reserved_1
Definition: kmp.h:199
void *(* kmpc_ctor_vec)(void *, size_t)
Definition: kmp.h:1322
KMP_EXPORT void * __kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid, void *data, size_t size, void ***cache)
kmp_int32 reserved_3
Definition: kmp.h:206
void *(* kmpc_cctor_vec)(void *, void *, size_t)
Definition: kmp.h:1334
KMP_EXPORT void __kmpc_flush(ident_t *)
Definition: kmp_csupport.c:587
void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk)
KMP_EXPORT kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid)
int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st)
KMP_EXPORT void __kmpc_end(ident_t *)
Definition: kmp_csupport.c:64
KMP_EXPORT void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid)
Definition: kmp_csupport.c:844
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
Definition: kmp_csupport.c:445
void *(* kmpc_cctor)(void *, void *)
Definition: kmp.h:1312
KMP_EXPORT void __kmpc_threadprivate_register(ident_t *, void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor)
KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
KMP_EXPORT void __kmpc_begin(ident_t *, kmp_int32 flags)
Definition: kmp_csupport.c:46
KMP_EXPORT kmp_int32 __kmpc_bound_thread_num(ident_t *)
Definition: kmp_csupport.c:135
KMP_EXPORT kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st)
KMP_EXPORT void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
KMP_EXPORT void __kmpc_ordered(ident_t *, kmp_int32 global_tid)
Definition: kmp_csupport.c:778
KMP_EXPORT void __kmpc_critical(ident_t *, kmp_int32 global_tid, kmp_critical_name *)
Definition: kmp.h:198
KMP_EXPORT void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_end_master(ident_t *, kmp_int32 global_tid)
Definition: kmp_csupport.c:743
KMP_EXPORT void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
Definition: kmp_csupport.c:240
KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
Definition: kmp_csupport.c:364
KMP_EXPORT kmp_int32 __kmpc_in_parallel(ident_t *loc)
Definition: kmp_csupport.c:225
KMP_EXPORT kmp_int32 __kmpc_ok_to_fork(ident_t *)
Definition: kmp_csupport.c:161
KMP_EXPORT kmp_int32 __kmpc_global_num_threads(ident_t *)
Definition: kmp_csupport.c:121
void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid)
KMP_EXPORT kmp_int32 __kmpc_bound_num_threads(ident_t *)
Definition: kmp_csupport.c:147
KMP_EXPORT void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid)
KMP_EXPORT void __kmpc_barrier(ident_t *, kmp_int32 global_tid)
Definition: kmp_csupport.c:653
KMP_EXPORT void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
KMP_EXPORT void __kmpc_end_critical(ident_t *, kmp_int32 global_tid, kmp_critical_name *)
void *(* kmpc_ctor)(void *)
Definition: kmp.h:1301
KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
Definition: kmp_csupport.c:346
void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1285
KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait(ident_t *, kmp_int32 global_tid)
int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st)
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
Definition: kmp_csupport.c:430
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
Definition: kmp_csupport.c:281
KMP_EXPORT void __kmpc_threadprivate_register_vec(ident_t *, void *data, kmpc_ctor_vec ctor, kmpc_cctor_vec cctor, kmpc_dtor_vec dtor, size_t vector_length)
sched_type
Definition: kmp.h:300
char const * psource
Definition: kmp.h:207
void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk)
kmp_int32 flags
Definition: kmp.h:200
struct ident ident_t