25 #if defined(_WIN32_WINNT) && defined(_M_IX86) 27 #define _WIN32_WINNT 0x0502 31 #include "kmp_error.h" 34 #include "kmp_stats.h" 36 #if KMP_OS_WINDOWS && KMP_ARCH_X86 41 #include "ompt-internal.h" 42 #include "ompt-specific.h" 47 #if KMP_STATIC_STEAL_ENABLED 51 template <
typename T>
struct dispatch_private_infoXX_template {
52 typedef typename traits_t<T>::unsigned_t UT;
53 typedef typename traits_t<T>::signed_t ST;
60 T static_steal_counter;
70 struct KMP_ALIGN(32) {
88 template <
typename T>
struct dispatch_private_infoXX_template {
89 typedef typename traits_t<T>::unsigned_t UT;
90 typedef typename traits_t<T>::signed_t ST;
113 template <
typename T>
struct KMP_ALIGN_CACHE dispatch_private_info_template {
116 union KMP_ALIGN_CACHE private_info_tmpl {
117 dispatch_private_infoXX_template<T> p;
118 dispatch_private_info64_t p64;
122 kmp_uint32 ordered_bumped;
124 kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 3];
125 dispatch_private_info *next;
127 kmp_uint32 type_size;
128 enum cons_type pushed_ws;
133 template <
typename UT>
struct dispatch_shared_infoXX_template {
136 volatile UT iteration;
137 volatile UT num_done;
138 volatile UT ordered_iteration;
140 UT ordered_dummy[KMP_MAX_ORDERED - 3];
144 template <
typename UT>
struct dispatch_shared_info_template {
146 union shared_info_tmpl {
147 dispatch_shared_infoXX_template<UT> s;
148 dispatch_shared_info64_t s64;
150 volatile kmp_uint32 buffer_index;
152 volatile kmp_int32 doacross_buf_idx;
153 kmp_uint32 *doacross_flags;
154 kmp_int32 doacross_num_done;
166 #undef USE_TEST_LOCKS 169 template <
typename T>
static __forceinline T test_then_add(
volatile T *p, T d);
172 __forceinline kmp_int32 test_then_add<kmp_int32>(
volatile kmp_int32 *p,
175 r = KMP_TEST_THEN_ADD32(p, d);
180 __forceinline kmp_int64 test_then_add<kmp_int64>(
volatile kmp_int64 *p,
183 r = KMP_TEST_THEN_ADD64(p, d);
188 template <
typename T>
static __forceinline T test_then_inc_acq(
volatile T *p);
191 __forceinline kmp_int32 test_then_inc_acq<kmp_int32>(
volatile kmp_int32 *p) {
193 r = KMP_TEST_THEN_INC_ACQ32(p);
198 __forceinline kmp_int64 test_then_inc_acq<kmp_int64>(
volatile kmp_int64 *p) {
200 r = KMP_TEST_THEN_INC_ACQ64(p);
205 template <
typename T>
static __forceinline T test_then_inc(
volatile T *p);
208 __forceinline kmp_int32 test_then_inc<kmp_int32>(
volatile kmp_int32 *p) {
210 r = KMP_TEST_THEN_INC32(p);
215 __forceinline kmp_int64 test_then_inc<kmp_int64>(
volatile kmp_int64 *p) {
217 r = KMP_TEST_THEN_INC64(p);
222 template <
typename T>
223 static __forceinline kmp_int32 compare_and_swap(
volatile T *p, T c, T s);
226 __forceinline kmp_int32 compare_and_swap<kmp_int32>(
volatile kmp_int32 *p,
227 kmp_int32 c, kmp_int32 s) {
228 return KMP_COMPARE_AND_STORE_REL32(p, c, s);
232 __forceinline kmp_int32 compare_and_swap<kmp_int64>(
volatile kmp_int64 *p,
233 kmp_int64 c, kmp_int64 s) {
234 return KMP_COMPARE_AND_STORE_REL64(p, c, s);
250 template <
typename UT>
254 volatile UT *spinner, UT checker,
255 kmp_uint32 (*pred)(UT, UT) USE_ITT_BUILD_ARG(
259 volatile UT *spin = spinner;
262 kmp_uint32 (*f)(UT, UT) = pred;
265 KMP_FSYNC_SPIN_INIT(obj, CCAST(UT *, spin));
266 KMP_INIT_YIELD(spins);
268 while (!f(r = *spin, check)) {
269 KMP_FSYNC_SPIN_PREPARE(obj);
277 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
278 KMP_YIELD_SPIN(spins);
280 KMP_FSYNC_SPIN_ACQUIRED(obj);
284 template <
typename UT>
static kmp_uint32 __kmp_eq(UT value, UT checker) {
285 return value == checker;
288 template <
typename UT>
static kmp_uint32 __kmp_neq(UT value, UT checker) {
289 return value != checker;
292 template <
typename UT>
static kmp_uint32 __kmp_lt(UT value, UT checker) {
293 return value < checker;
296 template <
typename UT>
static kmp_uint32 __kmp_ge(UT value, UT checker) {
297 return value >= checker;
300 template <
typename UT>
static kmp_uint32 __kmp_le(UT value, UT checker) {
301 return value <= checker;
306 static void __kmp_dispatch_deo_error(
int *gtid_ref,
int *cid_ref,
310 KMP_DEBUG_ASSERT(gtid_ref);
312 if (__kmp_env_consistency_check) {
313 th = __kmp_threads[*gtid_ref];
314 if (th->th.th_root->r.r_active &&
315 (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none)) {
316 #if KMP_USE_DYNAMIC_LOCK 317 __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0);
319 __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL);
325 template <
typename UT>
326 static void __kmp_dispatch_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
327 typedef typename traits_t<UT>::signed_t ST;
328 dispatch_private_info_template<UT> *pr;
330 int gtid = *gtid_ref;
332 kmp_info_t *th = __kmp_threads[gtid];
333 KMP_DEBUG_ASSERT(th->th.th_dispatch);
335 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d called\n", gtid));
336 if (__kmp_env_consistency_check) {
337 pr =
reinterpret_cast<dispatch_private_info_template<UT> *
>(
338 th->th.th_dispatch->th_dispatch_pr_current);
339 if (pr->pushed_ws != ct_none) {
340 #if KMP_USE_DYNAMIC_LOCK 341 __kmp_push_sync(gtid, ct_ordered_in_pdo, loc_ref, NULL, 0);
343 __kmp_push_sync(gtid, ct_ordered_in_pdo, loc_ref, NULL);
348 if (!th->th.th_team->t.t_serialized) {
349 dispatch_shared_info_template<UT> *sh =
350 reinterpret_cast<dispatch_shared_info_template<UT> *
>(
351 th->th.th_dispatch->th_dispatch_sh_current);
354 if (!__kmp_env_consistency_check) {
355 pr =
reinterpret_cast<dispatch_private_info_template<UT> *
>(
356 th->th.th_dispatch->th_dispatch_pr_current);
358 lower = pr->u.p.ordered_lower;
360 #if !defined(KMP_GOMP_COMPAT) 361 if (__kmp_env_consistency_check) {
362 if (pr->ordered_bumped) {
363 struct cons_header *p = __kmp_threads[gtid]->th.th_cons;
364 __kmp_error_construct2(kmp_i18n_msg_CnsMultipleNesting,
365 ct_ordered_in_pdo, loc_ref,
366 &p->stack_data[p->w_top]);
376 buff = __kmp_str_format(
"__kmp_dispatch_deo: T#%%d before wait: " 377 "ordered_iter:%%%s lower:%%%s\n",
378 traits_t<UT>::spec, traits_t<UT>::spec);
379 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
380 __kmp_str_free(&buff);
384 __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
385 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
391 buff = __kmp_str_format(
"__kmp_dispatch_deo: T#%%d after wait: " 392 "ordered_iter:%%%s lower:%%%s\n",
393 traits_t<UT>::spec, traits_t<UT>::spec);
394 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
395 __kmp_str_free(&buff);
399 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d returned\n", gtid));
402 static void __kmp_dispatch_dxo_error(
int *gtid_ref,
int *cid_ref,
406 if (__kmp_env_consistency_check) {
407 th = __kmp_threads[*gtid_ref];
408 if (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none) {
409 __kmp_pop_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref);
414 template <
typename UT>
415 static void __kmp_dispatch_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
416 typedef typename traits_t<UT>::signed_t ST;
417 dispatch_private_info_template<UT> *pr;
419 int gtid = *gtid_ref;
421 kmp_info_t *th = __kmp_threads[gtid];
422 KMP_DEBUG_ASSERT(th->th.th_dispatch);
424 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d called\n", gtid));
425 if (__kmp_env_consistency_check) {
426 pr =
reinterpret_cast<dispatch_private_info_template<UT> *
>(
427 th->th.th_dispatch->th_dispatch_pr_current);
428 if (pr->pushed_ws != ct_none) {
429 __kmp_pop_sync(gtid, ct_ordered_in_pdo, loc_ref);
433 if (!th->th.th_team->t.t_serialized) {
434 dispatch_shared_info_template<UT> *sh =
435 reinterpret_cast<dispatch_shared_info_template<UT> *
>(
436 th->th.th_dispatch->th_dispatch_sh_current);
438 if (!__kmp_env_consistency_check) {
439 pr =
reinterpret_cast<dispatch_private_info_template<UT> *
>(
440 th->th.th_dispatch->th_dispatch_pr_current);
443 KMP_FSYNC_RELEASING(CCAST(UT *, &sh->u.s.ordered_iteration));
444 #if !defined(KMP_GOMP_COMPAT) 445 if (__kmp_env_consistency_check) {
446 if (pr->ordered_bumped != 0) {
447 struct cons_header *p = __kmp_threads[gtid]->th.th_cons;
449 __kmp_error_construct2(kmp_i18n_msg_CnsMultipleNesting,
450 ct_ordered_in_pdo, loc_ref,
451 &p->stack_data[p->w_top]);
458 pr->ordered_bumped += 1;
461 (
"__kmp_dispatch_dxo: T#%d bumping ordered ordered_bumped=%d\n",
462 gtid, pr->ordered_bumped));
467 test_then_inc<ST>((
volatile ST *)&sh->u.s.ordered_iteration);
471 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d returned\n", gtid));
475 template <
typename UT>
476 static __forceinline
long double __kmp_pow(
long double x, UT y) {
477 long double s = 1.0L;
479 KMP_DEBUG_ASSERT(x > 0.0 && x < 1.0);
495 template <
typename T>
496 static __inline
typename traits_t<T>::unsigned_t
497 __kmp_dispatch_guided_remaining(T tc,
typename traits_t<T>::floating_t base,
498 typename traits_t<T>::unsigned_t idx) {
504 typedef typename traits_t<T>::unsigned_t UT;
506 long double x = tc * __kmp_pow<UT>(base, idx);
519 static int guided_int_param = 2;
520 static double guided_flt_param = 0.5;
524 template <
typename T>
527 T ub,
typename traits_t<T>::signed_t st,
528 typename traits_t<T>::signed_t chunk,
int push_ws) {
529 typedef typename traits_t<T>::unsigned_t UT;
530 typedef typename traits_t<T>::signed_t ST;
531 typedef typename traits_t<T>::floating_t DBL;
537 kmp_uint32 my_buffer_index;
538 dispatch_private_info_template<T> *pr;
539 dispatch_shared_info_template<UT>
volatile *sh;
541 KMP_BUILD_ASSERT(
sizeof(dispatch_private_info_template<T>) ==
542 sizeof(dispatch_private_info));
543 KMP_BUILD_ASSERT(
sizeof(dispatch_shared_info_template<UT>) ==
544 sizeof(dispatch_shared_info));
546 if (!TCR_4(__kmp_init_parallel))
547 __kmp_parallel_initialize();
549 #if INCLUDE_SSC_MARKS 550 SSC_MARK_DISPATCH_INIT();
556 buff = __kmp_str_format(
"__kmp_dispatch_init: T#%%d called: schedule:%%d " 557 "chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
558 traits_t<ST>::spec, traits_t<T>::spec,
559 traits_t<T>::spec, traits_t<ST>::spec);
560 KD_TRACE(10, (buff, gtid, schedule, chunk, lb, ub, st));
561 __kmp_str_free(&buff);
565 th = __kmp_threads[gtid];
566 team = th->th.th_team;
567 active = !team->t.t_serialized;
568 th->th.th_ident = loc;
571 kmp_uint64 cur_chunk = chunk;
572 int itt_need_metadata_reporting = __itt_metadata_add_ptr &&
573 __kmp_forkjoin_frames_mode == 3 &&
574 KMP_MASTER_GTID(gtid) &&
576 th->th.th_teams_microtask == NULL &&
578 team->t.t_active_level == 1;
581 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
582 th->th.th_dispatch->th_disp_buffer);
584 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
585 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
587 my_buffer_index = th->th.th_dispatch->th_disp_index++;
590 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
592 ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
593 sh =
reinterpret_cast<dispatch_shared_info_template<UT>
volatile *
>(
594 &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
597 #if (KMP_STATIC_STEAL_ENABLED) 598 if (SCHEDULE_HAS_NONMONOTONIC(schedule))
600 schedule = kmp_sch_static_steal;
603 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
613 pr->type_size = traits_t<T>::type_size;
623 schedule = __kmp_static;
625 if (schedule == kmp_sch_runtime) {
628 schedule = team->t.t_sched.r_sched_type;
632 schedule = __kmp_guided;
634 schedule = __kmp_static;
638 chunk = team->t.t_sched.chunk;
646 buff = __kmp_str_format(
647 "__kmp_dispatch_init: T#%%d new: schedule:%%d chunk:%%%s\n",
649 KD_TRACE(10, (buff, gtid, schedule, chunk));
650 __kmp_str_free(&buff);
655 schedule = __kmp_guided;
658 chunk = KMP_DEFAULT_CHUNK;
664 schedule = __kmp_auto;
669 buff = __kmp_str_format(
"__kmp_dispatch_init: kmp_sch_auto: T#%%d new: " 670 "schedule:%%d chunk:%%%s\n",
672 KD_TRACE(10, (buff, gtid, schedule, chunk));
673 __kmp_str_free(&buff);
679 if (schedule == kmp_sch_guided_analytical_chunked &&
680 th->th.th_team_nproc > 1 << 20) {
681 schedule = kmp_sch_guided_iterative_chunked;
682 KMP_WARNING(DispatchManyThreads);
684 if (schedule == kmp_sch_runtime_simd) {
686 schedule = team->t.t_sched.r_sched_type;
690 schedule == __kmp_static) {
691 schedule = kmp_sch_static_balanced_chunked;
694 schedule = kmp_sch_guided_simd;
696 chunk = team->t.t_sched.chunk * chunk;
705 buff = __kmp_str_format(
"__kmp_dispatch_init: T#%%d new: schedule:%%d" 708 KD_TRACE(10, (buff, gtid, schedule, chunk));
709 __kmp_str_free(&buff);
713 pr->u.p.parm1 = chunk;
716 "unknown scheduling type");
720 if (__kmp_env_consistency_check) {
722 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited,
723 (pr->ordered ? ct_pdo_ordered : ct_pdo), loc);
737 tc = (UT)(lb - ub) / (-st) + 1;
745 tc = (UT)(ub - lb) / st + 1;
753 if (schedule == __kmp_static) {
767 pr->u.p.last_upper = ub + st;
773 if (pr->ordered == 0) {
774 th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error;
775 th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error;
777 pr->ordered_bumped = 0;
779 pr->u.p.ordered_lower = 1;
780 pr->u.p.ordered_upper = 0;
782 th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo<UT>;
783 th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo<UT>;
787 if (__kmp_env_consistency_check) {
788 enum cons_type ws = pr->ordered ? ct_pdo_ordered : ct_pdo;
790 __kmp_push_workshare(gtid, ws, loc);
793 __kmp_check_workshare(gtid, ws, loc);
794 pr->pushed_ws = ct_none;
799 #if (KMP_STATIC_STEAL_ENABLED) 800 case kmp_sch_static_steal: {
801 T nproc = th->th.th_team_nproc;
805 (
"__kmp_dispatch_init: T#%d kmp_sch_static_steal case\n", gtid));
807 ntc = (tc % chunk ? 1 : 0) + tc / chunk;
808 if (nproc > 1 && ntc >= nproc) {
810 T
id = __kmp_tid_from_gtid(gtid);
811 T small_chunk, extras;
813 small_chunk = ntc / nproc;
814 extras = ntc % nproc;
816 init =
id * small_chunk + (
id < extras ? id : extras);
817 pr->u.p.count = init;
818 pr->u.p.ub = init + small_chunk + (
id < extras ? 1 : 0);
822 pr->u.p.parm4 = (
id + 1) % nproc;
824 if (traits_t<T>::type_size > 4) {
830 KMP_DEBUG_ASSERT(th->th.th_dispatch->th_steal_lock == NULL);
831 th->th.th_dispatch->th_steal_lock =
832 (kmp_lock_t *)__kmp_allocate(
sizeof(kmp_lock_t));
833 __kmp_init_lock(th->th.th_dispatch->th_steal_lock);
837 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to " 838 "kmp_sch_static_balanced\n",
840 schedule = kmp_sch_static_balanced;
846 case kmp_sch_static_balanced: {
847 T nproc = th->th.th_team_nproc;
850 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_balanced case\n",
854 T
id = __kmp_tid_from_gtid(gtid);
860 pr->u.p.parm1 = (
id == tc - 1);
863 pr->u.p.parm1 = FALSE;
867 T small_chunk = tc / nproc;
868 T extras = tc % nproc;
869 init =
id * small_chunk + (
id < extras ? id : extras);
870 limit = init + small_chunk - (
id < extras ? 0 : 1);
871 pr->u.p.parm1 = (
id == nproc - 1);
877 pr->u.p.parm1 = TRUE;
880 pr->u.p.parm1 = FALSE;
886 if (itt_need_metadata_reporting)
887 cur_chunk = limit - init + 1;
890 pr->u.p.lb = lb + init;
891 pr->u.p.ub = lb + limit;
894 T ub_tmp = lb + limit * st;
895 pr->u.p.lb = lb + init * st;
899 pr->u.p.ub = (ub_tmp + st > ub ? ub : ub_tmp);
901 pr->u.p.ub = (ub_tmp + st < ub ? ub : ub_tmp);
905 pr->u.p.ordered_lower = init;
906 pr->u.p.ordered_upper = limit;
910 case kmp_sch_static_balanced_chunked: {
912 T nth = th->th.th_team_nproc;
913 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d runtime(simd:static)" 914 " -> falling-through to static_greedy\n",
916 schedule = kmp_sch_static_greedy;
918 pr->u.p.parm1 = ((tc + nth - 1) / nth + chunk - 1) & ~(chunk - 1);
923 case kmp_sch_guided_iterative_chunked:
924 case kmp_sch_guided_simd: {
925 T nproc = th->th.th_team_nproc;
926 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_guided_iterative_chunked" 931 if ((2L * chunk + 1) * nproc >= tc) {
933 schedule = kmp_sch_dynamic_chunked;
936 pr->u.p.parm2 = guided_int_param * nproc * (chunk + 1);
937 *(
double *)&pr->u.p.parm3 =
938 guided_flt_param / nproc;
941 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to " 942 "kmp_sch_static_greedy\n",
944 schedule = kmp_sch_static_greedy;
946 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",
952 case kmp_sch_guided_analytical_chunked: {
953 T nproc = th->th.th_team_nproc;
954 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_guided_analytical_chunked" 958 if ((2L * chunk + 1) * nproc >= tc) {
960 schedule = kmp_sch_dynamic_chunked;
965 #if KMP_OS_WINDOWS && KMP_ARCH_X86 975 unsigned int oldFpcw = _control87(0, 0);
976 _control87(_PC_64, _MCW_PC);
979 long double target = ((
long double)chunk * 2 + 1) * nproc / tc;
986 x = (
long double)1.0 - (
long double)0.5 / nproc;
997 ptrdiff_t natural_alignment =
998 (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
1002 (((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0);
1007 *(DBL *)&pr->u.p.parm3 = x;
1012 UT left, right, mid;
1020 p = __kmp_pow<UT>(x, right);
1025 }
while (p > target && right < (1 << 27));
1033 while (left + 1 < right) {
1034 mid = (left + right) / 2;
1035 if (__kmp_pow<UT>(x, mid) > target) {
1044 KMP_ASSERT(cross && __kmp_pow<UT>(x, cross - 1) > target &&
1045 __kmp_pow<UT>(x, cross) <= target);
1048 pr->u.p.parm2 = cross;
1051 #if ((KMP_OS_LINUX || KMP_OS_WINDOWS) && KMP_ARCH_X86) && (!defined(KMP_I8)) 1052 #define GUIDED_ANALYTICAL_WORKAROUND (*(DBL *)&pr->u.p.parm3) 1054 #define GUIDED_ANALYTICAL_WORKAROUND (x) 1057 pr->u.p.count = tc - __kmp_dispatch_guided_remaining(
1058 tc, GUIDED_ANALYTICAL_WORKAROUND, cross) -
1060 #if KMP_OS_WINDOWS && KMP_ARCH_X86 1062 _control87(oldFpcw, _MCW_PC);
1066 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to " 1067 "kmp_sch_static_greedy\n",
1069 schedule = kmp_sch_static_greedy;
1075 case kmp_sch_static_greedy:
1077 (
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n", gtid));
1078 pr->u.p.parm1 = (th->th.th_team_nproc > 1)
1079 ? (tc + th->th.th_team_nproc - 1) / th->th.th_team_nproc
1082 case kmp_sch_static_chunked:
1083 case kmp_sch_dynamic_chunked:
1084 if (pr->u.p.parm1 <= 0) {
1085 pr->u.p.parm1 = KMP_DEFAULT_CHUNK;
1087 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d " 1088 "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n",
1091 case kmp_sch_trapezoidal: {
1094 T parm1, parm2, parm3, parm4;
1096 (
"__kmp_dispatch_init: T#%d kmp_sch_trapezoidal case\n", gtid));
1101 parm2 = (tc / (2 * th->th.th_team_nproc));
1111 }
else if (parm1 > parm2) {
1116 parm3 = (parm2 + parm1);
1117 parm3 = (2 * tc + parm3 - 1) / parm3;
1124 parm4 = (parm3 - 1);
1125 parm4 = (parm2 - parm1) / parm4;
1132 pr->u.p.parm1 = parm1;
1133 pr->u.p.parm2 = parm2;
1134 pr->u.p.parm3 = parm3;
1135 pr->u.p.parm4 = parm4;
1140 __kmp_msg(kmp_ms_fatal,
1141 KMP_MSG(UnknownSchedTypeDetected),
1142 KMP_HNT(GetNewerLibrary),
1147 pr->schedule = schedule;
1152 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d " 1153 "sh->buffer_index:%d\n",
1154 gtid, my_buffer_index, sh->buffer_index));
1155 __kmp_wait_yield<kmp_uint32>(&sh->buffer_index, my_buffer_index,
1156 __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL));
1160 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d " 1161 "sh->buffer_index:%d\n",
1162 gtid, my_buffer_index, sh->buffer_index));
1164 th->th.th_dispatch->th_dispatch_pr_current = (dispatch_private_info_t *)pr;
1165 th->th.th_dispatch->th_dispatch_sh_current =
1166 CCAST(dispatch_shared_info_t *, (
volatile dispatch_shared_info_t *)sh);
1169 __kmp_itt_ordered_init(gtid);
1172 if (itt_need_metadata_reporting) {
1174 kmp_uint64 schedtype = 0;
1176 case kmp_sch_static_chunked:
1177 case kmp_sch_static_balanced:
1179 case kmp_sch_static_greedy:
1180 cur_chunk = pr->u.p.parm1;
1182 case kmp_sch_dynamic_chunked:
1185 case kmp_sch_guided_iterative_chunked:
1186 case kmp_sch_guided_analytical_chunked:
1187 case kmp_sch_guided_simd:
1196 __kmp_itt_metadata_loop(loc, schedtype, tc, cur_chunk);
1205 buff = __kmp_str_format(
1206 "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s " 1208 " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" 1209 " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
1210 traits_t<UT>::spec, traits_t<T>::spec, traits_t<T>::spec,
1211 traits_t<ST>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
1212 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<T>::spec,
1213 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec);
1214 KD_TRACE(10, (buff, gtid, pr->schedule, pr->ordered, pr->u.p.lb, pr->u.p.ub,
1215 pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr->u.p.ordered_lower,
1216 pr->u.p.ordered_upper, pr->u.p.parm1, pr->u.p.parm2,
1217 pr->u.p.parm3, pr->u.p.parm4));
1218 __kmp_str_free(&buff);
1221 #if (KMP_STATIC_STEAL_ENABLED) 1227 if (schedule == kmp_sch_static_steal) {
1231 volatile T *p = &pr->u.p.static_steal_counter;
1234 #endif // ( KMP_STATIC_STEAL_ENABLED ) 1236 #if OMPT_SUPPORT && OMPT_TRACE 1237 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
1238 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1239 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
1240 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
1241 team_info->parallel_id, task_info->task_id, team_info->microtask);
1251 template <
typename UT>
1252 static void __kmp_dispatch_finish(
int gtid,
ident_t *loc) {
1253 typedef typename traits_t<UT>::signed_t ST;
1254 kmp_info_t *th = __kmp_threads[gtid];
1256 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d called\n", gtid));
1257 if (!th->th.th_team->t.t_serialized) {
1259 dispatch_private_info_template<UT> *pr =
1260 reinterpret_cast<dispatch_private_info_template<UT> *
>(
1261 th->th.th_dispatch->th_dispatch_pr_current);
1262 dispatch_shared_info_template<UT>
volatile *sh =
1263 reinterpret_cast<dispatch_shared_info_template<UT>
volatile *
>(
1264 th->th.th_dispatch->th_dispatch_sh_current);
1265 KMP_DEBUG_ASSERT(pr);
1266 KMP_DEBUG_ASSERT(sh);
1267 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
1268 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1270 if (pr->ordered_bumped) {
1273 (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1275 pr->ordered_bumped = 0;
1277 UT lower = pr->u.p.ordered_lower;
1283 buff = __kmp_str_format(
"__kmp_dispatch_finish: T#%%d before wait: " 1284 "ordered_iteration:%%%s lower:%%%s\n",
1285 traits_t<UT>::spec, traits_t<UT>::spec);
1286 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
1287 __kmp_str_free(&buff);
1291 __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
1292 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
1298 buff = __kmp_str_format(
"__kmp_dispatch_finish: T#%%d after wait: " 1299 "ordered_iteration:%%%s lower:%%%s\n",
1300 traits_t<UT>::spec, traits_t<UT>::spec);
1301 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
1302 __kmp_str_free(&buff);
1306 test_then_inc<ST>((
volatile ST *)&sh->u.s.ordered_iteration);
1309 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d returned\n", gtid));
1312 #ifdef KMP_GOMP_COMPAT 1314 template <
typename UT>
1315 static void __kmp_dispatch_finish_chunk(
int gtid,
ident_t *loc) {
1316 typedef typename traits_t<UT>::signed_t ST;
1317 kmp_info_t *th = __kmp_threads[gtid];
1319 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d called\n", gtid));
1320 if (!th->th.th_team->t.t_serialized) {
1322 dispatch_private_info_template<UT> *pr =
1323 reinterpret_cast<dispatch_private_info_template<UT> *
>(
1324 th->th.th_dispatch->th_dispatch_pr_current);
1325 dispatch_shared_info_template<UT>
volatile *sh =
1326 reinterpret_cast<dispatch_shared_info_template<UT>
volatile *
>(
1327 th->th.th_dispatch->th_dispatch_sh_current);
1328 KMP_DEBUG_ASSERT(pr);
1329 KMP_DEBUG_ASSERT(sh);
1330 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
1331 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1334 UT lower = pr->u.p.ordered_lower;
1335 UT upper = pr->u.p.ordered_upper;
1336 UT inc = upper - lower + 1;
1338 if (pr->ordered_bumped == inc) {
1341 (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1343 pr->ordered_bumped = 0;
1345 inc -= pr->ordered_bumped;
1351 buff = __kmp_str_format(
1352 "__kmp_dispatch_finish_chunk: T#%%d before wait: " 1353 "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
1354 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec);
1355 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower, upper));
1356 __kmp_str_free(&buff);
1360 __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
1361 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
1364 KD_TRACE(1000, (
"__kmp_dispatch_finish_chunk: T#%d resetting " 1365 "ordered_bumped to zero\n",
1367 pr->ordered_bumped = 0;
1373 buff = __kmp_str_format(
1374 "__kmp_dispatch_finish_chunk: T#%%d after wait: " 1375 "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
1376 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
1377 traits_t<UT>::spec);
1379 (buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper));
1380 __kmp_str_free(&buff);
1384 test_then_add<ST>((
volatile ST *)&sh->u.s.ordered_iteration, inc);
1388 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d returned\n", gtid));
1396 #if OMPT_SUPPORT && OMPT_TRACE 1397 #define OMPT_LOOP_END \ 1398 if (status == 0) { \ 1399 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_end)) { \ 1400 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \ 1401 ompt_task_info_t *task_info = __ompt_get_taskinfo(0); \ 1402 ompt_callbacks.ompt_callback(ompt_event_loop_end)( \ 1403 team_info->parallel_id, task_info->task_id); \ 1407 #define OMPT_LOOP_END // no-op 1410 template <
typename T>
1411 static int __kmp_dispatch_next(
ident_t *loc,
int gtid, kmp_int32 *p_last,
1413 typename traits_t<T>::signed_t *p_st) {
1415 typedef typename traits_t<T>::unsigned_t UT;
1416 typedef typename traits_t<T>::signed_t ST;
1417 typedef typename traits_t<T>::floating_t DBL;
1423 KMP_TIME_PARTITIONED_BLOCK(FOR_dynamic_scheduling);
1426 dispatch_private_info_template<T> *pr;
1427 kmp_info_t *th = __kmp_threads[gtid];
1428 kmp_team_t *team = th->th.th_team;
1430 KMP_DEBUG_ASSERT(p_lb && p_ub && p_st);
1435 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d called p_lb:%%%s " 1436 "p_ub:%%%s p_st:%%%s p_last: %%p\n",
1437 traits_t<T>::spec, traits_t<T>::spec,
1438 traits_t<ST>::spec);
1439 KD_TRACE(1000, (buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last));
1440 __kmp_str_free(&buff);
1444 if (team->t.t_serialized) {
1446 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1447 th->th.th_dispatch->th_disp_buffer);
1448 KMP_DEBUG_ASSERT(pr);
1450 if ((status = (pr->u.p.tc != 0)) == 0) {
1457 if (__kmp_env_consistency_check) {
1458 if (pr->pushed_ws != ct_none) {
1459 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
1462 }
else if (pr->nomerge) {
1465 UT limit, trip, init;
1467 T chunk = pr->u.p.parm1;
1469 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",
1472 init = chunk * pr->u.p.count++;
1473 trip = pr->u.p.tc - 1;
1475 if ((status = (init <= trip)) == 0) {
1482 if (__kmp_env_consistency_check) {
1483 if (pr->pushed_ws != ct_none) {
1484 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
1489 limit = chunk + init - 1;
1492 if ((last = (limit >= trip)) != 0) {
1495 pr->u.p.last_upper = pr->u.p.ub;
1503 *p_lb = start + init;
1504 *p_ub = start + limit;
1506 *p_lb = start + init * incr;
1507 *p_ub = start + limit * incr;
1511 pr->u.p.ordered_lower = init;
1512 pr->u.p.ordered_upper = limit;
1517 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 1518 "ordered_lower:%%%s ordered_upper:%%%s\n",
1519 traits_t<UT>::spec, traits_t<UT>::spec);
1520 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
1521 pr->u.p.ordered_upper));
1522 __kmp_str_free(&buff);
1532 pr->u.p.last_upper = *p_ub;
1543 buff = __kmp_str_format(
1544 "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " 1545 "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n",
1546 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
1547 KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, *p_st, p_last, *p_last, status));
1548 __kmp_str_free(&buff);
1551 #if INCLUDE_SSC_MARKS 1552 SSC_MARK_DISPATCH_NEXT();
1558 dispatch_shared_info_template<UT> *sh;
1561 UT limit, trip, init;
1563 KMP_DEBUG_ASSERT(th->th.th_dispatch ==
1564 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
1566 pr =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1567 th->th.th_dispatch->th_dispatch_pr_current);
1568 KMP_DEBUG_ASSERT(pr);
1569 sh =
reinterpret_cast<dispatch_shared_info_template<UT> *
>(
1570 th->th.th_dispatch->th_dispatch_sh_current);
1571 KMP_DEBUG_ASSERT(sh);
1573 if (pr->u.p.tc == 0) {
1577 switch (pr->schedule) {
1578 #if (KMP_STATIC_STEAL_ENABLED) 1579 case kmp_sch_static_steal: {
1580 T chunk = pr->u.p.parm1;
1581 int nproc = th->th.th_team_nproc;
1583 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_steal case\n",
1586 trip = pr->u.p.tc - 1;
1588 if (traits_t<T>::type_size > 4) {
1591 kmp_lock_t *lck = th->th.th_dispatch->th_steal_lock;
1592 KMP_DEBUG_ASSERT(lck != NULL);
1593 if (pr->u.p.count < (UT)pr->u.p.ub) {
1594 __kmp_acquire_lock(lck, gtid);
1596 init = (pr->u.p.count)++;
1597 status = (init < (UT)pr->u.p.ub);
1598 __kmp_release_lock(lck, gtid);
1603 kmp_info_t **other_threads = team->t.t_threads;
1604 int while_limit = nproc;
1605 int while_index = 0;
1608 while ((!status) && (while_limit != ++while_index)) {
1610 T victimIdx = pr->u.p.parm4;
1611 T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1;
1612 dispatch_private_info_template<T> *victim =
1613 reinterpret_cast<dispatch_private_info_template<T> *
>(
1614 other_threads[victimIdx]
1615 ->th.th_dispatch->th_dispatch_pr_current);
1616 while ((victim == NULL || victim == pr ||
1617 (*(
volatile T *)&victim->u.p.static_steal_counter !=
1618 *(
volatile T *)&pr->u.p.static_steal_counter)) &&
1619 oldVictimIdx != victimIdx) {
1620 victimIdx = (victimIdx + 1) % nproc;
1621 victim =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1622 other_threads[victimIdx]
1623 ->th.th_dispatch->th_dispatch_pr_current);
1626 (*(
volatile T *)&victim->u.p.static_steal_counter !=
1627 *(
volatile T *)&pr->u.p.static_steal_counter)) {
1632 if (victim->u.p.count + 2 > (UT)victim->u.p.ub) {
1633 pr->u.p.parm4 = (victimIdx + 1) % nproc;
1637 lck = other_threads[victimIdx]->th.th_dispatch->th_steal_lock;
1638 KMP_ASSERT(lck != NULL);
1639 __kmp_acquire_lock(lck, gtid);
1640 limit = victim->u.p.ub;
1641 if (victim->u.p.count >= limit ||
1642 (remaining = limit - victim->u.p.count) < 2) {
1643 __kmp_release_lock(lck, gtid);
1644 pr->u.p.parm4 = (victimIdx + 1) % nproc;
1649 if (remaining > 3) {
1651 init = (victim->u.p.ub -=
1656 (victim->u.p.ub -= 1);
1658 __kmp_release_lock(lck, gtid);
1660 KMP_DEBUG_ASSERT(init + 1 <= limit);
1661 pr->u.p.parm4 = victimIdx;
1665 __kmp_acquire_lock(th->th.th_dispatch->th_steal_lock, gtid);
1666 pr->u.p.count = init + 1;
1668 __kmp_release_lock(th->th.th_dispatch->th_steal_lock, gtid);
1683 union_i4 vold, vnew;
1684 vold.b = *(
volatile kmp_int64 *)(&pr->u.p.count);
1687 while (!KMP_COMPARE_AND_STORE_ACQ64(
1688 (
volatile kmp_int64 *)&pr->u.p.count,
1689 *VOLATILE_CAST(kmp_int64 *) & vold.b,
1690 *VOLATILE_CAST(kmp_int64 *) & vnew.b)) {
1692 vold.b = *(
volatile kmp_int64 *)(&pr->u.p.count);
1697 init = vnew.p.count;
1698 status = (init < (UT)vnew.p.ub);
1702 kmp_info_t **other_threads = team->t.t_threads;
1703 int while_limit = nproc;
1704 int while_index = 0;
1708 while ((!status) && (while_limit != ++while_index)) {
1709 union_i4 vold, vnew;
1710 kmp_int32 remaining;
1711 T victimIdx = pr->u.p.parm4;
1712 T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1;
1713 dispatch_private_info_template<T> *victim =
1714 reinterpret_cast<dispatch_private_info_template<T> *
>(
1715 other_threads[victimIdx]
1716 ->th.th_dispatch->th_dispatch_pr_current);
1717 while ((victim == NULL || victim == pr ||
1718 (*(
volatile T *)&victim->u.p.static_steal_counter !=
1719 *(
volatile T *)&pr->u.p.static_steal_counter)) &&
1720 oldVictimIdx != victimIdx) {
1721 victimIdx = (victimIdx + 1) % nproc;
1722 victim =
reinterpret_cast<dispatch_private_info_template<T> *
>(
1723 other_threads[victimIdx]
1724 ->th.th_dispatch->th_dispatch_pr_current);
1727 (*(
volatile T *)&victim->u.p.static_steal_counter !=
1728 *(
volatile T *)&pr->u.p.static_steal_counter)) {
1733 pr->u.p.parm4 = victimIdx;
1735 vold.b = *(
volatile kmp_int64 *)(&victim->u.p.count);
1738 KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip);
1739 if (vnew.p.count >= (UT)vnew.p.ub ||
1740 (remaining = vnew.p.ub - vnew.p.count) < 2) {
1742 (victimIdx + 1) % nproc;
1745 if (remaining > 3) {
1746 vnew.p.ub -= (remaining >> 2);
1750 KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip);
1752 if (KMP_COMPARE_AND_STORE_ACQ64(
1753 (
volatile kmp_int64 *)&victim->u.p.count,
1754 *VOLATILE_CAST(kmp_int64 *) & vold.b,
1755 *VOLATILE_CAST(kmp_int64 *) & vnew.b)) {
1758 vold.p.ub - vnew.p.ub);
1763 vold.p.count = init + 1;
1765 KMP_XCHG_FIXED64((
volatile kmp_int64 *)(&pr->u.p.count),
1768 *(
volatile kmp_int64 *)(&pr->u.p.count) = vold.b;
1783 start = pr->u.p.parm2;
1785 limit = chunk + init - 1;
1789 KMP_DEBUG_ASSERT(init <= trip);
1790 if ((last = (limit >= trip)) != 0)
1796 *p_lb = start + init;
1797 *p_ub = start + limit;
1799 *p_lb = start + init * incr;
1800 *p_ub = start + limit * incr;
1804 pr->u.p.ordered_lower = init;
1805 pr->u.p.ordered_upper = limit;
1810 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 1811 "ordered_lower:%%%s ordered_upper:%%%s\n",
1812 traits_t<UT>::spec, traits_t<UT>::spec);
1813 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
1814 pr->u.p.ordered_upper));
1815 __kmp_str_free(&buff);
1822 #endif // ( KMP_STATIC_STEAL_ENABLED ) 1823 case kmp_sch_static_balanced: {
1826 (
"__kmp_dispatch_next: T#%d kmp_sch_static_balanced case\n", gtid));
1827 if ((status = !pr->u.p.count) !=
1832 last = pr->u.p.parm1;
1836 pr->u.p.lb = pr->u.p.ub + pr->u.p.st;
1843 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 1844 "ordered_lower:%%%s ordered_upper:%%%s\n",
1845 traits_t<UT>::spec, traits_t<UT>::spec);
1846 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
1847 pr->u.p.ordered_upper));
1848 __kmp_str_free(&buff);
1854 case kmp_sch_static_greedy:
1856 case kmp_sch_static_chunked: {
1859 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d " 1860 "kmp_sch_static_[affinity|chunked] case\n",
1862 parm1 = pr->u.p.parm1;
1864 trip = pr->u.p.tc - 1;
1865 init = parm1 * (pr->u.p.count + __kmp_tid_from_gtid(gtid));
1867 if ((status = (init <= trip)) != 0) {
1870 limit = parm1 + init - 1;
1872 if ((last = (limit >= trip)) != 0)
1878 pr->u.p.count += th->th.th_team_nproc;
1881 *p_lb = start + init;
1882 *p_ub = start + limit;
1884 *p_lb = start + init * incr;
1885 *p_ub = start + limit * incr;
1889 pr->u.p.ordered_lower = init;
1890 pr->u.p.ordered_upper = limit;
1895 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 1896 "ordered_lower:%%%s ordered_upper:%%%s\n",
1897 traits_t<UT>::spec, traits_t<UT>::spec);
1898 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
1899 pr->u.p.ordered_upper));
1900 __kmp_str_free(&buff);
1908 case kmp_sch_dynamic_chunked: {
1909 T chunk = pr->u.p.parm1;
1913 (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", gtid));
1915 init = chunk * test_then_inc_acq<ST>((
volatile ST *)&sh->u.s.iteration);
1916 trip = pr->u.p.tc - 1;
1918 if ((status = (init <= trip)) == 0) {
1925 limit = chunk + init - 1;
1928 if ((last = (limit >= trip)) != 0)
1935 *p_lb = start + init;
1936 *p_ub = start + limit;
1938 *p_lb = start + init * incr;
1939 *p_ub = start + limit * incr;
1943 pr->u.p.ordered_lower = init;
1944 pr->u.p.ordered_upper = limit;
1949 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 1950 "ordered_lower:%%%s ordered_upper:%%%s\n",
1951 traits_t<UT>::spec, traits_t<UT>::spec);
1952 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
1953 pr->u.p.ordered_upper));
1954 __kmp_str_free(&buff);
1962 case kmp_sch_guided_iterative_chunked: {
1963 T chunkspec = pr->u.p.parm1;
1964 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked " 1971 init = sh->u.s.iteration;
1972 remaining = trip - init;
1973 if (remaining <= 0) {
1982 init = test_then_add<ST>(
1983 RCAST(
volatile ST *, &sh->u.s.iteration), (ST)chunkspec);
1984 remaining = trip - init;
1985 if (remaining <= 0) {
1989 if ((T)remaining > chunkspec) {
1990 limit = init + chunkspec - 1;
1993 limit = init + remaining - 1;
1998 limit = init + (UT)(remaining *
1999 *(
double *)&pr->u.p.parm3);
2000 if (compare_and_swap<ST>(RCAST(
volatile ST *, &sh->u.s.iteration),
2001 (ST)init, (ST)limit)) {
2013 *p_lb = start + init * incr;
2014 *p_ub = start + limit * incr;
2016 pr->u.p.ordered_lower = init;
2017 pr->u.p.ordered_upper = limit;
2022 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 2023 "ordered_lower:%%%s ordered_upper:%%%s\n",
2024 traits_t<UT>::spec, traits_t<UT>::spec);
2025 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
2026 pr->u.p.ordered_upper));
2027 __kmp_str_free(&buff);
2040 case kmp_sch_guided_simd: {
2043 T chunk = pr->u.p.parm1;
2044 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_guided_simd case\n",
2050 init = sh->u.s.iteration;
2051 remaining = trip - init;
2052 if (remaining <= 0) {
2056 KMP_DEBUG_ASSERT(init % chunk == 0);
2058 if ((T)remaining < pr->u.p.parm2) {
2061 init = test_then_add<ST>(
2062 RCAST(
volatile ST *, &sh->u.s.iteration), (ST)chunk);
2063 remaining = trip - init;
2064 if (remaining <= 0) {
2069 if ((T)remaining > chunk) {
2070 limit = init + chunk - 1;
2073 limit = init + remaining - 1;
2079 UT span = remaining * (*(
double *)&pr->u.p.parm3);
2080 UT rem = span % chunk;
2082 span += chunk - rem;
2083 limit = init + span;
2084 if (compare_and_swap<ST>(RCAST(
volatile ST *, &sh->u.s.iteration),
2085 (ST)init, (ST)limit)) {
2097 *p_lb = start + init * incr;
2098 *p_ub = start + limit * incr;
2100 pr->u.p.ordered_lower = init;
2101 pr->u.p.ordered_upper = limit;
2106 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 2107 "ordered_lower:%%%s ordered_upper:%%%s\n",
2108 traits_t<UT>::spec, traits_t<UT>::spec);
2109 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
2110 pr->u.p.ordered_upper));
2111 __kmp_str_free(&buff);
2124 case kmp_sch_guided_analytical_chunked: {
2125 T chunkspec = pr->u.p.parm1;
2127 #if KMP_OS_WINDOWS && KMP_ARCH_X86 2130 unsigned int oldFpcw;
2131 unsigned int fpcwSet = 0;
2133 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked " 2134 "analytical case\n",
2139 KMP_DEBUG_ASSERT(th->th.th_team_nproc > 1);
2140 KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)th->th.th_team_nproc <
2145 chunkIdx = test_then_inc_acq<ST>((
volatile ST *)&sh->u.s.iteration);
2146 if (chunkIdx >= (UT)pr->u.p.parm2) {
2149 init = chunkIdx * chunkspec + pr->u.p.count;
2152 if ((status = (init > 0 && init <= trip)) != 0) {
2153 limit = init + chunkspec - 1;
2155 if ((last = (limit >= trip)) != 0)
2165 #if KMP_OS_WINDOWS && KMP_ARCH_X86 2170 oldFpcw = _control87(0, 0);
2171 _control87(_PC_64, _MCW_PC);
2176 init = __kmp_dispatch_guided_remaining<T>(
2177 trip, *(DBL *)&pr->u.p.parm3, chunkIdx);
2178 KMP_DEBUG_ASSERT(init);
2182 limit = trip - __kmp_dispatch_guided_remaining<T>(
2183 trip, *(DBL *)&pr->u.p.parm3, chunkIdx + 1);
2184 KMP_ASSERT(init <= limit);
2186 KMP_DEBUG_ASSERT(limit <= trip);
2193 #if KMP_OS_WINDOWS && KMP_ARCH_X86 2197 if (fpcwSet && (oldFpcw & fpcwSet))
2198 _control87(oldFpcw, _MCW_PC);
2205 *p_lb = start + init * incr;
2206 *p_ub = start + limit * incr;
2208 pr->u.p.ordered_lower = init;
2209 pr->u.p.ordered_upper = limit;
2214 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 2215 "ordered_lower:%%%s ordered_upper:%%%s\n",
2216 traits_t<UT>::spec, traits_t<UT>::spec);
2217 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
2218 pr->u.p.ordered_upper));
2219 __kmp_str_free(&buff);
2232 case kmp_sch_trapezoidal: {
2234 T parm2 = pr->u.p.parm2;
2235 T parm3 = pr->u.p.parm3;
2236 T parm4 = pr->u.p.parm4;
2237 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_trapezoidal case\n",
2240 index = test_then_inc<ST>((
volatile ST *)&sh->u.s.iteration);
2242 init = (index * ((2 * parm2) - (index - 1) * parm4)) / 2;
2243 trip = pr->u.p.tc - 1;
2245 if ((status = ((T)index < parm3 && init <= trip)) == 0) {
2252 limit = ((index + 1) * (2 * parm2 - index * parm4)) / 2 - 1;
2255 if ((last = (limit >= trip)) != 0)
2262 *p_lb = start + init;
2263 *p_ub = start + limit;
2265 *p_lb = start + init * incr;
2266 *p_ub = start + limit * incr;
2270 pr->u.p.ordered_lower = init;
2271 pr->u.p.ordered_upper = limit;
2276 buff = __kmp_str_format(
"__kmp_dispatch_next: T#%%d " 2277 "ordered_lower:%%%s ordered_upper:%%%s\n",
2278 traits_t<UT>::spec, traits_t<UT>::spec);
2279 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,
2280 pr->u.p.ordered_upper));
2281 __kmp_str_free(&buff);
2290 __kmp_msg(kmp_ms_fatal,
2291 KMP_MSG(UnknownSchedTypeDetected),
2292 KMP_HNT(GetNewerLibrary),
2302 num_done = test_then_inc<ST>((
volatile ST *)&sh->u.s.num_done);
2307 buff = __kmp_str_format(
2308 "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
2309 traits_t<UT>::spec);
2310 KD_TRACE(100, (buff, gtid, sh->u.s.num_done));
2311 __kmp_str_free(&buff);
2315 if ((ST)num_done == th->th.th_team_nproc - 1) {
2316 #if (KMP_STATIC_STEAL_ENABLED) 2317 if (pr->schedule == kmp_sch_static_steal &&
2318 traits_t<T>::type_size > 4) {
2320 kmp_info_t **other_threads = team->t.t_threads;
2322 for (i = 0; i < th->th.th_team_nproc; ++i) {
2323 kmp_lock_t *lck = other_threads[i]->th.th_dispatch->th_steal_lock;
2324 KMP_ASSERT(lck != NULL);
2325 __kmp_destroy_lock(lck);
2327 other_threads[i]->th.th_dispatch->th_steal_lock = NULL;
2335 sh->u.s.num_done = 0;
2336 sh->u.s.iteration = 0;
2340 sh->u.s.ordered_iteration = 0;
2345 sh->buffer_index += __kmp_dispatch_num_buffers;
2346 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d change buffer_index:%d\n",
2347 gtid, sh->buffer_index));
2352 if (__kmp_env_consistency_check) {
2353 if (pr->pushed_ws != ct_none) {
2354 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
2358 th->th.th_dispatch->th_deo_fcn = NULL;
2359 th->th.th_dispatch->th_dxo_fcn = NULL;
2360 th->th.th_dispatch->th_dispatch_sh_current = NULL;
2361 th->th.th_dispatch->th_dispatch_pr_current = NULL;
2365 pr->u.p.last_upper = pr->u.p.ub;
2368 if (p_last != NULL && status != 0)
2376 buff = __kmp_str_format(
2377 "__kmp_dispatch_next: T#%%d normal case: " 2378 "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p returning:%%d\n",
2379 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
2380 KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last, status));
2381 __kmp_str_free(&buff);
2384 #if INCLUDE_SSC_MARKS 2385 SSC_MARK_DISPATCH_NEXT();
2391 template <
typename T>
2392 static void __kmp_dist_get_bounds(
ident_t *loc, kmp_int32 gtid,
2393 kmp_int32 *plastiter, T *plower, T *pupper,
2394 typename traits_t<T>::signed_t incr) {
2395 typedef typename traits_t<T>::unsigned_t UT;
2396 typedef typename traits_t<T>::signed_t ST;
2403 KMP_DEBUG_ASSERT(plastiter && plower && pupper);
2404 KE_TRACE(10, (
"__kmpc_dist_get_bounds called (%d)\n", gtid));
2409 buff = __kmp_str_format(
"__kmpc_dist_get_bounds: T#%%d liter=%%d " 2410 "iter=(%%%s, %%%s, %%%s) signed?<%s>\n",
2411 traits_t<T>::spec, traits_t<T>::spec,
2412 traits_t<ST>::spec, traits_t<T>::spec);
2413 KD_TRACE(100, (buff, gtid, *plastiter, *plower, *pupper, incr));
2414 __kmp_str_free(&buff);
2418 if (__kmp_env_consistency_check) {
2420 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
2423 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
2433 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
2436 th = __kmp_threads[gtid];
2437 team = th->th.th_team;
2439 KMP_DEBUG_ASSERT(th->th.th_teams_microtask);
2440 nteams = th->th.th_teams_size.nteams;
2442 team_id = team->t.t_master_tid;
2443 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
2447 trip_count = *pupper - *plower + 1;
2448 }
else if (incr == -1) {
2449 trip_count = *plower - *pupper + 1;
2450 }
else if (incr > 0) {
2452 trip_count = (UT)(*pupper - *plower) / incr + 1;
2454 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
2457 if (trip_count <= nteams) {
2459 __kmp_static == kmp_sch_static_greedy ||
2461 kmp_sch_static_balanced);
2463 if (team_id < trip_count) {
2464 *pupper = *plower = *plower + team_id * incr;
2466 *plower = *pupper + incr;
2468 if (plastiter != NULL)
2469 *plastiter = (team_id == trip_count - 1);
2471 if (__kmp_static == kmp_sch_static_balanced) {
2472 UT chunk = trip_count / nteams;
2473 UT extras = trip_count % nteams;
2475 incr * (team_id * chunk + (team_id < extras ? team_id : extras));
2476 *pupper = *plower + chunk * incr - (team_id < extras ? 0 : incr);
2477 if (plastiter != NULL)
2478 *plastiter = (team_id == nteams - 1);
2481 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
2483 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
2485 *plower += team_id * chunk_inc_count;
2486 *pupper = *plower + chunk_inc_count - incr;
2489 if (*pupper < *plower)
2490 *pupper = traits_t<T>::max_value;
2491 if (plastiter != NULL)
2492 *plastiter = *plower <= upper && *pupper > upper - incr;
2493 if (*pupper > upper)
2496 if (*pupper > *plower)
2497 *pupper = traits_t<T>::min_value;
2498 if (plastiter != NULL)
2499 *plastiter = *plower >= upper && *pupper < upper - incr;
2500 if (*pupper < upper)
2532 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) {
2533 KMP_DEBUG_ASSERT(__kmp_init_serial);
2534 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2541 kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) {
2542 KMP_DEBUG_ASSERT(__kmp_init_serial);
2543 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2551 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) {
2552 KMP_DEBUG_ASSERT(__kmp_init_serial);
2553 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2561 kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) {
2562 KMP_DEBUG_ASSERT(__kmp_init_serial);
2563 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2577 kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
2579 KMP_DEBUG_ASSERT(__kmp_init_serial);
2580 __kmp_dist_get_bounds<kmp_int32>(loc, gtid, p_last, &lb, &ub, st);
2581 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2584 void __kmpc_dist_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
2586 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
2588 KMP_DEBUG_ASSERT(__kmp_init_serial);
2589 __kmp_dist_get_bounds<kmp_uint32>(loc, gtid, p_last, &lb, &ub, st);
2590 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2593 void __kmpc_dist_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
2595 kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
2597 KMP_DEBUG_ASSERT(__kmp_init_serial);
2598 __kmp_dist_get_bounds<kmp_int64>(loc, gtid, p_last, &lb, &ub, st);
2599 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2602 void __kmpc_dist_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
2604 kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
2606 KMP_DEBUG_ASSERT(__kmp_init_serial);
2607 __kmp_dist_get_bounds<kmp_uint64>(loc, gtid, p_last, &lb, &ub, st);
2608 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
true);
2625 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) {
2626 return __kmp_dispatch_next<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st);
2633 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
2635 return __kmp_dispatch_next<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st);
2642 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) {
2643 return __kmp_dispatch_next<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st);
2650 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
2652 return __kmp_dispatch_next<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st);
2662 __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
2669 __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
2676 __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
2683 __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
2690 kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker) {
2691 return value == checker;
2694 kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker) {
2695 return value != checker;
2698 kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker) {
2699 return value < checker;
2702 kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker) {
2703 return value >= checker;
2706 kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker) {
2707 return value <= checker;
2711 __kmp_wait_yield_4(
volatile kmp_uint32 *spinner, kmp_uint32 checker,
2712 kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
2716 volatile kmp_uint32 *spin = spinner;
2717 kmp_uint32 check = checker;
2719 kmp_uint32 (*f)(kmp_uint32, kmp_uint32) = pred;
2722 KMP_FSYNC_SPIN_INIT(obj, CCAST(kmp_uint32 *, spin));
2723 KMP_INIT_YIELD(spins);
2725 while (!f(r = TCR_4(*spin), check)) {
2726 KMP_FSYNC_SPIN_PREPARE(obj);
2734 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
2735 KMP_YIELD_SPIN(spins);
2737 KMP_FSYNC_SPIN_ACQUIRED(obj);
2741 void __kmp_wait_yield_4_ptr(
2742 void *spinner, kmp_uint32 checker, kmp_uint32 (*pred)(
void *, kmp_uint32),
2746 void *spin = spinner;
2747 kmp_uint32 check = checker;
2749 kmp_uint32 (*f)(
void *, kmp_uint32) = pred;
2751 KMP_FSYNC_SPIN_INIT(obj, spin);
2752 KMP_INIT_YIELD(spins);
2754 while (!f(spin, check)) {
2755 KMP_FSYNC_SPIN_PREPARE(obj);
2758 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
2759 KMP_YIELD_SPIN(spins);
2761 KMP_FSYNC_SPIN_ACQUIRED(obj);
2766 #ifdef KMP_GOMP_COMPAT 2768 void __kmp_aux_dispatch_init_4(
ident_t *loc, kmp_int32 gtid,
2770 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk,
2772 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
2776 void __kmp_aux_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
2778 kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk,
2780 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
2784 void __kmp_aux_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
2786 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk,
2788 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
2792 void __kmp_aux_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
2794 kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk,
2796 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
2800 void __kmp_aux_dispatch_fini_chunk_4(
ident_t *loc, kmp_int32 gtid) {
2801 __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
2804 void __kmp_aux_dispatch_fini_chunk_8(
ident_t *loc, kmp_int32 gtid) {
2805 __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
2808 void __kmp_aux_dispatch_fini_chunk_4u(
ident_t *loc, kmp_int32 gtid) {
2809 __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
2812 void __kmp_aux_dispatch_fini_chunk_8u(
ident_t *loc, kmp_int32 gtid) {
2813 __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st)
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk)
int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st)
int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk)
void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st)
void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk)