17 #include "kmp_wait_release.h" 18 #include "kmp_stats.h" 24 #include <immintrin.h> 25 #define USE_NGO_STORES 1 28 #include "tsan_annotations.h" 30 #if KMP_MIC && USE_NGO_STORES 32 #define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src)) 33 #define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) 34 #define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) 35 #define ngo_sync() __asm__ volatile ("lock; addl $0,0(%%rsp)" ::: "memory") 37 #define ngo_load(src) ((void)0) 38 #define ngo_store_icvs(dst, src) copy_icvs((dst), (src)) 39 #define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE) 40 #define ngo_sync() ((void)0) 43 void __kmp_print_structure(
void);
49 __kmp_linear_barrier_gather(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
50 void (*reduce)(
void *,
void *)
51 USE_ITT_BUILD_ARG(
void * itt_sync_obj) )
53 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather);
54 register kmp_team_t *team = this_thr->th.th_team;
55 register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
56 register kmp_info_t **other_threads = team->t.t_threads;
58 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
59 gtid, team->t.t_id, tid, bt));
60 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
62 #if USE_ITT_BUILD && USE_ITT_NOTIFY 64 if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
65 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
69 if (!KMP_MASTER_TID(tid)) {
70 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" 71 "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
72 __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar->b_arrived,
73 thr_bar->b_arrived, thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
77 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]);
80 register kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
81 register int nproc = this_thr->th.th_team_nproc;
84 register kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP;
87 for (i=1; i<nproc; ++i) {
91 KMP_CACHE_PREFETCH(&other_threads[i+1]->th.th_bar[bt].bb.b_arrived);
93 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " 94 "arrived(%p) == %llu\n", gtid, team->t.t_id, tid,
95 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
96 &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state));
99 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state);
100 flag.wait(this_thr, FALSE
101 USE_ITT_BUILD_ARG(itt_sync_obj) );
102 #if USE_ITT_BUILD && USE_ITT_NOTIFY 104 if (__kmp_forkjoin_frames_mode == 2) {
105 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
106 other_threads[i]->th.th_bar_min_time);
110 KA_TRACE(100, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", gtid,
111 team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team->t.t_id, i));
112 ANNOTATE_REDUCE_AFTER(reduce);
113 (*reduce)(this_thr->th.th_local.reduce_data,
114 other_threads[i]->th.th_local.reduce_data);
115 ANNOTATE_REDUCE_BEFORE(reduce);
116 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
120 team_bar->b_arrived = new_state;
121 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
122 gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived, new_state));
124 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
125 gtid, team->t.t_id, tid, bt));
129 __kmp_linear_barrier_release(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
131 USE_ITT_BUILD_ARG(
void *itt_sync_obj) )
133 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release);
134 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
135 register kmp_team_t *team;
137 if (KMP_MASTER_TID(tid)) {
138 register unsigned int i;
139 register kmp_uint32 nproc = this_thr->th.th_team_nproc;
140 register kmp_info_t **other_threads;
142 team = __kmp_threads[gtid]->th.th_team;
143 KMP_DEBUG_ASSERT(team != NULL);
144 other_threads = team->t.t_threads;
146 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
147 gtid, team->t.t_id, tid, bt));
150 #if KMP_BARRIER_ICV_PUSH 152 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
153 if (propagate_icvs) {
154 ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
155 for (i=1; i<nproc; ++i) {
156 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], team, i, FALSE);
157 ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
158 &team->t.t_implicit_task_taskdata[0].td_icvs);
163 #endif // KMP_BARRIER_ICV_PUSH 166 for (i=1; i<nproc; ++i) {
170 KMP_CACHE_PREFETCH(&other_threads[i+1]->th.th_bar[bt].bb.b_go);
172 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " 173 "go(%p): %u => %u\n", gtid, team->t.t_id, tid,
174 other_threads[i]->th.th_info.ds.ds_gtid, team->t.t_id, i,
175 &other_threads[i]->th.th_bar[bt].bb.b_go,
176 other_threads[i]->th.th_bar[bt].bb.b_go,
177 other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP));
178 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go, other_threads[i]);
183 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
184 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
185 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
186 flag.wait(this_thr, TRUE
187 USE_ITT_BUILD_ARG(itt_sync_obj) );
188 #if USE_ITT_BUILD && USE_ITT_NOTIFY 189 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
191 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
193 __kmp_itt_task_starting(itt_sync_obj);
195 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
198 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
199 if (itt_sync_obj != NULL)
201 __kmp_itt_task_finished(itt_sync_obj);
205 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
209 tid = __kmp_tid_from_gtid(gtid);
210 team = __kmp_threads[gtid]->th.th_team;
212 KMP_DEBUG_ASSERT(team != NULL);
213 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
214 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
215 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
218 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
219 gtid, team->t.t_id, tid, bt));
224 __kmp_tree_barrier_gather(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
225 void (*reduce)(
void *,
void *)
226 USE_ITT_BUILD_ARG(
void *itt_sync_obj) )
228 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather);
229 register kmp_team_t *team = this_thr->th.th_team;
230 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
231 register kmp_info_t **other_threads = team->t.t_threads;
232 register kmp_uint32 nproc = this_thr->th.th_team_nproc;
233 register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
234 register kmp_uint32 branch_factor = 1 << branch_bits;
235 register kmp_uint32 child;
236 register kmp_uint32 child_tid;
237 register kmp_uint64 new_state;
239 KA_TRACE(20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
240 gtid, team->t.t_id, tid, bt));
241 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
243 #if USE_ITT_BUILD && USE_ITT_NOTIFY 245 if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
246 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
250 child_tid = (tid << branch_bits) + 1;
251 if (child_tid < nproc) {
253 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
256 register kmp_info_t *child_thr = other_threads[child_tid];
257 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
260 if (child+1 <= branch_factor && child_tid+1 < nproc)
261 KMP_CACHE_PREFETCH(&other_threads[child_tid+1]->th.th_bar[bt].bb.b_arrived);
263 KA_TRACE(20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " 264 "arrived(%p) == %llu\n", gtid, team->t.t_id, tid,
265 __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid,
266 &child_bar->b_arrived, new_state));
268 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
269 flag.wait(this_thr, FALSE
270 USE_ITT_BUILD_ARG(itt_sync_obj) );
271 #if USE_ITT_BUILD && USE_ITT_NOTIFY 273 if (__kmp_forkjoin_frames_mode == 2) {
274 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
275 child_thr->th.th_bar_min_time);
279 KA_TRACE(100, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
280 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
281 team->t.t_id, child_tid));
282 ANNOTATE_REDUCE_AFTER(reduce);
283 (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
284 ANNOTATE_REDUCE_BEFORE(reduce);
285 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
290 while (child <= branch_factor && child_tid < nproc);
293 if (!KMP_MASTER_TID(tid)) {
294 register kmp_int32 parent_tid = (tid - 1) >> branch_bits;
296 KA_TRACE(20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " 297 "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
298 __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid,
299 &thr_bar->b_arrived, thr_bar->b_arrived,
300 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
305 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]);
310 team->t.t_bar[bt].b_arrived = new_state;
312 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
313 KA_TRACE(20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
314 gtid, team->t.t_id, tid, team->t.t_id,
315 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
317 KA_TRACE(20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
318 gtid, team->t.t_id, tid, bt));
322 __kmp_tree_barrier_release(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
324 USE_ITT_BUILD_ARG(
void *itt_sync_obj) )
326 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release);
327 register kmp_team_t *team;
328 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
329 register kmp_uint32 nproc;
330 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
331 register kmp_uint32 branch_factor = 1 << branch_bits;
332 register kmp_uint32 child;
333 register kmp_uint32 child_tid;
336 if (!KMP_MASTER_TID(tid)) {
337 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n",
338 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
340 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
341 flag.wait(this_thr, TRUE
342 USE_ITT_BUILD_ARG(itt_sync_obj) );
343 #if USE_ITT_BUILD && USE_ITT_NOTIFY 344 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
346 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
348 __kmp_itt_task_starting(itt_sync_obj);
350 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
353 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
354 if (itt_sync_obj != NULL)
356 __kmp_itt_task_finished(itt_sync_obj);
360 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
364 team = __kmp_threads[gtid]->th.th_team;
365 KMP_DEBUG_ASSERT(team != NULL);
366 tid = __kmp_tid_from_gtid(gtid);
368 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
369 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
370 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
373 team = __kmp_threads[gtid]->th.th_team;
374 KMP_DEBUG_ASSERT(team != NULL);
375 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
376 gtid, team->t.t_id, tid, bt));
378 nproc = this_thr->th.th_team_nproc;
379 child_tid = (tid << branch_bits) + 1;
381 if (child_tid < nproc) {
382 register kmp_info_t **other_threads = team->t.t_threads;
386 register kmp_info_t *child_thr = other_threads[child_tid];
387 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
390 if (child+1 <= branch_factor && child_tid+1 < nproc)
391 KMP_CACHE_PREFETCH(&other_threads[child_tid+1]->th.th_bar[bt].bb.b_go);
394 #if KMP_BARRIER_ICV_PUSH 396 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
397 if (propagate_icvs) {
398 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid],
399 team, child_tid, FALSE);
400 copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
401 &team->t.t_implicit_task_taskdata[0].td_icvs);
404 #endif // KMP_BARRIER_ICV_PUSH 405 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" 406 "go(%p): %u => %u\n", gtid, team->t.t_id, tid,
407 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
408 child_tid, &child_bar->b_go, child_bar->b_go,
409 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
411 kmp_flag_64 flag(&child_bar->b_go, child_thr);
416 while (child <= branch_factor && child_tid < nproc);
418 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
419 gtid, team->t.t_id, tid, bt));
425 __kmp_hyper_barrier_gather(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
426 void (*reduce)(
void *,
void *)
427 USE_ITT_BUILD_ARG(
void *itt_sync_obj) )
429 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather);
430 register kmp_team_t *team = this_thr->th.th_team;
431 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
432 register kmp_info_t **other_threads = team->t.t_threads;
433 register kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE;
434 register kmp_uint32 num_threads = this_thr->th.th_team_nproc;
435 register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
436 register kmp_uint32 branch_factor = 1 << branch_bits;
437 register kmp_uint32 offset;
438 register kmp_uint32 level;
440 KA_TRACE(20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
441 gtid, team->t.t_id, tid, bt));
443 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
445 #if USE_ITT_BUILD && USE_ITT_NOTIFY 447 if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
448 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
453 kmp_flag_64 p_flag(&thr_bar->b_arrived);
454 for (level=0, offset=1; offset<num_threads; level+=branch_bits, offset<<=branch_bits)
456 register kmp_uint32 child;
457 register kmp_uint32 child_tid;
459 if (((tid >> level) & (branch_factor - 1)) != 0) {
460 register kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) -1);
462 KA_TRACE(20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " 463 "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
464 __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid,
465 &thr_bar->b_arrived, thr_bar->b_arrived,
466 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
471 p_flag.set_waiter(other_threads[parent_tid]);
477 if (new_state == KMP_BARRIER_UNUSED_STATE)
478 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
479 for (child=1, child_tid=tid+(1 << level); child<branch_factor && child_tid<num_threads;
480 child++, child_tid+=(1 << level))
482 register kmp_info_t *child_thr = other_threads[child_tid];
483 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
485 register kmp_uint32 next_child_tid = child_tid + (1 << level);
487 if (child+1 < branch_factor && next_child_tid < num_threads)
488 KMP_CACHE_PREFETCH(&other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived);
490 KA_TRACE(20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " 491 "arrived(%p) == %llu\n", gtid, team->t.t_id, tid,
492 __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid,
493 &child_bar->b_arrived, new_state));
495 kmp_flag_64 c_flag(&child_bar->b_arrived, new_state);
496 c_flag.wait(this_thr, FALSE
497 USE_ITT_BUILD_ARG(itt_sync_obj) );
498 #if USE_ITT_BUILD && USE_ITT_NOTIFY 500 if (__kmp_forkjoin_frames_mode == 2) {
501 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
502 child_thr->th.th_bar_min_time);
506 KA_TRACE(100, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
507 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
508 team->t.t_id, child_tid));
509 ANNOTATE_REDUCE_AFTER(reduce);
510 (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
511 ANNOTATE_REDUCE_BEFORE(reduce);
512 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
517 if (KMP_MASTER_TID(tid)) {
519 if (new_state == KMP_BARRIER_UNUSED_STATE)
520 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
522 team->t.t_bar[bt].b_arrived = new_state;
523 KA_TRACE(20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
524 gtid, team->t.t_id, tid, team->t.t_id,
525 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
527 KA_TRACE(20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
528 gtid, team->t.t_id, tid, bt));
532 #define KMP_REVERSE_HYPER_BAR 534 __kmp_hyper_barrier_release(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
536 USE_ITT_BUILD_ARG(
void *itt_sync_obj) )
538 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release);
539 register kmp_team_t *team;
540 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
541 register kmp_info_t **other_threads;
542 register kmp_uint32 num_threads;
543 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[ bt ];
544 register kmp_uint32 branch_factor = 1 << branch_bits;
545 register kmp_uint32 child;
546 register kmp_uint32 child_tid;
547 register kmp_uint32 offset;
548 register kmp_uint32 level;
553 if (KMP_MASTER_TID(tid)) {
554 team = __kmp_threads[gtid]->th.th_team;
555 KMP_DEBUG_ASSERT(team != NULL);
556 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
557 gtid, team->t.t_id, tid, bt));
558 #if KMP_BARRIER_ICV_PUSH 559 if (propagate_icvs) {
560 copy_icvs(&thr_bar->th_fixed_icvs, &team->t.t_implicit_task_taskdata[tid].td_icvs);
565 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n",
566 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
568 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
569 flag.wait(this_thr, TRUE
570 USE_ITT_BUILD_ARG(itt_sync_obj) );
571 #if USE_ITT_BUILD && USE_ITT_NOTIFY 572 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
574 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
576 __kmp_itt_task_starting(itt_sync_obj);
578 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
581 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
582 if (itt_sync_obj != NULL)
584 __kmp_itt_task_finished(itt_sync_obj);
588 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
592 team = __kmp_threads[gtid]->th.th_team;
593 KMP_DEBUG_ASSERT(team != NULL);
594 tid = __kmp_tid_from_gtid(gtid);
596 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
597 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
598 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
601 num_threads = this_thr->th.th_team_nproc;
602 other_threads = team->t.t_threads;
604 #ifdef KMP_REVERSE_HYPER_BAR 606 for (level=0, offset=1; offset<num_threads && (((tid>>level) & (branch_factor-1)) == 0);
607 level+=branch_bits, offset<<=branch_bits);
610 for (level-=branch_bits, offset>>=branch_bits; offset != 0;
611 level-=branch_bits, offset>>=branch_bits)
614 for (level=0, offset=1; offset<num_threads; level+=branch_bits, offset<<=branch_bits)
617 #ifdef KMP_REVERSE_HYPER_BAR 620 child = num_threads >> ((level==0)?level:level-1);
621 for (child=(child<branch_factor-1) ? child : branch_factor-1, child_tid=tid+(child<<level);
622 child>=1; child--, child_tid-=(1<<level))
624 if (((tid >> level) & (branch_factor - 1)) != 0)
628 for (child=1, child_tid=tid+(1<<level); child<branch_factor && child_tid<num_threads;
629 child++, child_tid+=(1<<level))
630 #endif // KMP_REVERSE_HYPER_BAR 632 if (child_tid >= num_threads)
continue;
634 register kmp_info_t *child_thr = other_threads[child_tid];
635 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
637 register kmp_uint32 next_child_tid = child_tid - (1 << level);
639 # ifdef KMP_REVERSE_HYPER_BAR 640 if (child-1 >= 1 && next_child_tid < num_threads)
642 if (child+1 < branch_factor && next_child_tid < num_threads)
643 # endif // KMP_REVERSE_HYPER_BAR 644 KMP_CACHE_PREFETCH(&other_threads[next_child_tid]->th.th_bar[bt].bb.b_go);
647 #if KMP_BARRIER_ICV_PUSH 649 copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
650 #endif // KMP_BARRIER_ICV_PUSH 652 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" 653 "go(%p): %u => %u\n", gtid, team->t.t_id, tid,
654 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
655 child_tid, &child_bar->b_go, child_bar->b_go,
656 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
658 kmp_flag_64 flag(&child_bar->b_go, child_thr);
663 #if KMP_BARRIER_ICV_PUSH 664 if (propagate_icvs && !KMP_MASTER_TID(tid)) {
665 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
666 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &thr_bar->th_fixed_icvs);
669 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
670 gtid, team->t.t_id, tid, bt));
682 __kmp_init_hierarchical_barrier_thread(
enum barrier_type bt, kmp_bstate_t *thr_bar, kmp_uint32 nproc,
683 int gtid,
int tid, kmp_team_t *team)
686 bool uninitialized = thr_bar->team == NULL;
687 bool team_changed = team != thr_bar->team;
688 bool team_sz_changed = nproc != thr_bar->nproc;
689 bool tid_changed = tid != thr_bar->old_tid;
692 if (uninitialized || team_sz_changed) {
693 __kmp_get_hierarchy(nproc, thr_bar);
696 if (uninitialized || team_sz_changed || tid_changed) {
697 thr_bar->my_level = thr_bar->depth-1;
698 thr_bar->parent_tid = -1;
699 if (!KMP_MASTER_TID(tid)) {
701 while (d<thr_bar->depth) {
703 if (d == thr_bar->depth-2) {
704 thr_bar->parent_tid = 0;
705 thr_bar->my_level = d;
708 else if ((rem = tid%thr_bar->skip_per_level[d+1]) != 0) {
710 thr_bar->parent_tid = tid - rem;
711 thr_bar->my_level = d;
717 thr_bar->offset = 7-(tid-thr_bar->parent_tid-1);
718 thr_bar->old_tid = tid;
719 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
720 thr_bar->team = team;
721 thr_bar->parent_bar = &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
723 if (uninitialized || team_changed || tid_changed) {
724 thr_bar->team = team;
725 thr_bar->parent_bar = &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
728 if (uninitialized || team_sz_changed || tid_changed) {
729 thr_bar->nproc = nproc;
730 thr_bar->leaf_kids = thr_bar->base_leaf_kids;
731 if (thr_bar->my_level == 0) thr_bar->leaf_kids=0;
732 if (thr_bar->leaf_kids && (kmp_uint32)tid+thr_bar->leaf_kids+1 > nproc)
733 thr_bar->leaf_kids = nproc - tid - 1;
734 thr_bar->leaf_state = 0;
735 for (
int i=0; i<thr_bar->leaf_kids; ++i) ((
char *)&(thr_bar->leaf_state))[7-i] = 1;
741 __kmp_hierarchical_barrier_gather(
enum barrier_type bt, kmp_info_t *this_thr,
742 int gtid,
int tid,
void (*reduce) (
void *,
void *)
743 USE_ITT_BUILD_ARG(
void * itt_sync_obj) )
745 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather);
746 register kmp_team_t *team = this_thr->th.th_team;
747 register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
748 register kmp_uint32 nproc = this_thr->th.th_team_nproc;
749 register kmp_info_t **other_threads = team->t.t_threads;
750 register kmp_uint64 new_state;
752 int level = team->t.t_level;
754 if (other_threads[0]->th.th_teams_microtask)
755 if (this_thr->th.th_teams_size.nteams > 1)
758 if (level == 1) thr_bar->use_oncore_barrier = 1;
759 else thr_bar->use_oncore_barrier = 0;
761 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
762 gtid, team->t.t_id, tid, bt));
763 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
765 #if USE_ITT_BUILD && USE_ITT_NOTIFY 767 if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
768 this_thr->th.th_bar_arrive_time = __itt_get_timestamp();
772 (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, team);
774 if (thr_bar->my_level) {
775 register kmp_int32 child_tid;
776 new_state = (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
777 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) {
778 if (thr_bar->leaf_kids) {
779 kmp_uint64 leaf_state = KMP_MASTER_TID(tid) ? thr_bar->b_arrived | thr_bar->leaf_state : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state;
780 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting for leaf kids\n",
781 gtid, team->t.t_id, tid));
782 kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state);
783 flag.wait(this_thr, FALSE
784 USE_ITT_BUILD_ARG(itt_sync_obj) );
786 ANNOTATE_REDUCE_AFTER(reduce);
787 for (child_tid=tid+1; child_tid<=tid+thr_bar->leaf_kids; ++child_tid) {
788 KA_TRACE(100, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
789 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
790 team->t.t_id, child_tid));
791 (*reduce)(this_thr->th.th_local.reduce_data, other_threads[child_tid]->th.th_local.reduce_data);
793 ANNOTATE_REDUCE_BEFORE(reduce);
794 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
796 (void) KMP_TEST_THEN_AND64((
volatile kmp_int64 *)&thr_bar->b_arrived, ~(thr_bar->leaf_state));
799 for (kmp_uint32 d=1; d<thr_bar->my_level; ++d) {
800 kmp_uint32 last = tid+thr_bar->skip_per_level[d+1], skip = thr_bar->skip_per_level[d];
801 if (last > nproc) last = nproc;
802 for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) {
803 register kmp_info_t *child_thr = other_threads[child_tid];
804 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
805 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " 806 "arrived(%p) == %llu\n",
807 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
808 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
809 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
810 flag.wait(this_thr, FALSE
811 USE_ITT_BUILD_ARG(itt_sync_obj) );
813 KA_TRACE(100, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
814 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
815 team->t.t_id, child_tid));
816 ANNOTATE_REDUCE_AFTER(reduce);
817 (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
818 ANNOTATE_REDUCE_BEFORE(reduce);
819 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
825 for (kmp_uint32 d=0; d<thr_bar->my_level; ++d) {
826 kmp_uint32 last = tid+thr_bar->skip_per_level[d+1], skip = thr_bar->skip_per_level[d];
827 if (last > nproc) last = nproc;
828 for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) {
829 register kmp_info_t *child_thr = other_threads[child_tid];
830 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
831 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " 832 "arrived(%p) == %llu\n",
833 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
834 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
835 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
836 flag.wait(this_thr, FALSE
837 USE_ITT_BUILD_ARG(itt_sync_obj) );
839 KA_TRACE(100, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
840 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
841 team->t.t_id, child_tid));
842 ANNOTATE_REDUCE_AFTER(reduce);
843 (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
844 ANNOTATE_REDUCE_BEFORE(reduce);
845 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
853 if (!KMP_MASTER_TID(tid)) {
854 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " 855 "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
856 __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id, thr_bar->parent_tid,
857 &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived+KMP_BARRIER_STATE_BUMP));
860 if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME
861 || !thr_bar->use_oncore_barrier) {
862 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]);
866 thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
867 kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, thr_bar->offset);
868 flag.set_waiter(other_threads[thr_bar->parent_tid]);
872 team->t.t_bar[bt].b_arrived = new_state;
873 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
874 gtid, team->t.t_id, tid, team->t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
877 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
878 gtid, team->t.t_id, tid, bt));
882 __kmp_hierarchical_barrier_release(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
884 USE_ITT_BUILD_ARG(
void * itt_sync_obj) )
886 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release);
887 register kmp_team_t *team;
888 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
889 register kmp_uint32 nproc;
890 bool team_change =
false;
892 if (KMP_MASTER_TID(tid)) {
893 team = __kmp_threads[gtid]->th.th_team;
894 KMP_DEBUG_ASSERT(team != NULL);
895 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) master entered barrier type %d\n",
896 gtid, team->t.t_id, tid, bt));
900 if (!thr_bar->use_oncore_barrier || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME
901 || thr_bar->my_level != 0 || thr_bar->team == NULL) {
903 thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG;
904 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
905 flag.wait(this_thr, TRUE
906 USE_ITT_BUILD_ARG(itt_sync_obj) );
907 TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
911 thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG;
912 kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP, thr_bar->offset,
914 USE_ITT_BUILD_ARG(itt_sync_obj) );
915 flag.wait(this_thr, TRUE);
916 if (thr_bar->wait_flag == KMP_BARRIER_SWITCHING) {
917 TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
920 ((
char*)&(thr_bar->parent_bar->b_go))[thr_bar->offset] = 0;
923 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
925 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
928 team = __kmp_threads[gtid]->th.th_team;
929 KMP_DEBUG_ASSERT(team != NULL);
930 tid = __kmp_tid_from_gtid(gtid);
932 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
933 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
937 nproc = this_thr->th.th_team_nproc;
938 int level = team->t.t_level;
940 if (team->t.t_threads[0]->th.th_teams_microtask ) {
941 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && this_thr->th.th_teams_level == level)
943 if( this_thr->th.th_teams_size.nteams > 1 )
947 if (level == 1) thr_bar->use_oncore_barrier = 1;
948 else thr_bar->use_oncore_barrier = 0;
951 unsigned short int old_leaf_kids = thr_bar->leaf_kids;
952 kmp_uint64 old_leaf_state = thr_bar->leaf_state;
953 team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, team);
955 if (team_change) old_leaf_kids = 0;
957 #if KMP_BARRIER_ICV_PUSH 958 if (propagate_icvs) {
959 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
960 if (KMP_MASTER_TID(tid)) {
961 copy_icvs(&thr_bar->th_fixed_icvs, &team->t.t_implicit_task_taskdata[tid].td_icvs);
963 else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) {
964 if (!thr_bar->my_level)
966 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
967 &thr_bar->parent_bar->th_fixed_icvs);
971 if (thr_bar->my_level)
972 copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs);
974 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
975 &thr_bar->parent_bar->th_fixed_icvs);
978 #endif // KMP_BARRIER_ICV_PUSH 981 if (thr_bar->my_level) {
982 register kmp_int32 child_tid;
984 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) {
985 if (KMP_MASTER_TID(tid)) {
987 thr_bar->b_go = KMP_BARRIER_STATE_BUMP;
989 ngo_load(&thr_bar->th_fixed_icvs);
991 for (child_tid=thr_bar->skip_per_level[1]; child_tid<(
int)nproc; child_tid+=thr_bar->skip_per_level[1]) {
992 register kmp_bstate_t *child_bar = &team->t.t_threads[child_tid]->th.th_bar[bt].bb;
993 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)" 994 " go(%p): %u => %u\n",
995 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
996 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
997 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
999 ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
1003 TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
1005 if (thr_bar->leaf_kids) {
1007 if (team_change || old_leaf_kids < thr_bar->leaf_kids) {
1008 if (old_leaf_kids) {
1009 thr_bar->b_go |= old_leaf_state;
1012 last = tid+thr_bar->skip_per_level[1];
1013 if (last > nproc) last = nproc;
1014 for (child_tid=tid+1+old_leaf_kids; child_tid<(int)last; ++child_tid) {
1015 register kmp_info_t *child_thr = team->t.t_threads[child_tid];
1016 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1017 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" 1018 " T#%d(%d:%d) go(%p): %u => %u\n",
1019 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1020 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1021 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1023 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1028 thr_bar->b_go |= thr_bar->leaf_state;
1033 for (
int d=thr_bar->my_level-1; d>=0; --d) {
1034 last = tid+thr_bar->skip_per_level[d+1];
1035 kmp_uint32 skip = thr_bar->skip_per_level[d];
1036 if (last > nproc) last = nproc;
1037 for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) {
1038 register kmp_info_t *child_thr = team->t.t_threads[child_tid];
1039 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1040 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)" 1041 " go(%p): %u => %u\n",
1042 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1043 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1044 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1046 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1051 #if KMP_BARRIER_ICV_PUSH 1052 if (propagate_icvs && !KMP_MASTER_TID(tid))
1053 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &thr_bar->th_fixed_icvs);
1054 #endif // KMP_BARRIER_ICV_PUSH 1056 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
1057 gtid, team->t.t_id, tid, bt));
1067 __kmp_barrier(
enum barrier_type bt,
int gtid,
int is_split,
size_t reduce_size,
1068 void *reduce_data,
void (*reduce)(
void *,
void *))
1070 KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier);
1071 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
1072 register int tid = __kmp_tid_from_gtid(gtid);
1073 register kmp_info_t *this_thr = __kmp_threads[gtid];
1074 register kmp_team_t *team = this_thr->th.th_team;
1075 register int status = 0;
1076 ident_t *loc = __kmp_threads[gtid]->th.th_ident;
1078 ompt_task_id_t my_task_id;
1079 ompt_parallel_id_t my_parallel_id;
1082 KA_TRACE(15, (
"__kmp_barrier: T#%d(%d:%d) has arrived\n",
1083 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
1085 ANNOTATE_NEW_BARRIER_BEGIN(&team->t.t_bar);
1089 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
1090 my_parallel_id = team->t.ompt_team_info.parallel_id;
1093 if (this_thr->th.ompt_thread_info.state == ompt_state_wait_single) {
1094 if (ompt_callbacks.ompt_callback(ompt_event_single_others_end)) {
1095 ompt_callbacks.ompt_callback(ompt_event_single_others_end)(
1096 my_parallel_id, my_task_id);
1100 if (ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
1101 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(
1102 my_parallel_id, my_task_id);
1108 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
1112 if (! team->t.t_serialized) {
1115 void *itt_sync_obj = NULL;
1117 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1118 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1121 if (__kmp_tasking_mode == tskm_extra_barrier) {
1122 __kmp_tasking_barrier(team, this_thr, gtid);
1123 KA_TRACE(15, (
"__kmp_barrier: T#%d(%d:%d) past tasking barrier\n",
1124 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
1130 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1132 this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1134 this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1138 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1139 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1143 if (KMP_MASTER_TID(tid)) {
1144 team->t.t_bar[bt].b_master_arrived += 1;
1146 this_thr->th.th_bar[bt].bb.b_worker_arrived += 1;
1149 if (reduce != NULL) {
1151 this_thr->th.th_local.reduce_data = reduce_data;
1154 if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec)
1155 __kmp_task_team_setup(this_thr, team, 0);
1157 switch (__kmp_barrier_gather_pattern[bt]) {
1158 case bp_hyper_bar: {
1159 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
1160 __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid, reduce
1161 USE_ITT_BUILD_ARG(itt_sync_obj) );
1164 case bp_hierarchical_bar: {
1165 __kmp_hierarchical_barrier_gather(bt, this_thr, gtid, tid, reduce
1166 USE_ITT_BUILD_ARG(itt_sync_obj));
1170 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
1171 __kmp_tree_barrier_gather(bt, this_thr, gtid, tid, reduce
1172 USE_ITT_BUILD_ARG(itt_sync_obj) );
1176 __kmp_linear_barrier_gather(bt, this_thr, gtid, tid, reduce
1177 USE_ITT_BUILD_ARG(itt_sync_obj) );
1183 if (KMP_MASTER_TID(tid)) {
1185 if (__kmp_tasking_mode != tskm_immediate_exec) {
1186 __kmp_task_team_wait(this_thr, team
1187 USE_ITT_BUILD_ARG(itt_sync_obj) );
1191 team->t.t_bar[bt].b_team_arrived += 1;
1198 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1199 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1201 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1203 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode &&
1205 this_thr->th.th_teams_microtask == NULL &&
1207 team->t.t_active_level == 1)
1209 kmp_uint64 cur_time = __itt_get_timestamp();
1210 kmp_info_t **other_threads = team->t.t_threads;
1211 int nproc = this_thr->th.th_team_nproc;
1213 switch(__kmp_forkjoin_frames_mode) {
1215 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
1216 this_thr->th.th_frame_time = cur_time;
1219 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc);
1222 if( __itt_metadata_add_ptr ) {
1224 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1226 this_thr->th.th_bar_arrive_time = 0;
1227 for (i=1; i<nproc; ++i) {
1228 delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time );
1229 other_threads[i]->th.th_bar_arrive_time = 0;
1231 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, cur_time, delta, (kmp_uint64)( reduce != NULL));
1233 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
1234 this_thr->th.th_frame_time = cur_time;
1242 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1243 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1246 if (status == 1 || ! is_split) {
1247 switch (__kmp_barrier_release_pattern[bt]) {
1248 case bp_hyper_bar: {
1249 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1250 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, FALSE
1251 USE_ITT_BUILD_ARG(itt_sync_obj) );
1254 case bp_hierarchical_bar: {
1255 __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, FALSE
1256 USE_ITT_BUILD_ARG(itt_sync_obj) );
1260 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1261 __kmp_tree_barrier_release(bt, this_thr, gtid, tid, FALSE
1262 USE_ITT_BUILD_ARG(itt_sync_obj) );
1266 __kmp_linear_barrier_release(bt, this_thr, gtid, tid, FALSE
1267 USE_ITT_BUILD_ARG(itt_sync_obj) );
1270 if (__kmp_tasking_mode != tskm_immediate_exec) {
1271 __kmp_task_team_sync(this_thr, team);
1279 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1280 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1284 if (__kmp_tasking_mode != tskm_immediate_exec) {
1286 if ( this_thr->th.th_task_team != NULL ) {
1287 void *itt_sync_obj = NULL;
1289 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1290 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1291 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1295 KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE);
1296 __kmp_task_team_wait(this_thr, team
1297 USE_ITT_BUILD_ARG(itt_sync_obj));
1298 __kmp_task_team_setup(this_thr, team, 0);
1301 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1302 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1307 KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL);
1308 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL);
1312 KA_TRACE(15, (
"__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
1313 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid), status));
1318 if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
1319 ompt_callbacks.ompt_callback(ompt_event_barrier_end)(
1320 my_parallel_id, my_task_id);
1323 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1326 ANNOTATE_NEW_BARRIER_END(&team->t.t_bar);
1333 __kmp_end_split_barrier(
enum barrier_type bt,
int gtid)
1335 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier);
1336 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
1337 int tid = __kmp_tid_from_gtid(gtid);
1338 kmp_info_t *this_thr = __kmp_threads[gtid];
1339 kmp_team_t *team = this_thr->th.th_team;
1341 ANNOTATE_NEW_BARRIER_BEGIN(&team->t.t_bar);
1342 if (!team->t.t_serialized) {
1343 if (KMP_MASTER_GTID(gtid)) {
1344 switch (__kmp_barrier_release_pattern[bt]) {
1345 case bp_hyper_bar: {
1346 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1347 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, FALSE
1348 USE_ITT_BUILD_ARG(NULL) );
1351 case bp_hierarchical_bar: {
1352 __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, FALSE
1353 USE_ITT_BUILD_ARG(NULL));
1357 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1358 __kmp_tree_barrier_release(bt, this_thr, gtid, tid, FALSE
1359 USE_ITT_BUILD_ARG(NULL) );
1363 __kmp_linear_barrier_release(bt, this_thr, gtid, tid, FALSE
1364 USE_ITT_BUILD_ARG(NULL) );
1367 if (__kmp_tasking_mode != tskm_immediate_exec) {
1368 __kmp_task_team_sync(this_thr, team);
1372 ANNOTATE_NEW_BARRIER_END(&team->t.t_bar);
1377 __kmp_join_barrier(
int gtid)
1379 KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier);
1380 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
1381 register kmp_info_t *this_thr = __kmp_threads[gtid];
1382 register kmp_team_t *team;
1383 register kmp_uint nproc;
1384 kmp_info_t *master_thread;
1390 void *itt_sync_obj = NULL;
1392 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1394 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1400 team = this_thr->th.th_team;
1401 nproc = this_thr->th.th_team_nproc;
1402 KMP_DEBUG_ASSERT((
int)nproc == team->t.t_nproc);
1403 tid = __kmp_tid_from_gtid(gtid);
1405 team_id = team->t.t_id;
1407 master_thread = this_thr->th.th_team_master;
1409 if (master_thread != team->t.t_threads[0]) {
1410 __kmp_print_structure();
1413 KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]);
1417 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
1418 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team));
1419 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root));
1420 KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]);
1421 KA_TRACE(10, (
"__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n", gtid, team_id, tid));
1423 ANNOTATE_NEW_BARRIER_BEGIN(&team->t.t_bar);
1427 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
1428 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(
1429 team->t.ompt_team_info.parallel_id,
1430 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1433 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
1436 if (__kmp_tasking_mode == tskm_extra_barrier) {
1437 __kmp_tasking_barrier(team, this_thr, gtid);
1438 KA_TRACE(10, (
"__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid, team_id, tid));
1441 if (__kmp_tasking_mode != tskm_immediate_exec) {
1442 KA_TRACE(20, (
"__kmp_join_barrier: T#%d, old team = %d, old task_team = %p, th_task_team = %p\n",
1443 __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state],
1444 this_thr->th.th_task_team));
1445 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
1453 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1455 this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1457 this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1461 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1462 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1465 switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
1466 case bp_hyper_bar: {
1467 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1468 __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
1469 USE_ITT_BUILD_ARG(itt_sync_obj) );
1472 case bp_hierarchical_bar: {
1473 __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
1474 USE_ITT_BUILD_ARG(itt_sync_obj) );
1478 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1479 __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
1480 USE_ITT_BUILD_ARG(itt_sync_obj) );
1484 __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
1485 USE_ITT_BUILD_ARG(itt_sync_obj) );
1493 if (KMP_MASTER_TID(tid)) {
1494 if (__kmp_tasking_mode != tskm_immediate_exec) {
1495 __kmp_task_team_wait(this_thr, team
1496 USE_ITT_BUILD_ARG(itt_sync_obj) );
1498 #if KMP_STATS_ENABLED 1501 for (
int i=0; i<team->t.t_nproc; ++i) {
1502 kmp_info_t* team_thread = team->t.t_threads[i];
1503 if (team_thread == this_thr)
1505 team_thread->th.th_stats->setIdleFlag();
1506 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && team_thread->th.th_sleep_loc != NULL)
1507 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread), team_thread->th.th_sleep_loc);
1511 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1512 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1515 # if USE_ITT_BUILD && USE_ITT_NOTIFY 1517 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode &&
1519 this_thr->th.th_teams_microtask == NULL &&
1521 team->t.t_active_level == 1)
1523 kmp_uint64 cur_time = __itt_get_timestamp();
1524 ident_t * loc = team->t.t_ident;
1525 kmp_info_t **other_threads = team->t.t_threads;
1526 int nproc = this_thr->th.th_team_nproc;
1528 switch(__kmp_forkjoin_frames_mode) {
1530 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
1533 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc);
1536 if( __itt_metadata_add_ptr ) {
1538 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1540 this_thr->th.th_bar_arrive_time = 0;
1541 for (i=1; i<nproc; ++i) {
1542 delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time );
1543 other_threads[i]->th.th_bar_arrive_time = 0;
1545 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, cur_time, delta, 0);
1547 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
1548 this_thr->th.th_frame_time = cur_time;
1556 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1557 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1562 if (KMP_MASTER_TID(tid)) {
1563 KA_TRACE(15, (
"__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
1564 gtid, team_id, tid, nproc));
1570 KA_TRACE(10, (
"__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid));
1575 if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
1576 ompt_callbacks.ompt_callback(ompt_event_barrier_end)(
1577 team->t.ompt_team_info.parallel_id,
1578 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1583 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
1586 ANNOTATE_NEW_BARRIER_END(&team->t.t_bar);
1592 __kmp_fork_barrier(
int gtid,
int tid)
1594 KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier);
1595 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
1596 kmp_info_t *this_thr = __kmp_threads[gtid];
1597 kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
1599 void * itt_sync_obj = NULL;
1602 ANNOTATE_NEW_BARRIER_END(&team->t.t_bar);
1604 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d(%d:%d) has arrived\n",
1605 gtid, (team != NULL) ? team->t.t_id : -1, tid));
1608 if (KMP_MASTER_TID(tid)) {
1609 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1610 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1612 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1);
1613 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1618 register kmp_info_t **other_threads = team->t.t_threads;
1624 for(i=1; i<team->t.t_nproc; ++i) {
1625 KA_TRACE(500, (
"__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go == %u.\n",
1626 gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
1627 team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
1628 other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go));
1629 KMP_DEBUG_ASSERT((TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go)
1630 & ~(KMP_BARRIER_SLEEP_STATE))
1631 == KMP_INIT_BARRIER_STATE);
1632 KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team);
1636 if (__kmp_tasking_mode != tskm_immediate_exec) {
1637 __kmp_task_team_setup(this_thr, team, 0);
1644 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1646 this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1648 this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1652 switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
1653 case bp_hyper_bar: {
1654 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1655 __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
1656 USE_ITT_BUILD_ARG(itt_sync_obj) );
1659 case bp_hierarchical_bar: {
1660 __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
1661 USE_ITT_BUILD_ARG(itt_sync_obj) );
1665 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1666 __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
1667 USE_ITT_BUILD_ARG(itt_sync_obj) );
1671 __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
1672 USE_ITT_BUILD_ARG(itt_sync_obj) );
1677 if (TCR_4(__kmp_global.g.g_done)) {
1678 this_thr->th.th_task_team = NULL;
1680 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1681 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1682 if (!KMP_MASTER_TID(tid)) {
1683 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1685 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1689 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d is leaving early\n", gtid));
1696 team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
1697 KMP_DEBUG_ASSERT(team != NULL);
1698 tid = __kmp_tid_from_gtid(gtid);
1701 #if KMP_BARRIER_ICV_PULL 1708 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
1709 if (!KMP_MASTER_TID(tid)) {
1711 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
1712 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
1713 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1714 &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs);
1717 #endif // KMP_BARRIER_ICV_PULL 1719 if (__kmp_tasking_mode != tskm_immediate_exec) {
1720 __kmp_task_team_sync(this_thr, team);
1723 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1724 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
1725 if (proc_bind == proc_bind_intel) {
1727 #if KMP_AFFINITY_SUPPORTED 1729 if(__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) {
1730 __kmp_balanced_affinity(tid, team->t.t_nproc);
1732 #endif // KMP_AFFINITY_SUPPORTED 1733 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1735 else if (proc_bind != proc_bind_false) {
1736 if (this_thr->th.th_new_place == this_thr->th.th_current_place) {
1737 KA_TRACE(100, (
"__kmp_fork_barrier: T#%d already in correct place %d\n",
1738 __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place));
1741 __kmp_affinity_set_place(gtid);
1746 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1747 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1748 if (!KMP_MASTER_TID(tid)) {
1750 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1751 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1755 ANNOTATE_NEW_BARRIER_END(&team->t.t_bar);
1756 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid, team->t.t_id, tid));
1761 __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc, kmp_internal_control_t *new_icvs,
ident_t *loc )
1763 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy);
1765 KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
1766 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
1771 #if KMP_BARRIER_ICV_PULL 1774 KMP_DEBUG_ASSERT(team->t.t_threads[0]);
1775 copy_icvs(&team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs, new_icvs);
1776 KF_TRACE(10, (
"__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n",
1777 0, team->t.t_threads[0], team));
1778 #elif KMP_BARRIER_ICV_PUSH 1780 KF_TRACE(10, (
"__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n",
1781 0, team->t.t_threads[0], team));
1785 KMP_DEBUG_ASSERT(team->t.t_threads[0]);
1786 for (
int f=1; f<new_nproc; ++f) {
1788 KF_TRACE(10, (
"__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
1789 f, team->t.t_threads[f], team));
1790 __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE);
1791 ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs);
1792 KF_TRACE(10, (
"__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
1793 f, team->t.t_threads[f], team));
1796 #endif // KMP_BARRIER_ICV_PULL