19 #include "kmp_stats.h" 20 #include "kmp_wait_release.h" 23 #include "ompt-specific.h" 26 #include "tsan_annotations.h" 29 static void __kmp_enable_tasking(kmp_task_team_t *task_team,
30 kmp_info_t *this_thr);
31 static void __kmp_alloc_task_deque(kmp_info_t *thread,
32 kmp_thread_data_t *thread_data);
33 static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
34 kmp_task_team_t *task_team);
37 static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
40 #ifdef BUILD_TIED_TASK_STACK 49 static void __kmp_trace_task_stack(kmp_int32 gtid,
50 kmp_thread_data_t *thread_data,
51 int threshold,
char *location) {
52 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
53 kmp_taskdata_t **stack_top = task_stack->ts_top;
54 kmp_int32 entries = task_stack->ts_entries;
55 kmp_taskdata_t *tied_task;
59 (
"__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, " 60 "first_block = %p, stack_top = %p \n",
61 location, gtid, entries, task_stack->ts_first_block, stack_top));
63 KMP_DEBUG_ASSERT(stack_top != NULL);
64 KMP_DEBUG_ASSERT(entries > 0);
66 while (entries != 0) {
67 KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]);
69 if (entries & TASK_STACK_INDEX_MASK == 0) {
70 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top);
72 stack_block = stack_block->sb_prev;
73 stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
80 tied_task = *stack_top;
82 KMP_DEBUG_ASSERT(tied_task != NULL);
83 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
86 (
"__kmp_trace_task_stack(%s): gtid=%d, entry=%d, " 87 "stack_top=%p, tied_task=%p\n",
88 location, gtid, entries, stack_top, tied_task));
90 KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]);
93 (
"__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
103 static void __kmp_init_task_stack(kmp_int32 gtid,
104 kmp_thread_data_t *thread_data) {
105 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
106 kmp_stack_block_t *first_block;
109 first_block = &task_stack->ts_first_block;
110 task_stack->ts_top = (kmp_taskdata_t **)first_block;
111 memset((
void *)first_block,
'\0',
112 TASK_STACK_BLOCK_SIZE *
sizeof(kmp_taskdata_t *));
115 task_stack->ts_entries = TASK_STACK_EMPTY;
116 first_block->sb_next = NULL;
117 first_block->sb_prev = NULL;
124 static void __kmp_free_task_stack(kmp_int32 gtid,
125 kmp_thread_data_t *thread_data) {
126 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
127 kmp_stack_block_t *stack_block = &task_stack->ts_first_block;
129 KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY);
131 while (stack_block != NULL) {
132 kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL;
134 stack_block->sb_next = NULL;
135 stack_block->sb_prev = NULL;
136 if (stack_block != &task_stack->ts_first_block) {
137 __kmp_thread_free(thread,
140 stack_block = next_block;
143 task_stack->ts_entries = 0;
144 task_stack->ts_top = NULL;
153 static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread,
154 kmp_taskdata_t *tied_task) {
156 kmp_thread_data_t *thread_data =
157 &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
158 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
160 if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) {
164 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
165 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
168 (
"__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
169 gtid, thread, tied_task));
171 *(task_stack->ts_top) = tied_task;
174 task_stack->ts_top++;
175 task_stack->ts_entries++;
177 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
179 kmp_stack_block_t *stack_block =
180 (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE);
183 if (stack_block->sb_next !=
185 task_stack->ts_top = &stack_block->sb_next->sb_block[0];
187 kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc(
188 thread,
sizeof(kmp_stack_block_t));
190 task_stack->ts_top = &new_block->sb_block[0];
191 stack_block->sb_next = new_block;
192 new_block->sb_prev = stack_block;
193 new_block->sb_next = NULL;
197 (
"__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
198 gtid, tied_task, new_block));
201 KA_TRACE(20, (
"__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
212 static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread,
213 kmp_taskdata_t *ending_task) {
215 kmp_thread_data_t *thread_data =
216 &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)];
217 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
218 kmp_taskdata_t *tied_task;
220 if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) {
225 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
226 KMP_DEBUG_ASSERT(task_stack->ts_entries > 0);
228 KA_TRACE(20, (
"__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid,
232 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
233 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top);
235 stack_block = stack_block->sb_prev;
236 task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
240 task_stack->ts_top--;
241 task_stack->ts_entries--;
243 tied_task = *(task_stack->ts_top);
245 KMP_DEBUG_ASSERT(tied_task != NULL);
246 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
247 KMP_DEBUG_ASSERT(tied_task == ending_task);
249 KA_TRACE(20, (
"__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
256 static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
257 kmp_info_t *thread = __kmp_threads[gtid];
258 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
259 kmp_task_team_t *task_team = thread->th.th_task_team;
260 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
261 kmp_thread_data_t *thread_data;
264 (
"__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata));
266 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
269 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
272 (
"__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
273 gtid, counter, taskdata));
277 if (taskdata->td_flags.task_serial) {
278 KA_TRACE(20, (
"__kmp_push_task: T#%d team serialized; returning " 279 "TASK_NOT_PUSHED for task %p\n",
281 return TASK_NOT_PUSHED;
286 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
287 if (!KMP_TASKING_ENABLED(task_team)) {
288 __kmp_enable_tasking(task_team, thread);
290 KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
291 KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
294 thread_data = &task_team->tt.tt_threads_data[tid];
297 if (thread_data->td.td_deque == NULL) {
298 __kmp_alloc_task_deque(thread, thread_data);
302 if (TCR_4(thread_data->td.td_deque_ntasks) >=
303 TASK_DEQUE_SIZE(thread_data->td)) {
304 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full; returning " 305 "TASK_NOT_PUSHED for task %p\n",
307 return TASK_NOT_PUSHED;
311 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
315 if (TCR_4(thread_data->td.td_deque_ntasks) >=
316 TASK_DEQUE_SIZE(thread_data->td)) {
317 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
318 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full on 2nd check; returning " 319 "TASK_NOT_PUSHED for task %p\n",
321 return TASK_NOT_PUSHED;
325 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
326 TASK_DEQUE_SIZE(thread_data->td));
329 thread_data->td.td_deque[thread_data->td.td_deque_tail] =
332 thread_data->td.td_deque_tail =
333 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
334 TCW_4(thread_data->td.td_deque_ntasks,
335 TCR_4(thread_data->td.td_deque_ntasks) + 1);
337 KA_TRACE(20, (
"__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: " 338 "task=%p ntasks=%d head=%u tail=%u\n",
339 gtid, taskdata, thread_data->td.td_deque_ntasks,
340 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
342 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
344 return TASK_SUCCESSFULLY_PUSHED;
351 void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) {
352 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(enter): T#%d " 353 "this_thread=%p, curtask=%p, " 354 "curtask_parent=%p\n",
355 0, this_thr, this_thr->th.th_current_task,
356 this_thr->th.th_current_task->td_parent));
358 this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
360 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(exit): T#%d " 361 "this_thread=%p, curtask=%p, " 362 "curtask_parent=%p\n",
363 0, this_thr, this_thr->th.th_current_task,
364 this_thr->th.th_current_task->td_parent));
373 void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team,
377 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p " 380 tid, this_thr, this_thr->th.th_current_task,
381 team->t.t_implicit_task_taskdata[tid].td_parent));
383 KMP_DEBUG_ASSERT(this_thr != NULL);
386 if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
387 team->t.t_implicit_task_taskdata[0].td_parent =
388 this_thr->th.th_current_task;
389 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
392 team->t.t_implicit_task_taskdata[tid].td_parent =
393 team->t.t_implicit_task_taskdata[0].td_parent;
394 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
397 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p " 400 tid, this_thr, this_thr->th.th_current_task,
401 team->t.t_implicit_task_taskdata[tid].td_parent));
409 static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
410 kmp_taskdata_t *current_task) {
411 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
412 kmp_info_t *thread = __kmp_threads[gtid];
415 (
"__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
416 gtid, taskdata, current_task));
418 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
423 current_task->td_flags.executing = 0;
426 #ifdef BUILD_TIED_TASK_STACK 427 if (taskdata->td_flags.tiedness == TASK_TIED) {
428 __kmp_push_task_stack(gtid, thread, taskdata);
433 thread->th.th_current_task = taskdata;
435 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||
436 taskdata->td_flags.tiedness == TASK_UNTIED);
437 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||
438 taskdata->td_flags.tiedness == TASK_UNTIED);
439 taskdata->td_flags.started = 1;
440 taskdata->td_flags.executing = 1;
441 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
442 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
449 KA_TRACE(10, (
"__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
452 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
453 kmp_taskdata_t *parent = taskdata->td_parent;
454 ompt_callbacks.ompt_callback(ompt_event_task_begin)(
455 parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
456 parent ? &(parent->ompt_task_info.frame) : NULL,
457 taskdata->ompt_task_info.task_id, taskdata->ompt_task_info.function);
460 #if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE 462 if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 &&
463 ompt_callbacks.ompt_callback(ompt_event_task_dependences)) {
464 ompt_callbacks.ompt_callback(ompt_event_task_dependences)(
465 taskdata->ompt_task_info.task_id, taskdata->ompt_task_info.deps,
466 taskdata->ompt_task_info.ndeps);
468 KMP_OMPT_DEPS_FREE(thread, taskdata->ompt_task_info.deps);
469 taskdata->ompt_task_info.deps = NULL;
470 taskdata->ompt_task_info.ndeps = 0;
483 void __kmpc_omp_task_begin_if0(
ident_t *loc_ref, kmp_int32 gtid,
485 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
486 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
488 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p " 490 gtid, loc_ref, taskdata, current_task));
492 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
495 kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
496 KA_TRACE(20, (
"__kmpc_omp_task_begin_if0: T#%d untied_count (%d) " 497 "incremented for task %p\n",
498 gtid, counter, taskdata));
501 taskdata->td_flags.task_serial =
503 __kmp_task_start(gtid, task, current_task);
505 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
514 void __kmpc_omp_task_begin(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) {
515 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
519 (
"__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
520 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task));
522 __kmp_task_start(gtid, task, current_task);
524 KA_TRACE(10, (
"__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,
525 loc_ref, KMP_TASK_TO_TASKDATA(task)));
528 #endif // TASK_UNUSED 535 static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
536 kmp_info_t *thread) {
537 KA_TRACE(30, (
"__kmp_free_task: T#%d freeing data from task %p\n", gtid,
541 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
542 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0);
543 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1);
544 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
545 KMP_DEBUG_ASSERT(TCR_4(taskdata->td_allocated_child_tasks) == 0 ||
546 taskdata->td_flags.task_serial == 1);
547 KMP_DEBUG_ASSERT(TCR_4(taskdata->td_incomplete_child_tasks) == 0);
549 taskdata->td_flags.freed = 1;
550 ANNOTATE_HAPPENS_BEFORE(taskdata);
553 __kmp_fast_free(thread, taskdata);
555 __kmp_thread_free(thread, taskdata);
558 KA_TRACE(20, (
"__kmp_free_task: T#%d freed task %p\n", gtid, taskdata));
567 static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
568 kmp_taskdata_t *taskdata,
569 kmp_info_t *thread) {
573 kmp_int32 team_serial =
574 (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
575 !taskdata->td_flags.proxy;
577 kmp_int32 team_serial =
578 taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser;
580 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
583 KMP_TEST_THEN_DEC32(&taskdata->td_allocated_child_tasks) - 1;
584 KMP_DEBUG_ASSERT(children >= 0);
587 while (children == 0) {
588 kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
590 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(enter): T#%d task %p complete " 591 "and freeing itself\n",
595 __kmp_free_task(gtid, taskdata, thread);
597 taskdata = parent_taskdata;
601 if (team_serial || taskdata->td_flags.tasktype == TASK_IMPLICIT)
605 children = KMP_TEST_THEN_DEC32(&taskdata->td_allocated_child_tasks) - 1;
606 KMP_DEBUG_ASSERT(children >= 0);
610 20, (
"__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; " 611 "not freeing it yet\n",
612 gtid, taskdata, children));
620 static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
621 kmp_taskdata_t *resumed_task) {
622 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
623 kmp_info_t *thread = __kmp_threads[gtid];
624 kmp_task_team_t *task_team =
625 thread->th.th_task_team;
626 kmp_int32 children = 0;
629 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_end)) {
630 kmp_taskdata_t *parent = taskdata->td_parent;
631 ompt_callbacks.ompt_callback(ompt_event_task_end)(
632 taskdata->ompt_task_info.task_id);
636 KA_TRACE(10, (
"__kmp_task_finish(enter): T#%d finishing task %p and resuming " 638 gtid, taskdata, resumed_task));
640 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
643 #ifdef BUILD_TIED_TASK_STACK 644 if (taskdata->td_flags.tiedness == TASK_TIED) {
645 __kmp_pop_task_stack(gtid, thread, taskdata);
649 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
652 kmp_int32 counter = KMP_TEST_THEN_DEC32(&taskdata->td_untied_count) - 1;
655 (
"__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
656 gtid, counter, taskdata));
660 if (resumed_task == NULL) {
661 KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial);
662 resumed_task = taskdata->td_parent;
665 thread->th.th_current_task = resumed_task;
666 resumed_task->td_flags.executing = 1;
667 KA_TRACE(10, (
"__kmp_task_finish(exit): T#%d partially done task %p, " 668 "resuming task %p\n",
669 gtid, taskdata, resumed_task));
674 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
675 taskdata->td_flags.complete = 1;
676 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
677 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
681 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
684 KMP_TEST_THEN_DEC32(&taskdata->td_parent->td_incomplete_child_tasks) -
686 KMP_DEBUG_ASSERT(children >= 0);
688 if (taskdata->td_taskgroup)
689 KMP_TEST_THEN_DEC32((kmp_int32 *)(&taskdata->td_taskgroup->count));
694 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) ||
695 (task_team && task_team->tt.tt_found_proxy_tasks)) {
697 __kmp_release_deps(gtid, taskdata);
704 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
705 taskdata->td_flags.executing = 0;
708 20, (
"__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
709 gtid, taskdata, children));
718 if (taskdata->td_flags.destructors_thunk) {
719 kmp_routine_entry_t destr_thunk = task->data1.destructors;
720 KMP_ASSERT(destr_thunk);
721 destr_thunk(gtid, task);
723 #endif // OMP_40_ENABLED 728 (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
729 taskdata->td_flags.task_serial);
730 if (taskdata->td_flags.task_serial) {
731 if (resumed_task == NULL) {
732 resumed_task = taskdata->td_parent;
736 if (!(task_team && task_team->tt.tt_found_proxy_tasks))
740 KMP_DEBUG_ASSERT(resumed_task == taskdata->td_parent);
743 KMP_DEBUG_ASSERT(resumed_task !=
751 thread->th.th_current_task = resumed_task;
752 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
756 resumed_task->td_flags.executing = 1;
759 10, (
"__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
760 gtid, taskdata, resumed_task));
770 void __kmpc_omp_task_complete_if0(
ident_t *loc_ref, kmp_int32 gtid,
772 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
773 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
775 __kmp_task_finish(gtid, task, NULL);
777 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
778 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
785 void __kmpc_omp_task_complete(
ident_t *loc_ref, kmp_int32 gtid,
787 KA_TRACE(10, (
"__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,
788 loc_ref, KMP_TASK_TO_TASKDATA(task)));
790 __kmp_task_finish(gtid, task, NULL);
792 KA_TRACE(10, (
"__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,
793 loc_ref, KMP_TASK_TO_TASKDATA(task)));
796 #endif // TASK_UNUSED 802 static inline void __kmp_task_init_ompt(kmp_taskdata_t *task,
int tid,
805 task->ompt_task_info.task_id = __ompt_task_id_new(tid);
806 task->ompt_task_info.function =
function;
807 task->ompt_task_info.frame.exit_runtime_frame = NULL;
808 task->ompt_task_info.frame.reenter_runtime_frame = NULL;
810 task->ompt_task_info.ndeps = 0;
811 task->ompt_task_info.deps = NULL;
828 void __kmp_init_implicit_task(
ident_t *loc_ref, kmp_info_t *this_thr,
829 kmp_team_t *team,
int tid,
int set_curr_task) {
830 kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
834 (
"__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
835 tid, team, task, set_curr_task ?
"TRUE" :
"FALSE"));
837 task->td_task_id = KMP_GEN_TASK_ID();
838 task->td_team = team;
841 task->td_ident = loc_ref;
842 task->td_taskwait_ident = NULL;
843 task->td_taskwait_counter = 0;
844 task->td_taskwait_thread = 0;
846 task->td_flags.tiedness = TASK_TIED;
847 task->td_flags.tasktype = TASK_IMPLICIT;
849 task->td_flags.proxy = TASK_FULL;
853 task->td_flags.task_serial = 1;
854 task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
855 task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
857 task->td_flags.started = 1;
858 task->td_flags.executing = 1;
859 task->td_flags.complete = 0;
860 task->td_flags.freed = 0;
863 task->td_depnode = NULL;
867 task->td_incomplete_child_tasks = 0;
869 task->td_allocated_child_tasks = 0;
871 task->td_taskgroup = NULL;
872 task->td_dephash = NULL;
874 __kmp_push_current_task_to_thread(this_thr, team, tid);
876 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
877 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
881 __kmp_task_init_ompt(task, tid, NULL);
884 KF_TRACE(10, (
"__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
893 void __kmp_finish_implicit_task(kmp_info_t *thread) {
894 kmp_taskdata_t *task = thread->th.th_current_task;
895 if (task->td_dephash)
896 __kmp_dephash_free_entries(thread, task->td_dephash);
903 void __kmp_free_implicit_task(kmp_info_t *thread) {
904 kmp_taskdata_t *task = thread->th.th_current_task;
905 if (task->td_dephash)
906 __kmp_dephash_free(thread, task->td_dephash);
907 task->td_dephash = NULL;
912 static size_t __kmp_round_up_to_val(
size_t size,
size_t val) {
913 if (size & (val - 1)) {
915 if (size <= KMP_SIZE_T_MAX - val) {
934 kmp_task_t *__kmp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
935 kmp_tasking_flags_t *flags,
936 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
937 kmp_routine_entry_t task_entry) {
939 kmp_taskdata_t *taskdata;
940 kmp_info_t *thread = __kmp_threads[gtid];
941 kmp_team_t *team = thread->th.th_team;
942 kmp_taskdata_t *parent_task = thread->th.th_current_task;
943 size_t shareds_offset;
945 KA_TRACE(10, (
"__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) " 946 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
947 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
948 sizeof_shareds, task_entry));
950 if (parent_task->td_flags.final) {
951 if (flags->merged_if0) {
957 if (flags->proxy == TASK_PROXY) {
958 flags->tiedness = TASK_UNTIED;
959 flags->merged_if0 = 1;
963 if ((thread->th.th_task_team) == NULL) {
966 KMP_DEBUG_ASSERT(team->t.t_serialized);
968 (
"T#%d creating task team in __kmp_task_alloc for proxy task\n",
970 __kmp_task_team_setup(
973 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
975 kmp_task_team_t *task_team = thread->th.th_task_team;
978 if (!KMP_TASKING_ENABLED(task_team)) {
981 (
"T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
982 __kmp_enable_tasking(task_team, thread);
983 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
984 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
986 if (thread_data->td.td_deque == NULL) {
987 __kmp_alloc_task_deque(thread, thread_data);
991 if (task_team->tt.tt_found_proxy_tasks == FALSE)
992 TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE);
998 shareds_offset =
sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
999 shareds_offset = __kmp_round_up_to_val(shareds_offset,
sizeof(
void *));
1002 KA_TRACE(30, (
"__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,
1004 KA_TRACE(30, (
"__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,
1009 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +
1012 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +
1015 ANNOTATE_HAPPENS_AFTER(taskdata);
1017 task = KMP_TASKDATA_TO_TASK(taskdata);
1020 #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD 1021 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(
double) - 1)) == 0);
1022 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(
double) - 1)) == 0);
1024 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(_Quad) - 1)) == 0);
1025 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(_Quad) - 1)) == 0);
1027 if (sizeof_shareds > 0) {
1029 task->shareds = &((
char *)taskdata)[shareds_offset];
1031 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
1034 task->shareds = NULL;
1036 task->routine = task_entry;
1039 taskdata->td_task_id = KMP_GEN_TASK_ID();
1040 taskdata->td_team = team;
1041 taskdata->td_alloc_thread = thread;
1042 taskdata->td_parent = parent_task;
1043 taskdata->td_level = parent_task->td_level + 1;
1044 taskdata->td_untied_count = 0;
1045 taskdata->td_ident = loc_ref;
1046 taskdata->td_taskwait_ident = NULL;
1047 taskdata->td_taskwait_counter = 0;
1048 taskdata->td_taskwait_thread = 0;
1049 KMP_DEBUG_ASSERT(taskdata->td_parent != NULL);
1052 if (flags->proxy == TASK_FULL)
1054 copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
1056 taskdata->td_flags.tiedness = flags->tiedness;
1057 taskdata->td_flags.final = flags->final;
1058 taskdata->td_flags.merged_if0 = flags->merged_if0;
1060 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
1061 #endif // OMP_40_ENABLED 1063 taskdata->td_flags.proxy = flags->proxy;
1064 taskdata->td_task_team = thread->th.th_task_team;
1065 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
1067 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1070 taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1073 taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1079 taskdata->td_flags.task_serial =
1080 (parent_task->td_flags.final || taskdata->td_flags.team_serial ||
1081 taskdata->td_flags.tasking_ser);
1083 taskdata->td_flags.started = 0;
1084 taskdata->td_flags.executing = 0;
1085 taskdata->td_flags.complete = 0;
1086 taskdata->td_flags.freed = 0;
1088 taskdata->td_flags.native = flags->native;
1090 taskdata->td_incomplete_child_tasks = 0;
1091 taskdata->td_allocated_child_tasks = 1;
1094 taskdata->td_taskgroup =
1095 parent_task->td_taskgroup;
1096 taskdata->td_dephash = NULL;
1097 taskdata->td_depnode = NULL;
1103 if (flags->proxy == TASK_PROXY ||
1104 !(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser))
1106 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser))
1109 KMP_TEST_THEN_INC32(&parent_task->td_incomplete_child_tasks);
1111 if (parent_task->td_taskgroup)
1112 KMP_TEST_THEN_INC32((kmp_int32 *)(&parent_task->td_taskgroup->count));
1116 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) {
1117 KMP_TEST_THEN_INC32(&taskdata->td_parent->td_allocated_child_tasks);
1121 KA_TRACE(20, (
"__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1122 gtid, taskdata, taskdata->td_parent));
1123 ANNOTATE_HAPPENS_BEFORE(task);
1126 __kmp_task_init_ompt(taskdata, gtid, (
void *)task_entry);
1132 kmp_task_t *__kmpc_omp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1133 kmp_int32 flags,
size_t sizeof_kmp_task_t,
1134 size_t sizeof_shareds,
1135 kmp_routine_entry_t task_entry) {
1137 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1139 input_flags->native = FALSE;
1143 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) " 1144 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1145 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1146 input_flags->proxy ?
"proxy" :
"", sizeof_kmp_task_t,
1147 sizeof_shareds, task_entry));
1149 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) " 1150 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1151 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1152 sizeof_kmp_task_t, sizeof_shareds, task_entry));
1155 retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1156 sizeof_shareds, task_entry);
1158 KA_TRACE(20, (
"__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval));
1168 static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
1169 kmp_taskdata_t *current_task) {
1170 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
1171 kmp_uint64 cur_time;
1176 30, (
"__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1177 gtid, taskdata, current_task));
1178 KMP_DEBUG_ASSERT(task);
1180 if (taskdata->td_flags.proxy == TASK_PROXY &&
1181 taskdata->td_flags.complete == 1) {
1186 (
"__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1189 __kmp_bottom_half_finish_proxy(gtid, task);
1191 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed bottom finish for " 1192 "proxy task %p, resuming task %p\n",
1193 gtid, taskdata, current_task));
1199 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1200 if (__kmp_forkjoin_frames_mode == 3) {
1203 cur_time = __itt_get_timestamp();
1209 if (taskdata->td_flags.proxy != TASK_PROXY) {
1211 ANNOTATE_HAPPENS_AFTER(task);
1212 __kmp_task_start(gtid, task, current_task);
1218 ompt_thread_info_t oldInfo;
1222 thread = __kmp_threads[gtid];
1223 oldInfo = thread->th.ompt_thread_info;
1224 thread->th.ompt_thread_info.wait_id = 0;
1225 thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1226 taskdata->ompt_task_info.frame.exit_runtime_frame =
1227 __builtin_frame_address(0);
1235 if (__kmp_omp_cancellation) {
1236 kmp_info_t *this_thr = __kmp_threads[gtid];
1237 kmp_team_t *this_team = this_thr->th.th_team;
1238 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
1239 if ((taskgroup && taskgroup->cancel_request) ||
1240 (this_team->t.t_cancel_request == cancel_parallel)) {
1250 #if KMP_STATS_ENABLED 1252 switch (KMP_GET_THREAD_STATE()) {
1253 case FORK_JOIN_BARRIER:
1254 KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar);
1257 KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar);
1260 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield);
1263 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait);
1266 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup);
1269 KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate);
1272 #endif // KMP_STATS_ENABLED 1273 #endif // OMP_40_ENABLED 1275 #if OMPT_SUPPORT && OMPT_TRACE 1277 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_switch)) {
1278 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1279 current_task->ompt_task_info.task_id,
1280 taskdata->ompt_task_info.task_id);
1284 #ifdef KMP_GOMP_COMPAT 1285 if (taskdata->td_flags.native) {
1286 ((void (*)(
void *))(*(task->routine)))(task->shareds);
1290 (*(task->routine))(gtid, task);
1292 KMP_POP_PARTITIONED_TIMER();
1294 #if OMPT_SUPPORT && OMPT_TRACE 1296 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_switch)) {
1297 ompt_callbacks.ompt_callback(ompt_event_task_switch)(
1298 taskdata->ompt_task_info.task_id,
1299 current_task->ompt_task_info.task_id);
1305 #endif // OMP_40_ENABLED 1309 thread->th.ompt_thread_info = oldInfo;
1310 taskdata->ompt_task_info.frame.exit_runtime_frame = NULL;
1316 if (taskdata->td_flags.proxy != TASK_PROXY) {
1318 ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent);
1319 __kmp_task_finish(gtid, task, current_task);
1324 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1326 if (__kmp_forkjoin_frames_mode == 3) {
1327 kmp_info_t *this_thr = __kmp_threads[gtid];
1328 if (this_thr->th.th_bar_arrive_time) {
1329 this_thr->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1335 (
"__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1336 gtid, taskdata, current_task));
1350 kmp_int32 __kmpc_omp_task_parts(
ident_t *loc_ref, kmp_int32 gtid,
1351 kmp_task_t *new_task) {
1352 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1354 KA_TRACE(10, (
"__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
1355 loc_ref, new_taskdata));
1360 if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1362 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1363 new_taskdata->td_flags.task_serial = 1;
1364 __kmp_invoke_task(gtid, new_task, current_task);
1369 (
"__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: " 1370 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
1371 gtid, loc_ref, new_taskdata));
1373 ANNOTATE_HAPPENS_BEFORE(new_task);
1374 return TASK_CURRENT_NOT_QUEUED;
1388 kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
1389 bool serialize_immediate) {
1390 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1394 new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
1395 __builtin_frame_address(1);
1402 if (new_taskdata->td_flags.proxy == TASK_PROXY ||
1403 __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1405 if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1408 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1409 if (serialize_immediate)
1410 new_taskdata->td_flags.task_serial = 1;
1411 __kmp_invoke_task(gtid, new_task, current_task);
1416 new_taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
1420 ANNOTATE_HAPPENS_BEFORE(new_task);
1421 return TASK_CURRENT_NOT_QUEUED;
1436 kmp_int32 __kmpc_omp_task(
ident_t *loc_ref, kmp_int32 gtid,
1437 kmp_task_t *new_task) {
1439 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
1442 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1444 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
1447 res = __kmp_omp_task(gtid, new_task,
true);
1449 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning " 1450 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1451 gtid, loc_ref, new_taskdata));
1457 kmp_int32 __kmpc_omp_taskwait(
ident_t *loc_ref, kmp_int32 gtid) {
1458 kmp_taskdata_t *taskdata;
1460 int thread_finished = FALSE;
1461 KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
1463 KA_TRACE(10, (
"__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref));
1465 if (__kmp_tasking_mode != tskm_immediate_exec) {
1466 thread = __kmp_threads[gtid];
1467 taskdata = thread->th.th_current_task;
1468 #if OMPT_SUPPORT && OMPT_TRACE 1469 ompt_task_id_t my_task_id;
1470 ompt_parallel_id_t my_parallel_id;
1473 kmp_team_t *team = thread->th.th_team;
1474 my_task_id = taskdata->ompt_task_info.task_id;
1475 my_parallel_id = team->t.ompt_team_info.parallel_id;
1477 taskdata->ompt_task_info.frame.reenter_runtime_frame =
1478 __builtin_frame_address(1);
1479 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) {
1480 ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(my_parallel_id,
1491 taskdata->td_taskwait_counter += 1;
1492 taskdata->td_taskwait_ident = loc_ref;
1493 taskdata->td_taskwait_thread = gtid + 1;
1496 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
1497 if (itt_sync_obj != NULL)
1498 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
1502 !taskdata->td_flags.team_serial && !taskdata->td_flags.final;
1505 must_wait = must_wait || (thread->th.th_task_team != NULL &&
1506 thread->th.th_task_team->tt.tt_found_proxy_tasks);
1510 RCAST(
volatile kmp_uint32 *, &taskdata->td_incomplete_child_tasks),
1512 while (TCR_4(taskdata->td_incomplete_child_tasks) != 0) {
1513 flag.execute_tasks(thread, gtid, FALSE,
1514 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
1515 __kmp_task_stealing_constraint);
1519 if (itt_sync_obj != NULL)
1520 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
1525 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
1527 #if OMPT_SUPPORT && OMPT_TRACE 1529 if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) {
1530 ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(my_parallel_id,
1533 taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
1536 ANNOTATE_HAPPENS_AFTER(taskdata);
1539 KA_TRACE(10, (
"__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, " 1540 "returning TASK_CURRENT_NOT_QUEUED\n",
1543 return TASK_CURRENT_NOT_QUEUED;
1547 kmp_int32 __kmpc_omp_taskyield(
ident_t *loc_ref, kmp_int32 gtid,
int end_part) {
1548 kmp_taskdata_t *taskdata;
1550 int thread_finished = FALSE;
1553 KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
1555 KA_TRACE(10, (
"__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1556 gtid, loc_ref, end_part));
1558 if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) {
1559 thread = __kmp_threads[gtid];
1560 taskdata = thread->th.th_current_task;
1567 taskdata->td_taskwait_counter += 1;
1568 taskdata->td_taskwait_ident = loc_ref;
1569 taskdata->td_taskwait_thread = gtid + 1;
1572 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
1573 if (itt_sync_obj != NULL)
1574 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
1576 if (!taskdata->td_flags.team_serial) {
1577 kmp_task_team_t *task_team = thread->th.th_task_team;
1578 if (task_team != NULL) {
1579 if (KMP_TASKING_ENABLED(task_team)) {
1580 __kmp_execute_tasks_32(
1581 thread, gtid, NULL, FALSE,
1582 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
1583 __kmp_task_stealing_constraint);
1588 if (itt_sync_obj != NULL)
1589 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
1594 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
1597 KA_TRACE(10, (
"__kmpc_omp_taskyield(exit): T#%d task %p resuming, " 1598 "returning TASK_CURRENT_NOT_QUEUED\n",
1601 return TASK_CURRENT_NOT_QUEUED;
1608 typedef struct kmp_task_red_flags {
1609 unsigned lazy_priv : 1;
1610 unsigned reserved31 : 31;
1611 } kmp_task_red_flags_t;
1614 typedef struct kmp_task_red_data {
1622 kmp_task_red_flags_t flags;
1623 } kmp_task_red_data_t;
1626 typedef struct kmp_task_red_input {
1632 kmp_task_red_flags_t flags;
1633 } kmp_task_red_input_t;
1644 void *__kmpc_task_reduction_init(
int gtid,
int num,
void *data) {
1645 kmp_info_t *thread = __kmp_threads[gtid];
1646 kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
1647 kmp_int32 nth = thread->th.th_team_nproc;
1648 kmp_task_red_input_t *input = (kmp_task_red_input_t *)data;
1649 kmp_task_red_data_t *arr;
1652 KMP_ASSERT(tg != NULL);
1653 KMP_ASSERT(data != NULL);
1654 KMP_ASSERT(num > 0);
1656 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
1660 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
1662 arr = (kmp_task_red_data_t *)__kmp_thread_malloc(
1663 thread, num *
sizeof(kmp_task_red_data_t));
1664 for (
int i = 0; i < num; ++i) {
1665 void (*f_init)(
void *) = (
void (*)(
void *))(input[i].reduce_init);
1666 size_t size = input[i].reduce_size - 1;
1668 size += CACHE_LINE - size % CACHE_LINE;
1669 KMP_ASSERT(input[i].reduce_comb != NULL);
1670 arr[i].reduce_shar = input[i].reduce_shar;
1671 arr[i].reduce_size = size;
1672 arr[i].reduce_init = input[i].reduce_init;
1673 arr[i].reduce_fini = input[i].reduce_fini;
1674 arr[i].reduce_comb = input[i].reduce_comb;
1675 arr[i].flags = input[i].flags;
1676 if (!input[i].flags.lazy_priv) {
1678 arr[i].reduce_priv = __kmp_allocate(nth * size);
1679 arr[i].reduce_pend = (
char *)(arr[i].reduce_priv) + nth * size;
1680 if (f_init != NULL) {
1682 for (
int j = 0; j < nth; ++j) {
1683 f_init((
char *)(arr[i].reduce_priv) + j * size);
1689 arr[i].reduce_priv = __kmp_allocate(nth *
sizeof(
void *));
1692 tg->reduce_data = (
void *)arr;
1693 tg->reduce_num_data = num;
1706 void *__kmpc_task_reduction_get_th_data(
int gtid,
void *tskgrp,
void *data) {
1707 kmp_info_t *thread = __kmp_threads[gtid];
1708 kmp_int32 nth = thread->th.th_team_nproc;
1712 kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp;
1714 tg = thread->th.th_current_task->td_taskgroup;
1715 KMP_ASSERT(tg != NULL);
1716 kmp_task_red_data_t *arr = (kmp_task_red_data_t *)(tg->reduce_data);
1717 kmp_int32 num = tg->reduce_num_data;
1718 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
1720 KMP_ASSERT(data != NULL);
1721 while (tg != NULL) {
1722 for (
int i = 0; i < num; ++i) {
1723 if (!arr[i].flags.lazy_priv) {
1724 if (data == arr[i].reduce_shar ||
1725 (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
1726 return (
char *)(arr[i].reduce_priv) + tid * arr[i].reduce_size;
1729 void **p_priv = (
void **)(arr[i].reduce_priv);
1730 if (data == arr[i].reduce_shar)
1733 for (
int j = 0; j < nth; ++j)
1734 if (data == p_priv[j])
1738 if (p_priv[tid] == NULL) {
1740 void (*f_init)(
void *) = (
void (*)(
void *))(arr[i].reduce_init);
1741 p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
1742 if (f_init != NULL) {
1743 f_init(p_priv[tid]);
1750 arr = (kmp_task_red_data_t *)(tg->reduce_data);
1751 num = tg->reduce_num_data;
1753 KMP_ASSERT2(0,
"Unknown task reduction item");
1759 static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
1760 kmp_int32 nth = th->th.th_team_nproc;
1761 KMP_DEBUG_ASSERT(nth > 1);
1762 kmp_task_red_data_t *arr = (kmp_task_red_data_t *)tg->reduce_data;
1763 kmp_int32 num = tg->reduce_num_data;
1764 for (
int i = 0; i < num; ++i) {
1765 void *sh_data = arr[i].reduce_shar;
1766 void (*f_fini)(
void *) = (
void (*)(
void *))(arr[i].reduce_fini);
1767 void (*f_comb)(
void *,
void *) =
1768 (
void (*)(
void *,
void *))(arr[i].reduce_comb);
1769 if (!arr[i].flags.lazy_priv) {
1770 void *pr_data = arr[i].reduce_priv;
1771 size_t size = arr[i].reduce_size;
1772 for (
int j = 0; j < nth; ++j) {
1773 void *priv_data = (
char *)pr_data + j * size;
1774 f_comb(sh_data, priv_data);
1779 void **pr_data = (
void **)(arr[i].reduce_priv);
1780 for (
int j = 0; j < nth; ++j) {
1781 if (pr_data[j] != NULL) {
1782 f_comb(sh_data, pr_data[j]);
1785 __kmp_free(pr_data[j]);
1789 __kmp_free(arr[i].reduce_priv);
1791 __kmp_thread_free(th, arr);
1792 tg->reduce_data = NULL;
1793 tg->reduce_num_data = 0;
1799 void __kmpc_taskgroup(
ident_t *loc,
int gtid) {
1800 kmp_info_t *thread = __kmp_threads[gtid];
1801 kmp_taskdata_t *taskdata = thread->th.th_current_task;
1802 kmp_taskgroup_t *tg_new =
1803 (kmp_taskgroup_t *)__kmp_thread_malloc(thread,
sizeof(kmp_taskgroup_t));
1804 KA_TRACE(10, (
"__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new));
1806 tg_new->cancel_request = cancel_noreq;
1807 tg_new->parent = taskdata->td_taskgroup;
1810 tg_new->reduce_data = NULL;
1811 tg_new->reduce_num_data = 0;
1813 taskdata->td_taskgroup = tg_new;
1818 void __kmpc_end_taskgroup(
ident_t *loc,
int gtid) {
1819 kmp_info_t *thread = __kmp_threads[gtid];
1820 kmp_taskdata_t *taskdata = thread->th.th_current_task;
1821 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
1822 int thread_finished = FALSE;
1824 KA_TRACE(10, (
"__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc));
1825 KMP_DEBUG_ASSERT(taskgroup != NULL);
1826 KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
1828 if (__kmp_tasking_mode != tskm_immediate_exec) {
1832 void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
1833 if (itt_sync_obj != NULL)
1834 __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
1838 if (!taskdata->td_flags.team_serial ||
1839 (thread->th.th_task_team != NULL &&
1840 thread->th.th_task_team->tt.tt_found_proxy_tasks))
1842 if (!taskdata->td_flags.team_serial)
1845 kmp_flag_32 flag(RCAST(kmp_uint32 *, &taskgroup->count), 0U);
1846 while (TCR_4(taskgroup->count) != 0) {
1847 flag.execute_tasks(thread, gtid, FALSE,
1848 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
1849 __kmp_task_stealing_constraint);
1854 if (itt_sync_obj != NULL)
1855 __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
1858 KMP_DEBUG_ASSERT(taskgroup->count == 0);
1862 if (taskgroup->reduce_data != NULL)
1863 __kmp_task_reduction_fini(thread, taskgroup);
1866 taskdata->td_taskgroup = taskgroup->parent;
1867 __kmp_thread_free(thread, taskgroup);
1869 KA_TRACE(10, (
"__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
1871 ANNOTATE_HAPPENS_AFTER(taskdata);
1876 static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
1877 kmp_task_team_t *task_team,
1878 kmp_int32 is_constrained) {
1880 kmp_taskdata_t *taskdata;
1881 kmp_thread_data_t *thread_data;
1884 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
1885 KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=
1888 thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
1890 KA_TRACE(10, (
"__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1891 gtid, thread_data->td.td_deque_ntasks,
1892 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
1894 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
1896 (
"__kmp_remove_my_task(exit #1): T#%d No tasks to remove: " 1897 "ntasks=%d head=%u tail=%u\n",
1898 gtid, thread_data->td.td_deque_ntasks,
1899 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
1903 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
1905 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
1906 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
1908 (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: " 1909 "ntasks=%d head=%u tail=%u\n",
1910 gtid, thread_data->td.td_deque_ntasks,
1911 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
1915 tail = (thread_data->td.td_deque_tail - 1) &
1916 TASK_DEQUE_MASK(thread_data->td);
1917 taskdata = thread_data->td.td_deque[tail];
1919 if (is_constrained && (taskdata->td_flags.tiedness == TASK_TIED)) {
1922 kmp_taskdata_t *current = thread->th.th_current_task;
1923 kmp_int32 level = current->td_level;
1924 kmp_taskdata_t *parent = taskdata->td_parent;
1925 while (parent != current && parent->td_level > level) {
1926 parent = parent->td_parent;
1928 KMP_DEBUG_ASSERT(parent != NULL);
1930 if (parent != current) {
1933 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
1935 (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: " 1936 "ntasks=%d head=%u tail=%u\n",
1937 gtid, thread_data->td.td_deque_ntasks,
1938 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
1943 thread_data->td.td_deque_tail = tail;
1944 TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1);
1946 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
1948 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d task %p removed: " 1949 "ntasks=%d head=%u tail=%u\n",
1950 gtid, taskdata, thread_data->td.td_deque_ntasks,
1951 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
1953 task = KMP_TASKDATA_TO_TASK(taskdata);
1960 static kmp_task_t *__kmp_steal_task(kmp_info_t *victim, kmp_int32 gtid,
1961 kmp_task_team_t *task_team,
1962 volatile kmp_int32 *unfinished_threads,
1963 int *thread_finished,
1964 kmp_int32 is_constrained) {
1966 kmp_taskdata_t *taskdata;
1967 kmp_thread_data_t *victim_td, *threads_data;
1968 kmp_int32 victim_tid;
1970 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
1972 threads_data = task_team->tt.tt_threads_data;
1973 KMP_DEBUG_ASSERT(threads_data != NULL);
1975 victim_tid = victim->th.th_info.ds.ds_tid;
1976 victim_td = &threads_data[victim_tid];
1978 KA_TRACE(10, (
"__kmp_steal_task(enter): T#%d try to steal from T#%d: " 1979 "task_team=%p ntasks=%d " 1980 "head=%u tail=%u\n",
1981 gtid, __kmp_gtid_from_thread(victim), task_team,
1982 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1983 victim_td->td.td_deque_tail));
1985 if ((TCR_4(victim_td->td.td_deque_ntasks) ==
1987 (TCR_PTR(victim->th.th_task_team) !=
1990 KA_TRACE(10, (
"__kmp_steal_task(exit #1): T#%d could not steal from T#%d: " 1992 "ntasks=%d head=%u tail=%u\n",
1993 gtid, __kmp_gtid_from_thread(victim), task_team,
1994 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1995 victim_td->td.td_deque_tail));
1999 __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
2002 if ((TCR_4(victim_td->td.td_deque_ntasks) == 0) ||
2003 (TCR_PTR(victim->th.th_task_team) !=
2006 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2007 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: " 2009 "ntasks=%d head=%u tail=%u\n",
2010 gtid, __kmp_gtid_from_thread(victim), task_team,
2011 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
2012 victim_td->td.td_deque_tail));
2016 KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL);
2018 taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
2019 if (is_constrained) {
2022 kmp_taskdata_t *current = __kmp_threads[gtid]->th.th_current_task;
2023 kmp_int32 level = current->td_level;
2024 kmp_taskdata_t *parent = taskdata->td_parent;
2025 while (parent != current && parent->td_level > level) {
2026 parent = parent->td_parent;
2028 KMP_DEBUG_ASSERT(parent != NULL);
2030 if (parent != current) {
2034 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2035 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from " 2036 "T#%d: task_team=%p " 2037 "ntasks=%d head=%u tail=%u\n",
2039 __kmp_gtid_from_thread(threads_data[victim_tid].td.td_thr),
2040 task_team, victim_td->td.td_deque_ntasks,
2041 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
2046 victim_td->td.td_deque_head =
2047 (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
2048 if (*thread_finished) {
2054 count = KMP_TEST_THEN_INC32(unfinished_threads);
2058 (
"__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
2059 gtid, count + 1, task_team));
2061 *thread_finished = FALSE;
2063 TCW_4(victim_td->td.td_deque_ntasks,
2064 TCR_4(victim_td->td.td_deque_ntasks) - 1);
2066 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
2071 (
"__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p " 2072 "ntasks=%d head=%u tail=%u\n",
2073 gtid, taskdata, __kmp_gtid_from_thread(victim), task_team,
2074 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
2075 victim_td->td.td_deque_tail));
2077 task = KMP_TASKDATA_TO_TASK(taskdata);
2091 static inline int __kmp_execute_tasks_template(
2092 kmp_info_t *thread, kmp_int32 gtid, C *flag,
int final_spin,
2093 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
2094 kmp_int32 is_constrained) {
2095 kmp_task_team_t *task_team = thread->th.th_task_team;
2096 kmp_thread_data_t *threads_data;
2098 kmp_info_t *other_thread;
2099 kmp_taskdata_t *current_task = thread->th.th_current_task;
2100 volatile kmp_int32 *unfinished_threads;
2101 kmp_int32 nthreads, victim = -2, use_own_tasks = 1, new_victim = 0,
2102 tid = thread->th.th_info.ds.ds_tid;
2104 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2105 KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]);
2107 if (task_team == NULL)
2110 KA_TRACE(15, (
"__kmp_execute_tasks_template(enter): T#%d final_spin=%d " 2111 "*thread_finished=%d\n",
2112 gtid, final_spin, *thread_finished));
2114 thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
2115 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
2116 KMP_DEBUG_ASSERT(threads_data != NULL);
2118 nthreads = task_team->tt.tt_nproc;
2119 unfinished_threads = &(task_team->tt.tt_unfinished_threads);
2121 KMP_DEBUG_ASSERT(nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
2123 KMP_DEBUG_ASSERT(nthreads > 1);
2125 KMP_DEBUG_ASSERT(TCR_4(*unfinished_threads) >= 0);
2131 if (use_own_tasks) {
2132 task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
2134 if ((task == NULL) && (nthreads > 1)) {
2139 victim = threads_data[tid].td.td_deque_last_stolen;
2142 other_thread = threads_data[victim].td.td_thr;
2146 }
else if (!new_victim) {
2152 victim = __kmp_get_random(thread) % (nthreads - 1);
2153 if (victim >= tid) {
2157 other_thread = threads_data[victim].td.td_thr;
2167 if ((__kmp_tasking_mode == tskm_task_teams) &&
2168 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
2169 (TCR_PTR(CCAST(
void *, other_thread->th.th_sleep_loc)) !=
2172 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread),
2173 other_thread->th.th_sleep_loc);
2186 task = __kmp_steal_task(other_thread, gtid, task_team,
2187 unfinished_threads, thread_finished,
2191 if (threads_data[tid].td.td_deque_last_stolen != victim) {
2192 threads_data[tid].td.td_deque_last_stolen = victim;
2199 KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
2208 #if USE_ITT_BUILD && USE_ITT_NOTIFY 2209 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
2210 if (itt_sync_obj == NULL) {
2212 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
2214 __kmp_itt_task_starting(itt_sync_obj);
2217 __kmp_invoke_task(gtid, task, current_task);
2219 if (itt_sync_obj != NULL)
2220 __kmp_itt_task_finished(itt_sync_obj);
2227 if (flag == NULL || (!final_spin && flag->done_check())) {
2230 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
2234 if (thread->th.th_task_team == NULL) {
2238 KMP_YIELD(__kmp_library == library_throughput);
2241 if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
2242 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned " 2243 "other tasks, restart\n",
2255 if (final_spin && TCR_4(current_task->td_incomplete_child_tasks) == 0)
2263 if (!*thread_finished) {
2266 count = KMP_TEST_THEN_DEC32(unfinished_threads) - 1;
2267 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d dec " 2268 "unfinished_threads to %d task_team=%p\n",
2269 gtid, count, task_team));
2270 *thread_finished = TRUE;
2278 if (flag != NULL && flag->done_check()) {
2281 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
2289 if (thread->th.th_task_team == NULL) {
2291 (
"__kmp_execute_tasks_template: T#%d no more tasks\n", gtid));
2304 (
"__kmp_execute_tasks_template: T#%d can't find work\n", gtid));
2310 int __kmp_execute_tasks_32(
2311 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag,
int final_spin,
2312 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
2313 kmp_int32 is_constrained) {
2314 return __kmp_execute_tasks_template(
2315 thread, gtid, flag, final_spin,
2316 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2319 int __kmp_execute_tasks_64(
2320 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag,
int final_spin,
2321 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
2322 kmp_int32 is_constrained) {
2323 return __kmp_execute_tasks_template(
2324 thread, gtid, flag, final_spin,
2325 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2328 int __kmp_execute_tasks_oncore(
2329 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag,
int final_spin,
2330 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
2331 kmp_int32 is_constrained) {
2332 return __kmp_execute_tasks_template(
2333 thread, gtid, flag, final_spin,
2334 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2340 static void __kmp_enable_tasking(kmp_task_team_t *task_team,
2341 kmp_info_t *this_thr) {
2342 kmp_thread_data_t *threads_data;
2343 int nthreads, i, is_init_thread;
2345 KA_TRACE(10, (
"__kmp_enable_tasking(enter): T#%d\n",
2346 __kmp_gtid_from_thread(this_thr)));
2348 KMP_DEBUG_ASSERT(task_team != NULL);
2349 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
2351 nthreads = task_team->tt.tt_nproc;
2352 KMP_DEBUG_ASSERT(nthreads > 0);
2353 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
2356 is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team);
2358 if (!is_init_thread) {
2362 (
"__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2363 __kmp_gtid_from_thread(this_thr)));
2366 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
2367 KMP_DEBUG_ASSERT(threads_data != NULL);
2369 if ((__kmp_tasking_mode == tskm_task_teams) &&
2370 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) {
2374 for (i = 0; i < nthreads; i++) {
2375 volatile void *sleep_loc;
2376 kmp_info_t *thread = threads_data[i].td.td_thr;
2378 if (i == this_thr->th.th_info.ds.ds_tid) {
2387 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
2389 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2390 __kmp_gtid_from_thread(this_thr),
2391 __kmp_gtid_from_thread(thread)));
2392 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2394 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2395 __kmp_gtid_from_thread(this_thr),
2396 __kmp_gtid_from_thread(thread)));
2401 KA_TRACE(10, (
"__kmp_enable_tasking(exit): T#%d\n",
2402 __kmp_gtid_from_thread(this_thr)));
2439 static kmp_task_team_t *__kmp_free_task_teams =
2442 static kmp_bootstrap_lock_t __kmp_task_team_lock =
2443 KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock);
2450 static void __kmp_alloc_task_deque(kmp_info_t *thread,
2451 kmp_thread_data_t *thread_data) {
2452 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
2453 KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL);
2456 thread_data->td.td_deque_last_stolen = -1;
2458 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0);
2459 KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0);
2460 KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0);
2464 (
"__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2465 __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data));
2469 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
2470 INITIAL_TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
2471 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
2478 static void __kmp_realloc_task_deque(kmp_info_t *thread,
2479 kmp_thread_data_t *thread_data) {
2480 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
2481 kmp_int32 new_size = 2 * size;
2483 KE_TRACE(10, (
"__kmp_realloc_task_deque: T#%d reallocating deque[from %d to " 2484 "%d] for thread_data %p\n",
2485 __kmp_gtid_from_thread(thread), size, new_size, thread_data));
2487 kmp_taskdata_t **new_deque =
2488 (kmp_taskdata_t **)__kmp_allocate(new_size *
sizeof(kmp_taskdata_t *));
2491 for (i = thread_data->td.td_deque_head, j = 0; j < size;
2492 i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++)
2493 new_deque[j] = thread_data->td.td_deque[i];
2495 __kmp_free(thread_data->td.td_deque);
2497 thread_data->td.td_deque_head = 0;
2498 thread_data->td.td_deque_tail = size;
2499 thread_data->td.td_deque = new_deque;
2500 thread_data->td.td_deque_size = new_size;
2506 static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) {
2507 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
2509 if (thread_data->td.td_deque != NULL) {
2510 TCW_4(thread_data->td.td_deque_ntasks, 0);
2511 __kmp_free(thread_data->td.td_deque);
2512 thread_data->td.td_deque = NULL;
2514 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2516 #ifdef BUILD_TIED_TASK_STACK 2518 if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
2519 __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data);
2521 #endif // BUILD_TIED_TASK_STACK 2531 static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
2532 kmp_task_team_t *task_team) {
2533 kmp_thread_data_t **threads_data_p;
2534 kmp_int32 nthreads, maxthreads;
2535 int is_init_thread = FALSE;
2537 if (TCR_4(task_team->tt.tt_found_tasks)) {
2542 threads_data_p = &task_team->tt.tt_threads_data;
2543 nthreads = task_team->tt.tt_nproc;
2544 maxthreads = task_team->tt.tt_max_threads;
2549 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
2551 if (!TCR_4(task_team->tt.tt_found_tasks)) {
2553 kmp_team_t *team = thread->th.th_team;
2556 is_init_thread = TRUE;
2557 if (maxthreads < nthreads) {
2559 if (*threads_data_p != NULL) {
2560 kmp_thread_data_t *old_data = *threads_data_p;
2561 kmp_thread_data_t *new_data = NULL;
2565 (
"__kmp_realloc_task_threads_data: T#%d reallocating " 2566 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2567 __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads));
2572 new_data = (kmp_thread_data_t *)__kmp_allocate(
2573 nthreads *
sizeof(kmp_thread_data_t));
2575 KMP_MEMCPY_S((
void *)new_data, nthreads *
sizeof(kmp_thread_data_t),
2576 (
void *)old_data, maxthreads *
sizeof(kmp_thread_data_t));
2578 #ifdef BUILD_TIED_TASK_STACK 2580 for (i = maxthreads; i < nthreads; i++) {
2581 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
2582 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
2584 #endif // BUILD_TIED_TASK_STACK 2586 (*threads_data_p) = new_data;
2587 __kmp_free(old_data);
2589 KE_TRACE(10, (
"__kmp_realloc_task_threads_data: T#%d allocating " 2590 "threads data for task_team %p, size = %d\n",
2591 __kmp_gtid_from_thread(thread), task_team, nthreads));
2595 ANNOTATE_IGNORE_WRITES_BEGIN();
2596 *threads_data_p = (kmp_thread_data_t *)__kmp_allocate(
2597 nthreads *
sizeof(kmp_thread_data_t));
2598 ANNOTATE_IGNORE_WRITES_END();
2599 #ifdef BUILD_TIED_TASK_STACK 2601 for (i = 0; i < nthreads; i++) {
2602 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
2603 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
2605 #endif // BUILD_TIED_TASK_STACK 2607 task_team->tt.tt_max_threads = nthreads;
2610 KMP_DEBUG_ASSERT(*threads_data_p != NULL);
2614 for (i = 0; i < nthreads; i++) {
2615 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
2616 thread_data->td.td_thr = team->t.t_threads[i];
2618 if (thread_data->td.td_deque_last_stolen >= nthreads) {
2622 thread_data->td.td_deque_last_stolen = -1;
2627 TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE);
2630 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
2631 return is_init_thread;
2637 static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
2638 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
2639 if (task_team->tt.tt_threads_data != NULL) {
2641 for (i = 0; i < task_team->tt.tt_max_threads; i++) {
2642 __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
2644 __kmp_free(task_team->tt.tt_threads_data);
2645 task_team->tt.tt_threads_data = NULL;
2647 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
2654 static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
2656 kmp_task_team_t *task_team = NULL;
2659 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d entering; team = %p\n",
2660 (thread ? __kmp_gtid_from_thread(thread) : -1), team));
2662 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2664 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
2665 if (__kmp_free_task_teams != NULL) {
2666 task_team = __kmp_free_task_teams;
2667 TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next);
2668 task_team->tt.tt_next = NULL;
2670 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
2673 if (task_team == NULL) {
2674 KE_TRACE(10, (
"__kmp_allocate_task_team: T#%d allocating " 2675 "task team for team %p\n",
2676 __kmp_gtid_from_thread(thread), team));
2680 task_team = (kmp_task_team_t *)__kmp_allocate(
sizeof(kmp_task_team_t));
2681 __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
2688 TCW_4(task_team->tt.tt_found_tasks, FALSE);
2690 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
2692 task_team->tt.tt_nproc = nthreads = team->t.t_nproc;
2694 TCW_4(task_team->tt.tt_unfinished_threads, nthreads);
2695 TCW_4(task_team->tt.tt_active, TRUE);
2697 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d exiting; task_team = %p " 2698 "unfinished_threads init'd to %d\n",
2699 (thread ? __kmp_gtid_from_thread(thread) : -1), task_team,
2700 task_team->tt.tt_unfinished_threads));
2707 void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) {
2708 KA_TRACE(20, (
"__kmp_free_task_team: T#%d task_team = %p\n",
2709 thread ? __kmp_gtid_from_thread(thread) : -1, task_team));
2712 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
2714 KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL);
2715 task_team->tt.tt_next = __kmp_free_task_teams;
2716 TCW_PTR(__kmp_free_task_teams, task_team);
2718 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
2726 void __kmp_reap_task_teams(
void) {
2727 kmp_task_team_t *task_team;
2729 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2731 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
2732 while ((task_team = __kmp_free_task_teams) != NULL) {
2733 __kmp_free_task_teams = task_team->tt.tt_next;
2734 task_team->tt.tt_next = NULL;
2737 if (task_team->tt.tt_threads_data != NULL) {
2738 __kmp_free_task_threads_data(task_team);
2740 __kmp_free(task_team);
2742 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
2749 void __kmp_wait_to_unref_task_teams(
void) {
2754 KMP_INIT_YIELD(spins);
2762 for (thread = CCAST(kmp_info_t *, __kmp_thread_pool); thread != NULL;
2763 thread = thread->th.th_next_pool) {
2767 if (TCR_PTR(thread->th.th_task_team) == NULL) {
2768 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2769 __kmp_gtid_from_thread(thread)));
2774 if (!__kmp_is_thread_alive(thread, &exit_val)) {
2775 thread->th.th_task_team = NULL;
2782 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: Waiting for T#%d to " 2783 "unreference task_team\n",
2784 __kmp_gtid_from_thread(thread)));
2786 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
2787 volatile void *sleep_loc;
2789 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
2793 (
"__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2794 __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
2795 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2805 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
2806 KMP_YIELD_SPIN(spins);
2812 void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
int always) {
2813 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2819 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL &&
2820 (always || team->t.t_nproc > 1)) {
2821 team->t.t_task_team[this_thr->th.th_task_state] =
2822 __kmp_allocate_task_team(this_thr, team);
2823 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d created new task_team %p " 2824 "for team %d at parity=%d\n",
2825 __kmp_gtid_from_thread(this_thr),
2826 team->t.t_task_team[this_thr->th.th_task_state],
2827 ((team != NULL) ? team->t.t_id : -1),
2828 this_thr->th.th_task_state));
2838 if (team->t.t_nproc > 1) {
2839 int other_team = 1 - this_thr->th.th_task_state;
2840 if (team->t.t_task_team[other_team] == NULL) {
2841 team->t.t_task_team[other_team] =
2842 __kmp_allocate_task_team(this_thr, team);
2843 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d created second new " 2844 "task_team %p for team %d at parity=%d\n",
2845 __kmp_gtid_from_thread(this_thr),
2846 team->t.t_task_team[other_team],
2847 ((team != NULL) ? team->t.t_id : -1), other_team));
2850 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
2851 if (!task_team->tt.tt_active ||
2852 team->t.t_nproc != task_team->tt.tt_nproc) {
2853 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
2854 TCW_4(task_team->tt.tt_found_tasks, FALSE);
2856 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
2858 TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc);
2859 TCW_4(task_team->tt.tt_active, TRUE);
2863 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d reset next task_team " 2864 "%p for team %d at parity=%d\n",
2865 __kmp_gtid_from_thread(this_thr),
2866 team->t.t_task_team[other_team],
2867 ((team != NULL) ? team->t.t_id : -1), other_team));
2875 void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) {
2876 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2880 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
2883 TCW_PTR(this_thr->th.th_task_team,
2884 team->t.t_task_team[this_thr->th.th_task_state]);
2886 (
"__kmp_task_team_sync: Thread T#%d task team switched to task_team " 2887 "%p from Team #%d (parity=%d)\n",
2888 __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,
2889 ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
2899 void __kmp_task_team_wait(
2900 kmp_info_t *this_thr,
2901 kmp_team_t *team USE_ITT_BUILD_ARG(
void *itt_sync_obj),
int wait) {
2902 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
2904 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2905 KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team);
2907 if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) {
2909 KA_TRACE(20, (
"__kmp_task_team_wait: Master T#%d waiting for all tasks " 2910 "(for unfinished_threads to reach 0) on task_team = %p\n",
2911 __kmp_gtid_from_thread(this_thr), task_team));
2916 RCAST(
volatile kmp_uint32 *, &task_team->tt.tt_unfinished_threads),
2918 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
2924 (
"__kmp_task_team_wait: Master T#%d deactivating task_team %p: " 2925 "setting active to false, setting local and team's pointer to NULL\n",
2926 __kmp_gtid_from_thread(this_thr), task_team));
2928 KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 ||
2929 task_team->tt.tt_found_proxy_tasks == TRUE);
2930 TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
2932 KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1);
2934 TCW_SYNC_4(task_team->tt.tt_active, FALSE);
2937 TCW_PTR(this_thr->th.th_task_team, NULL);
2946 void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
int gtid) {
2947 volatile kmp_uint32 *spin = RCAST(
2948 volatile kmp_uint32 *,
2949 &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads);
2951 KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier);
2954 KMP_FSYNC_SPIN_INIT(spin, (kmp_uint32 *)NULL);
2956 kmp_flag_32 spin_flag(spin, 0U);
2957 while (!spin_flag.execute_tasks(thread, gtid, TRUE,
2958 &flag USE_ITT_BUILD_ARG(NULL), 0)) {
2961 KMP_FSYNC_SPIN_PREPARE(CCAST(kmp_uint32 *, spin));
2964 if (TCR_4(__kmp_global.g.g_done)) {
2965 if (__kmp_global.g.g_abort)
2966 __kmp_abort_thread();
2972 KMP_FSYNC_SPIN_ACQUIRED(CCAST(kmp_uint32 *, spin));
2983 static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
2985 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
2986 kmp_task_team_t *task_team = taskdata->td_task_team;
2988 KA_TRACE(20, (
"__kmp_give_task: trying to give task %p to thread %d.\n",
2992 KMP_DEBUG_ASSERT(task_team != NULL);
2994 bool result =
false;
2995 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
2997 if (thread_data->td.td_deque == NULL) {
3001 (
"__kmp_give_task: thread %d has no queue while giving task %p.\n",
3006 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3007 TASK_DEQUE_SIZE(thread_data->td)) {
3010 (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n",
3015 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
3018 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3019 __kmp_realloc_task_deque(thread, thread_data);
3023 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3025 if (TCR_4(thread_data->td.td_deque_ntasks) >=
3026 TASK_DEQUE_SIZE(thread_data->td)) {
3027 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to " 3033 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
3034 goto release_and_exit;
3036 __kmp_realloc_task_deque(thread, thread_data);
3042 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
3044 thread_data->td.td_deque_tail =
3045 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
3046 TCW_4(thread_data->td.td_deque_ntasks,
3047 TCR_4(thread_data->td.td_deque_ntasks) + 1);
3050 KA_TRACE(30, (
"__kmp_give_task: successfully gave task %p to thread %d.\n",
3054 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3075 static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
3076 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
3077 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3078 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
3079 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
3081 taskdata->td_flags.complete = 1;
3083 if (taskdata->td_taskgroup)
3084 KMP_TEST_THEN_DEC32(&taskdata->td_taskgroup->count);
3088 TCI_4(taskdata->td_incomplete_child_tasks);
3091 static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
3092 kmp_int32 children = 0;
3096 KMP_TEST_THEN_DEC32(&taskdata->td_parent->td_incomplete_child_tasks) - 1;
3097 KMP_DEBUG_ASSERT(children >= 0);
3100 TCD_4(taskdata->td_incomplete_child_tasks);
3103 static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) {
3104 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3105 kmp_info_t *thread = __kmp_threads[gtid];
3107 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3108 KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==
3113 while (TCR_4(taskdata->td_incomplete_child_tasks) > 0)
3116 __kmp_release_deps(gtid, taskdata);
3117 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
3128 void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask) {
3129 KMP_DEBUG_ASSERT(ptask != NULL);
3130 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3132 10, (
"__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",
3135 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3137 __kmp_first_top_half_finish_proxy(taskdata);
3138 __kmp_second_top_half_finish_proxy(taskdata);
3139 __kmp_bottom_half_finish_proxy(gtid, ptask);
3142 (
"__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",
3153 void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask) {
3154 KMP_DEBUG_ASSERT(ptask != NULL);
3155 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
3159 (
"__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",
3162 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
3164 __kmp_first_top_half_finish_proxy(taskdata);
3168 kmp_team_t *team = taskdata->td_team;
3169 kmp_int32 nthreads = team->t.t_nproc;
3174 kmp_int32 start_k = 0;
3176 kmp_int32 k = start_k;
3180 thread = team->t.t_threads[k];
3181 k = (k + 1) % nthreads;
3187 }
while (!__kmp_give_task(thread, k, ptask, pass));
3189 __kmp_second_top_half_finish_proxy(taskdata);
3193 (
"__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",
3203 kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
3205 kmp_taskdata_t *taskdata;
3206 kmp_taskdata_t *taskdata_src;
3207 kmp_taskdata_t *parent_task = thread->th.th_current_task;
3208 size_t shareds_offset;
3211 KA_TRACE(10, (
"__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,
3213 taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
3214 KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==
3216 KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT);
3217 task_size = taskdata_src->td_size_alloc;
3220 KA_TRACE(30, (
"__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,
3223 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size);
3225 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size);
3227 KMP_MEMCPY(taskdata, taskdata_src, task_size);
3229 task = KMP_TASKDATA_TO_TASK(taskdata);
3232 taskdata->td_task_id = KMP_GEN_TASK_ID();
3233 if (task->shareds != NULL) {
3234 shareds_offset = (
char *)task_src->shareds - (
char *)taskdata_src;
3235 task->shareds = &((
char *)taskdata)[shareds_offset];
3236 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
3239 taskdata->td_alloc_thread = thread;
3240 taskdata->td_parent = parent_task;
3241 taskdata->td_taskgroup =
3247 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
3248 KMP_TEST_THEN_INC32(&parent_task->td_incomplete_child_tasks);
3249 if (parent_task->td_taskgroup)
3250 KMP_TEST_THEN_INC32(&parent_task->td_taskgroup->count);
3253 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT)
3254 KMP_TEST_THEN_INC32(&taskdata->td_parent->td_allocated_child_tasks);
3258 (
"__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
3259 thread, taskdata, taskdata->td_parent));
3261 __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid,
3262 (
void *)task->routine);
3271 typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
3287 void __kmp_taskloop_linear(
ident_t *loc,
int gtid, kmp_task_t *task,
3288 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3289 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
3290 kmp_uint64 grainsize, kmp_uint64 extras,
3291 kmp_uint64 tc,
void *task_dup) {
3293 KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
3294 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
3295 kmp_uint64 lower = *lb;
3296 kmp_uint64 upper = *ub;
3298 kmp_info_t *thread = __kmp_threads[gtid];
3299 kmp_taskdata_t *current_task = thread->th.th_current_task;
3300 kmp_task_t *next_task;
3301 kmp_int32 lastpriv = 0;
3302 size_t lower_offset =
3303 (
char *)lb - (
char *)task;
3304 size_t upper_offset =
3305 (
char *)ub - (
char *)task;
3307 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
3308 KMP_DEBUG_ASSERT(num_tasks > extras);
3309 KMP_DEBUG_ASSERT(num_tasks > 0);
3310 KA_TRACE(20, (
"__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, " 3311 "extras %lld, i=%lld,%lld(%d)%lld, dup %p\n", gtid, num_tasks,
3312 grainsize, extras, lower, upper, ub_glob, st, task_dup));
3315 for (i = 0; i < num_tasks; ++i) {
3316 kmp_uint64 chunk_minus_1;
3318 chunk_minus_1 = grainsize - 1;
3320 chunk_minus_1 = grainsize;
3323 upper = lower + st * chunk_minus_1;
3324 if (i == num_tasks - 1) {
3327 KMP_DEBUG_ASSERT(upper == *ub);
3328 if (upper == ub_glob)
3330 }
else if (st > 0) {
3331 KMP_DEBUG_ASSERT((kmp_uint64)st > *ub - upper);
3332 if ((kmp_uint64)st > ub_glob - upper)
3335 KMP_DEBUG_ASSERT(upper + st < *ub);
3336 if (upper - ub_glob < (kmp_uint64)(-st))
3340 next_task = __kmp_task_dup_alloc(thread, task);
3342 *(kmp_uint64 *)((
char *)next_task + lower_offset) = lower;
3343 *(kmp_uint64 *)((
char *)next_task + upper_offset) = upper;
3344 if (ptask_dup != NULL)
3345 ptask_dup(next_task, task, lastpriv);
3346 KA_TRACE(40, (
"__kmp_taskloop_linear: T#%d; task %p: lower %lld, " 3347 "upper %lld (offsets %p %p)\n",
3348 gtid, next_task, lower, upper, lower_offset, upper_offset));
3349 __kmp_omp_task(gtid, next_task,
true);
3353 __kmp_task_start(gtid, task, current_task);
3355 __kmp_task_finish(gtid, task, current_task);
3360 typedef struct __taskloop_params {
3367 kmp_uint64 num_tasks;
3368 kmp_uint64 grainsize;
3371 kmp_uint64 num_t_min;
3372 } __taskloop_params_t;
3374 void __kmp_taskloop_recur(
ident_t *,
int, kmp_task_t *, kmp_uint64 *,
3375 kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
3376 kmp_uint64, kmp_uint64, kmp_uint64, kmp_uint64,
3380 int __kmp_taskloop_task(
int gtid,
void *ptask) {
3381 __taskloop_params_t *p = (__taskloop_params_t*)((kmp_task_t*)ptask)->shareds;
3382 kmp_task_t *task = p->task;
3383 kmp_uint64 *lb = p->lb;
3384 kmp_uint64 *ub = p->ub;
3385 void *task_dup = p->task_dup;
3387 kmp_int64 st = p->st;
3388 kmp_uint64 ub_glob = p->ub_glob;
3389 kmp_uint64 num_tasks = p->num_tasks;
3390 kmp_uint64 grainsize = p->grainsize;
3391 kmp_uint64 extras = p->extras;
3392 kmp_uint64 tc = p->tc;
3393 kmp_uint64 num_t_min = p->num_t_min;
3395 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
3396 KMP_DEBUG_ASSERT(task != NULL);
3397 KA_TRACE(20, (
"__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize" 3398 " %lld, extras %lld, i=%lld,%lld(%d), dup %p\n", gtid, taskdata,
3399 num_tasks, grainsize, extras, *lb, *ub, st, task_dup));
3401 KMP_DEBUG_ASSERT(num_tasks*2+1 > num_t_min);
3402 if (num_tasks > num_t_min)
3403 __kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
3404 grainsize, extras, tc, num_t_min, task_dup);
3406 __kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
3407 grainsize, extras, tc, task_dup);
3409 KA_TRACE(40, (
"__kmp_taskloop_task(exit): T#%d\n", gtid));
3429 void __kmp_taskloop_recur(
ident_t *loc,
int gtid, kmp_task_t *task,
3430 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3431 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
3432 kmp_uint64 grainsize, kmp_uint64 extras,
3433 kmp_uint64 tc, kmp_uint64 num_t_min,
void *task_dup) {
3435 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
3436 KMP_DEBUG_ASSERT(task != NULL);
3437 KMP_DEBUG_ASSERT(num_tasks > num_t_min);
3438 KA_TRACE(20, (
"__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize" 3439 " %lld, extras %lld, i=%lld,%lld(%d), dup %p\n", gtid, taskdata,
3440 num_tasks, grainsize, extras, *lb, *ub, st, task_dup));
3442 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
3443 kmp_uint64 lower = *lb;
3444 kmp_uint64 upper = *ub;
3445 kmp_info_t *thread = __kmp_threads[gtid];
3447 kmp_task_t *next_task;
3448 kmp_int32 lastpriv = 0;
3449 size_t lower_offset =
3450 (
char *)lb - (
char *)task;
3451 size_t upper_offset =
3452 (
char *)ub - (
char *)task;
3454 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
3455 KMP_DEBUG_ASSERT(num_tasks > extras);
3456 KMP_DEBUG_ASSERT(num_tasks > 0);
3459 kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1;
3460 kmp_uint64 gr_size0 = grainsize;
3461 kmp_uint64 n_tsk0 = num_tasks >> 1;
3462 kmp_uint64 n_tsk1 = num_tasks - n_tsk0;
3463 if (n_tsk0 <= extras) {
3466 ext1 = extras - n_tsk0;
3467 tc0 = gr_size0 * n_tsk0;
3472 tc1 = grainsize * n_tsk1;
3475 ub0 = lower + st * (tc0 - 1);
3479 next_task = __kmp_task_dup_alloc(thread, task);
3481 *(kmp_uint64 *)((
char *)next_task + lower_offset) = lb1;
3482 if (ptask_dup != NULL)
3483 ptask_dup(next_task, task, 0);
3487 kmp_task_t *new_task =
3488 __kmpc_omp_task_alloc(loc, gtid, 1, 3 *
sizeof(
void*),
3489 sizeof(__taskloop_params_t), &__kmp_taskloop_task);
3490 __taskloop_params_t * p = (__taskloop_params_t *)new_task->shareds;
3491 p->task = next_task;
3492 p->lb = (kmp_uint64 *)((
char *)next_task + lower_offset);
3493 p->ub = (kmp_uint64 *)((
char *)next_task + upper_offset);
3494 p->task_dup = task_dup;
3496 p->ub_glob = ub_glob;
3497 p->num_tasks = n_tsk1;
3498 p->grainsize = grainsize;
3501 p->num_t_min = num_t_min;
3502 __kmp_omp_task(gtid, new_task,
true);
3505 if (n_tsk0 > num_t_min)
3506 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
3507 gr_size0, ext0, tc0, num_t_min, task_dup);
3509 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
3510 gr_size0, ext0, tc0, task_dup);
3512 KA_TRACE(40, (
"__kmpc_taskloop_recur(exit): T#%d\n", gtid));
3531 void __kmpc_taskloop(
ident_t *loc,
int gtid, kmp_task_t *task,
int if_val,
3532 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
int nogroup,
3533 int sched, kmp_uint64 grainsize,
void *task_dup) {
3534 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
3535 KMP_DEBUG_ASSERT(task != NULL);
3537 KA_TRACE(20, (
"__kmpc_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, " 3538 "grain %llu(%d), dup %p\n", gtid, taskdata, *lb, *ub, st,
3539 grainsize, sched, task_dup));
3542 __kmpc_taskgroup(loc, gtid);
3547 kmp_uint64 lower = *lb;
3548 kmp_uint64 upper = *ub;
3549 kmp_uint64 ub_glob = upper;
3550 kmp_uint64 num_tasks = 0, extras = 0;
3551 kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks;
3552 kmp_info_t *thread = __kmp_threads[gtid];
3553 kmp_taskdata_t *current_task = thread->th.th_current_task;
3557 tc = upper - lower + 1;
3558 }
else if (st < 0) {
3559 tc = (lower - upper) / (-st) + 1;
3561 tc = (upper - lower) / st + 1;
3564 KA_TRACE(20, (
"__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid));
3566 __kmp_task_start(gtid, task, current_task);
3568 __kmp_task_finish(gtid, task, current_task);
3571 if (num_tasks_min == 0)
3573 num_tasks_min = KMP_MIN(thread->th.th_team_nproc * 10,
3574 INITIAL_TASK_DEQUE_SIZE);
3580 grainsize = thread->th.th_team_nproc * 10;
3582 if (grainsize > tc) {
3587 num_tasks = grainsize;
3588 grainsize = tc / num_tasks;
3589 extras = tc % num_tasks;
3593 if (grainsize > tc) {
3598 num_tasks = tc / grainsize;
3600 grainsize = tc / num_tasks;
3601 extras = tc % num_tasks;
3605 KMP_ASSERT2(0,
"unknown scheduling of taskloop");
3607 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
3608 KMP_DEBUG_ASSERT(num_tasks > extras);
3609 KMP_DEBUG_ASSERT(num_tasks > 0);
3614 taskdata->td_flags.task_serial = 1;
3615 taskdata->td_flags.tiedness = TASK_TIED;
3617 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
3618 grainsize, extras, tc, task_dup);
3619 }
else if (num_tasks > num_tasks_min) {
3620 KA_TRACE(20, (
"__kmpc_taskloop: T#%d, go recursive: tc %llu, #tasks %llu" 3621 "(%lld), grain %llu, extras %llu\n", gtid, tc, num_tasks,
3622 num_tasks_min, grainsize, extras));
3623 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
3624 grainsize, extras, tc, num_tasks_min, task_dup);
3626 KA_TRACE(20, (
"__kmpc_taskloop: T#%d, go linear: tc %llu, #tasks %llu" 3627 "(%lld), grain %llu, extras %llu\n", gtid, tc, num_tasks,
3628 num_tasks_min, grainsize, extras));
3629 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
3630 grainsize, extras, tc, task_dup);
3634 __kmpc_end_taskgroup(loc, gtid);
3635 KA_TRACE(20, (
"__kmpc_taskloop(exit): T#%d\n", gtid));
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).