19 #include "kmp_wait_release.h"
22 #include "ompt-specific.h"
31 static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
32 static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
33 static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
36 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
39 static inline void __kmp_null_resume_wrapper(
int gtid,
volatile void *flag) {
40 switch (((kmp_flag_64 *)flag)->get_type()) {
41 case flag32: __kmp_resume_32(gtid, NULL);
break;
42 case flag64: __kmp_resume_64(gtid, NULL);
break;
43 case flag_oncore: __kmp_resume_oncore(gtid, NULL);
break;
47 #ifdef BUILD_TIED_TASK_STACK
59 __kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data,
int threshold,
char *location )
61 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
62 kmp_taskdata_t **stack_top = task_stack -> ts_top;
63 kmp_int32 entries = task_stack -> ts_entries;
64 kmp_taskdata_t *tied_task;
66 KA_TRACE(threshold, (
"__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
67 "first_block = %p, stack_top = %p \n",
68 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
70 KMP_DEBUG_ASSERT( stack_top != NULL );
71 KMP_DEBUG_ASSERT( entries > 0 );
73 while ( entries != 0 )
75 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
77 if ( entries & TASK_STACK_INDEX_MASK == 0 )
79 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
81 stack_block = stack_block -> sb_prev;
82 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
89 tied_task = * stack_top;
91 KMP_DEBUG_ASSERT( tied_task != NULL );
92 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
94 KA_TRACE(threshold, (
"__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
95 "stack_top=%p, tied_task=%p\n",
96 location, gtid, entries, stack_top, tied_task ) );
98 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
100 KA_TRACE(threshold, (
"__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
113 __kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
115 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
116 kmp_stack_block_t *first_block;
119 first_block = & task_stack -> ts_first_block;
120 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
121 memset( (
void *) first_block,
'\0', TASK_STACK_BLOCK_SIZE *
sizeof(kmp_taskdata_t *));
124 task_stack -> ts_entries = TASK_STACK_EMPTY;
125 first_block -> sb_next = NULL;
126 first_block -> sb_prev = NULL;
137 __kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
139 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
140 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
142 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
144 while ( stack_block != NULL ) {
145 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
147 stack_block -> sb_next = NULL;
148 stack_block -> sb_prev = NULL;
149 if (stack_block != & task_stack -> ts_first_block) {
150 __kmp_thread_free( thread, stack_block );
152 stack_block = next_block;
155 task_stack -> ts_entries = 0;
156 task_stack -> ts_top = NULL;
169 __kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
172 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
173 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
174 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
176 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
180 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
181 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
183 KA_TRACE(20, (
"__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
184 gtid, thread, tied_task ) );
186 * (task_stack -> ts_top) = tied_task;
189 task_stack -> ts_top++;
190 task_stack -> ts_entries++;
192 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
195 kmp_stack_block_t *stack_block =
196 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
199 if ( stack_block -> sb_next != NULL )
201 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
205 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
206 __kmp_thread_calloc(thread,
sizeof(kmp_stack_block_t));
208 task_stack -> ts_top = & new_block -> sb_block[0];
209 stack_block -> sb_next = new_block;
210 new_block -> sb_prev = stack_block;
211 new_block -> sb_next = NULL;
213 KA_TRACE(30, (
"__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
214 gtid, tied_task, new_block ) );
217 KA_TRACE(20, (
"__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
230 __kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
233 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
234 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
235 kmp_taskdata_t *tied_task;
237 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
241 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
242 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
244 KA_TRACE(20, (
"__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
247 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
249 kmp_stack_block_t *stack_block =
250 (kmp_stack_block_t *) (task_stack -> ts_top) ;
252 stack_block = stack_block -> sb_prev;
253 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
257 task_stack -> ts_top--;
258 task_stack -> ts_entries--;
260 tied_task = * (task_stack -> ts_top );
262 KMP_DEBUG_ASSERT( tied_task != NULL );
263 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
264 KMP_DEBUG_ASSERT( tied_task == ending_task );
266 KA_TRACE(20, (
"__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
275 __kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
277 kmp_info_t * thread = __kmp_threads[ gtid ];
278 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
279 kmp_task_team_t * task_team = thread->th.th_task_team;
280 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
281 kmp_thread_data_t * thread_data;
283 KA_TRACE(20, (
"__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
286 if ( taskdata->td_flags.task_serial ) {
287 KA_TRACE(20, (
"__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
289 return TASK_NOT_PUSHED;
293 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
294 if ( ! KMP_TASKING_ENABLED(task_team) ) {
295 __kmp_enable_tasking( task_team, thread );
297 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
298 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
301 thread_data = & task_team -> tt.tt_threads_data[ tid ];
304 if (thread_data -> td.td_deque == NULL ) {
305 __kmp_alloc_task_deque( thread, thread_data );
309 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
311 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
313 return TASK_NOT_PUSHED;
317 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
321 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
323 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
324 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
326 return TASK_NOT_PUSHED;
330 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
333 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
335 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
336 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
338 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
340 KA_TRACE(20, (
"__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
341 "task=%p ntasks=%d head=%u tail=%u\n",
342 gtid, taskdata, thread_data->td.td_deque_ntasks,
343 thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
345 return TASK_SUCCESSFULLY_PUSHED;
354 __kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
356 KF_TRACE( 10, (
"__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
357 "curtask_parent=%p\n",
358 0, this_thr, this_thr -> th.th_current_task,
359 this_thr -> th.th_current_task -> td_parent ) );
361 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
363 KF_TRACE( 10, (
"__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
364 "curtask_parent=%p\n",
365 0, this_thr, this_thr -> th.th_current_task,
366 this_thr -> th.th_current_task -> td_parent ) );
377 __kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team,
int tid )
380 KF_TRACE( 10, (
"__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
382 tid, this_thr, this_thr->th.th_current_task,
383 team->t.t_implicit_task_taskdata[tid].td_parent ) );
385 KMP_DEBUG_ASSERT (this_thr != NULL);
388 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
389 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
390 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
393 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
394 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
397 KF_TRACE( 10, (
"__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
399 tid, this_thr, this_thr->th.th_current_task,
400 team->t.t_implicit_task_taskdata[tid].td_parent ) );
411 __kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
413 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
414 kmp_info_t * thread = __kmp_threads[ gtid ];
416 KA_TRACE(10, (
"__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
417 gtid, taskdata, current_task) );
419 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
424 current_task -> td_flags.executing = 0;
427 #ifdef BUILD_TIED_TASK_STACK
428 if ( taskdata -> td_flags.tiedness == TASK_TIED )
430 __kmp_push_task_stack( gtid, thread, taskdata );
435 thread -> th.th_current_task = taskdata;
437 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
438 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
439 taskdata -> td_flags.started = 1;
440 taskdata -> td_flags.executing = 1;
441 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
442 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
449 KA_TRACE(10, (
"__kmp_task_start(exit): T#%d task=%p\n",
453 if ((ompt_status == ompt_status_track_callback) &&
454 ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
455 kmp_taskdata_t *parent = taskdata->td_parent;
456 ompt_callbacks.ompt_callback(ompt_event_task_begin)(
457 parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
458 parent ? &(parent->ompt_task_info.frame) : NULL,
459 taskdata->ompt_task_info.task_id,
460 taskdata->ompt_task_info.function);
475 __kmpc_omp_task_begin_if0(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
477 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
478 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
480 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
481 gtid, loc_ref, taskdata, current_task ) );
483 taskdata -> td_flags.task_serial = 1;
484 __kmp_task_start( gtid, task, current_task );
486 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
487 gtid, loc_ref, taskdata ) );
498 __kmpc_omp_task_begin(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
500 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
502 KA_TRACE(10, (
"__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
503 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
505 __kmp_task_start( gtid, task, current_task );
507 KA_TRACE(10, (
"__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
508 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
512 #endif // TASK_UNUSED
522 __kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
524 KA_TRACE(30, (
"__kmp_free_task: T#%d freeing data from task %p\n",
528 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
529 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
530 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
531 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
532 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
533 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
535 taskdata->td_flags.freed = 1;
538 __kmp_fast_free( thread, taskdata );
540 __kmp_thread_free( thread, taskdata );
543 KA_TRACE(20, (
"__kmp_free_task: T#%d freed task %p\n",
555 __kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
557 kmp_int32 children = 0;
558 kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
560 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
562 if ( !team_or_tasking_serialized ) {
563 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
564 KMP_DEBUG_ASSERT( children >= 0 );
568 while ( children == 0 )
570 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
572 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
573 "and freeing itself\n", gtid, taskdata) );
576 __kmp_free_task( gtid, taskdata, thread );
578 taskdata = parent_taskdata;
582 if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
585 if ( !team_or_tasking_serialized ) {
587 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
588 KMP_DEBUG_ASSERT( children >= 0 );
592 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
593 "not freeing it yet\n", gtid, taskdata, children) );
603 __kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
605 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
606 kmp_info_t * thread = __kmp_threads[ gtid ];
607 kmp_int32 children = 0;
610 if ((ompt_status == ompt_status_track_callback) &&
611 ompt_callbacks.ompt_callback(ompt_event_task_end)) {
612 kmp_taskdata_t *parent = taskdata->td_parent;
613 ompt_callbacks.ompt_callback(ompt_event_task_end)(
614 taskdata->ompt_task_info.task_id);
618 KA_TRACE(10, (
"__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
619 gtid, taskdata, resumed_task) );
621 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
624 #ifdef BUILD_TIED_TASK_STACK
625 if ( taskdata -> td_flags.tiedness == TASK_TIED )
627 __kmp_pop_task_stack( gtid, thread, taskdata );
631 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
632 taskdata -> td_flags.complete = 1;
633 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
634 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
637 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
639 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
640 KMP_DEBUG_ASSERT( children >= 0 );
642 if ( taskdata->td_taskgroup )
643 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
644 __kmp_release_deps(gtid,taskdata);
651 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
652 taskdata -> td_flags.executing = 0;
654 KA_TRACE(20, (
"__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
655 gtid, taskdata, children) );
665 if (taskdata->td_flags.destructors_thunk) {
666 kmp_routine_entry_t destr_thunk = task->destructors;
667 KMP_ASSERT(destr_thunk);
668 destr_thunk(gtid, task);
670 #endif // OMP_40_ENABLED
674 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
675 taskdata->td_flags.task_serial);
676 if ( taskdata->td_flags.task_serial )
678 if (resumed_task == NULL) {
679 resumed_task = taskdata->td_parent;
683 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
687 KMP_DEBUG_ASSERT( resumed_task != NULL );
691 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
696 __kmp_threads[ gtid ] -> th.th_current_task = resumed_task;
700 resumed_task->td_flags.executing = 1;
702 KA_TRACE(10, (
"__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
703 gtid, taskdata, resumed_task) );
715 __kmpc_omp_task_complete_if0(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
717 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
718 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
720 __kmp_task_finish( gtid, task, NULL );
722 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
723 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
734 __kmpc_omp_task_complete(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
736 KA_TRACE(10, (
"__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
737 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
739 __kmp_task_finish( gtid, task, NULL );
741 KA_TRACE(10, (
"__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
742 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
745 #endif // TASK_UNUSED
757 __kmp_task_init_ompt( kmp_taskdata_t * task,
int tid )
759 task->ompt_task_info.task_id = __ompt_task_id_new(tid);
760 task->ompt_task_info.function = NULL;
761 task->ompt_task_info.frame.exit_runtime_frame = NULL;
762 task->ompt_task_info.frame.reenter_runtime_frame = NULL;
779 __kmp_init_implicit_task(
ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team,
int tid,
int set_curr_task )
781 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
783 KF_TRACE(10, (
"__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
784 tid, team, task, set_curr_task ?
"TRUE" :
"FALSE" ) );
786 task->td_task_id = KMP_GEN_TASK_ID();
787 task->td_team = team;
789 task->td_ident = loc_ref;
790 task->td_taskwait_ident = NULL;
791 task->td_taskwait_counter = 0;
792 task->td_taskwait_thread = 0;
794 task->td_flags.tiedness = TASK_TIED;
795 task->td_flags.tasktype = TASK_IMPLICIT;
797 task->td_flags.proxy = TASK_FULL;
801 task->td_flags.task_serial = 1;
802 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
803 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
805 task->td_flags.started = 1;
806 task->td_flags.executing = 1;
807 task->td_flags.complete = 0;
808 task->td_flags.freed = 0;
811 task->td_dephash = NULL;
812 task->td_depnode = NULL;
816 task->td_incomplete_child_tasks = 0;
817 task->td_allocated_child_tasks = 0;
819 task->td_taskgroup = NULL;
821 __kmp_push_current_task_to_thread( this_thr, team, tid );
823 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
824 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
828 __kmp_task_init_ompt(task, tid);
831 KF_TRACE(10, (
"__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
838 __kmp_round_up_to_val(
size_t size,
size_t val ) {
839 if ( size & ( val - 1 ) ) {
840 size &= ~ ( val - 1 );
841 if ( size <= KMP_SIZE_T_MAX - val ) {
862 __kmp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
863 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
864 kmp_routine_entry_t task_entry )
867 kmp_taskdata_t *taskdata;
868 kmp_info_t *thread = __kmp_threads[ gtid ];
869 kmp_team_t *team = thread->th.th_team;
870 kmp_taskdata_t *parent_task = thread->th.th_current_task;
871 size_t shareds_offset;
873 KA_TRACE(10, (
"__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
874 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
875 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
876 sizeof_shareds, task_entry) );
878 if ( parent_task->td_flags.final ) {
879 if (flags->merged_if0) {
885 if ( flags->proxy == TASK_PROXY ) {
886 flags->tiedness = TASK_UNTIED;
887 flags->merged_if0 = 1;
890 if ( (thread->th.th_task_team) == NULL ) {
894 KMP_DEBUG_ASSERT(team->t.t_serialized);
895 KA_TRACE(30,(
"T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
896 __kmp_task_team_setup(thread,team,0,1);
897 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
899 kmp_task_team_t * task_team = thread->th.th_task_team;
902 if ( !KMP_TASKING_ENABLED( task_team ) ) {
903 KA_TRACE(30,(
"T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
904 __kmp_enable_tasking( task_team, thread );
905 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
906 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
908 if (thread_data -> td.td_deque == NULL ) {
909 __kmp_alloc_task_deque( thread, thread_data );
913 if ( task_team->tt.tt_found_proxy_tasks == FALSE )
914 TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
920 shareds_offset =
sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
921 shareds_offset = __kmp_round_up_to_val( shareds_offset,
sizeof(
void * ));
924 KA_TRACE(30, (
"__kmp_task_alloc: T#%d First malloc size: %ld\n",
925 gtid, shareds_offset) );
926 KA_TRACE(30, (
"__kmp_task_alloc: T#%d Second malloc size: %ld\n",
927 gtid, sizeof_shareds) );
931 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
933 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
936 task = KMP_TASKDATA_TO_TASK(taskdata);
939 #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
940 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (
sizeof(
double)-1) ) == 0 );
941 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (
sizeof(
double)-1) ) == 0 );
943 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (
sizeof(_Quad)-1) ) == 0 );
944 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (
sizeof(_Quad)-1) ) == 0 );
946 if (sizeof_shareds > 0) {
948 task->shareds = & ((
char *) taskdata)[ shareds_offset ];
950 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (
sizeof(
void *)-1) ) == 0 );
952 task->shareds = NULL;
954 task->routine = task_entry;
957 taskdata->td_task_id = KMP_GEN_TASK_ID();
958 taskdata->td_team = team;
959 taskdata->td_alloc_thread = thread;
960 taskdata->td_parent = parent_task;
961 taskdata->td_level = parent_task->td_level + 1;
962 taskdata->td_ident = loc_ref;
963 taskdata->td_taskwait_ident = NULL;
964 taskdata->td_taskwait_counter = 0;
965 taskdata->td_taskwait_thread = 0;
966 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
969 if ( flags->proxy == TASK_FULL )
971 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
973 taskdata->td_flags.tiedness = flags->tiedness;
974 taskdata->td_flags.final = flags->final;
975 taskdata->td_flags.merged_if0 = flags->merged_if0;
977 taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
978 #endif // OMP_40_ENABLED
980 taskdata->td_flags.proxy = flags->proxy;
982 taskdata->td_flags.tasktype = TASK_EXPLICIT;
985 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
988 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
993 taskdata->td_flags.task_serial = ( parent_task->td_flags.final
994 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
996 taskdata->td_flags.started = 0;
997 taskdata->td_flags.executing = 0;
998 taskdata->td_flags.complete = 0;
999 taskdata->td_flags.freed = 0;
1001 taskdata->td_flags.native = flags->native;
1003 taskdata->td_incomplete_child_tasks = 0;
1004 taskdata->td_allocated_child_tasks = 1;
1006 taskdata->td_taskgroup = parent_task->td_taskgroup;
1007 taskdata->td_dephash = NULL;
1008 taskdata->td_depnode = NULL;
1013 if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1015 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
1018 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
1020 if ( parent_task->td_taskgroup )
1021 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
1024 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
1025 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
1029 KA_TRACE(20, (
"__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1030 gtid, taskdata, taskdata->td_parent) );
1033 if (ompt_status & ompt_status_track) {
1034 taskdata->ompt_task_info.task_id = __ompt_task_id_new(gtid);
1035 taskdata->ompt_task_info.function = (
void*) task_entry;
1036 taskdata->ompt_task_info.frame.exit_runtime_frame = NULL;
1037 taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
1046 __kmpc_omp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
1047 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
1048 kmp_routine_entry_t task_entry )
1051 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
1053 input_flags->native = FALSE;
1057 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
1058 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1059 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1060 input_flags->proxy ?
"proxy" :
"",
1061 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1063 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
1064 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1065 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1066 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
1069 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1070 sizeof_shareds, task_entry );
1072 KA_TRACE(20, (
"__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
1085 __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
1087 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
1091 KA_TRACE(30, (
"__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1092 gtid, taskdata, current_task) );
1095 if ( taskdata->td_flags.proxy == TASK_PROXY &&
1096 taskdata->td_flags.complete == 1)
1100 KA_TRACE(30, (
"__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1103 __kmp_bottom_half_finish_proxy(gtid,task);
1105 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
1113 if ( taskdata->td_flags.proxy != TASK_PROXY )
1115 __kmp_task_start( gtid, task, current_task );
1118 ompt_thread_info_t oldInfo;
1119 kmp_info_t * thread;
1120 if (ompt_status & ompt_status_track) {
1122 thread = __kmp_threads[ gtid ];
1123 oldInfo = thread->th.ompt_thread_info;
1124 thread->th.ompt_thread_info.wait_id = 0;
1125 thread->th.ompt_thread_info.state = ompt_state_work_parallel;
1126 taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
1134 if (__kmp_omp_cancellation) {
1135 kmp_info_t *this_thr = __kmp_threads [ gtid ];
1136 kmp_team_t * this_team = this_thr->th.th_team;
1137 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1138 if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
1149 #endif // OMP_40_ENABLED
1150 #ifdef KMP_GOMP_COMPAT
1151 if (taskdata->td_flags.native) {
1152 ((void (*)(
void *))(*(task->routine)))(task->shareds);
1157 (*(task->routine))(gtid, task);
1161 #endif // OMP_40_ENABLED
1165 if (ompt_status & ompt_status_track) {
1166 thread->th.ompt_thread_info = oldInfo;
1167 taskdata->ompt_task_info.frame.exit_runtime_frame = 0;
1173 if ( taskdata->td_flags.proxy != TASK_PROXY )
1175 __kmp_task_finish( gtid, task, current_task );
1177 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1178 gtid, taskdata, current_task) );
1193 __kmpc_omp_task_parts(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1195 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1197 KA_TRACE(10, (
"__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1198 gtid, loc_ref, new_taskdata ) );
1203 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED )
1205 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1206 new_taskdata->td_flags.task_serial = 1;
1207 __kmp_invoke_task( gtid, new_task, current_task );
1210 KA_TRACE(10, (
"__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1211 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1214 return TASK_CURRENT_NOT_QUEUED;
1227 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task,
bool serialize_immediate )
1229 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1232 if (ompt_status & ompt_status_track) {
1233 new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
1234 __builtin_frame_address(0);
1241 if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED )
1243 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED )
1246 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1247 if ( serialize_immediate )
1248 new_taskdata -> td_flags.task_serial = 1;
1249 __kmp_invoke_task( gtid, new_task, current_task );
1253 if (ompt_status & ompt_status_track) {
1254 new_taskdata->ompt_task_info.frame.reenter_runtime_frame = 0;
1258 return TASK_CURRENT_NOT_QUEUED;
1273 __kmpc_omp_task(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1275 kmp_taskdata_t * new_taskdata;
1278 new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1279 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1280 gtid, loc_ref, new_taskdata ) );
1282 res = __kmp_omp_task(gtid,new_task,
true);
1284 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1285 gtid, loc_ref, new_taskdata ) );
1293 __kmpc_omp_taskwait(
ident_t *loc_ref, kmp_int32 gtid )
1295 kmp_taskdata_t * taskdata;
1296 kmp_info_t * thread;
1297 int thread_finished = FALSE;
1299 KA_TRACE(10, (
"__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
1302 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1305 thread = __kmp_threads[ gtid ];
1306 taskdata = thread -> th.th_current_task;
1310 taskdata->td_taskwait_counter += 1;
1311 taskdata->td_taskwait_ident = loc_ref;
1312 taskdata->td_taskwait_thread = gtid + 1;
1315 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1316 if ( itt_sync_obj != NULL )
1317 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1321 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1323 if ( ! taskdata->td_flags.team_serial )
1327 kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
1328 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
1329 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1330 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1334 if ( itt_sync_obj != NULL )
1335 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1339 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1342 KA_TRACE(10, (
"__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1343 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1345 return TASK_CURRENT_NOT_QUEUED;
1353 __kmpc_omp_taskyield(
ident_t *loc_ref, kmp_int32 gtid,
int end_part )
1355 kmp_taskdata_t * taskdata;
1356 kmp_info_t * thread;
1357 int thread_finished = FALSE;
1359 KA_TRACE(10, (
"__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1360 gtid, loc_ref, end_part) );
1362 if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
1365 thread = __kmp_threads[ gtid ];
1366 taskdata = thread -> th.th_current_task;
1371 taskdata->td_taskwait_counter += 1;
1372 taskdata->td_taskwait_ident = loc_ref;
1373 taskdata->td_taskwait_thread = gtid + 1;
1376 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1377 if ( itt_sync_obj != NULL )
1378 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1380 if ( ! taskdata->td_flags.team_serial ) {
1381 kmp_task_team_t * task_team = thread->th.th_task_team;
1382 if (task_team != NULL) {
1383 if (KMP_TASKING_ENABLED(task_team)) {
1384 __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
1385 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1390 if ( itt_sync_obj != NULL )
1391 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1395 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1398 KA_TRACE(10, (
"__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1399 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1401 return TASK_CURRENT_NOT_QUEUED;
1410 __kmpc_taskgroup(
ident_t* loc,
int gtid )
1412 kmp_info_t * thread = __kmp_threads[ gtid ];
1413 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1414 kmp_taskgroup_t * tg_new =
1415 (kmp_taskgroup_t *)__kmp_thread_malloc( thread,
sizeof( kmp_taskgroup_t ) );
1416 KA_TRACE(10, (
"__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1418 tg_new->cancel_request = cancel_noreq;
1419 tg_new->parent = taskdata->td_taskgroup;
1420 taskdata->td_taskgroup = tg_new;
1429 __kmpc_end_taskgroup(
ident_t* loc,
int gtid )
1431 kmp_info_t * thread = __kmp_threads[ gtid ];
1432 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1433 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1434 int thread_finished = FALSE;
1436 KA_TRACE(10, (
"__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1437 KMP_DEBUG_ASSERT( taskgroup != NULL );
1439 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1442 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1443 if ( itt_sync_obj != NULL )
1444 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1448 if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
1450 if ( ! taskdata->td_flags.team_serial )
1453 kmp_flag_32 flag(&(taskgroup->count), 0U);
1454 while ( TCR_4(taskgroup->count) != 0 ) {
1455 flag.execute_tasks(thread, gtid, FALSE, &thread_finished
1456 USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
1461 if ( itt_sync_obj != NULL )
1462 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1465 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1468 taskdata->td_taskgroup = taskgroup->parent;
1469 __kmp_thread_free( thread, taskgroup );
1471 KA_TRACE(10, (
"__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1480 __kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1481 kmp_int32 is_constrained )
1484 kmp_taskdata_t * taskdata;
1485 kmp_thread_data_t *thread_data;
1488 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1489 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL );
1491 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1493 KA_TRACE(10, (
"__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1494 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1495 thread_data->td.td_deque_tail) );
1497 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1498 KA_TRACE(10, (
"__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1499 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1500 thread_data->td.td_deque_tail) );
1504 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1506 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1507 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1508 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1509 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1510 thread_data->td.td_deque_tail) );
1514 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK;
1515 taskdata = thread_data -> td.td_deque[ tail ];
1517 if (is_constrained) {
1520 kmp_taskdata_t * current = thread->th.th_current_task;
1521 kmp_int32 level = current->td_level;
1522 kmp_taskdata_t * parent = taskdata->td_parent;
1523 while ( parent != current && parent->td_level > level ) {
1524 parent = parent->td_parent;
1525 KMP_DEBUG_ASSERT(parent != NULL);
1527 if ( parent != current ) {
1529 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1530 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1531 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1532 thread_data->td.td_deque_tail) );
1537 thread_data -> td.td_deque_tail = tail;
1538 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1540 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1542 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1543 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1544 thread_data->td.td_deque_tail) );
1546 task = KMP_TASKDATA_TO_TASK( taskdata );
1557 __kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1558 volatile kmp_uint32 *unfinished_threads,
int *thread_finished,
1559 kmp_int32 is_constrained )
1562 kmp_taskdata_t * taskdata;
1563 kmp_thread_data_t *victim_td, *threads_data;
1564 kmp_int32 victim_tid;
1566 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1568 threads_data = task_team -> tt.tt_threads_data;
1569 KMP_DEBUG_ASSERT( threads_data != NULL );
1571 victim_tid = victim->th.th_info.ds.ds_tid;
1572 victim_td = & threads_data[ victim_tid ];
1574 KA_TRACE(10, (
"__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1575 "head=%u tail=%u\n",
1576 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1577 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1579 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1580 (TCR_PTR(victim->th.th_task_team) != task_team))
1582 KA_TRACE(10, (
"__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1583 "ntasks=%d head=%u tail=%u\n",
1584 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1585 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1589 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1592 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1593 (TCR_PTR(victim->th.th_task_team) != task_team))
1595 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1596 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1597 "ntasks=%d head=%u tail=%u\n",
1598 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1599 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1603 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1605 if ( !is_constrained ) {
1606 taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1608 victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1611 kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK;
1612 taskdata = victim_td -> td.td_deque[ tail ];
1615 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1616 kmp_int32 level = current->td_level;
1617 kmp_taskdata_t * parent = taskdata->td_parent;
1618 while ( parent != current && parent->td_level > level ) {
1619 parent = parent->td_parent;
1620 KMP_DEBUG_ASSERT(parent != NULL);
1622 if ( parent != current ) {
1624 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1625 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1626 "ntasks=%d head=%u tail=%u\n",
1627 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1628 task_team, victim_td->td.td_deque_ntasks,
1629 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1632 victim_td -> td.td_deque_tail = tail;
1634 if (*thread_finished) {
1640 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
1642 KA_TRACE(20, (
"__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1643 gtid, count + 1, task_team) );
1645 *thread_finished = FALSE;
1647 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1649 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1651 KA_TRACE(10, (
"__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
1652 "ntasks=%d head=%u tail=%u\n",
1653 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1654 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1655 victim_td->td.td_deque_tail) );
1657 task = KMP_TASKDATA_TO_TASK( taskdata );
1672 static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag,
int final_spin,
1673 int *thread_finished
1674 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained)
1676 kmp_task_team_t * task_team;
1677 kmp_thread_data_t * threads_data;
1679 kmp_taskdata_t * current_task = thread -> th.th_current_task;
1680 volatile kmp_uint32 * unfinished_threads;
1681 kmp_int32 nthreads, last_stolen, k, tid;
1683 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1684 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1686 task_team = thread -> th.th_task_team;
1687 KMP_DEBUG_ASSERT( task_team != NULL );
1689 KA_TRACE(15, (
"__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
1690 gtid, final_spin, *thread_finished) );
1692 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1693 KMP_DEBUG_ASSERT( threads_data != NULL );
1695 nthreads = task_team -> tt.tt_nproc;
1696 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
1698 KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
1700 KMP_DEBUG_ASSERT( nthreads > 1 );
1702 KMP_DEBUG_ASSERT( TCR_4((
int)*unfinished_threads) >= 0 );
1706 while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1707 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1708 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1709 if ( itt_sync_obj == NULL ) {
1711 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1713 __kmp_itt_task_starting( itt_sync_obj );
1716 __kmp_invoke_task( gtid, task, current_task );
1718 if ( itt_sync_obj != NULL )
1719 __kmp_itt_task_finished( itt_sync_obj );
1727 if (flag == NULL || (!final_spin && flag->done_check())) {
1728 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) );
1731 KMP_YIELD( __kmp_library == library_throughput );
1738 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1746 if (! *thread_finished) {
1749 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1750 KA_TRACE(20, (
"__kmp_execute_tasks_template(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
1751 gtid, count, task_team) );
1752 *thread_finished = TRUE;
1760 if (flag != NULL && flag->done_check()) {
1761 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #2): T#%d spin condition satisfied\n", gtid) );
1768 if ( nthreads == 1 )
1773 tid = thread -> th.th_info.ds.ds_tid;
1774 last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1776 if (last_stolen != -1) {
1777 kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1779 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1780 thread_finished, is_constrained )) != NULL)
1782 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1783 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1784 if ( itt_sync_obj == NULL ) {
1786 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1788 __kmp_itt_task_starting( itt_sync_obj );
1791 __kmp_invoke_task( gtid, task, current_task );
1793 if ( itt_sync_obj != NULL )
1794 __kmp_itt_task_finished( itt_sync_obj );
1798 if (flag == NULL || (!final_spin && flag->done_check())) {
1799 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #3): T#%d spin condition satisfied\n",
1804 KMP_YIELD( __kmp_library == library_throughput );
1807 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1808 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
1815 threads_data[ tid ].td.td_deque_last_stolen = -1;
1821 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1829 if (! *thread_finished) {
1832 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1833 KA_TRACE(20, (
"__kmp_execute_tasks_template(dec #2): T#%d dec unfinished_threads to %d "
1834 "task_team=%p\n", gtid, count, task_team) );
1835 *thread_finished = TRUE;
1844 if (flag != NULL && flag->done_check()) {
1845 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #4): T#%d spin condition satisfied\n",
1858 k = __kmp_get_random( thread ) % (nthreads - 1);
1859 if ( k >= thread -> th.th_info.ds.ds_tid ) {
1863 kmp_info_t *other_thread = threads_data[k].td.td_thr;
1873 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1874 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1875 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1877 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
1888 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1889 thread_finished, is_constrained )) != NULL)
1891 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1892 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1893 if ( itt_sync_obj == NULL ) {
1895 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1897 __kmp_itt_task_starting( itt_sync_obj );
1900 __kmp_invoke_task( gtid, task, current_task );
1902 if ( itt_sync_obj != NULL )
1903 __kmp_itt_task_finished( itt_sync_obj );
1908 threads_data[ tid ].td.td_deque_last_stolen = k;
1913 if (flag == NULL || (!final_spin && flag->done_check())) {
1914 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #5): T#%d spin condition satisfied\n",
1918 KMP_YIELD( __kmp_library == library_throughput );
1922 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1923 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n",
1936 if (final_spin && TCR_4(current_task -> td_incomplete_child_tasks) == 0)
1944 if (! *thread_finished) {
1947 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1948 KA_TRACE(20, (
"__kmp_execute_tasks_template(dec #3): T#%d dec unfinished_threads to %d; "
1950 gtid, count, task_team) );
1951 *thread_finished = TRUE;
1960 if (flag != NULL && flag->done_check()) {
1961 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #6): T#%d spin condition satisfied\n", gtid) );
1967 KA_TRACE(15, (
"__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) );
1971 int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag,
int final_spin,
1972 int *thread_finished
1973 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained)
1975 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1976 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1979 int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag,
int final_spin,
1980 int *thread_finished
1981 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained)
1983 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1984 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1987 int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag,
int final_spin,
1988 int *thread_finished
1989 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained)
1991 return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
1992 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
2003 __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
2006 kmp_thread_data_t *threads_data;
2007 int nthreads, i, is_init_thread;
2009 KA_TRACE( 10, (
"__kmp_enable_tasking(enter): T#%d\n",
2010 __kmp_gtid_from_thread( this_thr ) ) );
2012 team = this_thr->th.th_team;
2013 KMP_DEBUG_ASSERT(task_team != NULL);
2014 KMP_DEBUG_ASSERT(team != NULL);
2016 nthreads = task_team->tt.tt_nproc;
2017 KMP_DEBUG_ASSERT(nthreads > 0);
2018 KMP_DEBUG_ASSERT(nthreads == team->t.t_nproc);
2021 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
2023 if (!is_init_thread) {
2025 KA_TRACE( 20, (
"__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
2026 __kmp_gtid_from_thread( this_thr ) ) );
2029 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
2030 KMP_DEBUG_ASSERT( threads_data != NULL );
2032 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
2033 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
2038 for (i = 0; i < nthreads; i++) {
2039 volatile void *sleep_loc;
2040 kmp_info_t *thread = threads_data[i].td.td_thr;
2042 if (i == this_thr->th.th_info.ds.ds_tid) {
2052 if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
2054 KF_TRACE( 50, (
"__kmp_enable_tasking: T#%d waking up thread T#%d\n",
2055 __kmp_gtid_from_thread( this_thr ),
2056 __kmp_gtid_from_thread( thread ) ) );
2057 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2060 KF_TRACE( 50, (
"__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
2061 __kmp_gtid_from_thread( this_thr ),
2062 __kmp_gtid_from_thread( thread ) ) );
2067 KA_TRACE( 10, (
"__kmp_enable_tasking(exit): T#%d\n",
2068 __kmp_gtid_from_thread( this_thr ) ) );
2108 static kmp_task_team_t *__kmp_free_task_teams = NULL;
2110 static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
2122 __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
2124 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
2125 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
2128 thread_data -> td.td_deque_last_stolen = -1;
2130 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
2131 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
2132 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
2134 KE_TRACE( 10, (
"__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
2135 __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
2139 thread_data -> td.td_deque = (kmp_taskdata_t **)
2140 __kmp_allocate( TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
2150 __kmp_free_task_deque( kmp_thread_data_t *thread_data )
2152 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
2154 if ( thread_data -> td.td_deque != NULL ) {
2155 TCW_4(thread_data -> td.td_deque_ntasks, 0);
2156 __kmp_free( thread_data -> td.td_deque );
2157 thread_data -> td.td_deque = NULL;
2159 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
2161 #ifdef BUILD_TIED_TASK_STACK
2163 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
2164 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
2166 #endif // BUILD_TIED_TASK_STACK
2180 __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
2182 kmp_thread_data_t ** threads_data_p;
2183 kmp_int32 nthreads, maxthreads;
2184 int is_init_thread = FALSE;
2186 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
2191 threads_data_p = & task_team -> tt.tt_threads_data;
2192 nthreads = task_team -> tt.tt_nproc;
2193 maxthreads = task_team -> tt.tt_max_threads;
2197 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2199 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
2201 kmp_team_t *team = thread -> th.th_team;
2204 is_init_thread = TRUE;
2205 if ( maxthreads < nthreads ) {
2207 if ( *threads_data_p != NULL ) {
2208 kmp_thread_data_t *old_data = *threads_data_p;
2209 kmp_thread_data_t *new_data = NULL;
2211 KE_TRACE( 10, (
"__kmp_realloc_task_threads_data: T#%d reallocating "
2212 "threads data for task_team %p, new_size = %d, old_size = %d\n",
2213 __kmp_gtid_from_thread( thread ), task_team,
2214 nthreads, maxthreads ) );
2219 new_data = (kmp_thread_data_t *)
2220 __kmp_allocate( nthreads *
sizeof(kmp_thread_data_t) );
2222 KMP_MEMCPY_S( (
void *) new_data, nthreads *
sizeof(kmp_thread_data_t),
2224 maxthreads *
sizeof(kmp_taskdata_t *) );
2226 #ifdef BUILD_TIED_TASK_STACK
2228 for (i = maxthreads; i < nthreads; i++) {
2229 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2230 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2232 #endif // BUILD_TIED_TASK_STACK
2234 (*threads_data_p) = new_data;
2235 __kmp_free( old_data );
2238 KE_TRACE( 10, (
"__kmp_realloc_task_threads_data: T#%d allocating "
2239 "threads data for task_team %p, size = %d\n",
2240 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
2244 *threads_data_p = (kmp_thread_data_t *)
2245 __kmp_allocate( nthreads *
sizeof(kmp_thread_data_t) );
2246 #ifdef BUILD_TIED_TASK_STACK
2248 for (i = 0; i < nthreads; i++) {
2249 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2250 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
2252 #endif // BUILD_TIED_TASK_STACK
2254 task_team -> tt.tt_max_threads = nthreads;
2258 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
2262 for (i = 0; i < nthreads; i++) {
2263 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
2264 thread_data -> td.td_thr = team -> t.t_threads[i];
2266 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
2270 thread_data -> td.td_deque_last_stolen = -1;
2275 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
2278 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2279 return is_init_thread;
2289 __kmp_free_task_threads_data( kmp_task_team_t *task_team )
2291 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2292 if ( task_team -> tt.tt_threads_data != NULL ) {
2294 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2295 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2297 __kmp_free( task_team -> tt.tt_threads_data );
2298 task_team -> tt.tt_threads_data = NULL;
2300 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2309 static kmp_task_team_t *
2310 __kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2312 kmp_task_team_t *task_team = NULL;
2315 KA_TRACE( 20, (
"__kmp_allocate_task_team: T#%d entering; team = %p\n",
2316 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2318 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2320 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2321 if (__kmp_free_task_teams != NULL) {
2322 task_team = __kmp_free_task_teams;
2323 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2324 task_team -> tt.tt_next = NULL;
2326 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2329 if (task_team == NULL) {
2330 KE_TRACE( 10, (
"__kmp_allocate_task_team: T#%d allocating "
2331 "task team for team %p\n",
2332 __kmp_gtid_from_thread( thread ), team ) );
2336 task_team = (kmp_task_team_t *) __kmp_allocate(
sizeof(kmp_task_team_t) );
2337 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2343 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2345 TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
2347 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2349 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2350 TCW_4( task_team -> tt.tt_active, TRUE );
2351 TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
2353 KA_TRACE( 20, (
"__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
2354 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
2366 __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2368 KA_TRACE( 20, (
"__kmp_free_task_team: T#%d task_team = %p\n",
2369 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2371 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
2374 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2376 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2377 task_team -> tt.tt_next = __kmp_free_task_teams;
2378 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2379 TCW_PTR(__kmp_free_task_teams, task_team);
2381 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2392 __kmp_reap_task_teams(
void )
2394 kmp_task_team_t *task_team;
2396 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2398 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2399 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2400 __kmp_free_task_teams = task_team -> tt.tt_next;
2401 task_team -> tt.tt_next = NULL;
2404 if ( task_team -> tt.tt_threads_data != NULL ) {
2405 __kmp_free_task_threads_data( task_team );
2407 __kmp_free( task_team );
2409 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2421 __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
2425 ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
2427 KA_TRACE( 20, (
"__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
2428 __kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
2431 if ( ref_ct == 0 ) {
2432 __kmp_free_task_team( thread, task_team );
2435 TCW_PTR( *((
volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
2445 __kmp_wait_to_unref_task_teams(
void)
2451 KMP_INIT_YIELD( spins );
2461 for (thread = (kmp_info_t *)__kmp_thread_pool;
2463 thread = thread->th.th_next_pool)
2468 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2469 KA_TRACE( 10, (
"__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2470 __kmp_gtid_from_thread( thread ) ) );
2475 if (!__kmp_is_thread_alive(thread, &exit_val)) {
2476 if (TCR_PTR(thread->th.th_task_team) != NULL) {
2477 __kmp_unref_task_team( thread->th.th_task_team, thread );
2485 KA_TRACE( 10, (
"__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2486 __kmp_gtid_from_thread( thread ) ) );
2488 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
2489 volatile void *sleep_loc;
2491 if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
2492 KA_TRACE( 10, (
"__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2493 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
2494 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
2505 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2506 KMP_YIELD_SPIN( spins );
2518 __kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team,
int both,
int always )
2520 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2522 if ( ( team->t.t_task_team[this_thr->th.th_task_state] == NULL ) && ( always || team->t.t_nproc > 1 ) ) {
2530 team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
2531 KA_TRACE(20, (
"__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
2532 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
2533 ((team != NULL) ? team->t.t_id : -1)) );
2541 int other_team = 1 - this_thr->th.th_task_state;
2542 if ( ( team->t.t_task_team[other_team] == NULL ) && ( team->t.t_nproc > 1 ) ) {
2543 team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
2544 KA_TRACE( 20, (
"__kmp_task_team_setup: Master T#%d created new task_team %p for team %d\n",
2545 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
2546 ((team != NULL) ? team->t.t_id : -1)) );
2558 __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2560 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2563 if ( this_thr->th.th_task_team != NULL ) {
2564 if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
2565 KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
2566 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
2568 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
2573 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
2575 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
2576 KA_TRACE( 20, (
"__kmp_task_team_sync: Thread T#%d task team assigned pointer (%p) from Team #%d task team\n",
2577 __kmp_gtid_from_thread( this_thr ), &this_thr->th.th_task_team,
2578 this_thr->th.th_task_team, ((team != NULL) ? (team->t.t_id) : -1) ) );
2586 __kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
2587 USE_ITT_BUILD_ARG(
void * itt_sync_obj)
2590 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
2592 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2593 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2595 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
2596 KA_TRACE( 20, (
"__kmp_task_team_wait: Master T#%d waiting for all tasks: task_team = %p\n",
2597 __kmp_gtid_from_thread( this_thr ), task_team ) );
2601 kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
2602 flag.wait(this_thr, TRUE
2603 USE_ITT_BUILD_ARG(itt_sync_obj));
2608 KA_TRACE( 20, (
"__kmp_task_team_wait: Master T#%d deactivating task_team %p\n",
2609 __kmp_gtid_from_thread( this_thr ), task_team ) );
2611 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
2612 TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
2614 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
2616 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2619 TCW_PTR(this_thr->th.th_task_team, NULL);
2620 team->t.t_task_team[this_thr->th.th_task_state] = NULL;
2634 __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread,
int gtid )
2636 volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
2638 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2641 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2643 kmp_flag_32 spin_flag(spin, 0U);
2644 while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
2645 USE_ITT_BUILD_ARG(NULL), 0 ) ) {
2648 KMP_FSYNC_SPIN_PREPARE( spin );
2651 if( TCR_4(__kmp_global.g.g_done) ) {
2652 if( __kmp_global.g.g_abort )
2653 __kmp_abort_thread( );
2659 KMP_FSYNC_SPIN_ACQUIRED( (
void*) spin );
2672 static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task )
2674 kmp_task_team_t * task_team = thread->th.th_task_team;
2675 kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
2676 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
2677 bool result =
false;
2679 KA_TRACE(20, (
"__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
2682 KMP_DEBUG_ASSERT( task_team != NULL );
2684 if (thread_data -> td.td_deque == NULL ) {
2687 KA_TRACE(30, (
"__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
2691 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2693 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2697 __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
2699 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
2701 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
2702 goto release_and_exit;
2705 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
2707 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
2708 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
2711 KA_TRACE(30, (
"__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
2714 __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
2734 static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2736 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
2737 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2738 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
2739 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
2741 taskdata -> td_flags.complete = 1;
2743 if ( taskdata->td_taskgroup )
2744 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
2747 TCR_4(taskdata->td_incomplete_child_tasks++);
2750 static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
2752 kmp_int32 children = 0;
2755 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
2756 KMP_DEBUG_ASSERT( children >= 0 );
2759 TCR_4(taskdata->td_incomplete_child_tasks--);
2762 static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
2764 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2765 kmp_info_t * thread = __kmp_threads[ gtid ];
2767 KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
2768 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 );
2772 while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
2774 __kmp_release_deps(gtid,taskdata);
2775 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
2785 void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
2787 KMP_DEBUG_ASSERT( ptask != NULL );
2788 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2789 KA_TRACE(10, (
"__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
2791 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2793 __kmp_first_top_half_finish_proxy(taskdata);
2794 __kmp_second_top_half_finish_proxy(taskdata);
2795 __kmp_bottom_half_finish_proxy(gtid,ptask);
2797 KA_TRACE(10, (
"__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
2806 void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
2808 KMP_DEBUG_ASSERT( ptask != NULL );
2809 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
2811 KA_TRACE(10, (
"__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
2813 KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
2815 __kmp_first_top_half_finish_proxy(taskdata);
2818 kmp_team_t * team = taskdata->td_team;
2819 kmp_int32 nthreads = team->t.t_nproc;
2826 k = (k+1) % nthreads;
2827 thread = team->t.t_threads[k];
2828 }
while ( !__kmp_give_task( thread, k, ptask ) );
2830 __kmp_second_top_half_finish_proxy(taskdata);
2832 KA_TRACE(10, (
"__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );