17 #include "kmp_error.h" 21 #define MAX_MESSAGE 512 25 #define KMP_DEBUG_REF_CTS(x) KF_TRACE(1, x); 27 #define THREAD_ALLOC_FOR_TASKQ 29 static int in_parallel_context(kmp_team_t *team) {
30 return !team->t.t_serialized;
33 static void __kmp_taskq_eo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
35 int tid = __kmp_tid_from_gtid(gtid);
37 kmpc_task_queue_t *taskq;
38 kmp_taskq_t *tq = &__kmp_threads[gtid]->th.th_team->t.t_taskq;
40 if (__kmp_env_consistency_check)
41 #if KMP_USE_DYNAMIC_LOCK 42 __kmp_push_sync(gtid, ct_ordered_in_taskq, loc_ref, NULL, 0);
44 __kmp_push_sync(gtid, ct_ordered_in_taskq, loc_ref, NULL);
47 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) {
53 my_token = tq->tq_curr_thunk[tid]->th_tasknum;
55 taskq = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
57 KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL);
62 static void __kmp_taskq_xo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
64 int tid = __kmp_tid_from_gtid(gtid);
66 kmp_taskq_t *tq = &__kmp_threads[gtid]->th.th_team->t.t_taskq;
68 if (__kmp_env_consistency_check)
69 __kmp_pop_sync(gtid, ct_ordered_in_taskq, loc_ref);
71 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) {
77 my_token = tq->tq_curr_thunk[tid]->th_tasknum;
81 tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue->tq_tasknum_serving =
88 static void __kmp_taskq_check_ordered(kmp_int32 gtid, kmpc_thunk_t *thunk) {
90 kmpc_task_queue_t *taskq;
96 my_token = thunk->th_tasknum;
98 taskq = thunk->th.th_shareds->sv_queue;
100 if (taskq->tq_tasknum_serving <= my_token) {
101 KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL);
103 taskq->tq_tasknum_serving = my_token + 1;
110 static void __kmp_dump_TQF(kmp_int32 flags) {
111 if (flags & TQF_IS_ORDERED)
112 __kmp_printf(
"ORDERED ");
113 if (flags & TQF_IS_LASTPRIVATE)
114 __kmp_printf(
"LAST_PRIV ");
115 if (flags & TQF_IS_NOWAIT)
116 __kmp_printf(
"NOWAIT ");
117 if (flags & TQF_HEURISTICS)
118 __kmp_printf(
"HEURIST ");
119 if (flags & TQF_INTERFACE_RESERVED1)
120 __kmp_printf(
"RESERV1 ");
121 if (flags & TQF_INTERFACE_RESERVED2)
122 __kmp_printf(
"RESERV2 ");
123 if (flags & TQF_INTERFACE_RESERVED3)
124 __kmp_printf(
"RESERV3 ");
125 if (flags & TQF_INTERFACE_RESERVED4)
126 __kmp_printf(
"RESERV4 ");
127 if (flags & TQF_IS_LAST_TASK)
128 __kmp_printf(
"LAST_TASK ");
129 if (flags & TQF_TASKQ_TASK)
130 __kmp_printf(
"TASKQ_TASK ");
131 if (flags & TQF_RELEASE_WORKERS)
132 __kmp_printf(
"RELEASE ");
133 if (flags & TQF_ALL_TASKS_QUEUED)
134 __kmp_printf(
"ALL_QUEUED ");
135 if (flags & TQF_PARALLEL_CONTEXT)
136 __kmp_printf(
"PARALLEL ");
137 if (flags & TQF_DEALLOCATED)
138 __kmp_printf(
"DEALLOC ");
139 if (!(flags & (TQF_INTERNAL_FLAGS | TQF_INTERFACE_FLAGS)))
140 __kmp_printf(
"(NONE)");
143 static void __kmp_dump_thunk(kmp_taskq_t *tq, kmpc_thunk_t *thunk,
144 kmp_int32 global_tid) {
146 int nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc;
148 __kmp_printf(
"\tThunk at %p on (%d): ", thunk, global_tid);
151 for (i = 0; i < nproc; i++) {
152 if (tq->tq_curr_thunk[i] == thunk) {
153 __kmp_printf(
"[%i] ", i);
156 __kmp_printf(
"th_shareds=%p, ", thunk->th.th_shareds);
157 __kmp_printf(
"th_task=%p, ", thunk->th_task);
158 __kmp_printf(
"th_encl_thunk=%p, ", thunk->th_encl_thunk);
159 __kmp_printf(
"th_status=%d, ", thunk->th_status);
160 __kmp_printf(
"th_tasknum=%u, ", thunk->th_tasknum);
161 __kmp_printf(
"th_flags=");
162 __kmp_dump_TQF(thunk->th_flags);
168 static void __kmp_dump_thunk_stack(kmpc_thunk_t *thunk, kmp_int32 thread_num) {
171 __kmp_printf(
" Thunk stack for T#%d: ", thread_num);
173 for (th = thunk; th != NULL; th = th->th_encl_thunk)
174 __kmp_printf(
"%p ", th);
179 static void __kmp_dump_task_queue(kmp_taskq_t *tq, kmpc_task_queue_t *queue,
180 kmp_int32 global_tid) {
183 kmpc_task_queue_t *taskq;
185 __kmp_printf(
"Task Queue at %p on (%d):\n", queue, global_tid);
188 int in_parallel = queue->tq_flags & TQF_PARALLEL_CONTEXT;
190 if (__kmp_env_consistency_check) {
191 __kmp_printf(
" tq_loc : ");
204 __kmp_printf(
" tq_parent : %p\n", queue->tq.tq_parent);
205 __kmp_printf(
" tq_first_child : %p\n", queue->tq_first_child);
206 __kmp_printf(
" tq_next_child : %p\n", queue->tq_next_child);
207 __kmp_printf(
" tq_prev_child : %p\n", queue->tq_prev_child);
208 __kmp_printf(
" tq_ref_count : %d\n", queue->tq_ref_count);
223 __kmp_printf(
" tq_shareds : ");
224 for (i = 0; i < ((queue == tq->tq_root) ? queue->tq_nproc : 1); i++)
225 __kmp_printf(
"%p ", queue->tq_shareds[i].ai_data);
229 __kmp_printf(
" tq_tasknum_queuing : %u\n", queue->tq_tasknum_queuing);
230 __kmp_printf(
" tq_tasknum_serving : %u\n", queue->tq_tasknum_serving);
233 __kmp_printf(
" tq_queue : %p\n", queue->tq_queue);
234 __kmp_printf(
" tq_thunk_space : %p\n", queue->tq_thunk_space);
235 __kmp_printf(
" tq_taskq_slot : %p\n", queue->tq_taskq_slot);
237 __kmp_printf(
" tq_free_thunks : ");
238 for (thunk = queue->tq_free_thunks; thunk != NULL;
239 thunk = thunk->th.th_next_free)
240 __kmp_printf(
"%p ", thunk);
243 __kmp_printf(
" tq_nslots : %d\n", queue->tq_nslots);
244 __kmp_printf(
" tq_head : %d\n", queue->tq_head);
245 __kmp_printf(
" tq_tail : %d\n", queue->tq_tail);
246 __kmp_printf(
" tq_nfull : %d\n", queue->tq_nfull);
247 __kmp_printf(
" tq_hiwat : %d\n", queue->tq_hiwat);
248 __kmp_printf(
" tq_flags : ");
249 __kmp_dump_TQF(queue->tq_flags);
253 __kmp_printf(
" tq_th_thunks : ");
254 for (i = 0; i < queue->tq_nproc; i++) {
255 __kmp_printf(
"%d ", queue->tq_th_thunks[i].ai_data);
261 __kmp_printf(
" Queue slots:\n");
264 for (count = 0; count < queue->tq_nfull; ++count) {
265 __kmp_printf(
"(%d)", qs);
266 __kmp_dump_thunk(tq, queue->tq_queue[qs].qs_thunk, global_tid);
267 qs = (qs + 1) % queue->tq_nslots;
273 if (queue->tq_taskq_slot != NULL) {
274 __kmp_printf(
" TaskQ slot:\n");
275 __kmp_dump_thunk(tq, CCAST(kmpc_thunk_t *, queue->tq_taskq_slot),
284 __kmp_printf(
" Taskq freelist: ");
292 for (taskq = tq->tq_freelist; taskq != NULL; taskq = taskq->tq.tq_next_free)
293 __kmp_printf(
"%p ", taskq);
297 __kmp_printf(
"\n\n");
300 static void __kmp_aux_dump_task_queue_tree(kmp_taskq_t *tq,
301 kmpc_task_queue_t *curr_queue,
303 kmp_int32 global_tid) {
305 int nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc;
306 kmpc_task_queue_t *queue = curr_queue;
308 if (curr_queue == NULL)
313 for (i = 0; i < level; i++)
316 __kmp_printf(
"%p", curr_queue);
318 for (i = 0; i < nproc; i++) {
319 if (tq->tq_curr_thunk[i] &&
320 tq->tq_curr_thunk[i]->th.th_shareds->sv_queue == curr_queue) {
321 __kmp_printf(
" [%i]", i);
333 qs = curr_queue->tq_tail;
335 for (count = 0; count < curr_queue->tq_nfull; ++count) {
336 __kmp_printf(
"%p ", curr_queue->tq_queue[qs].qs_thunk);
337 qs = (qs + 1) % curr_queue->tq_nslots;
344 if (curr_queue->tq_first_child) {
351 if (curr_queue->tq_first_child) {
352 for (queue = CCAST(kmpc_task_queue_t *, curr_queue->tq_first_child);
353 queue != NULL; queue = queue->tq_next_child) {
354 __kmp_aux_dump_task_queue_tree(tq, queue, level + 1, global_tid);
362 static void __kmp_dump_task_queue_tree(kmp_taskq_t *tq,
363 kmpc_task_queue_t *tqroot,
364 kmp_int32 global_tid) {
365 __kmp_printf(
"TaskQ Tree at root %p on (%d):\n", tqroot, global_tid);
367 __kmp_aux_dump_task_queue_tree(tq, tqroot, 0, global_tid);
375 static void *__kmp_taskq_allocate(
size_t size, kmp_int32 global_tid) {
376 void *addr, *orig_addr;
379 KB_TRACE(5, (
"__kmp_taskq_allocate: called size=%d, gtid=%d\n", (
int)size,
382 bytes =
sizeof(
void *) + CACHE_LINE + size;
384 #ifdef THREAD_ALLOC_FOR_TASKQ 386 (
void *)__kmp_thread_malloc(__kmp_thread_from_gtid(global_tid), bytes);
388 KE_TRACE(10, (
"%%%%%% MALLOC( %d )\n", bytes));
389 orig_addr = (
void *)KMP_INTERNAL_MALLOC(bytes);
393 KMP_FATAL(OutOfHeapMemory);
397 if (((kmp_uintptr_t)addr & (CACHE_LINE - 1)) != 0) {
398 KB_TRACE(50, (
"__kmp_taskq_allocate: adjust for cache alignment\n"));
399 addr = (
void *)(((kmp_uintptr_t)addr + CACHE_LINE) & ~(CACHE_LINE - 1));
402 (*(
void **)addr) = orig_addr;
405 (
"__kmp_taskq_allocate: allocate: %p, use: %p - %p, size: %d, " 407 orig_addr, ((
void **)addr) + 1,
408 ((
char *)(((
void **)addr) + 1)) + size - 1, (
int)size, global_tid));
410 return (((
void **)addr) + 1);
413 static void __kmpc_taskq_free(
void *p, kmp_int32 global_tid) {
414 KB_TRACE(5, (
"__kmpc_taskq_free: called addr=%p, gtid=%d\n", p, global_tid));
416 KB_TRACE(10, (
"__kmpc_taskq_free: freeing: %p, gtid: %d\n",
417 (*(((
void **)p) - 1)), global_tid));
419 #ifdef THREAD_ALLOC_FOR_TASKQ 420 __kmp_thread_free(__kmp_thread_from_gtid(global_tid), *(((
void **)p) - 1));
422 KMP_INTERNAL_FREE(*(((
void **)p) - 1));
429 static kmpc_task_queue_t *
430 __kmp_alloc_taskq(kmp_taskq_t *tq,
int in_parallel, kmp_int32 nslots,
431 kmp_int32 nthunks, kmp_int32 nshareds, kmp_int32 nproc,
432 size_t sizeof_thunk,
size_t sizeof_shareds,
433 kmpc_thunk_t **new_taskq_thunk, kmp_int32 global_tid) {
436 kmpc_task_queue_t *new_queue;
437 kmpc_aligned_shared_vars_t *shared_var_array;
438 char *shared_var_storage;
441 __kmp_acquire_lock(&tq->tq_freelist_lck, global_tid);
447 if (tq->tq_freelist) {
448 new_queue = tq->tq_freelist;
449 tq->tq_freelist = tq->tq_freelist->tq.tq_next_free;
451 KMP_DEBUG_ASSERT(new_queue->tq_flags & TQF_DEALLOCATED);
453 new_queue->tq_flags = 0;
455 __kmp_release_lock(&tq->tq_freelist_lck, global_tid);
457 __kmp_release_lock(&tq->tq_freelist_lck, global_tid);
459 new_queue = (kmpc_task_queue_t *)__kmp_taskq_allocate(
460 sizeof(kmpc_task_queue_t), global_tid);
461 new_queue->tq_flags = 0;
468 (CACHE_LINE - (sizeof_thunk % CACHE_LINE));
469 pt = (
char *)__kmp_taskq_allocate(nthunks * sizeof_thunk, global_tid);
470 new_queue->tq_thunk_space = (kmpc_thunk_t *)pt;
471 *new_taskq_thunk = (kmpc_thunk_t *)(pt + (nthunks - 1) * sizeof_thunk);
475 new_queue->tq_free_thunks = (kmpc_thunk_t *)pt;
477 for (i = 0; i < (nthunks - 2); i++) {
478 ((kmpc_thunk_t *)(pt + i * sizeof_thunk))->th.th_next_free =
479 (kmpc_thunk_t *)(pt + (i + 1) * sizeof_thunk);
481 ((kmpc_thunk_t *)(pt + i * sizeof_thunk))->th_flags = TQF_DEALLOCATED;
485 ((kmpc_thunk_t *)(pt + (nthunks - 2) * sizeof_thunk))->th.th_next_free = NULL;
487 ((kmpc_thunk_t *)(pt + (nthunks - 2) * sizeof_thunk))->th_flags =
494 __kmp_init_lock(&new_queue->tq_link_lck);
495 __kmp_init_lock(&new_queue->tq_free_thunks_lck);
496 __kmp_init_lock(&new_queue->tq_queue_lck);
501 bytes = nslots *
sizeof(kmpc_aligned_queue_slot_t);
502 new_queue->tq_queue =
503 (kmpc_aligned_queue_slot_t *)__kmp_taskq_allocate(bytes, global_tid);
506 sizeof_shareds +=
sizeof(kmpc_task_queue_t *);
508 (CACHE_LINE - (sizeof_shareds % CACHE_LINE));
510 bytes = nshareds *
sizeof(kmpc_aligned_shared_vars_t);
512 (kmpc_aligned_shared_vars_t *)__kmp_taskq_allocate(bytes, global_tid);
514 bytes = nshareds * sizeof_shareds;
515 shared_var_storage = (
char *)__kmp_taskq_allocate(bytes, global_tid);
517 for (i = 0; i < nshareds; i++) {
518 shared_var_array[i].ai_data =
519 (kmpc_shared_vars_t *)(shared_var_storage + i * sizeof_shareds);
520 shared_var_array[i].ai_data->sv_queue = new_queue;
522 new_queue->tq_shareds = shared_var_array;
527 bytes = nproc *
sizeof(kmpc_aligned_int32_t);
528 new_queue->tq_th_thunks =
529 (kmpc_aligned_int32_t *)__kmp_taskq_allocate(bytes, global_tid);
530 new_queue->tq_nproc = nproc;
532 for (i = 0; i < nproc; i++)
533 new_queue->tq_th_thunks[i].ai_data = 0;
539 static void __kmp_free_taskq(kmp_taskq_t *tq, kmpc_task_queue_t *p,
540 int in_parallel, kmp_int32 global_tid) {
541 __kmpc_taskq_free(p->tq_thunk_space, global_tid);
542 __kmpc_taskq_free(p->tq_queue, global_tid);
545 __kmpc_taskq_free(CCAST(kmpc_shared_vars_t *, p->tq_shareds[0].ai_data),
548 __kmpc_taskq_free(p->tq_shareds, global_tid);
551 p->tq_first_child = NULL;
552 p->tq_next_child = NULL;
553 p->tq_prev_child = NULL;
554 p->tq_ref_count = -10;
555 p->tq_shareds = NULL;
556 p->tq_tasknum_queuing = 0;
557 p->tq_tasknum_serving = 0;
559 p->tq_thunk_space = NULL;
560 p->tq_taskq_slot = NULL;
561 p->tq_free_thunks = NULL;
571 for (i = 0; i < p->tq_nproc; i++)
572 p->tq_th_thunks[i].ai_data = 0;
574 if (__kmp_env_consistency_check)
576 KMP_DEBUG_ASSERT(p->tq_flags & TQF_DEALLOCATED);
577 p->tq_flags = TQF_DEALLOCATED;
581 __kmpc_taskq_free(p->tq_th_thunks, global_tid);
582 __kmp_destroy_lock(&p->tq_link_lck);
583 __kmp_destroy_lock(&p->tq_queue_lck);
584 __kmp_destroy_lock(&p->tq_free_thunks_lck);
587 p->tq_th_thunks = NULL;
594 __kmp_acquire_lock(&tq->tq_freelist_lck, global_tid);
595 p->tq.tq_next_free = tq->tq_freelist;
598 __kmp_release_lock(&tq->tq_freelist_lck, global_tid);
605 static kmpc_thunk_t *__kmp_alloc_thunk(kmpc_task_queue_t *queue,
606 int in_parallel, kmp_int32 global_tid) {
610 __kmp_acquire_lock(&queue->tq_free_thunks_lck, global_tid);
616 fl = queue->tq_free_thunks;
618 KMP_DEBUG_ASSERT(fl != NULL);
620 queue->tq_free_thunks = fl->th.th_next_free;
624 __kmp_release_lock(&queue->tq_free_thunks_lck, global_tid);
629 static void __kmp_free_thunk(kmpc_task_queue_t *queue, kmpc_thunk_t *p,
630 int in_parallel, kmp_int32 global_tid) {
633 p->th_encl_thunk = 0;
640 __kmp_acquire_lock(&queue->tq_free_thunks_lck, global_tid);
646 p->th.th_next_free = queue->tq_free_thunks;
647 queue->tq_free_thunks = p;
650 p->th_flags = TQF_DEALLOCATED;
654 __kmp_release_lock(&queue->tq_free_thunks_lck, global_tid);
658 static kmp_int32 __kmp_enqueue_task(kmp_taskq_t *tq, kmp_int32 global_tid,
659 kmpc_task_queue_t *queue,
660 kmpc_thunk_t *thunk,
int in_parallel) {
666 __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
672 KMP_DEBUG_ASSERT(queue->tq_nfull < queue->tq_nslots);
674 queue->tq_queue[(queue->tq_head)++].qs_thunk = thunk;
676 if (queue->tq_head >= queue->tq_nslots)
684 ret = (in_parallel) ? (queue->tq_nfull == queue->tq_nslots) : FALSE;
688 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
690 if (tq->tq_global_flags & TQF_RELEASE_WORKERS) {
695 tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;
707 static kmpc_thunk_t *__kmp_dequeue_task(kmp_int32 global_tid,
708 kmpc_task_queue_t *queue,
711 int tid = __kmp_tid_from_gtid(global_tid);
713 KMP_DEBUG_ASSERT(queue->tq_nfull > 0);
715 if (queue->tq.tq_parent != NULL && in_parallel) {
717 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
718 ct = ++(queue->tq_ref_count);
719 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
721 (
"line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct));
724 pt = queue->tq_queue[(queue->tq_tail)++].qs_thunk;
726 if (queue->tq_tail >= queue->tq_nslots)
730 queue->tq_th_thunks[tid].ai_data++;
735 KF_TRACE(200, (
"__kmp_dequeue_task: T#%d(:%d) now has %d outstanding " 736 "thunks from queue %p\n",
737 global_tid, tid, queue->tq_th_thunks[tid].ai_data, queue));
748 KMP_DEBUG_ASSERT(queue->tq_nfull >= 0);
751 KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data <=
752 __KMP_TASKQ_THUNKS_PER_TH);
782 static kmpc_thunk_t *__kmp_find_task_in_queue(kmp_int32 global_tid,
783 kmpc_task_queue_t *queue) {
784 kmpc_thunk_t *pt = NULL;
785 int tid = __kmp_tid_from_gtid(global_tid);
788 if (!(queue->tq_flags & TQF_DEALLOCATED)) {
790 __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
793 if (!(queue->tq_flags & TQF_DEALLOCATED)) {
798 if ((queue->tq_taskq_slot != NULL) &&
799 (queue->tq_nfull <= queue->tq_hiwat)) {
802 pt = CCAST(kmpc_thunk_t *, queue->tq_taskq_slot);
803 queue->tq_taskq_slot = NULL;
804 }
else if (queue->tq_nfull == 0 ||
805 queue->tq_th_thunks[tid].ai_data >=
806 __KMP_TASKQ_THUNKS_PER_TH) {
810 }
else if (queue->tq_nfull > 1) {
813 pt = __kmp_dequeue_task(global_tid, queue, TRUE);
814 }
else if (!(queue->tq_flags & TQF_IS_LASTPRIVATE)) {
816 pt = __kmp_dequeue_task(global_tid, queue, TRUE);
817 }
else if (queue->tq_flags & TQF_IS_LAST_TASK) {
821 pt = __kmp_dequeue_task(global_tid, queue, TRUE);
828 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
837 static kmpc_thunk_t *
838 __kmp_find_task_in_descendant_queue(kmp_int32 global_tid,
839 kmpc_task_queue_t *curr_queue) {
840 kmpc_thunk_t *pt = NULL;
841 kmpc_task_queue_t *queue = curr_queue;
843 if (curr_queue->tq_first_child != NULL) {
844 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
849 queue = CCAST(kmpc_task_queue_t *, curr_queue->tq_first_child);
851 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
855 while (queue != NULL) {
857 kmpc_task_queue_t *next;
859 ct = ++(queue->tq_ref_count);
860 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
862 (
"line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct));
864 pt = __kmp_find_task_in_queue(global_tid, queue);
869 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
874 ct = --(queue->tq_ref_count);
875 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n", __LINE__,
876 global_tid, queue, ct));
877 KMP_DEBUG_ASSERT(queue->tq_ref_count >= 0);
879 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
888 pt = __kmp_find_task_in_descendant_queue(global_tid, queue);
893 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
898 ct = --(queue->tq_ref_count);
899 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n", __LINE__,
900 global_tid, queue, ct));
901 KMP_DEBUG_ASSERT(ct >= 0);
903 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
908 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
913 next = queue->tq_next_child;
915 ct = --(queue->tq_ref_count);
917 (
"line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct));
918 KMP_DEBUG_ASSERT(ct >= 0);
923 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
932 static kmpc_thunk_t *
933 __kmp_find_task_in_ancestor_queue(kmp_taskq_t *tq, kmp_int32 global_tid,
934 kmpc_task_queue_t *curr_queue) {
935 kmpc_task_queue_t *queue;
940 if (curr_queue->tq.tq_parent != NULL) {
941 queue = curr_queue->tq.tq_parent;
943 while (queue != NULL) {
944 if (queue->tq.tq_parent != NULL) {
946 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
951 ct = ++(queue->tq_ref_count);
952 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
953 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p inc %d\n", __LINE__,
954 global_tid, queue, ct));
957 pt = __kmp_find_task_in_queue(global_tid, queue);
959 if (queue->tq.tq_parent != NULL) {
961 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
966 ct = --(queue->tq_ref_count);
967 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n", __LINE__,
968 global_tid, queue, ct));
969 KMP_DEBUG_ASSERT(ct >= 0);
971 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
977 if (queue->tq.tq_parent != NULL) {
979 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
984 ct = --(queue->tq_ref_count);
985 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p dec %d\n", __LINE__,
986 global_tid, queue, ct));
987 KMP_DEBUG_ASSERT(ct >= 0);
989 queue = queue->tq.tq_parent;
992 __kmp_release_lock(&queue->tq_link_lck, global_tid);
996 pt = __kmp_find_task_in_descendant_queue(global_tid, tq->tq_root);
1001 static int __kmp_taskq_tasks_finished(kmpc_task_queue_t *queue) {
1006 for (i = 0; i < queue->tq_nproc; i++) {
1007 if (queue->tq_th_thunks[i].ai_data != 0)
1014 static int __kmp_taskq_has_any_children(kmpc_task_queue_t *queue) {
1015 return (queue->tq_first_child != NULL);
1018 static void __kmp_remove_queue_from_tree(kmp_taskq_t *tq, kmp_int32 global_tid,
1019 kmpc_task_queue_t *queue,
1023 kmpc_thunk_t *thunk;
1027 (
"Before Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
1028 KF_DUMP(50, __kmp_dump_task_queue(tq, queue, global_tid));
1031 KMP_DEBUG_ASSERT(queue->tq.tq_parent != NULL);
1034 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1040 KMP_DEBUG_ASSERT(queue->tq_first_child == NULL);
1043 if (queue->tq_prev_child != NULL)
1044 queue->tq_prev_child->tq_next_child = queue->tq_next_child;
1045 if (queue->tq_next_child != NULL)
1046 queue->tq_next_child->tq_prev_child = queue->tq_prev_child;
1047 if (queue->tq.tq_parent->tq_first_child == queue)
1048 queue->tq.tq_parent->tq_first_child = queue->tq_next_child;
1050 queue->tq_prev_child = NULL;
1051 queue->tq_next_child = NULL;
1055 (
"line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n",
1056 __LINE__, global_tid, queue, queue->tq_ref_count));
1059 while (queue->tq_ref_count > 1) {
1060 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1062 KMP_WAIT_YIELD((
volatile kmp_uint32 *)&queue->tq_ref_count, 1, KMP_LE,
1065 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1071 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1075 (
"line %d gtid %d: Q %p freeing queue\n", __LINE__, global_tid, queue));
1078 KMP_DEBUG_ASSERT(queue->tq_flags & TQF_ALL_TASKS_QUEUED);
1079 KMP_DEBUG_ASSERT(queue->tq_nfull == 0);
1081 for (i = 0; i < queue->tq_nproc; i++) {
1082 KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
1086 for (thunk = queue->tq_free_thunks; thunk != NULL;
1087 thunk = thunk->th.th_next_free)
1091 queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH));
1095 __kmp_free_taskq(tq, queue, TRUE, global_tid);
1097 KF_TRACE(50, (
"After Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
1098 KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid));
1105 static void __kmp_find_and_remove_finished_child_taskq(
1106 kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue) {
1107 kmpc_task_queue_t *queue = curr_queue;
1109 if (curr_queue->tq_first_child != NULL) {
1110 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
1115 queue = CCAST(kmpc_task_queue_t *, curr_queue->tq_first_child);
1116 if (queue != NULL) {
1117 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
1121 while (queue != NULL) {
1122 kmpc_task_queue_t *next;
1123 int ct = ++(queue->tq_ref_count);
1125 (
"line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct));
1131 if (queue->tq_flags & TQF_IS_NOWAIT) {
1132 __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue);
1134 if ((queue->tq_flags & TQF_ALL_TASKS_QUEUED) &&
1135 (queue->tq_nfull == 0) && __kmp_taskq_tasks_finished(queue) &&
1136 !__kmp_taskq_has_any_children(queue)) {
1141 if (__kmp_test_lock(&queue->tq_queue_lck, global_tid)) {
1142 if (!(queue->tq_flags & TQF_DEALLOCATED)) {
1143 queue->tq_flags |= TQF_DEALLOCATED;
1144 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
1146 __kmp_remove_queue_from_tree(tq, global_tid, queue, TRUE);
1152 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
1159 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
1164 next = queue->tq_next_child;
1166 ct = --(queue->tq_ref_count);
1168 (
"line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct));
1169 KMP_DEBUG_ASSERT(ct >= 0);
1174 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
1181 static void __kmp_remove_all_child_taskq(kmp_taskq_t *tq, kmp_int32 global_tid,
1182 kmpc_task_queue_t *queue) {
1183 kmpc_task_queue_t *next_child;
1185 queue = CCAST(kmpc_task_queue_t *, queue->tq_first_child);
1187 while (queue != NULL) {
1188 __kmp_remove_all_child_taskq(tq, global_tid, queue);
1190 next_child = queue->tq_next_child;
1191 queue->tq_flags |= TQF_DEALLOCATED;
1192 __kmp_remove_queue_from_tree(tq, global_tid, queue, FALSE);
1197 static void __kmp_execute_task_from_queue(kmp_taskq_t *tq,
ident_t *loc,
1198 kmp_int32 global_tid,
1199 kmpc_thunk_t *thunk,
1201 kmpc_task_queue_t *queue = thunk->th.th_shareds->sv_queue;
1202 kmp_int32 tid = __kmp_tid_from_gtid(global_tid);
1204 KF_TRACE(100, (
"After dequeueing this Task on (%d):\n", global_tid));
1205 KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid));
1206 KF_TRACE(100, (
"Task Queue: %p looks like this (%d):\n", queue, global_tid));
1207 KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
1223 if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
1224 kmp_int32 index = (queue == tq->tq_root) ? tid : 0;
1225 thunk->th.th_shareds =
1226 CCAST(kmpc_shared_vars_t *, queue->tq_shareds[index].ai_data);
1228 if (__kmp_env_consistency_check) {
1229 __kmp_push_workshare(global_tid,
1230 (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered
1235 if (__kmp_env_consistency_check)
1236 __kmp_push_workshare(global_tid, ct_taskq, queue->tq_loc);
1240 thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
1241 tq->tq_curr_thunk[tid] = thunk;
1243 KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
1246 KF_TRACE(50, (
"Begin Executing Thunk %p from queue %p on (%d)\n", thunk,
1247 queue, global_tid));
1248 thunk->th_task(global_tid, thunk);
1249 KF_TRACE(50, (
"End Executing Thunk %p from queue %p on (%d)\n", thunk, queue,
1252 if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
1253 if (__kmp_env_consistency_check)
1254 __kmp_pop_workshare(global_tid,
1255 (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered
1260 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
1261 thunk->th_encl_thunk = NULL;
1262 KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
1265 if ((thunk->th_flags & TQF_IS_ORDERED) && in_parallel) {
1266 __kmp_taskq_check_ordered(global_tid, thunk);
1269 __kmp_free_thunk(queue, thunk, in_parallel, global_tid);
1271 KF_TRACE(100, (
"T#%d After freeing thunk: %p, TaskQ looks like this:\n",
1272 global_tid, thunk));
1273 KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
1279 KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data >= 1);
1283 (
"__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n",
1284 global_tid, queue->tq_th_thunks[tid].ai_data - 1, queue));
1286 queue->tq_th_thunks[tid].ai_data--;
1291 if (queue->tq.tq_parent != NULL && in_parallel) {
1293 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1294 ct = --(queue->tq_ref_count);
1295 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1297 (
"line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct));
1298 KMP_DEBUG_ASSERT(ct >= 0);
1305 kmpc_thunk_t *__kmpc_taskq(
ident_t *loc, kmp_int32 global_tid,
1306 kmpc_task_t taskq_task,
size_t sizeof_thunk,
1307 size_t sizeof_shareds, kmp_int32 flags,
1308 kmpc_shared_vars_t **shareds) {
1310 kmp_int32 nslots, nthunks, nshareds, nproc;
1311 kmpc_task_queue_t *new_queue, *curr_queue;
1312 kmpc_thunk_t *new_taskq_thunk;
1318 KE_TRACE(10, (
"__kmpc_taskq called (%d)\n", global_tid));
1320 th = __kmp_threads[global_tid];
1321 team = th->th.th_team;
1322 tq = &team->t.t_taskq;
1323 nproc = team->t.t_nproc;
1324 tid = __kmp_tid_from_gtid(global_tid);
1327 in_parallel = in_parallel_context(team);
1332 th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
1335 th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
1341 if (__kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL)) {
1348 CCAST(kmpc_shared_vars_t *, tq->tq_root->tq_shareds[tid].ai_data);
1349 KE_TRACE(10, (
"__kmpc_taskq return (%d)\n", global_tid));
1356 if (tq->tq_curr_thunk_capacity < nproc) {
1357 if (tq->tq_curr_thunk)
1358 __kmp_free(tq->tq_curr_thunk);
1362 __kmp_init_lock(&tq->tq_freelist_lck);
1366 (kmpc_thunk_t **)__kmp_allocate(nproc *
sizeof(kmpc_thunk_t *));
1367 tq->tq_curr_thunk_capacity = nproc;
1371 tq->tq_global_flags = TQF_RELEASE_WORKERS;
1376 nslots = (in_parallel) ? (2 * nproc) : 1;
1380 nthunks = (in_parallel) ? (nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH) + 1)
1385 nshareds = (!tq->tq_root && in_parallel) ? nproc : 1;
1389 new_queue = __kmp_alloc_taskq(tq, in_parallel, nslots, nthunks, nshareds,
1390 nproc, sizeof_thunk, sizeof_shareds,
1391 &new_taskq_thunk, global_tid);
1394 new_queue->tq_flags = flags & TQF_INTERFACE_FLAGS;
1397 new_queue->tq_tasknum_queuing = 0;
1398 new_queue->tq_tasknum_serving = 0;
1399 new_queue->tq_flags |= TQF_PARALLEL_CONTEXT;
1402 new_queue->tq_taskq_slot = NULL;
1403 new_queue->tq_nslots = nslots;
1404 new_queue->tq_hiwat = HIGH_WATER_MARK(nslots);
1405 new_queue->tq_nfull = 0;
1406 new_queue->tq_head = 0;
1407 new_queue->tq_tail = 0;
1408 new_queue->tq_loc = loc;
1410 if ((new_queue->tq_flags & TQF_IS_ORDERED) && in_parallel) {
1412 new_queue->tq_tasknum_serving = 1;
1415 th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
1418 th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
1422 *shareds = CCAST(kmpc_shared_vars_t *, new_queue->tq_shareds[0].ai_data);
1424 new_taskq_thunk->th.th_shareds = *shareds;
1425 new_taskq_thunk->th_task = taskq_task;
1426 new_taskq_thunk->th_flags = new_queue->tq_flags | TQF_TASKQ_TASK;
1427 new_taskq_thunk->th_status = 0;
1429 KMP_DEBUG_ASSERT(new_taskq_thunk->th_flags & TQF_TASKQ_TASK);
1439 new_queue->tq.tq_parent = NULL;
1440 new_queue->tq_first_child = NULL;
1441 new_queue->tq_next_child = NULL;
1442 new_queue->tq_prev_child = NULL;
1443 new_queue->tq_ref_count = 1;
1444 tq->tq_root = new_queue;
1446 curr_queue = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
1447 new_queue->tq.tq_parent = curr_queue;
1448 new_queue->tq_first_child = NULL;
1449 new_queue->tq_prev_child = NULL;
1450 new_queue->tq_ref_count =
1453 KMP_DEBUG_REF_CTS((
"line %d gtid %d: Q %p alloc %d\n", __LINE__,
1454 global_tid, new_queue, new_queue->tq_ref_count));
1456 __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
1462 new_queue->tq_next_child =
1463 CCAST(
struct kmpc_task_queue_t *, curr_queue->tq_first_child);
1465 if (curr_queue->tq_first_child != NULL)
1466 curr_queue->tq_first_child->tq_prev_child = new_queue;
1468 curr_queue->tq_first_child = new_queue;
1470 __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
1474 new_taskq_thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
1475 tq->tq_curr_thunk[tid] = new_taskq_thunk;
1477 KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
1479 new_taskq_thunk->th_encl_thunk = 0;
1480 new_queue->tq.tq_parent = NULL;
1481 new_queue->tq_first_child = NULL;
1482 new_queue->tq_next_child = NULL;
1483 new_queue->tq_prev_child = NULL;
1484 new_queue->tq_ref_count = 1;
1488 KF_TRACE(150, (
"Creating TaskQ Task on (%d):\n", global_tid));
1489 KF_DUMP(150, __kmp_dump_thunk(tq, new_taskq_thunk, global_tid));
1493 (
"After TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
1495 KF_TRACE(25, (
"After Serial TaskQ at %p Creation on (%d):\n", new_queue,
1499 KF_DUMP(25, __kmp_dump_task_queue(tq, new_queue, global_tid));
1502 KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid));
1506 if (__kmp_env_consistency_check)
1507 __kmp_push_workshare(global_tid, ct_taskq, new_queue->tq_loc);
1509 KE_TRACE(10, (
"__kmpc_taskq return (%d)\n", global_tid));
1511 return new_taskq_thunk;
1516 void __kmpc_end_taskq(
ident_t *loc, kmp_int32 global_tid,
1517 kmpc_thunk_t *taskq_thunk) {
1524 kmp_int32 is_outermost;
1525 kmpc_task_queue_t *queue;
1526 kmpc_thunk_t *thunk;
1529 KE_TRACE(10, (
"__kmpc_end_taskq called (%d)\n", global_tid));
1531 tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
1532 nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc;
1536 queue = (taskq_thunk == NULL) ? tq->tq_root
1537 : taskq_thunk->th.th_shareds->sv_queue;
1539 KE_TRACE(50, (
"__kmpc_end_taskq queue=%p (%d) \n", queue, global_tid));
1540 is_outermost = (queue == tq->tq_root);
1541 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
1549 if (is_outermost && (KMP_MASTER_GTID(global_tid))) {
1550 if (tq->tq_global_flags & TQF_RELEASE_WORKERS) {
1552 tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;
1554 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1562 KMP_INIT_YIELD(spins);
1564 while ((queue->tq_nfull == 0) && (queue->tq_taskq_slot == NULL) &&
1565 (!__kmp_taskq_has_any_children(queue)) &&
1566 (!(queue->tq_flags & TQF_ALL_TASKS_QUEUED))) {
1567 KMP_YIELD_WHEN(TRUE, spins);
1571 while (((queue->tq_nfull != 0) || (queue->tq_taskq_slot != NULL)) &&
1572 (thunk = __kmp_find_task_in_queue(global_tid, queue)) != NULL) {
1573 KF_TRACE(50, (
"Found thunk: %p in primary queue %p (%d)\n", thunk,
1574 queue, global_tid));
1575 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
1579 if ((__kmp_taskq_has_any_children(queue)) &&
1580 (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) !=
1584 (
"Stole thunk: %p in descendant queue: %p while waiting in " 1586 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1588 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
1591 }
while ((!(queue->tq_flags & TQF_ALL_TASKS_QUEUED)) ||
1592 (queue->tq_nfull != 0));
1594 KF_TRACE(50, (
"All tasks queued and dequeued in queue: %p (%d)\n", queue,
1600 while ((!__kmp_taskq_tasks_finished(queue)) &&
1601 (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) !=
1604 KF_TRACE(50, (
"Stole thunk: %p in descendant queue: %p while waiting in " 1606 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1608 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
1611 KF_TRACE(50, (
"No work found in descendent queues or all work finished in " 1613 queue, global_tid));
1615 if (!is_outermost) {
1618 if (queue->tq_flags & TQF_IS_NOWAIT) {
1619 __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1620 queue->tq_ref_count--;
1621 KMP_DEBUG_ASSERT(queue->tq_ref_count >= 0);
1622 __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
1625 10, (
"__kmpc_end_taskq return for nowait case (%d)\n", global_tid));
1630 __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue);
1634 KMP_INIT_YIELD(spins);
1636 while (!__kmp_taskq_tasks_finished(queue) ||
1637 __kmp_taskq_has_any_children(queue)) {
1638 thunk = __kmp_find_task_in_ancestor_queue(tq, global_tid, queue);
1640 if (thunk != NULL) {
1642 (
"Stole thunk: %p in ancestor queue: %p while waiting in " 1644 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1645 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk,
1649 KMP_YIELD_WHEN(thunk == NULL, spins);
1651 __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue);
1654 __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
1655 if (!(queue->tq_flags & TQF_DEALLOCATED)) {
1656 queue->tq_flags |= TQF_DEALLOCATED;
1658 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
1661 if (taskq_thunk != NULL) {
1662 __kmp_remove_queue_from_tree(tq, global_tid, queue, TRUE);
1667 (
"__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n",
1675 KMP_INIT_YIELD(spins);
1677 while (!__kmp_taskq_tasks_finished(queue)) {
1678 thunk = __kmp_find_task_in_descendant_queue(global_tid, queue);
1680 if (thunk != NULL) {
1682 (
"Stole thunk: %p in descendant queue: %p while waiting in " 1684 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
1686 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
1689 KMP_YIELD_WHEN(thunk == NULL, spins);
1697 if (!__kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL)) {
1702 __kmp_remove_all_child_taskq(tq, global_tid, queue);
1705 KF_TRACE(100, (
"T#%d Before Deletion of top-level TaskQ at %p:\n",
1706 global_tid, queue));
1707 KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
1711 KMP_DEBUG_ASSERT((queue->tq.tq_parent == NULL) &&
1712 (queue->tq_next_child == NULL));
1715 KMP_DEBUG_ASSERT(queue->tq_first_child == NULL);
1717 for (i = 0; i < nproc; i++) {
1718 KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
1721 for (i = 0, thunk = queue->tq_free_thunks; thunk != NULL;
1722 i++, thunk = thunk->th.th_next_free)
1725 KMP_DEBUG_ASSERT(i ==
1726 queue->tq_nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH));
1728 for (i = 0; i < nproc; i++) {
1729 KMP_DEBUG_ASSERT(!tq->tq_curr_thunk[i]);
1736 KF_TRACE(50, (
"After Deletion of top-level TaskQ at %p on (%d):\n", queue,
1739 queue->tq_flags |= TQF_DEALLOCATED;
1740 __kmp_free_taskq(tq, queue, in_parallel, global_tid);
1742 KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid));
1745 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1748 th = __kmp_threads[global_tid];
1751 th->th.th_dispatch->th_deo_fcn = 0;
1754 th->th.th_dispatch->th_dxo_fcn = 0;
1759 if (queue->tq_nfull > 0) {
1760 KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
1762 thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
1764 if (queue->tq_flags & TQF_IS_LAST_TASK) {
1770 thunk->th_flags |= TQF_IS_LAST_TASK;
1773 KF_TRACE(50, (
"T#%d found thunk: %p in serial queue: %p\n", global_tid,
1776 __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
1780 KF_TRACE(100, (
"Before Deletion of Serialized TaskQ at %p on (%d):\n",
1781 queue, global_tid));
1782 KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
1786 for (thunk = queue->tq_free_thunks; thunk != NULL;
1787 thunk = thunk->th.th_next_free)
1789 KMP_DEBUG_ASSERT(i == queue->tq_nslots + 1);
1793 (
"Serialized TaskQ at %p deleted on (%d).\n", queue, global_tid));
1795 queue->tq_flags |= TQF_DEALLOCATED;
1796 __kmp_free_taskq(tq, queue, in_parallel, global_tid);
1799 KE_TRACE(10, (
"__kmpc_end_taskq return (%d)\n", global_tid));
1805 kmp_int32 __kmpc_task(
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk) {
1807 kmpc_task_queue_t *queue;
1811 KE_TRACE(10, (
"__kmpc_task called (%d)\n", global_tid));
1813 KMP_DEBUG_ASSERT(!(thunk->th_flags &
1816 tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
1817 queue = thunk->th.th_shareds->sv_queue;
1818 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
1820 if (in_parallel && (thunk->th_flags & TQF_IS_ORDERED))
1821 thunk->th_tasknum = ++queue->tq_tasknum_queuing;
1827 if (!in_parallel && queue->tq_nfull > 0) {
1828 kmpc_thunk_t *prev_thunk;
1830 KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
1832 prev_thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
1834 KF_TRACE(50, (
"T#%d found thunk: %p in serial queue: %p\n", global_tid,
1835 prev_thunk, queue));
1837 __kmp_execute_task_from_queue(tq, loc, global_tid, prev_thunk, in_parallel);
1846 KF_TRACE(100, (
"After enqueueing this Task on (%d):\n", global_tid));
1847 KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid));
1849 ret = __kmp_enqueue_task(tq, global_tid, queue, thunk, in_parallel);
1851 KF_TRACE(100, (
"Task Queue looks like this on (%d):\n", global_tid));
1852 KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
1854 KE_TRACE(10, (
"__kmpc_task return (%d)\n", global_tid));
1862 void __kmpc_taskq_task(
ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk,
1864 kmpc_task_queue_t *queue;
1865 kmp_taskq_t *tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
1866 int tid = __kmp_tid_from_gtid(global_tid);
1868 KE_TRACE(10, (
"__kmpc_taskq_task called (%d)\n", global_tid));
1869 KF_TRACE(100, (
"TaskQ Task argument thunk on (%d):\n", global_tid));
1870 KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid));
1872 queue = thunk->th.th_shareds->sv_queue;
1874 if (__kmp_env_consistency_check)
1875 __kmp_pop_workshare(global_tid, ct_taskq, loc);
1878 KMP_DEBUG_ASSERT(thunk->th_flags & TQF_TASKQ_TASK);
1881 KMP_DEBUG_ASSERT(queue->tq_taskq_slot == NULL);
1884 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
1885 thunk->th_encl_thunk = NULL;
1887 KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
1889 thunk->th_status = status;
1898 queue->tq_taskq_slot = thunk;
1900 KE_TRACE(10, (
"__kmpc_taskq_task return (%d)\n", global_tid));
1905 void __kmpc_end_taskq_task(
ident_t *loc, kmp_int32 global_tid,
1906 kmpc_thunk_t *thunk) {
1908 kmpc_task_queue_t *queue;
1912 KE_TRACE(10, (
"__kmpc_end_taskq_task called (%d)\n", global_tid));
1914 tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
1915 queue = thunk->th.th_shareds->sv_queue;
1916 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
1917 tid = __kmp_tid_from_gtid(global_tid);
1919 if (__kmp_env_consistency_check)
1920 __kmp_pop_workshare(global_tid, ct_taskq, loc);
1923 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1924 KMP_TEST_THEN_OR32(RCAST(
volatile kmp_uint32 *, &queue->tq_flags),
1925 TQF_ALL_TASKS_QUEUED);
1928 __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
1934 queue->tq_flags |= TQF_ALL_TASKS_QUEUED;
1935 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
1940 if (thunk->th_flags & TQF_IS_LASTPRIVATE) {
1952 queue->tq_flags |= TQF_IS_LAST_TASK;
1954 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1955 KMP_TEST_THEN_OR32(RCAST(
volatile kmp_uint32 *, &queue->tq_flags),
1959 __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
1965 queue->tq_flags |= TQF_IS_LAST_TASK;
1966 __kmp_release_lock(&queue->tq_queue_lck, global_tid);
1977 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
1978 thunk->th_encl_thunk = NULL;
1980 KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
1983 KE_TRACE(10, (
"__kmpc_end_taskq_task return (%d)\n", global_tid));
1989 kmpc_thunk_t *__kmpc_task_buffer(
ident_t *loc, kmp_int32 global_tid,
1990 kmpc_thunk_t *taskq_thunk, kmpc_task_t task) {
1992 kmpc_task_queue_t *queue;
1993 kmpc_thunk_t *new_thunk;
1996 KE_TRACE(10, (
"__kmpc_task_buffer called (%d)\n", global_tid));
1999 taskq_thunk->th_flags &
2002 tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
2003 queue = taskq_thunk->th.th_shareds->sv_queue;
2004 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
2012 new_thunk = __kmp_alloc_thunk(queue, in_parallel, global_tid);
2013 new_thunk->th.th_shareds =
2014 CCAST(kmpc_shared_vars_t *, queue->tq_shareds[0].ai_data);
2015 new_thunk->th_encl_thunk = NULL;
2016 new_thunk->th_task = task;
2019 new_thunk->th_flags = queue->tq_flags & TQF_INTERFACE_FLAGS;
2021 new_thunk->th_status = 0;
2023 KMP_DEBUG_ASSERT(!(new_thunk->th_flags & TQF_TASKQ_TASK));
2025 KF_TRACE(100, (
"Creating Regular Task on (%d):\n", global_tid));
2026 KF_DUMP(100, __kmp_dump_thunk(tq, new_thunk, global_tid));
2028 KE_TRACE(10, (
"__kmpc_task_buffer return (%d)\n", global_tid));
KMP_EXPORT void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid)