17 #include "kmp_atomic.h"
18 #include "kmp_wrapper_getpid.h"
19 #include "kmp_environment.h"
22 #include "kmp_settings.h"
25 #include "kmp_error.h"
26 #include "kmp_stats.h"
27 #include "kmp_wait_release.h"
30 #include "ompt-specific.h"
34 #define KMP_USE_PRCTL 0
35 #define KMP_USE_POOLED_ALLOC 0
42 #if defined(KMP_GOMP_COMPAT)
43 char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX
"alternative compiler support: yes";
46 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX
"API version: "
54 char const __kmp_version_lock[] = KMP_VERSION_PREFIX
"lock type: run time selectable";
58 #define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
63 kmp_info_t __kmp_monitor;
70 void __kmp_cleanup(
void );
72 static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *,
int tid,
int gtid );
73 static void __kmp_initialize_team( kmp_team_t * team,
int new_nproc, kmp_internal_control_t * new_icvs,
ident_t * loc );
74 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
75 static void __kmp_partition_places( kmp_team_t *team );
77 static void __kmp_do_serial_initialize(
void );
78 void __kmp_fork_barrier(
int gtid,
int tid );
79 void __kmp_join_barrier(
int gtid );
80 void __kmp_setup_icv_copy( kmp_team_t *team,
int new_nproc, kmp_internal_control_t * new_icvs,
ident_t *loc );
83 #ifdef USE_LOAD_BALANCE
84 static int __kmp_load_balance_nproc( kmp_root_t * root,
int set_nproc );
87 static int __kmp_expand_threads(
int nWish,
int nNeed);
89 static int __kmp_unregister_root_other_thread(
int gtid );
91 static void __kmp_unregister_library(
void );
92 static void __kmp_reap_thread( kmp_info_t * thread,
int is_root );
93 static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
104 __kmp_get_global_thread_id( )
107 kmp_info_t **other_threads;
113 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
114 __kmp_nth, __kmp_all_nth ));
121 if ( !TCR_4(__kmp_init_gtid) )
return KMP_GTID_DNE;
123 #ifdef KMP_TDATA_GTID
124 if ( TCR_4(__kmp_gtid_mode) >= 3) {
125 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using TDATA\n" ));
129 if ( TCR_4(__kmp_gtid_mode) >= 2) {
130 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n" ));
131 return __kmp_gtid_get_specific();
133 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n" ));
135 stack_addr = (
char*) & stack_data;
136 other_threads = __kmp_threads;
151 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
153 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
156 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
157 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
161 if( stack_addr <= stack_base ) {
162 size_t stack_diff = stack_base - stack_addr;
164 if( stack_diff <= stack_size ) {
167 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
174 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
175 "thread, using TLS\n" ));
176 i = __kmp_gtid_get_specific();
184 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
185 KMP_FATAL( StackOverflow, i );
188 stack_base = (
char *) other_threads[i]->th.th_info.ds.ds_stackbase;
189 if( stack_addr > stack_base ) {
190 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
191 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
192 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
194 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
198 if ( __kmp_storage_map ) {
199 char *stack_end = (
char *) other_threads[i]->th.th_info.ds.ds_stackbase;
200 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
201 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
202 other_threads[i]->th.th_info.ds.ds_stacksize,
203 "th_%d stack (refinement)", i );
209 __kmp_get_global_thread_id_reg( )
213 if ( !__kmp_init_serial ) {
216 #ifdef KMP_TDATA_GTID
217 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
218 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
222 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
223 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
224 gtid = __kmp_gtid_get_specific();
226 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
227 gtid = __kmp_get_global_thread_id();
231 if( gtid == KMP_GTID_DNE ) {
232 KA_TRACE( 10, (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
233 "Registering a new gtid.\n" ));
234 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
235 if( !__kmp_init_serial ) {
236 __kmp_do_serial_initialize();
237 gtid = __kmp_gtid_get_specific();
239 gtid = __kmp_register_root(FALSE);
241 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
245 KMP_DEBUG_ASSERT( gtid >=0 );
252 __kmp_check_stack_overlap( kmp_info_t *th )
255 char *stack_beg = NULL;
256 char *stack_end = NULL;
259 KA_TRACE(10,(
"__kmp_check_stack_overlap: called\n"));
260 if ( __kmp_storage_map ) {
261 stack_end = (
char *) th->th.th_info.ds.ds_stackbase;
262 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
264 gtid = __kmp_gtid_from_thread( th );
266 if (gtid == KMP_GTID_MONITOR) {
267 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
268 "th_%s stack (%s)",
"mon",
269 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
271 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
272 "th_%d stack (%s)", gtid,
273 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
278 gtid = __kmp_gtid_from_thread( th );
279 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid))
281 KA_TRACE(10,(
"__kmp_check_stack_overlap: performing extensive checking\n"));
282 if ( stack_beg == NULL ) {
283 stack_end = (
char *) th->th.th_info.ds.ds_stackbase;
284 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
287 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
288 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
290 if( f_th && f_th != th ) {
291 char *other_stack_end = (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
292 char *other_stack_beg = other_stack_end -
293 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
294 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
295 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
298 if ( __kmp_storage_map )
299 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
300 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
301 "th_%d stack (overlapped)",
302 __kmp_gtid_from_thread( f_th ) );
304 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
309 KA_TRACE(10,(
"__kmp_check_stack_overlap: returning\n"));
318 __kmp_infinite_loop(
void )
320 static int done = FALSE;
327 #define MAX_MESSAGE 512
330 __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
char const *format, ...) {
331 char buffer[MAX_MESSAGE];
334 va_start( ap, format);
335 KMP_SNPRINTF( buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1, p2, (
unsigned long) size, format );
336 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
337 __kmp_vprintf( kmp_err, buffer, ap );
338 #if KMP_PRINT_DATA_PLACEMENT
341 if(p1 <= p2 && (
char*)p2 - (
char*)p1 == size) {
342 if( __kmp_storage_map_verbose ) {
343 node = __kmp_get_host_node(p1);
345 __kmp_storage_map_verbose = FALSE;
349 int localProc = __kmp_get_cpu_from_gtid(gtid);
351 p1 = (
void *)( (
size_t)p1 & ~((size_t)PAGE_SIZE - 1) );
352 p2 = (
void *)( ((
size_t) p2 - 1) & ~((
size_t)PAGE_SIZE - 1) );
354 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid, localProc>>1);
356 __kmp_printf_no_lock(
" GTID %d\n", gtid);
364 (
char*)p1 += PAGE_SIZE;
365 }
while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
366 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last,
367 (
char*)p1 - 1, lastNode);
370 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
371 (
char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1));
373 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
374 (
char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2));
380 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR( StorageMapWarning ) );
383 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
387 __kmp_warn(
char const * format, ... )
389 char buffer[MAX_MESSAGE];
392 if ( __kmp_generate_warnings == kmp_warnings_off ) {
396 va_start( ap, format );
398 KMP_SNPRINTF( buffer,
sizeof(buffer) ,
"OMP warning: %s\n", format );
399 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
400 __kmp_vprintf( kmp_err, buffer, ap );
401 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
407 __kmp_abort_process()
411 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
413 if ( __kmp_debug_buf ) {
414 __kmp_dump_debug_buffer();
417 if ( KMP_OS_WINDOWS ) {
420 __kmp_global.g.g_abort = SIGABRT;
438 __kmp_infinite_loop();
439 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
444 __kmp_abort_thread(
void )
448 __kmp_infinite_loop();
459 __kmp_print_thread_storage_map( kmp_info_t *thr,
int gtid )
461 __kmp_print_storage_map_gtid( gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d", gtid );
463 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team,
sizeof(kmp_desc_t),
464 "th_%d.th_info", gtid );
466 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head,
sizeof(kmp_local_t),
467 "th_%d.th_local", gtid );
469 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
470 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid );
472 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
473 &thr->th.th_bar[bs_plain_barrier+1],
474 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]", gtid);
476 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
477 &thr->th.th_bar[bs_forkjoin_barrier+1],
478 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]", gtid);
480 #if KMP_FAST_REDUCTION_BARRIER
481 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
482 &thr->th.th_bar[bs_reduction_barrier+1],
483 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]", gtid);
484 #endif // KMP_FAST_REDUCTION_BARRIER
493 __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
int team_id,
int num_thr )
495 int num_disp_buff = team->t.t_max_nproc > 1 ? KMP_MAX_DISP_BUF : 2;
496 __kmp_print_storage_map_gtid( -1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
499 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
500 sizeof(kmp_balign_team_t) * bs_last_barrier,
"%s_%d.t_bar", header, team_id );
503 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
504 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]", header, team_id );
506 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
507 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[forkjoin]", header, team_id );
509 #if KMP_FAST_REDUCTION_BARRIER
510 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
511 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[reduction]", header, team_id );
512 #endif // KMP_FAST_REDUCTION_BARRIER
514 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
515 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id );
517 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
518 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id );
520 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
521 sizeof(dispatch_shared_info_t) * num_disp_buff,
"%s_%d.t_disp_buffer",
554 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
555 sizeof(kmp_taskq_t),
"%s_%d.t_taskq", header, team_id );
558 static void __kmp_init_allocator() {}
559 static void __kmp_fini_allocator() {}
563 #ifdef KMP_DYNAMIC_LIB
568 __kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
570 __kmp_init_bootstrap_lock( lck );
574 __kmp_reset_locks_on_process_detach(
int gtid_req ) {
591 for( i = 0; i < __kmp_threads_capacity; ++i ) {
592 if( !__kmp_threads )
continue;
593 kmp_info_t* th = __kmp_threads[ i ];
594 if( th == NULL )
continue;
595 int gtid = th->th.th_info.ds.ds_gtid;
596 if( gtid == gtid_req )
continue;
597 if( gtid < 0 )
continue;
599 int alive = __kmp_is_thread_alive( th, &exit_val );
604 if( thread_count == 0 )
break;
611 __kmp_reset_lock( &__kmp_forkjoin_lock );
613 __kmp_reset_lock( &__kmp_stdio_lock );
620 DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
623 switch( fdwReason ) {
625 case DLL_PROCESS_ATTACH:
626 KA_TRACE( 10, (
"DllMain: PROCESS_ATTACH\n" ));
630 case DLL_PROCESS_DETACH:
631 KA_TRACE( 10, (
"DllMain: PROCESS_DETACH T#%d\n",
632 __kmp_gtid_get_specific() ));
634 if( lpReserved != NULL )
661 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
664 __kmp_internal_end_library( __kmp_gtid_get_specific() );
668 case DLL_THREAD_ATTACH:
669 KA_TRACE( 10, (
"DllMain: THREAD_ATTACH\n" ));
675 case DLL_THREAD_DETACH:
676 KA_TRACE( 10, (
"DllMain: THREAD_DETACH T#%d\n",
677 __kmp_gtid_get_specific() ));
679 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
695 __kmp_change_library(
int status )
699 old_status = __kmp_yield_init & 1;
702 __kmp_yield_init |= 1;
705 __kmp_yield_init &= ~1;
718 __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
720 int gtid = *gtid_ref;
721 #ifdef BUILD_PARALLEL_ORDERED
722 kmp_team_t *team = __kmp_team_from_gtid( gtid );
725 if( __kmp_env_consistency_check ) {
726 if( __kmp_threads[gtid]->th.th_root->r.r_active )
727 #if KMP_USE_DYNAMIC_LOCK
728 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 );
730 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
733 #ifdef BUILD_PARALLEL_ORDERED
734 if( !team->t.t_serialized ) {
736 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
747 __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
749 int gtid = *gtid_ref;
750 #ifdef BUILD_PARALLEL_ORDERED
751 int tid = __kmp_tid_from_gtid( gtid );
752 kmp_team_t *team = __kmp_team_from_gtid( gtid );
755 if( __kmp_env_consistency_check ) {
756 if( __kmp_threads[gtid]->th.th_root->r.r_active )
757 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
759 #ifdef BUILD_PARALLEL_ORDERED
760 if ( ! team->t.t_serialized ) {
765 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
767 #if OMPT_SUPPORT && OMPT_BLAME
768 if ((ompt_status == ompt_status_track_callback) &&
769 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
771 kmp_info_t *this_thread = __kmp_threads[gtid];
772 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
773 this_thread->th.ompt_thread_info.wait_id);
791 __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws )
797 if( ! TCR_4(__kmp_init_parallel) )
798 __kmp_parallel_initialize();
800 th = __kmp_threads[ gtid ];
801 team = th->th.th_team;
804 th->th.th_ident = id_ref;
806 if ( team->t.t_serialized ) {
809 kmp_int32 old_this = th->th.th_local.this_construct;
811 ++th->th.th_local.this_construct;
816 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
817 th->th.th_local.this_construct);
819 if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) &&
821 th->th.th_teams_microtask == NULL &&
823 team->t.t_active_level == 1 )
825 __kmp_itt_metadata_single( id_ref );
830 if( __kmp_env_consistency_check ) {
831 if (status && push_ws) {
832 __kmp_push_workshare( gtid, ct_psingle, id_ref );
834 __kmp_check_workshare( gtid, ct_psingle, id_ref );
839 __kmp_itt_single_start( gtid );
846 __kmp_exit_single(
int gtid )
849 __kmp_itt_single_end( gtid );
851 if( __kmp_env_consistency_check )
852 __kmp_pop_workshare( gtid, ct_psingle, NULL );
865 __kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
866 int master_tid,
int set_nthreads
874 KMP_DEBUG_ASSERT( __kmp_init_serial );
875 KMP_DEBUG_ASSERT( root && parent_team );
880 if ( set_nthreads == 1 ) {
881 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d reserving 1 thread; requested %d threads\n",
882 __kmp_get_gtid(), set_nthreads ));
885 if ( ( !get__nested_2(parent_team,master_tid) && (root->r.r_in_parallel
889 ) ) || ( __kmp_library == library_serial ) ) {
890 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d serializing team; requested %d threads\n",
891 __kmp_get_gtid(), set_nthreads ));
899 new_nthreads = set_nthreads;
900 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
903 #ifdef USE_LOAD_BALANCE
904 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
905 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
906 if ( new_nthreads == 1 ) {
907 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
911 if ( new_nthreads < set_nthreads ) {
912 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
913 master_tid, new_nthreads ));
917 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
918 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
919 : root->r.r_hot_team->t.t_nproc);
920 if ( new_nthreads <= 1 ) {
921 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
925 if ( new_nthreads < set_nthreads ) {
926 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
927 master_tid, new_nthreads ));
930 new_nthreads = set_nthreads;
933 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
934 if ( set_nthreads > 2 ) {
935 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
936 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
937 if ( new_nthreads == 1 ) {
938 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
942 if ( new_nthreads < set_nthreads ) {
943 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
944 master_tid, new_nthreads ));
955 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
956 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
957 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
958 root->r.r_hot_team->t.t_nproc );
959 if ( tl_nthreads <= 0 ) {
966 if ( ! get__dynamic_2( parent_team, master_tid )
967 && ( ! __kmp_reserve_warn ) ) {
968 __kmp_reserve_warn = 1;
971 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
972 KMP_HNT( Unset_ALL_THREADS ),
976 if ( tl_nthreads == 1 ) {
977 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
981 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
982 master_tid, tl_nthreads ));
983 new_nthreads = tl_nthreads;
993 capacity = __kmp_threads_capacity;
994 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
997 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
998 root->r.r_hot_team->t.t_nproc ) > capacity ) {
1002 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
1003 root->r.r_hot_team->t.t_nproc ) - capacity;
1004 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
1005 if ( slotsAdded < slotsRequired ) {
1009 new_nthreads -= ( slotsRequired - slotsAdded );
1010 KMP_ASSERT( new_nthreads >= 1 );
1015 if ( ! get__dynamic_2( parent_team, master_tid )
1016 && ( ! __kmp_reserve_warn ) ) {
1017 __kmp_reserve_warn = 1;
1018 if ( __kmp_tp_cached ) {
1021 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
1022 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
1023 KMP_HNT( PossibleSystemLimitOnThreads ),
1030 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
1031 KMP_HNT( SystemLimitOnThreads ),
1039 if ( new_nthreads == 1 ) {
1040 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
1041 __kmp_get_gtid(), set_nthreads ) );
1045 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
1046 __kmp_get_gtid(), new_nthreads, set_nthreads ));
1047 return new_nthreads;
1058 __kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
1059 kmp_info_t *master_th,
int master_gtid )
1064 KA_TRACE( 10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
1065 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
1069 master_th->th.th_info.ds.ds_tid = 0;
1070 master_th->th.th_team = team;
1071 master_th->th.th_team_nproc = team->t.t_nproc;
1072 master_th->th.th_team_master = master_th;
1073 master_th->th.th_team_serialized = FALSE;
1074 master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ];
1077 #if KMP_NESTED_HOT_TEAMS
1079 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1081 int level = team->t.t_active_level - 1;
1082 if( master_th->th.th_teams_microtask ) {
1083 if( master_th->th.th_teams_size.nteams > 1 ) {
1086 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1087 master_th->th.th_teams_level == team->t.t_level ) {
1091 if( level < __kmp_hot_teams_max_level ) {
1092 if( hot_teams[level].hot_team ) {
1094 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1098 hot_teams[level].hot_team = team;
1099 hot_teams[level].hot_team_nth = team->t.t_nproc;
1106 use_hot_team = team == root->r.r_hot_team;
1108 if ( !use_hot_team ) {
1111 team->t.t_threads[ 0 ] = master_th;
1112 __kmp_initialize_info( master_th, team, 0, master_gtid );
1115 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
1118 kmp_info_t *thr = __kmp_allocate_thread( root, team, i );
1119 team->t.t_threads[ i ] = thr;
1120 KMP_DEBUG_ASSERT( thr );
1121 KMP_DEBUG_ASSERT( thr->th.th_team == team );
1123 KA_TRACE( 20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%u, plain=%u\n",
1124 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1125 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1126 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
1127 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
1129 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1130 thr->th.th_teams_level = master_th->th.th_teams_level;
1131 thr->th.th_teams_size = master_th->th.th_teams_size;
1135 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
1136 for ( b = 0; b < bs_last_barrier; ++ b ) {
1137 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
1138 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1140 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
1146 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
1147 __kmp_partition_places( team );
1155 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1162 propagateFPControl(kmp_team_t * team)
1164 if ( __kmp_inherit_fp_control ) {
1165 kmp_int16 x87_fpu_control_word;
1169 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1170 __kmp_store_mxcsr( &mxcsr );
1171 mxcsr &= KMP_X86_MXCSR_MASK;
1180 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1181 team->t.t_x87_fpu_control_word = x87_fpu_control_word;
1183 if ( team->t.t_mxcsr != mxcsr ) {
1184 team->t.t_mxcsr = mxcsr;
1188 if (!team->t.t_fp_control_saved) {
1189 team->t.t_fp_control_saved = TRUE;
1194 if (team->t.t_fp_control_saved)
1195 team->t.t_fp_control_saved = FALSE;
1201 updateHWFPControl(kmp_team_t * team)
1203 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
1208 kmp_int16 x87_fpu_control_word;
1210 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1211 __kmp_store_mxcsr( &mxcsr );
1212 mxcsr &= KMP_X86_MXCSR_MASK;
1214 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1215 __kmp_clear_x87_fpu_status_word();
1216 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
1219 if ( team->t.t_mxcsr != mxcsr ) {
1220 __kmp_load_mxcsr( &team->t.t_mxcsr );
1225 # define propagateFPControl(x) ((void)0)
1226 # define updateHWFPControl(x) ((void)0)
1230 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc );
1236 __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid)
1238 kmp_info_t *this_thr;
1239 kmp_team_t *serial_team;
1241 KC_TRACE( 10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
1248 if( ! TCR_4( __kmp_init_parallel ) )
1249 __kmp_parallel_initialize();
1251 this_thr = __kmp_threads[ global_tid ];
1252 serial_team = this_thr->th.th_serial_team;
1255 KMP_DEBUG_ASSERT( serial_team );
1258 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1259 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1260 KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL );
1261 KA_TRACE( 20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
1262 global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
1263 this_thr->th.th_task_team = NULL;
1267 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1268 if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1269 proc_bind = proc_bind_false;
1271 else if ( proc_bind == proc_bind_default ) {
1276 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1281 this_thr->th.th_set_proc_bind = proc_bind_default;
1284 if( this_thr->th.th_team != serial_team ) {
1286 int level = this_thr->th.th_team->t.t_level;
1288 if( serial_team->t.t_serialized ) {
1291 kmp_team_t *new_team;
1293 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1296 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1299 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1306 & this_thr->th.th_current_task->td_icvs,
1307 0 USE_NESTED_HOT_ARG(NULL) );
1308 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1309 KMP_ASSERT( new_team );
1312 new_team->t.t_threads[0] = this_thr;
1313 new_team->t.t_parent = this_thr->th.th_team;
1314 serial_team = new_team;
1315 this_thr->th.th_serial_team = serial_team;
1317 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1318 global_tid, serial_team ) );
1325 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1326 global_tid, serial_team ) );
1330 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1331 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1332 KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
1333 serial_team->t.t_ident = loc;
1334 serial_team->t.t_serialized = 1;
1335 serial_team->t.t_nproc = 1;
1336 serial_team->t.t_parent = this_thr->th.th_team;
1337 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1338 this_thr->th.th_team = serial_team;
1339 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1341 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#d curtask=%p\n",
1342 global_tid, this_thr->th.th_current_task ) );
1343 KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
1344 this_thr->th.th_current_task->td_flags.executing = 0;
1346 __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
1351 & this_thr->th.th_current_task->td_icvs,
1352 & this_thr->th.th_current_task->td_parent->td_icvs );
1355 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1356 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1360 if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
1361 this_thr->th.th_current_task->td_icvs.proc_bind
1362 = __kmp_nested_proc_bind.bind_types[ level + 1 ];
1367 serial_team->t.t_pkfn = (microtask_t)( ~0 );
1369 this_thr->th.th_info.ds.ds_tid = 0;
1372 this_thr->th.th_team_nproc = 1;
1373 this_thr->th.th_team_master = this_thr;
1374 this_thr->th.th_team_serialized = 1;
1376 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1377 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1379 propagateFPControl (serial_team);
1382 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1383 if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
1384 serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
1385 __kmp_allocate(
sizeof( dispatch_private_info_t ) );
1387 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1390 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1391 __ompt_team_assign_id(serial_team, ompt_parallel_id);
1399 KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
1400 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1401 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1402 ++ serial_team->t.t_serialized;
1403 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1406 int level = this_thr->th.th_team->t.t_level;
1408 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1409 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1411 serial_team->t.t_level++;
1412 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
1413 global_tid, serial_team, serial_team->t.t_level ) );
1416 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1418 dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
1419 __kmp_allocate(
sizeof( dispatch_private_info_t ) );
1420 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1421 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1423 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1428 if ( __kmp_env_consistency_check )
1429 __kmp_push_parallel( global_tid, NULL );
1433 if ( serial_team->t.t_level == 1
1435 && this_thr->th.th_teams_microtask == NULL
1440 if ( ( __itt_get_timestamp_ptr || KMP_ITT_DEBUG ) &&
1441 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
1443 serial_team->t.t_region_time = this_thr->th.th_frame_time_serialized = __itt_get_timestamp();
1446 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
1447 __kmp_forkjoin_frames && ! __kmp_forkjoin_frames_mode )
1449 this_thr->th.th_ident = loc;
1451 __kmp_itt_region_forking( global_tid, this_thr->th.th_team_nproc, 0, 1 );
1463 enum fork_context_e call_context,
1466 void *unwrapped_task,
1468 microtask_t microtask,
1471 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1481 int master_this_cons;
1483 kmp_team_t *parent_team;
1484 kmp_info_t *master_th;
1488 int master_set_numthreads;
1494 #if KMP_NESTED_HOT_TEAMS
1495 kmp_hot_team_ptr_t **p_hot_teams;
1500 KA_TRACE( 20, (
"__kmp_fork_call: enter T#%d\n", gtid ));
1501 if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
1504 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1506 if ( __kmp_stkpadding > KMP_MAX_STKPADDING )
1507 __kmp_stkpadding += (short)((kmp_int64)dummy);
1511 KMP_DEBUG_ASSERT( __kmp_init_serial );
1512 if( ! TCR_4(__kmp_init_parallel) )
1513 __kmp_parallel_initialize();
1516 master_th = __kmp_threads[ gtid ];
1517 parent_team = master_th->th.th_team;
1518 master_tid = master_th->th.th_info.ds.ds_tid;
1519 master_this_cons = master_th->th.th_local.this_construct;
1520 root = master_th->th.th_root;
1521 master_active = root->r.r_active;
1522 master_set_numthreads = master_th->th.th_set_nproc;
1525 ompt_parallel_id_t ompt_parallel_id;
1526 ompt_task_id_t ompt_task_id;
1527 ompt_frame_t *ompt_frame;
1528 ompt_task_id_t my_task_id;
1529 ompt_parallel_id_t my_parallel_id;
1531 if (ompt_status & ompt_status_track) {
1532 ompt_parallel_id = __ompt_parallel_id_new(gtid);
1533 ompt_task_id = __ompt_get_task_id_internal(0);
1534 ompt_frame = __ompt_get_task_frame_internal(0);
1539 level = parent_team->t.t_level;
1541 active_level = parent_team->t.t_active_level;
1542 teams_level = master_th->th.th_teams_level;
1544 #if KMP_NESTED_HOT_TEAMS
1545 p_hot_teams = &master_th->th.th_hot_teams;
1546 if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) {
1547 *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate(
1548 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1549 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1550 (*p_hot_teams)[0].hot_team_nth = 1;
1555 if ((ompt_status == ompt_status_track_callback) &&
1556 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
1557 int team_size = master_set_numthreads;
1559 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
1560 ompt_task_id, ompt_frame, ompt_parallel_id,
1561 team_size, unwrapped_task);
1565 master_th->th.th_ident = loc;
1568 if ( master_th->th.th_teams_microtask &&
1569 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
1573 parent_team->t.t_ident = loc;
1574 parent_team->t.t_argc = argc;
1575 argv = (
void**)parent_team->t.t_argv;
1576 for( i=argc-1; i >= 0; --i )
1578 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1579 *argv++ = va_arg( *ap,
void * );
1581 *argv++ = va_arg( ap,
void * );
1584 if ( parent_team == master_th->th.th_serial_team ) {
1587 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
1588 parent_team->t.t_serialized--;
1593 void **exit_runtime_p;
1595 ompt_lw_taskteam_t lw_taskteam;
1597 if (ompt_status & ompt_status_track) {
1598 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1599 unwrapped_task, ompt_parallel_id);
1600 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1601 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1603 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1607 my_task_id = lw_taskteam.ompt_task_info.task_id;
1608 my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
1609 if ((ompt_status == ompt_status_track_callback) &&
1610 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1611 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1612 my_parallel_id, my_task_id);
1617 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1619 exit_runtime_p = &dummy;
1624 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1631 if (ompt_status & ompt_status_track) {
1633 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1635 if ((ompt_status == ompt_status_track_callback) &&
1636 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1637 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1638 ompt_parallel_id, ompt_task_id);
1641 __ompt_lw_taskteam_unlink(master_th);
1643 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1646 if ((ompt_status == ompt_status_track_callback) &&
1647 ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1648 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1649 ompt_parallel_id, ompt_task_id);
1651 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1657 parent_team->t.t_pkfn = microtask;
1659 parent_team->t.ompt_team_info.microtask = unwrapped_task;
1661 parent_team->t.t_invoke = invoker;
1662 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1663 parent_team->t.t_active_level ++;
1664 parent_team->t.t_level ++;
1667 if ( master_set_numthreads ) {
1668 if ( master_set_numthreads < master_th->th.th_teams_size.nth ) {
1670 kmp_info_t **other_threads = parent_team->t.t_threads;
1671 parent_team->t.t_nproc = master_set_numthreads;
1672 for ( i = 0; i < master_set_numthreads; ++i ) {
1673 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1677 master_th->th.th_set_nproc = 0;
1681 if ( __kmp_debugging ) {
1682 int nth = __kmp_omp_num_threads( loc );
1684 master_set_numthreads = nth;
1689 KF_TRACE( 10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1690 __kmp_internal_fork( loc, gtid, parent_team );
1691 KF_TRACE( 10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1694 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
1695 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1699 if (! parent_team->t.t_invoke( gtid )) {
1700 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
1703 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
1704 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1707 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
1714 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1715 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
1720 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1722 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
1725 nthreads = master_set_numthreads ?
1726 master_set_numthreads : get__nproc_2( parent_team, master_tid );
1727 nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
1732 , ((ap==NULL && active_level==0) ||
1733 (ap && teams_level>0 && teams_level==level))
1737 KMP_DEBUG_ASSERT( nthreads > 0 );
1740 master_th->th.th_set_nproc = 0;
1744 if ( nthreads == 1 ) {
1746 #if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1747 void * args[ argc ];
1749 void * * args = (
void**) KMP_ALLOCA( argc *
sizeof(
void * ) );
1752 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1753 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
1757 if ( call_context == fork_context_intel ) {
1759 master_th->th.th_serial_team->t.t_ident = loc;
1763 master_th->th.th_serial_team->t.t_level--;
1768 void **exit_runtime_p;
1770 ompt_lw_taskteam_t lw_taskteam;
1772 if (ompt_status & ompt_status_track) {
1773 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1774 unwrapped_task, ompt_parallel_id);
1775 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1776 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1778 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1781 my_task_id = lw_taskteam.ompt_task_info.task_id;
1782 if ((ompt_status == ompt_status_track_callback) &&
1783 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1784 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1785 ompt_parallel_id, my_task_id);
1790 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1792 exit_runtime_p = &dummy;
1798 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1806 if (ompt_status & ompt_status_track) {
1807 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1810 if ((ompt_status == ompt_status_track_callback) &&
1811 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1812 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1813 ompt_parallel_id, ompt_task_id);
1817 __ompt_lw_taskteam_unlink(master_th);
1819 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1821 if ((ompt_status == ompt_status_track_callback) &&
1822 ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1823 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1824 ompt_parallel_id, ompt_task_id);
1826 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1829 }
else if ( microtask == (microtask_t)__kmp_teams_master ) {
1830 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
1831 team = master_th->th.th_team;
1833 team->t.t_invoke = invoker;
1834 __kmp_alloc_argv_entries( argc, team, TRUE );
1835 team->t.t_argc = argc;
1836 argv = (
void**) team->t.t_argv;
1838 for( i=argc-1; i >= 0; --i )
1840 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1841 *argv++ = va_arg( *ap,
void * );
1843 *argv++ = va_arg( ap,
void * );
1846 for( i=0; i < argc; ++i )
1848 argv[i] = parent_team->t.t_argv[i];
1861 for( i=argc-1; i >= 0; --i )
1863 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1864 *argv++ = va_arg( *ap,
void * );
1866 *argv++ = va_arg( ap,
void * );
1872 void **exit_runtime_p;
1874 ompt_lw_taskteam_t lw_taskteam;
1876 if (ompt_status & ompt_status_track) {
1877 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1878 unwrapped_task, ompt_parallel_id);
1879 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1880 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1882 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1886 my_task_id = lw_taskteam.ompt_task_info.task_id;
1887 my_parallel_id = ompt_parallel_id;
1888 if ((ompt_status == ompt_status_track_callback) &&
1889 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1890 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1891 my_parallel_id, my_task_id);
1896 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1898 exit_runtime_p = &dummy;
1904 __kmp_invoke_microtask( microtask, gtid, 0, argc, args
1912 if (ompt_status & ompt_status_track) {
1914 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1916 if ((ompt_status == ompt_status_track_callback) &&
1917 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1918 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1919 my_parallel_id, my_task_id);
1923 __ompt_lw_taskteam_unlink(master_th);
1925 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1927 if ((ompt_status == ompt_status_track_callback) &&
1928 ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1929 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1930 ompt_parallel_id, ompt_task_id);
1932 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1939 else if ( call_context == fork_context_gnu ) {
1941 ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *)
1942 __kmp_allocate(
sizeof(ompt_lw_taskteam_t));
1943 __ompt_lw_taskteam_init(lwt, master_th, gtid,
1944 unwrapped_task, ompt_parallel_id);
1946 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
1947 lwt->ompt_task_info.frame.exit_runtime_frame = 0;
1948 __ompt_lw_taskteam_link(lwt, master_th);
1952 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
1956 KMP_ASSERT2( call_context < fork_context_last,
"__kmp_fork_call: unknown fork_context parameter" );
1960 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
1967 KF_TRACE( 10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
1968 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
1969 master_th->th.th_current_task->td_icvs.max_active_levels ) );
1972 master_th->th.th_current_task->td_flags.executing = 0;
1975 if ( !master_th->th.th_teams_microtask || level > teams_level )
1979 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1983 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1984 if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) {
1985 nthreads_icv = __kmp_nested_nth.nth[level+1];
1993 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1994 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1995 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1996 proc_bind = proc_bind_false;
1999 if (proc_bind == proc_bind_default) {
2001 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2006 if ((level+1 < __kmp_nested_proc_bind.used)
2007 && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) {
2008 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1];
2013 master_th->th.th_set_proc_bind = proc_bind_default;
2016 if ((nthreads_icv > 0)
2018 || (proc_bind_icv != proc_bind_default)
2021 kmp_internal_control_t new_icvs;
2022 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2023 new_icvs.next = NULL;
2024 if (nthreads_icv > 0) {
2025 new_icvs.nproc = nthreads_icv;
2029 if (proc_bind_icv != proc_bind_default) {
2030 new_icvs.proc_bind = proc_bind_icv;
2035 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
2036 team = __kmp_allocate_team(root, nthreads, nthreads,
2043 &new_icvs, argc USE_NESTED_HOT_ARG(master_th) );
2046 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
2047 team = __kmp_allocate_team(root, nthreads, nthreads,
2054 &master_th->th.th_current_task->td_icvs, argc
2055 USE_NESTED_HOT_ARG(master_th) );
2057 KF_TRACE( 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) );
2060 team->t.t_master_tid = master_tid;
2061 team->t.t_master_this_cons = master_this_cons;
2062 team->t.t_ident = loc;
2063 team->t.t_parent = parent_team;
2064 TCW_SYNC_PTR(team->t.t_pkfn, microtask);
2066 TCW_SYNC_PTR(team->t.ompt_team_info.microtask, unwrapped_task);
2068 team->t.t_invoke = invoker;
2071 if ( !master_th->th.th_teams_microtask || level > teams_level ) {
2073 team->t.t_level = parent_team->t.t_level + 1;
2074 team->t.t_active_level = parent_team->t.t_active_level + 1;
2078 team->t.t_level = parent_team->t.t_level;
2079 team->t.t_active_level = parent_team->t.t_active_level;
2082 team->t.t_sched = get__sched_2(parent_team, master_tid);
2085 propagateFPControl(team);
2087 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2089 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
2090 KA_TRACE( 20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
2091 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
2092 parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) );
2095 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2096 if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) {
2097 kmp_uint8 *old_stack, *new_stack = (kmp_uint8 *) __kmp_allocate( 2*master_th->th.th_task_state_stack_sz );
2099 for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
2100 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2102 old_stack = master_th->th.th_task_state_memo_stack;
2103 master_th->th.th_task_state_memo_stack = new_stack;
2104 master_th->th.th_task_state_stack_sz *= 2;
2105 __kmp_free(old_stack);
2108 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
2109 master_th->th.th_task_state_top++;
2110 master_th->th.th_task_state = 0;
2112 master_th->th.th_task_team = team->t.t_task_team[master_th->th.th_task_state];
2114 #if !KMP_NESTED_HOT_TEAMS
2115 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
2119 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2120 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
2121 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
2122 ( team->t.t_master_tid == 0 &&
2123 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
2127 argv = (
void**)team->t.t_argv;
2131 for ( i=argc-1; i >= 0; --i )
2133 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
2134 *argv++ = va_arg( *ap,
void * );
2136 *argv++ = va_arg( ap,
void * );
2140 for ( i=0; i < argc; ++i )
2142 argv[i] = team->t.t_parent->t.t_argv[i];
2147 team->t.t_master_active = master_active;
2148 if (!root->r.r_active)
2149 root->r.r_active = TRUE;
2151 __kmp_fork_team_threads( root, team, master_th, gtid );
2152 __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc );
2155 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2158 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2162 if ( team->t.t_active_level == 1
2164 && !master_th->th.th_teams_microtask
2168 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
2169 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
2171 kmp_uint64 tmp_time = 0;
2172 if ( __itt_get_timestamp_ptr )
2173 tmp_time = __itt_get_timestamp();
2175 master_th->th.th_frame_time = tmp_time;
2176 if ( __kmp_forkjoin_frames_mode == 3 )
2177 team->t.t_region_time = tmp_time;
2180 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
2181 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode )
2183 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2189 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
2191 KF_TRACE(10, (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2192 root, team, master_th, gtid));
2195 if ( __itt_stack_caller_create_ptr ) {
2196 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2204 __kmp_internal_fork( loc, gtid, team );
2205 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n",
2206 root, team, master_th, gtid));
2209 if (call_context == fork_context_gnu) {
2210 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
2215 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2216 gtid, team->t.t_id, team->t.t_pkfn ) );
2222 if (! team->t.t_invoke( gtid )) {
2223 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
2226 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2227 gtid, team->t.t_id, team->t.t_pkfn ) );
2230 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
2233 if (ompt_status & ompt_status_track) {
2234 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2243 __kmp_join_restore_state(
2248 thread->th.ompt_thread_info.state = ((team->t.t_serialized) ?
2249 ompt_state_work_serial : ompt_state_work_parallel);
2256 ompt_parallel_id_t parallel_id)
2258 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
2259 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2260 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
2261 parallel_id, task_info->task_id);
2264 __kmp_join_restore_state(thread,team);
2269 __kmp_join_call(
ident_t *loc,
int gtid
2277 kmp_team_t *parent_team;
2278 kmp_info_t *master_th;
2283 KA_TRACE( 20, (
"__kmp_join_call: enter T#%d\n", gtid ));
2286 master_th = __kmp_threads[ gtid ];
2287 root = master_th->th.th_root;
2288 team = master_th->th.th_team;
2289 parent_team = team->t.t_parent;
2291 master_th->th.th_ident = loc;
2294 if (ompt_status & ompt_status_track) {
2295 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2300 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2301 KA_TRACE( 20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
2302 __kmp_gtid_from_thread( master_th ), team,
2303 team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) );
2304 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] );
2308 if( team->t.t_serialized ) {
2310 if ( master_th->th.th_teams_microtask ) {
2312 int level = team->t.t_level;
2313 int tlevel = master_th->th.th_teams_level;
2314 if ( level == tlevel ) {
2318 }
else if ( level == tlevel + 1 ) {
2321 team->t.t_serialized++;
2328 if (ompt_status == ompt_status_track_callback) {
2329 __kmp_join_restore_state(master_th, parent_team);
2336 master_active = team->t.t_master_active;
2344 __kmp_internal_join( loc, gtid, team );
2347 master_th->th.th_task_state = 0;
2353 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
2357 if ( __itt_stack_caller_create_ptr ) {
2358 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id );
2362 if ( team->t.t_active_level == 1
2364 && !master_th->th.th_teams_microtask
2367 master_th->th.th_ident = loc;
2369 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 )
2370 __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time,
2371 0, loc, master_th->th.th_team_nproc, 1 );
2372 else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
2373 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
2374 __kmp_itt_region_joined( gtid );
2379 if ( master_th->th.th_teams_microtask &&
2381 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2382 team->t.t_level == master_th->th.th_teams_level + 1 ) {
2389 team->t.t_active_level --;
2390 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2393 if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) {
2394 int old_num = master_th->th.th_team_nproc;
2395 int new_num = master_th->th.th_teams_size.nth;
2396 kmp_info_t **other_threads = team->t.t_threads;
2397 kmp_task_team_t * task_team = master_th->th.th_task_team;
2398 team->t.t_nproc = new_num;
2400 task_team->tt.tt_ref_ct = new_num - 1;
2401 task_team->tt.tt_unfinished_threads = new_num;
2403 for ( i = 0; i < old_num; ++i ) {
2404 other_threads[i]->th.th_team_nproc = new_num;
2407 for ( i = old_num; i < new_num; ++i ) {
2410 kmp_balign_t * balign = other_threads[i]->th.th_bar;
2411 for ( b = 0; b < bs_last_barrier; ++ b ) {
2412 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
2413 KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2415 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
2418 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2420 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2426 if (ompt_status == ompt_status_track_callback) {
2427 __kmp_join_ompt(master_th, parent_team, parallel_id);
2436 master_th->th.th_info .ds.ds_tid = team->t.t_master_tid;
2437 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2439 master_th->th.th_dispatch =
2440 & parent_team->t.t_dispatch[ team->t.t_master_tid ];
2446 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2449 if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level )
2453 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2455 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
2457 KF_TRACE( 10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
2458 0, master_th, team ) );
2459 __kmp_pop_current_task_from_thread( master_th );
2461 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
2465 master_th->th.th_first_place = team->t.t_first_place;
2466 master_th->th.th_last_place = team->t.t_last_place;
2469 updateHWFPControl (team);
2471 if ( root->r.r_active != master_active )
2472 root->r.r_active = master_active;
2474 __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) );
2482 master_th->th.th_team = parent_team;
2483 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2484 master_th->th.th_team_master = parent_team->t.t_threads[0];
2485 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2488 if( parent_team->t.t_serialized &&
2489 parent_team != master_th->th.th_serial_team &&
2490 parent_team != root->r.r_root_team ) {
2491 __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) );
2492 master_th->th.th_serial_team = parent_team;
2495 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2497 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2498 if (master_th->th.th_task_state_top > 0) {
2499 --master_th->th.th_task_state_top;
2500 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
2503 master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
2505 KA_TRACE( 20, (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2506 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
2512 master_th->th.th_current_task->td_flags.executing = 1;
2514 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2517 if (ompt_status == ompt_status_track_callback) {
2518 __kmp_join_ompt(master_th, parent_team, parallel_id);
2523 KA_TRACE( 20, (
"__kmp_join_call: exit T#%d\n", gtid ));
2532 __kmp_save_internal_controls ( kmp_info_t * thread )
2535 if ( thread->th.th_team != thread->th.th_serial_team ) {
2538 if (thread->th.th_team->t.t_serialized > 1) {
2541 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2544 if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2545 thread->th.th_team->t.t_serialized ) {
2550 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(
sizeof(kmp_internal_control_t));
2552 copy_icvs( control, & thread->th.th_current_task->td_icvs );
2554 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2556 control->next = thread->th.th_team->t.t_control_stack_top;
2557 thread->th.th_team->t.t_control_stack_top = control;
2564 __kmp_set_num_threads(
int new_nth,
int gtid )
2569 KF_TRACE( 10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
2570 KMP_DEBUG_ASSERT( __kmp_init_serial );
2574 else if (new_nth > __kmp_max_nth)
2575 new_nth = __kmp_max_nth;
2577 thread = __kmp_threads[gtid];
2579 __kmp_save_internal_controls( thread );
2581 set__nproc( thread, new_nth );
2588 root = thread->th.th_root;
2589 if ( __kmp_init_parallel && ( ! root->r.r_active )
2590 && ( root->r.r_hot_team->t.t_nproc > new_nth )
2591 #
if KMP_NESTED_HOT_TEAMS
2592 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2595 kmp_team_t *hot_team = root->r.r_hot_team;
2598 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2601 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2603 for (tt_idx=0; tt_idx<2; ++tt_idx) {
2604 kmp_task_team_t *task_team = hot_team->t.t_task_team[tt_idx];
2605 if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
2608 KMP_DEBUG_ASSERT( hot_team->t.t_nproc > 1 );
2609 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2611 KA_TRACE( 20, (
"__kmp_set_num_threads: setting task_team %p to NULL\n",
2612 &hot_team->t.t_task_team[tt_idx] ) );
2613 hot_team->t.t_task_team[tt_idx] = NULL;
2616 KMP_DEBUG_ASSERT( task_team == NULL );
2624 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
2625 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2626 __kmp_free_thread( hot_team->t.t_threads[f] );
2627 hot_team->t.t_threads[f] = NULL;
2629 hot_team->t.t_nproc = new_nth;
2630 #if KMP_NESTED_HOT_TEAMS
2631 if( thread->th.th_hot_teams ) {
2632 KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team );
2633 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2638 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2643 for( f=0 ; f < new_nth; f++ ) {
2644 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2645 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2648 hot_team->t.t_size_changed = -1;
2655 __kmp_set_max_active_levels(
int gtid,
int max_active_levels )
2659 KF_TRACE( 10, (
"__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2660 KMP_DEBUG_ASSERT( __kmp_init_serial );
2663 if( max_active_levels < 0 ) {
2664 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
2668 KF_TRACE( 10, (
"__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2671 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
2675 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
2676 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2681 KF_TRACE( 10, (
"__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2683 thread = __kmp_threads[ gtid ];
2685 __kmp_save_internal_controls( thread );
2687 set__max_active_levels( thread, max_active_levels );
2693 __kmp_get_max_active_levels(
int gtid )
2697 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d\n", gtid ) );
2698 KMP_DEBUG_ASSERT( __kmp_init_serial );
2700 thread = __kmp_threads[ gtid ];
2701 KMP_DEBUG_ASSERT( thread->th.th_current_task );
2702 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
2703 gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) );
2704 return thread->th.th_current_task->td_icvs.max_active_levels;
2709 __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk )
2714 KF_TRACE( 10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (
int)kind, chunk ));
2715 KMP_DEBUG_ASSERT( __kmp_init_serial );
2721 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2722 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
2727 KMP_MSG( ScheduleKindOutOfRange, kind ),
2728 KMP_HNT( DefaultScheduleKindUsed,
"static, no chunk" ),
2731 kind = kmp_sched_default;
2735 thread = __kmp_threads[ gtid ];
2737 __kmp_save_internal_controls( thread );
2739 if ( kind < kmp_sched_upper_std ) {
2740 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
2743 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2745 thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
2749 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2750 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
2752 if ( kind == kmp_sched_auto ) {
2754 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2756 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2762 __kmp_get_schedule(
int gtid, kmp_sched_t * kind,
int * chunk )
2767 KF_TRACE( 10, (
"__kmp_get_schedule: thread %d\n", gtid ));
2768 KMP_DEBUG_ASSERT( __kmp_init_serial );
2770 thread = __kmp_threads[ gtid ];
2773 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2775 switch ( th_type ) {
2777 case kmp_sch_static_greedy:
2778 case kmp_sch_static_balanced:
2779 *kind = kmp_sched_static;
2782 case kmp_sch_static_chunked:
2783 *kind = kmp_sched_static;
2785 case kmp_sch_dynamic_chunked:
2786 *kind = kmp_sched_dynamic;
2789 case kmp_sch_guided_iterative_chunked:
2790 case kmp_sch_guided_analytical_chunked:
2791 *kind = kmp_sched_guided;
2794 *kind = kmp_sched_auto;
2796 case kmp_sch_trapezoidal:
2797 *kind = kmp_sched_trapezoidal;
2805 KMP_FATAL( UnknownSchedulingType, th_type );
2809 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2813 __kmp_get_ancestor_thread_num(
int gtid,
int level ) {
2819 KF_TRACE( 10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
2820 KMP_DEBUG_ASSERT( __kmp_init_serial );
2823 if( level == 0 )
return 0;
2824 if( level < 0 )
return -1;
2825 thr = __kmp_threads[ gtid ];
2826 team = thr->th.th_team;
2827 ii = team->t.t_level;
2828 if( level > ii )
return -1;
2831 if( thr->th.th_teams_microtask ) {
2833 int tlevel = thr->th.th_teams_level;
2834 if( level <= tlevel ) {
2835 KMP_DEBUG_ASSERT( ii >= tlevel );
2837 if ( ii == tlevel ) {
2846 if( ii == level )
return __kmp_tid_from_gtid( gtid );
2848 dd = team->t.t_serialized;
2852 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
2855 if( ( team->t.t_serialized ) && ( !dd ) ) {
2856 team = team->t.t_parent;
2860 team = team->t.t_parent;
2861 dd = team->t.t_serialized;
2866 return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid );
2870 __kmp_get_team_size(
int gtid,
int level ) {
2876 KF_TRACE( 10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level ));
2877 KMP_DEBUG_ASSERT( __kmp_init_serial );
2880 if( level == 0 )
return 1;
2881 if( level < 0 )
return -1;
2882 thr = __kmp_threads[ gtid ];
2883 team = thr->th.th_team;
2884 ii = team->t.t_level;
2885 if( level > ii )
return -1;
2888 if( thr->th.th_teams_microtask ) {
2890 int tlevel = thr->th.th_teams_level;
2891 if( level <= tlevel ) {
2892 KMP_DEBUG_ASSERT( ii >= tlevel );
2894 if ( ii == tlevel ) {
2905 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
2908 if( team->t.t_serialized && ( !dd ) ) {
2909 team = team->t.t_parent;
2913 team = team->t.t_parent;
2918 return team->t.t_nproc;
2922 __kmp_get_schedule_global() {
2926 kmp_r_sched_t r_sched;
2932 r_sched.r_sched_type = __kmp_static;
2934 r_sched.r_sched_type = __kmp_guided;
2936 r_sched.r_sched_type = __kmp_sched;
2939 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) {
2940 r_sched.chunk = KMP_DEFAULT_CHUNK;
2942 r_sched.chunk = __kmp_chunk;
2957 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc )
2960 KMP_DEBUG_ASSERT( team );
2961 if( !realloc || argc > team->t.t_max_argc ) {
2963 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
2964 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
2966 if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] )
2967 __kmp_free( (
void *) team->t.t_argv );
2969 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
2971 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
2972 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
2973 team->t.t_id, team->t.t_max_argc ));
2974 team->t.t_argv = &team->t.t_inline_argv[0];
2975 if ( __kmp_storage_map ) {
2976 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
2977 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
2978 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
2979 "team_%d.t_inline_argv",
2984 team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
2985 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
2986 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
2987 team->t.t_id, team->t.t_max_argc ));
2988 team->t.t_argv = (
void**) __kmp_page_allocate(
sizeof(
void*) * team->t.t_max_argc );
2989 if ( __kmp_storage_map ) {
2990 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
2991 sizeof(
void *) * team->t.t_max_argc,
"team_%d.t_argv",
2999 __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth)
3002 int num_disp_buff = max_nth > 1 ? KMP_MAX_DISP_BUF : 2;
3003 #if KMP_USE_POOLED_ALLOC
3005 char *ptr = __kmp_allocate(max_nth *
3006 (
sizeof(kmp_info_t*) +
sizeof(dispatch_shared_info_t)*num_disp_buf
3007 +
sizeof(kmp_disp_t) +
sizeof(
int)*6
3009 +
sizeof(kmp_r_sched_t)
3010 +
sizeof(kmp_taskdata_t) ) );
3012 team->t.t_threads = (kmp_info_t**) ptr; ptr +=
sizeof(kmp_info_t*) * max_nth;
3013 team->t.t_disp_buffer = (dispatch_shared_info_t*) ptr;
3014 ptr +=
sizeof(dispatch_shared_info_t) * num_disp_buff;
3015 team->t.t_dispatch = (kmp_disp_t*) ptr; ptr +=
sizeof(kmp_disp_t) * max_nth;
3016 team->t.t_set_nproc = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3017 team->t.t_set_dynamic = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3018 team->t.t_set_nested = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3019 team->t.t_set_blocktime = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3020 team->t.t_set_bt_intervals = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3021 team->t.t_set_bt_set = (
int*) ptr;
3022 ptr +=
sizeof(int) * max_nth;
3024 team->t.t_set_sched = (kmp_r_sched_t*) ptr;
3025 ptr +=
sizeof(kmp_r_sched_t) * max_nth;
3026 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) ptr;
3027 ptr +=
sizeof(kmp_taskdata_t) * max_nth;
3030 team->t.t_threads = (kmp_info_t**) __kmp_allocate(
sizeof(kmp_info_t*) * max_nth );
3031 team->t.t_disp_buffer = (dispatch_shared_info_t*)
3032 __kmp_allocate(
sizeof(dispatch_shared_info_t) * num_disp_buff );
3033 team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate(
sizeof(kmp_disp_t) * max_nth );
3036 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth );
3038 team->t.t_max_nproc = max_nth;
3041 for(i = 0 ; i < num_disp_buff; ++i)
3042 team->t.t_disp_buffer[i].buffer_index = i;
3046 __kmp_free_team_arrays(kmp_team_t *team) {
3049 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
3050 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
3051 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
3052 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
3055 __kmp_free(team->t.t_threads);
3056 #if !KMP_USE_POOLED_ALLOC
3057 __kmp_free(team->t.t_disp_buffer);
3058 __kmp_free(team->t.t_dispatch);
3061 __kmp_free(team->t.t_implicit_task_taskdata);
3063 team->t.t_threads = NULL;
3064 team->t.t_disp_buffer = NULL;
3065 team->t.t_dispatch = NULL;
3068 team->t.t_implicit_task_taskdata = 0;
3072 __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3073 kmp_info_t **oldThreads = team->t.t_threads;
3075 #if !KMP_USE_POOLED_ALLOC
3076 __kmp_free(team->t.t_disp_buffer);
3077 __kmp_free(team->t.t_dispatch);
3080 __kmp_free(team->t.t_implicit_task_taskdata);
3082 __kmp_allocate_team_arrays(team, max_nth);
3084 KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
3086 __kmp_free(oldThreads);
3089 static kmp_internal_control_t
3090 __kmp_get_global_icvs(
void ) {
3092 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
3095 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
3098 kmp_internal_control_t g_icvs = {
3100 (kmp_int8)__kmp_dflt_nested,
3101 (kmp_int8)__kmp_global.g.g_dynamic,
3102 (kmp_int8)__kmp_env_blocktime,
3103 __kmp_dflt_blocktime,
3105 __kmp_dflt_team_nth,
3107 __kmp_dflt_max_active_levels,
3110 __kmp_nested_proc_bind.bind_types[0],
3118 static kmp_internal_control_t
3119 __kmp_get_x_global_icvs(
const kmp_team_t *team ) {
3121 kmp_internal_control_t gx_icvs;
3122 gx_icvs.serial_nesting_level = 0;
3123 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
3124 gx_icvs.next = NULL;
3130 __kmp_initialize_root( kmp_root_t *root )
3133 kmp_team_t *root_team;
3134 kmp_team_t *hot_team;
3135 int hot_team_max_nth;
3136 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
3137 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3138 KMP_DEBUG_ASSERT( root );
3139 KMP_ASSERT( ! root->r.r_begin );
3142 __kmp_init_lock( &root->r.r_begin_lock );
3143 root->r.r_begin = FALSE;
3144 root->r.r_active = FALSE;
3145 root->r.r_in_parallel = 0;
3146 root->r.r_blocktime = __kmp_dflt_blocktime;
3147 root->r.r_nested = __kmp_dflt_nested;
3151 KF_TRACE( 10, (
"__kmp_initialize_root: before root_team\n" ) );
3154 __kmp_allocate_team(
3162 __kmp_nested_proc_bind.bind_types[0],
3166 USE_NESTED_HOT_ARG(NULL)
3170 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 ));
3173 KF_TRACE( 10, (
"__kmp_initialize_root: after root_team = %p\n", root_team ) );
3175 root->r.r_root_team = root_team;
3176 root_team->t.t_control_stack_top = NULL;
3179 root_team->t.t_threads[0] = NULL;
3180 root_team->t.t_nproc = 1;
3181 root_team->t.t_serialized = 1;
3183 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3184 root_team->t.t_sched.chunk = r_sched.chunk;
3185 KA_TRACE( 20, (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3186 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
3190 KF_TRACE( 10, (
"__kmp_initialize_root: before hot_team\n" ) );
3193 __kmp_allocate_team(
3196 __kmp_dflt_team_nth_ub * 2,
3201 __kmp_nested_proc_bind.bind_types[0],
3205 USE_NESTED_HOT_ARG(NULL)
3207 KF_TRACE( 10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
3209 root->r.r_hot_team = hot_team;
3210 root_team->t.t_control_stack_top = NULL;
3213 hot_team->t.t_parent = root_team;
3216 hot_team_max_nth = hot_team->t.t_max_nproc;
3217 for ( f = 0; f < hot_team_max_nth; ++ f ) {
3218 hot_team->t.t_threads[ f ] = NULL;
3220 hot_team->t.t_nproc = 1;
3222 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3223 hot_team->t.t_sched.chunk = r_sched.chunk;
3224 hot_team->t.t_size_changed = 0;
3231 typedef struct kmp_team_list_item {
3232 kmp_team_p
const * entry;
3233 struct kmp_team_list_item * next;
3234 } kmp_team_list_item_t;
3235 typedef kmp_team_list_item_t * kmp_team_list_t;
3239 __kmp_print_structure_team_accum(
3240 kmp_team_list_t list,
3241 kmp_team_p
const * team
3251 KMP_DEBUG_ASSERT( list != NULL );
3252 if ( team == NULL ) {
3256 __kmp_print_structure_team_accum( list, team->t.t_parent );
3257 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
3261 while ( l->next != NULL && l->entry != team ) {
3264 if ( l->next != NULL ) {
3270 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
3276 kmp_team_list_item_t * item =
3277 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
3286 __kmp_print_structure_team(
3288 kmp_team_p
const * team
3291 __kmp_printf(
"%s", title );
3292 if ( team != NULL ) {
3293 __kmp_printf(
"%2x %p\n", team->t.t_id, team );
3295 __kmp_printf(
" - (nil)\n" );
3300 __kmp_print_structure_thread(
3302 kmp_info_p
const * thread
3305 __kmp_printf(
"%s", title );
3306 if ( thread != NULL ) {
3307 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
3309 __kmp_printf(
" - (nil)\n" );
3314 __kmp_print_structure(
3318 kmp_team_list_t list;
3321 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
3325 __kmp_printf(
"\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
3328 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3329 __kmp_printf(
"%2d", gtid );
3330 if ( __kmp_threads != NULL ) {
3331 __kmp_printf(
" %p", __kmp_threads[ gtid ] );
3333 if ( __kmp_root != NULL ) {
3334 __kmp_printf(
" %p", __kmp_root[ gtid ] );
3336 __kmp_printf(
"\n" );
3341 __kmp_printf(
"\n------------------------------\nThreads\n------------------------------\n" );
3342 if ( __kmp_threads != NULL ) {
3344 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3345 kmp_info_t
const * thread = __kmp_threads[ gtid ];
3346 if ( thread != NULL ) {
3347 __kmp_printf(
"GTID %2d %p:\n", gtid, thread );
3348 __kmp_printf(
" Our Root: %p\n", thread->th.th_root );
3349 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team );
3350 __kmp_print_structure_team(
" Serial Team: ", thread->th.th_serial_team );
3351 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc );
3352 __kmp_print_structure_thread(
" Master: ", thread->th.th_team_master );
3353 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized );
3354 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc );
3356 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
3358 __kmp_print_structure_thread(
" Next in pool: ", thread->th.th_next_pool );
3359 __kmp_printf(
"\n" );
3360 __kmp_print_structure_team_accum( list, thread->th.th_team );
3361 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
3365 __kmp_printf(
"Threads array is not allocated.\n" );
3369 __kmp_printf(
"\n------------------------------\nUbers\n------------------------------\n" );
3370 if ( __kmp_root != NULL ) {
3372 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3373 kmp_root_t
const * root = __kmp_root[ gtid ];
3374 if ( root != NULL ) {
3375 __kmp_printf(
"GTID %2d %p:\n", gtid, root );
3376 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team );
3377 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team );
3378 __kmp_print_structure_thread(
" Uber Thread: ", root->r.r_uber_thread );
3379 __kmp_printf(
" Active?: %2d\n", root->r.r_active );
3380 __kmp_printf(
" Nested?: %2d\n", root->r.r_nested );
3381 __kmp_printf(
" In Parallel: %2d\n", root->r.r_in_parallel );
3382 __kmp_printf(
"\n" );
3383 __kmp_print_structure_team_accum( list, root->r.r_root_team );
3384 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
3388 __kmp_printf(
"Ubers array is not allocated.\n" );
3391 __kmp_printf(
"\n------------------------------\nTeams\n------------------------------\n" );
3392 while ( list->next != NULL ) {
3393 kmp_team_p
const * team = list->entry;
3395 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team );
3396 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent );
3397 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid );
3398 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc );
3399 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized );
3400 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc );
3401 for ( i = 0; i < team->t.t_nproc; ++ i ) {
3402 __kmp_printf(
" Thread %2d: ", i );
3403 __kmp_print_structure_thread(
"", team->t.t_threads[ i ] );
3405 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool );
3406 __kmp_printf(
"\n" );
3411 __kmp_printf(
"\n------------------------------\nPools\n------------------------------\n" );
3412 __kmp_print_structure_thread(
"Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
3413 __kmp_print_structure_team(
"Team pool: ", (kmp_team_t *)__kmp_team_pool );
3414 __kmp_printf(
"\n" );
3417 while ( list != NULL ) {
3418 kmp_team_list_item_t * item = list;
3420 KMP_INTERNAL_FREE( item );
3432 static const unsigned __kmp_primes[] = {
3433 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
3434 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
3435 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3436 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
3437 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
3438 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3439 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
3440 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
3441 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3442 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
3443 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
3444 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3445 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
3446 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
3447 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3448 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
3455 __kmp_get_random( kmp_info_t * thread )
3457 unsigned x = thread->th.th_x;
3458 unsigned short r = x>>16;
3460 thread->th.th_x = x*thread->th.th_a+1;
3462 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3463 thread->th.th_info.ds.ds_tid, r) );
3471 __kmp_init_random( kmp_info_t * thread )
3473 unsigned seed = thread->th.th_info.ds.ds_tid;
3475 thread->th.th_a = __kmp_primes[seed%(
sizeof(__kmp_primes)/
sizeof(__kmp_primes[0]))];
3476 thread->th.th_x = (seed+1)*thread->th.th_a+1;
3477 KA_TRACE(30, (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) );
3484 __kmp_reclaim_dead_roots(
void) {
3487 for(i = 0; i < __kmp_threads_capacity; ++i) {
3488 if( KMP_UBER_GTID( i ) &&
3489 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3490 !__kmp_root[i]->r.r_active ) {
3491 r += __kmp_unregister_root_other_thread(i);
3520 __kmp_expand_threads(
int nWish,
int nNeed) {
3523 int __kmp_actual_max_nth;
3527 #if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB
3530 added = __kmp_reclaim_dead_roots();
3548 int minimumRequiredCapacity;
3550 kmp_info_t **newThreads;
3551 kmp_root_t **newRoot;
3573 old_tp_cached = __kmp_tp_cached;
3574 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3575 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3579 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3583 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3589 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3596 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3598 newCapacity = __kmp_threads_capacity;
3601 newCapacity <= (__kmp_actual_max_nth >> 1) ?
3602 (newCapacity << 1) :
3603 __kmp_actual_max_nth;
3604 }
while(newCapacity < minimumRequiredCapacity);
3605 newThreads = (kmp_info_t**) __kmp_allocate((
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
3606 newRoot = (kmp_root_t**) ((
char*)newThreads +
sizeof(kmp_info_t*) * newCapacity );
3607 KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity *
sizeof(kmp_info_t*));
3608 KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity *
sizeof(kmp_root_t*));
3609 memset(newThreads + __kmp_threads_capacity, 0,
3610 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_info_t*));
3611 memset(newRoot + __kmp_threads_capacity, 0,
3612 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_root_t*));
3614 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3620 __kmp_free(newThreads);
3623 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3624 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3626 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3627 __kmp_free(newThreads);
3633 *(kmp_info_t**
volatile*)&__kmp_threads = newThreads;
3634 *(kmp_root_t**
volatile*)&__kmp_root = newRoot;
3635 added += newCapacity - __kmp_threads_capacity;
3636 *(
volatile int*)&__kmp_threads_capacity = newCapacity;
3637 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3648 __kmp_register_root(
int initial_thread )
3650 kmp_info_t *root_thread;
3654 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3655 KA_TRACE( 20, (
"__kmp_register_root: entered\n"));
3673 capacity = __kmp_threads_capacity;
3674 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
3679 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
3680 if ( __kmp_tp_cached ) {
3683 KMP_MSG( CantRegisterNewThread ),
3684 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
3685 KMP_HNT( PossibleSystemLimitOnThreads ),
3692 KMP_MSG( CantRegisterNewThread ),
3693 KMP_HNT( SystemLimitOnThreads ),
3702 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ )
3704 KA_TRACE( 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
3705 KMP_ASSERT( gtid < __kmp_threads_capacity );
3709 TCW_4(__kmp_nth, __kmp_nth + 1);
3716 if ( __kmp_adjust_gtid_mode ) {
3717 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
3718 if ( TCR_4(__kmp_gtid_mode) != 2) {
3719 TCW_4(__kmp_gtid_mode, 2);
3723 if (TCR_4(__kmp_gtid_mode) != 1 ) {
3724 TCW_4(__kmp_gtid_mode, 1);
3729 #ifdef KMP_ADJUST_BLOCKTIME
3732 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
3733 if ( __kmp_nth > __kmp_avail_proc ) {
3734 __kmp_zero_bt = TRUE;
3740 if( ! ( root = __kmp_root[gtid] )) {
3741 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate(
sizeof(kmp_root_t) );
3742 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
3745 __kmp_initialize_root( root );
3748 if( root->r.r_uber_thread ) {
3749 root_thread = root->r.r_uber_thread;
3751 root_thread = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
3752 if ( __kmp_storage_map ) {
3753 __kmp_print_thread_storage_map( root_thread, gtid );
3755 root_thread->th.th_info .ds.ds_gtid = gtid;
3756 root_thread->th.th_root = root;
3757 if( __kmp_env_consistency_check ) {
3758 root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid );
3761 __kmp_initialize_fast_memory( root_thread );
3765 KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL );
3766 __kmp_initialize_bget( root_thread );
3768 __kmp_init_random( root_thread );
3772 if( ! root_thread->th.th_serial_team ) {
3773 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3774 KF_TRACE( 10, (
"__kmp_register_root: before serial_team\n" ) );
3776 root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1,
3784 0 USE_NESTED_HOT_ARG(NULL) );
3786 KMP_ASSERT( root_thread->th.th_serial_team );
3787 KF_TRACE( 10, (
"__kmp_register_root: after serial_team = %p\n",
3788 root_thread->th.th_serial_team ) );
3791 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3793 root->r.r_root_team->t.t_threads[0] = root_thread;
3794 root->r.r_hot_team ->t.t_threads[0] = root_thread;
3795 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3796 root_thread->th.th_serial_team->t.t_serialized = 0;
3797 root->r.r_uber_thread = root_thread;
3800 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
3803 __kmp_gtid_set_specific( gtid );
3805 __kmp_itt_thread_name( gtid );
3807 #ifdef KMP_TDATA_GTID
3810 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
3811 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
3812 TCW_4(__kmp_init_gtid, TRUE);
3814 KA_TRACE( 20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
3815 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
3816 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3817 KMP_INIT_BARRIER_STATE ) );
3820 for ( b = 0; b < bs_last_barrier; ++ b ) {
3821 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3823 root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0;
3827 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
3830 #if KMP_AFFINITY_SUPPORTED
3831 if ( TCR_4(__kmp_init_middle) ) {
3832 __kmp_affinity_set_init_mask( gtid, TRUE );
3836 __kmp_root_counter ++;
3839 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3844 #if KMP_NESTED_HOT_TEAMS
3846 __kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr,
int level,
const int max_level )
3849 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3850 if( !hot_teams || !hot_teams[level].hot_team ) {
3853 KMP_DEBUG_ASSERT( level < max_level );
3854 kmp_team_t *team = hot_teams[level].hot_team;
3855 nth = hot_teams[level].hot_team_nth;
3857 if( level < max_level - 1 ) {
3858 for( i = 0; i < nth; ++i ) {
3859 kmp_info_t *th = team->t.t_threads[i];
3860 n += __kmp_free_hot_teams( root, th, level + 1, max_level );
3861 if( i > 0 && th->th.th_hot_teams ) {
3862 __kmp_free( th->th.th_hot_teams );
3863 th->th.th_hot_teams = NULL;
3867 __kmp_free_team( root, team, NULL );
3876 __kmp_reset_root(
int gtid, kmp_root_t *root)
3878 kmp_team_t * root_team = root->r.r_root_team;
3879 kmp_team_t * hot_team = root->r.r_hot_team;
3880 int n = hot_team->t.t_nproc;
3883 KMP_DEBUG_ASSERT( ! root->r.r_active );
3885 root->r.r_root_team = NULL;
3886 root->r.r_hot_team = NULL;
3889 __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) );
3890 #if KMP_NESTED_HOT_TEAMS
3891 if( __kmp_hot_teams_max_level > 1 ) {
3892 for( i = 0; i < hot_team->t.t_nproc; ++i ) {
3893 kmp_info_t *th = hot_team->t.t_threads[i];
3894 n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level );
3895 if( th->th.th_hot_teams ) {
3896 __kmp_free( th->th.th_hot_teams );
3897 th->th.th_hot_teams = NULL;
3902 __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) );
3908 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3909 __kmp_wait_to_unref_task_teams();
3914 KA_TRACE( 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
"\n",
3915 (LPVOID)&(root->r.r_uber_thread->th),
3916 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
3917 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
3921 if ((ompt_status == ompt_status_track_callback) &&
3922 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
3923 int gtid = __kmp_get_gtid();
3924 __ompt_thread_end(ompt_thread_initial, gtid);
3928 TCW_4(__kmp_nth, __kmp_nth - 1);
3929 __kmp_reap_thread( root->r.r_uber_thread, 1 );
3932 root->r.r_uber_thread = NULL;
3934 root->r.r_begin = FALSE;
3940 __kmp_unregister_root_current_thread(
int gtid )
3942 KA_TRACE( 1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
3947 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3948 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
3949 KC_TRACE( 10, (
"__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid ));
3950 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3953 kmp_root_t *root = __kmp_root[gtid];
3955 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3956 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3957 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3958 KMP_ASSERT( root->r.r_active == FALSE );
3964 kmp_info_t * thread = __kmp_threads[gtid];
3965 kmp_team_t * team = thread->th.th_team;
3966 kmp_task_team_t * task_team = thread->th.th_task_team;
3969 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks )
3970 __kmp_task_team_wait(thread, team, NULL );
3973 __kmp_reset_root(gtid, root);
3976 __kmp_gtid_set_specific( KMP_GTID_DNE );
3977 #ifdef KMP_TDATA_GTID
3978 __kmp_gtid = KMP_GTID_DNE;
3982 KC_TRACE( 10, (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
3984 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3993 __kmp_unregister_root_other_thread(
int gtid )
3995 kmp_root_t *root = __kmp_root[gtid];
3998 KA_TRACE( 1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
3999 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
4000 KMP_ASSERT( KMP_UBER_GTID( gtid ));
4001 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
4002 KMP_ASSERT( root->r.r_active == FALSE );
4004 r = __kmp_reset_root(gtid, root);
4005 KC_TRACE( 10, (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
4011 void __kmp_task_info() {
4013 kmp_int32 gtid = __kmp_entry_gtid();
4014 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
4015 kmp_info_t *this_thr = __kmp_threads[ gtid ];
4016 kmp_team_t *steam = this_thr->th.th_serial_team;
4017 kmp_team_t *team = this_thr->th.th_team;
4019 __kmp_printf(
"__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
4020 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
4028 __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team,
int tid,
int gtid )
4032 kmp_info_t *master = team->t.t_threads[0];
4033 KMP_DEBUG_ASSERT( this_thr != NULL );
4034 KMP_DEBUG_ASSERT( this_thr->th.th_serial_team );
4035 KMP_DEBUG_ASSERT( team );
4036 KMP_DEBUG_ASSERT( team->t.t_threads );
4037 KMP_DEBUG_ASSERT( team->t.t_dispatch );
4038 KMP_DEBUG_ASSERT( master );
4039 KMP_DEBUG_ASSERT( master->th.th_root );
4043 TCW_SYNC_PTR(this_thr->th.th_team, team);
4045 this_thr->th.th_info.ds.ds_tid = tid;
4046 this_thr->th.th_set_nproc = 0;
4048 this_thr->th.th_set_proc_bind = proc_bind_default;
4049 # if KMP_AFFINITY_SUPPORTED
4050 this_thr->th.th_new_place = this_thr->th.th_current_place;
4053 this_thr->th.th_root = master->th.th_root;
4056 this_thr->th.th_team_nproc = team->t.t_nproc;
4057 this_thr->th.th_team_master = master;
4058 this_thr->th.th_team_serialized = team->t.t_serialized;
4059 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4061 KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
4062 this_thr->th.th_task_state = 0;
4064 KF_TRACE( 10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4065 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4067 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
4069 KF_TRACE( 10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4070 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4074 this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ];
4076 this_thr->th.th_local.this_construct = 0;
4079 this_thr->th.th_local.tv_data = 0;
4082 if ( ! this_thr->th.th_pri_common ) {
4083 this_thr->th.th_pri_common = (
struct common_table *) __kmp_allocate(
sizeof(
struct common_table) );
4084 if ( __kmp_storage_map ) {
4085 __kmp_print_storage_map_gtid(
4086 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4087 sizeof(
struct common_table ),
"th_%d.th_pri_common\n", gtid
4090 this_thr->th.th_pri_head = NULL;
4095 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4099 size_t disp_size =
sizeof( dispatch_private_info_t ) *
4100 ( team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF );
4101 KD_TRACE( 10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
4102 KMP_ASSERT( dispatch );
4103 KMP_DEBUG_ASSERT( team->t.t_dispatch );
4104 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
4106 dispatch->th_disp_index = 0;
4108 if( ! dispatch->th_disp_buffer ) {
4109 dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
4111 if ( __kmp_storage_map ) {
4112 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
4113 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF ],
4114 disp_size,
"th_%d.th_dispatch.th_disp_buffer "
4115 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4116 gtid, team->t.t_id, gtid );
4119 memset( & dispatch->th_disp_buffer[0],
'\0', disp_size );
4122 dispatch->th_dispatch_pr_current = 0;
4123 dispatch->th_dispatch_sh_current = 0;
4125 dispatch->th_deo_fcn = 0;
4126 dispatch->th_dxo_fcn = 0;
4129 this_thr->th.th_next_pool = NULL;
4131 if (!this_thr->th.th_task_state_memo_stack) {
4132 this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*
sizeof(kmp_uint8) );
4133 this_thr->th.th_task_state_top = 0;
4134 this_thr->th.th_task_state_stack_sz = 4;
4137 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
4138 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
4151 __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team,
int new_tid )
4153 kmp_team_t *serial_team;
4154 kmp_info_t *new_thr;
4157 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
4158 KMP_DEBUG_ASSERT( root && team );
4159 #if !KMP_NESTED_HOT_TEAMS
4160 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
4165 if ( __kmp_thread_pool ) {
4167 new_thr = (kmp_info_t*)__kmp_thread_pool;
4168 __kmp_thread_pool = (
volatile kmp_info_t *) new_thr->th.th_next_pool;
4169 if ( new_thr == __kmp_thread_pool_insert_pt ) {
4170 __kmp_thread_pool_insert_pt = NULL;
4172 TCW_4(new_thr->th.th_in_pool, FALSE);
4178 __kmp_thread_pool_nth--;
4180 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4181 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
4182 KMP_ASSERT( ! new_thr->th.th_team );
4183 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
4184 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
4187 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
4188 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
4190 TCW_4(__kmp_nth, __kmp_nth + 1);
4192 new_thr->th.th_task_state_top = 0;
4193 new_thr->th.th_task_state_stack_sz = 4;
4195 #ifdef KMP_ADJUST_BLOCKTIME
4198 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4199 if ( __kmp_nth > __kmp_avail_proc ) {
4200 __kmp_zero_bt = TRUE;
4208 kmp_balign_t * balign = new_thr->th.th_bar;
4209 for( b = 0; b < bs_last_barrier; ++ b )
4210 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4213 KF_TRACE( 10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4214 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
4222 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
4223 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
4229 if ( ! TCR_4( __kmp_init_monitor ) ) {
4230 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
4231 if ( ! TCR_4( __kmp_init_monitor ) ) {
4232 KF_TRACE( 10, (
"before __kmp_create_monitor\n" ) );
4233 TCW_4( __kmp_init_monitor, 1 );
4234 __kmp_create_monitor( & __kmp_monitor );
4235 KF_TRACE( 10, (
"after __kmp_create_monitor\n" ) );
4244 while ( TCR_4(__kmp_init_monitor) < 2 ) {
4247 KF_TRACE( 10, (
"after monitor thread has started\n" ) );
4250 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
4254 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
4255 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
4259 new_thr = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
4261 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4263 if ( __kmp_storage_map ) {
4264 __kmp_print_thread_storage_map( new_thr, new_gtid );
4269 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
4270 KF_TRACE( 10, (
"__kmp_allocate_thread: before th_serial/serial_team\n" ) );
4272 new_thr->th.th_serial_team = serial_team =
4273 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
4281 0 USE_NESTED_HOT_ARG(NULL) );
4283 KMP_ASSERT ( serial_team );
4284 serial_team->t.t_serialized = 0;
4285 serial_team->t.t_threads[0] = new_thr;
4286 KF_TRACE( 10, (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4290 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
4293 __kmp_initialize_fast_memory( new_thr );
4297 KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL );
4298 __kmp_initialize_bget( new_thr );
4301 __kmp_init_random( new_thr );
4304 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4305 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4308 kmp_balign_t * balign = new_thr->th.th_bar;
4309 for(b=0; b<bs_last_barrier; ++b) {
4310 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4311 balign[b].bb.team = NULL;
4312 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4313 balign[b].bb.use_oncore_barrier = 0;
4316 new_thr->th.th_spin_here = FALSE;
4317 new_thr->th.th_next_waiting = 0;
4319 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
4320 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4321 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4322 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4323 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4326 TCW_4(new_thr->th.th_in_pool, FALSE);
4327 new_thr->th.th_active_in_pool = FALSE;
4328 TCW_4(new_thr->th.th_active, TRUE);
4339 if ( __kmp_adjust_gtid_mode ) {
4340 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
4341 if ( TCR_4(__kmp_gtid_mode) != 2) {
4342 TCW_4(__kmp_gtid_mode, 2);
4346 if (TCR_4(__kmp_gtid_mode) != 1 ) {
4347 TCW_4(__kmp_gtid_mode, 1);
4352 #ifdef KMP_ADJUST_BLOCKTIME
4355 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4356 if ( __kmp_nth > __kmp_avail_proc ) {
4357 __kmp_zero_bt = TRUE;
4363 KF_TRACE( 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
4364 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
4365 KF_TRACE( 10, (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
4368 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
4383 __kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs,
ident_t *loc ) {
4384 KF_TRACE( 10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4385 team->t.t_threads[0], team ) );
4386 KMP_DEBUG_ASSERT( team && new_icvs);
4387 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
4388 team->t.t_ident = loc;
4390 team->t.t_id = KMP_GEN_TEAM_ID();
4393 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
4394 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4396 KF_TRACE( 10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4397 team->t.t_threads[0], team ) );
4405 __kmp_initialize_team(
4408 kmp_internal_control_t * new_icvs,
4411 KF_TRACE( 10, (
"__kmp_initialize_team: enter: team=%p\n", team ) );
4414 KMP_DEBUG_ASSERT( team );
4415 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
4416 KMP_DEBUG_ASSERT( team->t.t_threads );
4419 team->t.t_master_tid = 0;
4421 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4422 team->t.t_nproc = new_nproc;
4425 team->t.t_next_pool = NULL;
4428 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4429 team->t.t_invoke = NULL;
4432 team->t.t_sched = new_icvs->sched;
4434 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4435 team->t.t_fp_control_saved = FALSE;
4436 team->t.t_x87_fpu_control_word = 0;
4437 team->t.t_mxcsr = 0;
4440 team->t.t_construct = 0;
4441 __kmp_init_lock( & team->t.t_single_lock );
4443 team->t.t_ordered .dt.t_value = 0;
4444 team->t.t_master_active = FALSE;
4446 memset( & team->t.t_taskq,
'\0',
sizeof( kmp_taskq_t ));
4449 team->t.t_copypriv_data = NULL;
4451 team->t.t_copyin_counter = 0;
4453 team->t.t_control_stack_top = NULL;
4455 __kmp_reinitialize_team( team, new_icvs, loc );
4458 KF_TRACE( 10, (
"__kmp_initialize_team: exit: team=%p\n", team ) );
4461 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
4464 __kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
4466 if ( KMP_AFFINITY_CAPABLE() ) {
4468 if ( old_mask != NULL ) {
4469 status = __kmp_get_system_affinity( old_mask, TRUE );
4471 if ( status != 0 ) {
4474 KMP_MSG( ChangeThreadAffMaskError ),
4480 __kmp_set_system_affinity( __kmp_affinity_get_fullMask(), TRUE );
4485 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
4494 __kmp_partition_places( kmp_team_t *team )
4499 kmp_info_t *master_th = team->t.t_threads[0];
4500 KMP_DEBUG_ASSERT( master_th != NULL );
4501 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4502 int first_place = master_th->th.th_first_place;
4503 int last_place = master_th->th.th_last_place;
4504 int masters_place = master_th->th.th_current_place;
4505 team->t.t_first_place = first_place;
4506 team->t.t_last_place = last_place;
4508 KA_TRACE( 20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
4509 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
4510 masters_place, first_place, last_place ) );
4512 switch ( proc_bind ) {
4514 case proc_bind_default:
4520 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
4523 case proc_bind_master:
4526 int n_th = team->t.t_nproc;
4527 for ( f = 1; f < n_th; f++ ) {
4528 kmp_info_t *th = team->t.t_threads[f];
4529 KMP_DEBUG_ASSERT( th != NULL );
4530 th->th.th_first_place = first_place;
4531 th->th.th_last_place = last_place;
4532 th->th.th_new_place = masters_place;
4534 KA_TRACE( 100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4535 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4536 team->t.t_id, f, masters_place, first_place, last_place ) );
4541 case proc_bind_close:
4544 int n_th = team->t.t_nproc;
4546 if ( first_place <= last_place ) {
4547 n_places = last_place - first_place + 1;
4550 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4552 if ( n_th <= n_places ) {
4553 int place = masters_place;
4554 for ( f = 1; f < n_th; f++ ) {
4555 kmp_info_t *th = team->t.t_threads[f];
4556 KMP_DEBUG_ASSERT( th != NULL );
4558 if ( place == last_place ) {
4559 place = first_place;
4561 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4567 th->th.th_first_place = first_place;
4568 th->th.th_last_place = last_place;
4569 th->th.th_new_place = place;
4571 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4572 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4573 team->t.t_id, f, place, first_place, last_place ) );
4577 int S, rem, gap, s_count;
4578 S = n_th / n_places;
4580 rem = n_th - ( S * n_places );
4581 gap = rem > 0 ? n_places/rem : n_places;
4582 int place = masters_place;
4584 for ( f = 0; f < n_th; f++ ) {
4585 kmp_info_t *th = team->t.t_threads[f];
4586 KMP_DEBUG_ASSERT( th != NULL );
4588 th->th.th_first_place = first_place;
4589 th->th.th_last_place = last_place;
4590 th->th.th_new_place = place;
4593 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4596 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4598 if ( place == last_place ) {
4599 place = first_place;
4601 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4611 else if (s_count == S) {
4612 if ( place == last_place ) {
4613 place = first_place;
4615 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4625 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4626 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4627 team->t.t_id, f, th->th.th_new_place, first_place,
4630 KMP_DEBUG_ASSERT( place == masters_place );
4635 case proc_bind_spread:
4638 int n_th = team->t.t_nproc;
4640 if ( first_place <= last_place ) {
4641 n_places = last_place - first_place + 1;
4644 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4646 if ( n_th <= n_places ) {
4647 int place = masters_place;
4648 int S = n_places/n_th;
4649 int s_count, rem, gap, gap_ct;
4650 rem = n_places - n_th*S;
4651 gap = rem ? n_th/rem : 1;
4653 for ( f = 0; f < n_th; f++ ) {
4654 kmp_info_t *th = team->t.t_threads[f];
4655 KMP_DEBUG_ASSERT( th != NULL );
4657 th->th.th_first_place = place;
4658 th->th.th_new_place = place;
4660 while (s_count < S) {
4661 if ( place == last_place ) {
4662 place = first_place;
4664 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4672 if (rem && (gap_ct == gap)) {
4673 if ( place == last_place ) {
4674 place = first_place;
4676 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4685 th->th.th_last_place = place;
4688 if ( place == last_place ) {
4689 place = first_place;
4691 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4698 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4699 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4700 team->t.t_id, f, th->th.th_new_place,
4701 th->th.th_first_place, th->th.th_last_place ) );
4703 KMP_DEBUG_ASSERT( place == masters_place );
4706 int S, rem, gap, s_count;
4707 S = n_th / n_places;
4709 rem = n_th - ( S * n_places );
4710 gap = rem > 0 ? n_places/rem : n_places;
4711 int place = masters_place;
4713 for ( f = 0; f < n_th; f++ ) {
4714 kmp_info_t *th = team->t.t_threads[f];
4715 KMP_DEBUG_ASSERT( th != NULL );
4717 th->th.th_first_place = place;
4718 th->th.th_last_place = place;
4719 th->th.th_new_place = place;
4722 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4725 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4727 if ( place == last_place ) {
4728 place = first_place;
4730 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4740 else if (s_count == S) {
4741 if ( place == last_place ) {
4742 place = first_place;
4744 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4754 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4755 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4756 team->t.t_id, f, th->th.th_new_place,
4757 th->th.th_first_place, th->th.th_last_place) );
4759 KMP_DEBUG_ASSERT( place == masters_place );
4768 KA_TRACE( 20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
4775 __kmp_allocate_team( kmp_root_t *root,
int new_nproc,
int max_nproc,
4777 ompt_parallel_id_t ompt_parallel_id,
4780 kmp_proc_bind_t new_proc_bind,
4782 kmp_internal_control_t *new_icvs,
4783 int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
4788 int use_hot_team = ! root->r.r_active;
4791 KA_TRACE( 20, (
"__kmp_allocate_team: called\n"));
4792 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
4793 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
4796 #if KMP_NESTED_HOT_TEAMS
4797 kmp_hot_team_ptr_t *hot_teams;
4799 team = master->th.th_team;
4800 level = team->t.t_active_level;
4801 if( master->th.th_teams_microtask ) {
4802 if( master->th.th_teams_size.nteams > 1 && (
4803 team->t.t_pkfn == (microtask_t)__kmp_teams_master ||
4804 master->th.th_teams_level < team->t.t_level ) ) {
4808 hot_teams = master->th.th_hot_teams;
4809 if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team )
4818 if( use_hot_team && new_nproc > 1 ) {
4819 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
4820 #if KMP_NESTED_HOT_TEAMS
4821 team = hot_teams[level].hot_team;
4823 team = root->r.r_hot_team;
4826 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4827 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n",
4828 team->t.t_task_team[0], team->t.t_task_team[1] ));
4835 if (team->t.t_nproc == new_nproc) {
4836 KA_TRACE( 20, (
"__kmp_allocate_team: reusing hot team\n" ));
4839 if ( team->t.t_size_changed == -1 ) {
4840 team->t.t_size_changed = 1;
4842 team->t.t_size_changed = 0;
4846 team->t.t_sched = new_icvs->sched;
4848 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4850 KF_TRACE( 10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
4851 0, team->t.t_threads[0], team ) );
4852 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4855 # if KMP_AFFINITY_SUPPORTED
4856 if ( team->t.t_proc_bind == new_proc_bind ) {
4857 KA_TRACE( 200, (
"__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
4858 team->t.t_id, new_proc_bind, team->t.t_first_place,
4859 team->t.t_last_place ) );
4862 team->t.t_proc_bind = new_proc_bind;
4863 __kmp_partition_places( team );
4866 if ( team->t.t_proc_bind != new_proc_bind ) {
4867 team->t.t_proc_bind = new_proc_bind;
4873 for(f = 0; f < new_nproc; ++f) {
4874 team->t.t_threads[f]->th.th_task_state = 0;
4878 else if( team->t.t_nproc > new_nproc ) {
4879 KA_TRACE( 20, (
"__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
4881 team->t.t_size_changed = 1;
4882 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4886 for (tt_idx=0; tt_idx<2; ++tt_idx) {
4888 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
4889 if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
4890 KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
4891 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
4893 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team %p to NULL\n",
4894 &team->t.t_task_team[tt_idx]));
4895 team->t.t_task_team[tt_idx] = NULL;
4898 KMP_DEBUG_ASSERT( task_team == NULL );
4902 #if KMP_NESTED_HOT_TEAMS
4903 if( __kmp_hot_teams_mode == 0 ) {
4906 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4907 hot_teams[level].hot_team_nth = new_nproc;
4908 #endif // KMP_NESTED_HOT_TEAMS
4910 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
4911 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
4912 __kmp_free_thread( team->t.t_threads[ f ] );
4913 team->t.t_threads[ f ] = NULL;
4915 #if KMP_NESTED_HOT_TEAMS
4917 #endif // KMP_NESTED_HOT_TEAMS
4918 team->t.t_nproc = new_nproc;
4920 team->t.t_sched = new_icvs->sched;
4921 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4923 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4926 for (tt_idx=0; tt_idx<2; ++tt_idx) {
4927 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
4928 if ( task_team != NULL ) {
4929 KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
4930 task_team->tt.tt_nproc = new_nproc;
4931 task_team->tt.tt_unfinished_threads = new_nproc;
4932 task_team->tt.tt_ref_ct = new_nproc - 1;
4939 for(f = 0; f < new_nproc; ++f) {
4940 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
4941 team->t.t_threads[f]->th.th_task_state = 0;
4945 for(f = 0; f < new_nproc; ++f) {
4946 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
4950 KF_TRACE( 10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
4951 0, team->t.t_threads[0], team ) );
4953 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4956 for ( f = 0; f < team->t.t_nproc; f++ ) {
4957 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4958 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
4963 team->t.t_proc_bind = new_proc_bind;
4964 # if KMP_AFFINITY_SUPPORTED
4965 __kmp_partition_places( team );
4970 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
4971 kmp_affin_mask_t *old_mask;
4972 if ( KMP_AFFINITY_CAPABLE() ) {
4973 KMP_CPU_ALLOC(old_mask);
4977 KA_TRACE( 20, (
"__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
4979 team->t.t_size_changed = 1;
4982 #if KMP_NESTED_HOT_TEAMS
4983 int avail_threads = hot_teams[level].hot_team_nth;
4984 if( new_nproc < avail_threads )
4985 avail_threads = new_nproc;
4986 kmp_info_t **other_threads = team->t.t_threads;
4987 for ( f = team->t.t_nproc; f < avail_threads; ++f ) {
4991 kmp_balign_t * balign = other_threads[f]->th.th_bar;
4992 for ( b = 0; b < bs_last_barrier; ++ b ) {
4993 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4994 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4996 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5000 if( hot_teams[level].hot_team_nth >= new_nproc ) {
5003 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5004 team->t.t_nproc = new_nproc;
5007 team->t.t_nproc = hot_teams[level].hot_team_nth;
5008 hot_teams[level].hot_team_nth = new_nproc;
5009 #endif // KMP_NESTED_HOT_TEAMS
5010 if(team->t.t_max_nproc < new_nproc) {
5012 __kmp_reallocate_team_arrays(team, new_nproc);
5013 __kmp_reinitialize_team( team, new_icvs, NULL );
5016 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5023 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
5027 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
5028 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
5029 KMP_DEBUG_ASSERT( new_worker );
5030 team->t.t_threads[ f ] = new_worker;
5031 new_worker->th.th_team_nproc = team->t.t_nproc;
5033 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init T#%d arrived: join=%u, plain=%u\n",
5034 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
5035 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5036 team->t.t_bar[bs_plain_barrier].b_arrived ) );
5040 kmp_balign_t * balign = new_worker->th.th_bar;
5041 for( b = 0; b < bs_last_barrier; ++ b ) {
5042 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5043 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5045 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5051 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
5052 if ( KMP_AFFINITY_CAPABLE() ) {
5054 __kmp_set_system_affinity( old_mask, TRUE );
5055 KMP_CPU_FREE(old_mask);
5058 #if KMP_NESTED_HOT_TEAMS
5060 #endif // KMP_NESTED_HOT_TEAMS
5062 __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
5064 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5066 for (tt_idx=0; tt_idx<2; ++tt_idx) {
5067 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5068 if ( task_team != NULL ) {
5069 KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
5070 task_team->tt.tt_nproc = new_nproc;
5071 task_team->tt.tt_unfinished_threads = new_nproc;
5072 task_team->tt.tt_ref_ct = new_nproc - 1;
5079 for( f = 0 ; f < team->t.t_nproc ; f++ ) {
5080 __kmp_initialize_info( team->t.t_threads[ f ], team, f,
5081 __kmp_gtid_from_tid( f, team ) );
5085 int old_state = team->t.t_threads[0]->th.th_task_state;
5086 for (f=0; f < team->t.t_nproc; ++f) {
5087 __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
5088 team->t.t_threads[f]->th.th_task_state = old_state;
5089 team->t.t_threads[f]->th.th_task_team = team->t.t_task_team[old_state];
5094 for ( f = 0; f < team->t.t_nproc; ++ f ) {
5095 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
5096 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5101 team->t.t_proc_bind = new_proc_bind;
5102 # if KMP_AFFINITY_SUPPORTED
5103 __kmp_partition_places( team );
5109 kmp_info_t *master = team->t.t_threads[0];
5110 if( master->th.th_teams_microtask ) {
5111 for( f = 1; f < new_nproc; ++f ) {
5113 kmp_info_t *thr = team->t.t_threads[f];
5114 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5115 thr->th.th_teams_level = master->th.th_teams_level;
5116 thr->th.th_teams_size = master->th.th_teams_size;
5120 #if KMP_NESTED_HOT_TEAMS
5123 for( f = 1; f < new_nproc; ++f ) {
5124 kmp_info_t *thr = team->t.t_threads[f];
5125 thr->th.th_task_state = 0;
5127 kmp_balign_t * balign = thr->th.th_bar;
5128 for( b = 0; b < bs_last_barrier; ++ b ) {
5129 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5130 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5132 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5137 #endif // KMP_NESTED_HOT_TEAMS
5140 __kmp_alloc_argv_entries( argc, team, TRUE );
5141 team->t.t_argc = argc;
5147 KF_TRACE( 10, (
" hot_team = %p\n", team ) );
5150 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5151 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n",
5152 team->t.t_task_team[0], team->t.t_task_team[1] ));
5157 __ompt_team_assign_id(team, ompt_parallel_id);
5167 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
5170 if ( team->t.t_max_nproc >= max_nproc ) {
5172 __kmp_team_pool = team->t.t_next_pool;
5175 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
5177 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5178 &team->t.t_task_team[0], &team->t.t_task_team[1]) );
5179 team->t.t_task_team[0] = NULL;
5180 team->t.t_task_team[1] = NULL;
5183 __kmp_alloc_argv_entries( argc, team, TRUE );
5184 team->t.t_argc = argc;
5186 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5187 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5190 for ( b = 0; b < bs_last_barrier; ++ b) {
5191 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
5193 team->t.t_bar[ b ].b_master_arrived = 0;
5194 team->t.t_bar[ b ].b_team_arrived = 0;
5200 team->t.t_proc_bind = new_proc_bind;
5203 KA_TRACE( 20, (
"__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
5206 __ompt_team_assign_id(team, ompt_parallel_id);
5217 team = __kmp_reap_team( team );
5218 __kmp_team_pool = team;
5223 team = (kmp_team_t*) __kmp_allocate(
sizeof( kmp_team_t ) );
5226 team->t.t_max_nproc = max_nproc;
5230 __kmp_allocate_team_arrays( team, max_nproc );
5232 KA_TRACE( 20, (
"__kmp_allocate_team: making a new team\n" ) );
5233 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
5235 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5236 &team->t.t_task_team[0], &team->t.t_task_team[1] ) );
5237 team->t.t_task_team[0] = NULL;
5238 team->t.t_task_team[1] = NULL;
5240 if ( __kmp_storage_map ) {
5241 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc );
5245 __kmp_alloc_argv_entries( argc, team, FALSE );
5246 team->t.t_argc = argc;
5248 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5249 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5252 for ( b = 0; b < bs_last_barrier; ++ b ) {
5253 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
5255 team->t.t_bar[ b ].b_master_arrived = 0;
5256 team->t.t_bar[ b ].b_team_arrived = 0;
5262 team->t.t_proc_bind = new_proc_bind;
5266 __ompt_team_assign_id(team, ompt_parallel_id);
5267 team->t.ompt_serialized_team_info = NULL;
5272 KA_TRACE( 20, (
"__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
5283 __kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) )
5286 KA_TRACE( 20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
5289 KMP_DEBUG_ASSERT( root );
5290 KMP_DEBUG_ASSERT( team );
5291 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
5292 KMP_DEBUG_ASSERT( team->t.t_threads );
5294 int use_hot_team = team == root->r.r_hot_team;
5295 #if KMP_NESTED_HOT_TEAMS
5297 kmp_hot_team_ptr_t *hot_teams;
5299 level = team->t.t_active_level - 1;
5300 if( master->th.th_teams_microtask ) {
5301 if( master->th.th_teams_size.nteams > 1 ) {
5304 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5305 master->th.th_teams_level == team->t.t_level ) {
5309 hot_teams = master->th.th_hot_teams;
5310 if( level < __kmp_hot_teams_max_level ) {
5311 KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team );
5315 #endif // KMP_NESTED_HOT_TEAMS
5318 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
5319 team->t.t_copyin_counter = 0;
5323 if( ! use_hot_team ) {
5324 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5326 for (tt_idx=0; tt_idx<2; ++tt_idx) {
5328 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5329 if ( task_team != NULL ) {
5333 KA_TRACE( 20, (
"__kmp_free_team: deactivating task_team %p\n", task_team ) );
5334 KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
5335 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
5337 team->t.t_task_team[tt_idx] = NULL;
5343 team->t.t_parent = NULL;
5347 for ( f = 1; f < team->t.t_nproc; ++ f ) {
5348 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5349 __kmp_free_thread( team->t.t_threads[ f ] );
5350 team->t.t_threads[ f ] = NULL;
5356 team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
5357 __kmp_team_pool = (
volatile kmp_team_t*) team;
5366 __kmp_reap_team( kmp_team_t *team )
5368 kmp_team_t *next_pool = team->t.t_next_pool;
5370 KMP_DEBUG_ASSERT( team );
5371 KMP_DEBUG_ASSERT( team->t.t_dispatch );
5372 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
5373 KMP_DEBUG_ASSERT( team->t.t_threads );
5374 KMP_DEBUG_ASSERT( team->t.t_argv );
5380 __kmp_free_team_arrays( team );
5381 if ( team->t.t_argv != &team->t.t_inline_argv[0] )
5382 __kmp_free( (
void*) team->t.t_argv );
5417 __kmp_free_thread( kmp_info_t *this_th )
5422 KA_TRACE( 20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5423 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
5425 KMP_DEBUG_ASSERT( this_th );
5429 kmp_balign_t *balign = this_th->th.th_bar;
5430 for (b=0; b<bs_last_barrier; ++b) {
5431 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5432 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5433 balign[b].bb.team = NULL;
5438 TCW_PTR(this_th->th.th_team, NULL);
5439 TCW_PTR(this_th->th.th_root, NULL);
5440 TCW_PTR(this_th->th.th_dispatch, NULL);
5446 gtid = this_th->th.th_info.ds.ds_gtid;
5447 if ( __kmp_thread_pool_insert_pt != NULL ) {
5448 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
5449 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
5450 __kmp_thread_pool_insert_pt = NULL;
5461 if ( __kmp_thread_pool_insert_pt != NULL ) {
5462 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
5465 scan = (kmp_info_t **)&__kmp_thread_pool;
5467 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
5468 scan = &( (*scan)->th.th_next_pool ) );
5474 TCW_PTR(this_th->th.th_next_pool, *scan);
5475 __kmp_thread_pool_insert_pt = *scan = this_th;
5476 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
5477 || ( this_th->th.th_info.ds.ds_gtid
5478 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
5479 TCW_4(this_th->th.th_in_pool, TRUE);
5480 __kmp_thread_pool_nth++;
5482 TCW_4(__kmp_nth, __kmp_nth - 1);
5484 #ifdef KMP_ADJUST_BLOCKTIME
5487 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5488 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5489 if ( __kmp_nth <= __kmp_avail_proc ) {
5490 __kmp_zero_bt = FALSE;
5502 __kmp_launch_thread( kmp_info_t *this_thr )
5504 int gtid = this_thr->th.th_info.ds.ds_gtid;
5506 kmp_team_t *(*
volatile pteam);
5509 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d start\n", gtid ) );
5511 if( __kmp_env_consistency_check ) {
5512 this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid );
5516 if (ompt_status & ompt_status_track) {
5517 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5518 this_thr->th.ompt_thread_info.wait_id = 0;
5519 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
5520 if ((ompt_status == ompt_status_track_callback) &&
5521 ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
5522 __ompt_thread_begin(ompt_thread_worker, gtid);
5528 while( ! TCR_4(__kmp_global.g.g_done) ) {
5529 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
5533 KA_TRACE( 20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid ));
5536 if (ompt_status & ompt_status_track) {
5537 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5542 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
5545 if (ompt_status & ompt_status_track) {
5546 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5550 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
5553 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
5555 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
5557 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5558 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
5560 updateHWFPControl (*pteam);
5563 if (ompt_status & ompt_status_track) {
5564 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
5566 int tid = __kmp_tid_from_gtid(gtid);
5567 (*pteam)->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id =
5568 __ompt_task_id_new(tid);
5575 rc = (*pteam)->t.t_invoke( gtid );
5581 if (ompt_status & ompt_status_track) {
5583 int tid = __kmp_tid_from_gtid(gtid);
5584 (*pteam)->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_runtime_frame = 0;
5586 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5590 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5591 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
5594 __kmp_join_barrier( gtid );
5597 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5600 if ((ompt_status == ompt_status_track_callback) &&
5601 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
5602 __ompt_thread_end(ompt_thread_worker, gtid);
5606 if ( TCR_PTR( this_thr->th.th_task_team ) != NULL ) {
5607 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
5610 __kmp_common_destroy_gtid( gtid );
5612 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d done\n", gtid ) );
5621 __kmp_internal_end_dest(
void *specific_gtid )
5623 #if KMP_COMPILER_ICC
5624 #pragma warning( push )
5625 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits
5628 int gtid = (kmp_intptr_t)specific_gtid - 1;
5629 #if KMP_COMPILER_ICC
5630 #pragma warning( pop )
5633 KA_TRACE( 30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
5647 if(gtid >= 0 && KMP_UBER_GTID(gtid))
5648 __kmp_gtid_set_specific( gtid );
5649 #ifdef KMP_TDATA_GTID
5652 __kmp_internal_end_thread( gtid );
5655 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
5661 __attribute__(( destructor ))
5663 __kmp_internal_end_dtor(
void )
5665 __kmp_internal_end_atexit();
5669 __kmp_internal_end_fini(
void )
5671 __kmp_internal_end_atexit();
5678 __kmp_internal_end_atexit(
void )
5680 KA_TRACE( 30, (
"__kmp_internal_end_atexit\n" ) );
5702 __kmp_internal_end_library( -1 );
5704 __kmp_close_console();
5710 kmp_info_t * thread,
5718 KMP_DEBUG_ASSERT( thread != NULL );
5720 gtid = thread->th.th_info.ds.ds_gtid;
5724 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
5726 KA_TRACE( 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
5728 kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread);
5729 __kmp_release_64(&flag);
5734 __kmp_reap_worker( thread );
5749 if ( thread->th.th_active_in_pool ) {
5750 thread->th.th_active_in_pool = FALSE;
5751 KMP_TEST_THEN_DEC32(
5752 (kmp_int32 *) &__kmp_thread_pool_active_nth );
5753 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
5757 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
5758 --__kmp_thread_pool_nth;
5763 __kmp_free_fast_memory( thread );
5766 __kmp_suspend_uninitialize_thread( thread );
5768 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
5769 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5774 #ifdef KMP_ADJUST_BLOCKTIME
5777 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5778 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5779 if ( __kmp_nth <= __kmp_avail_proc ) {
5780 __kmp_zero_bt = FALSE;
5786 if( __kmp_env_consistency_check ) {
5787 if ( thread->th.th_cons ) {
5788 __kmp_free_cons_stack( thread->th.th_cons );
5789 thread->th.th_cons = NULL;
5793 if ( thread->th.th_pri_common != NULL ) {
5794 __kmp_free( thread->th.th_pri_common );
5795 thread->th.th_pri_common = NULL;
5798 if (thread->th.th_task_state_memo_stack != NULL) {
5799 __kmp_free(thread->th.th_task_state_memo_stack);
5800 thread->th.th_task_state_memo_stack = NULL;
5804 if ( thread->th.th_local.bget_data != NULL ) {
5805 __kmp_finalize_bget( thread );
5809 #if KMP_AFFINITY_SUPPORTED
5810 if ( thread->th.th_affin_mask != NULL ) {
5811 KMP_CPU_FREE( thread->th.th_affin_mask );
5812 thread->th.th_affin_mask = NULL;
5816 __kmp_reap_team( thread->th.th_serial_team );
5817 thread->th.th_serial_team = NULL;
5818 __kmp_free( thread );
5825 __kmp_internal_end(
void)
5830 __kmp_unregister_library();
5838 __kmp_reclaim_dead_roots();
5841 for( i=0 ; i<__kmp_threads_capacity ; i++ )
5843 if( __kmp_root[i]->r.r_active )
5846 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5848 if ( i < __kmp_threads_capacity ) {
5866 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5867 if ( TCR_4( __kmp_init_monitor ) ) {
5868 __kmp_reap_monitor( & __kmp_monitor );
5869 TCW_4( __kmp_init_monitor, 0 );
5871 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5872 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
5877 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
5878 if( __kmp_root[i] ) {
5880 KMP_ASSERT( ! __kmp_root[i]->r.r_active );
5889 while ( __kmp_thread_pool != NULL ) {
5891 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
5892 __kmp_thread_pool = thread->th.th_next_pool;
5894 thread->th.th_next_pool = NULL;
5895 thread->th.th_in_pool = FALSE;
5896 __kmp_reap_thread( thread, 0 );
5898 __kmp_thread_pool_insert_pt = NULL;
5901 while ( __kmp_team_pool != NULL ) {
5903 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
5904 __kmp_team_pool = team->t.t_next_pool;
5906 team->t.t_next_pool = NULL;
5907 __kmp_reap_team( team );
5910 __kmp_reap_task_teams( );
5912 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
5919 TCW_SYNC_4(__kmp_init_common, FALSE);
5921 KA_TRACE( 10, (
"__kmp_internal_end: all workers reaped\n" ) );
5930 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5931 if ( TCR_4( __kmp_init_monitor ) ) {
5932 __kmp_reap_monitor( & __kmp_monitor );
5933 TCW_4( __kmp_init_monitor, 0 );
5935 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5936 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
5939 TCW_4(__kmp_init_gtid, FALSE);
5950 __kmp_internal_end_library(
int gtid_req )
5958 if( __kmp_global.g.g_abort ) {
5959 KA_TRACE( 11, (
"__kmp_internal_end_library: abort, exiting\n" ));
5963 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5964 KA_TRACE( 10, (
"__kmp_internal_end_library: already finished\n" ));
5973 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5974 KA_TRACE( 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
5975 if( gtid == KMP_GTID_SHUTDOWN ) {
5976 KA_TRACE( 10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
5978 }
else if( gtid == KMP_GTID_MONITOR ) {
5979 KA_TRACE( 10, (
"__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
5981 }
else if( gtid == KMP_GTID_DNE ) {
5982 KA_TRACE( 10, (
"__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
5984 }
else if( KMP_UBER_GTID( gtid )) {
5986 if( __kmp_root[gtid]->r.r_active ) {
5987 __kmp_global.g.g_abort = -1;
5988 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5989 KA_TRACE( 10, (
"__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
5992 KA_TRACE( 10, (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
5993 __kmp_unregister_root_current_thread( gtid );
6000 #ifdef DUMP_DEBUG_ON_EXIT
6001 if ( __kmp_debug_buf )
6002 __kmp_dump_debug_buffer( );
6008 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6011 if( __kmp_global.g.g_abort ) {
6012 KA_TRACE( 10, (
"__kmp_internal_end_library: abort, exiting\n" ));
6014 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6017 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6018 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6028 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6031 __kmp_internal_end();
6033 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6034 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6036 KA_TRACE( 10, (
"__kmp_internal_end_library: exit\n" ) );
6038 #ifdef DUMP_DEBUG_ON_EXIT
6039 if ( __kmp_debug_buf )
6040 __kmp_dump_debug_buffer();
6044 __kmp_close_console();
6047 __kmp_fini_allocator();
6052 __kmp_internal_end_thread(
int gtid_req )
6062 if( __kmp_global.g.g_abort ) {
6063 KA_TRACE( 11, (
"__kmp_internal_end_thread: abort, exiting\n" ));
6067 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6068 KA_TRACE( 10, (
"__kmp_internal_end_thread: already finished\n" ));
6076 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
6077 KA_TRACE( 10, (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
6078 if( gtid == KMP_GTID_SHUTDOWN ) {
6079 KA_TRACE( 10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
6081 }
else if( gtid == KMP_GTID_MONITOR ) {
6082 KA_TRACE( 10, (
"__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
6084 }
else if( gtid == KMP_GTID_DNE ) {
6085 KA_TRACE( 10, (
"__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
6088 }
else if( KMP_UBER_GTID( gtid )) {
6090 if( __kmp_root[gtid]->r.r_active ) {
6091 __kmp_global.g.g_abort = -1;
6092 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6093 KA_TRACE( 10, (
"__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
6096 KA_TRACE( 10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
6097 __kmp_unregister_root_current_thread( gtid );
6101 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
6104 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6105 if (TCR_PTR(this_thr->th.th_task_team) != NULL) {
6106 __kmp_unref_task_team(this_thr->th.th_task_team, this_thr);
6110 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
6114 #if defined KMP_DYNAMIC_LIB
6122 KA_TRACE( 10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req) );
6126 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6129 if( __kmp_global.g.g_abort ) {
6130 KA_TRACE( 10, (
"__kmp_internal_end_thread: abort, exiting\n" ));
6132 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6135 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6136 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6148 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6150 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
6151 if ( KMP_UBER_GTID( i ) ) {
6152 KA_TRACE( 10, (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
6153 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6154 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6161 __kmp_internal_end();
6163 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6164 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6166 KA_TRACE( 10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req ) );
6168 #ifdef DUMP_DEBUG_ON_EXIT
6169 if ( __kmp_debug_buf )
6170 __kmp_dump_debug_buffer();
6177 static long __kmp_registration_flag = 0;
6179 static char * __kmp_registration_str = NULL;
6185 __kmp_reg_status_name() {
6191 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int) getpid() );
6196 __kmp_register_library_startup(
6200 char * name = __kmp_reg_status_name();
6207 __kmp_initialize_system_tick();
6209 __kmp_read_system_time( & time.dtime );
6210 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
6211 __kmp_registration_str =
6214 & __kmp_registration_flag,
6215 __kmp_registration_flag,
6219 KA_TRACE( 50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
6223 char * value = NULL;
6226 __kmp_env_set( name, __kmp_registration_str, 0 );
6228 value = __kmp_env_get( name );
6229 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6238 char * tail = value;
6239 char * flag_addr_str = NULL;
6240 char * flag_val_str = NULL;
6241 char const * file_name = NULL;
6242 __kmp_str_split( tail,
'-', & flag_addr_str, & tail );
6243 __kmp_str_split( tail,
'-', & flag_val_str, & tail );
6245 if ( tail != NULL ) {
6246 long * flag_addr = 0;
6248 KMP_SSCANF( flag_addr_str,
"%p", & flag_addr );
6249 KMP_SSCANF( flag_val_str,
"%lx", & flag_val );
6250 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name,
"" ) != 0 ) {
6254 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
6262 switch ( neighbor ) {
6267 file_name =
"unknown library";
6271 char * duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK" );
6272 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
6276 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
6277 KMP_HNT( DuplicateLibrary ),
6281 KMP_INTERNAL_FREE( duplicate_ok );
6282 __kmp_duplicate_library_ok = 1;
6287 __kmp_env_unset( name );
6290 KMP_DEBUG_ASSERT( 0 );
6295 KMP_INTERNAL_FREE( (
void *) value );
6298 KMP_INTERNAL_FREE( (
void *) name );
6304 __kmp_unregister_library(
void ) {
6306 char * name = __kmp_reg_status_name();
6307 char * value = __kmp_env_get( name );
6309 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
6310 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
6311 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6313 __kmp_env_unset( name );
6316 KMP_INTERNAL_FREE( __kmp_registration_str );
6317 KMP_INTERNAL_FREE( value );
6318 KMP_INTERNAL_FREE( name );
6320 __kmp_registration_flag = 0;
6321 __kmp_registration_str = NULL;
6329 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6331 static void __kmp_check_mic_type()
6333 kmp_cpuid_t cpuid_state = {0};
6334 kmp_cpuid_t * cs_p = &cpuid_state;
6335 __kmp_x86_cpuid(1, 0, cs_p);
6337 if( (cs_p->eax & 0xff0) == 0xB10 ) {
6338 __kmp_mic_type = mic2;
6339 }
else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) {
6340 __kmp_mic_type = mic3;
6342 __kmp_mic_type = non_mic;
6349 __kmp_do_serial_initialize(
void )
6354 KA_TRACE( 10, (
"__kmp_do_serial_initialize: enter\n" ) );
6356 KMP_DEBUG_ASSERT(
sizeof( kmp_int32 ) == 4 );
6357 KMP_DEBUG_ASSERT(
sizeof( kmp_uint32 ) == 4 );
6358 KMP_DEBUG_ASSERT(
sizeof( kmp_int64 ) == 8 );
6359 KMP_DEBUG_ASSERT(
sizeof( kmp_uint64 ) == 8 );
6360 KMP_DEBUG_ASSERT(
sizeof( kmp_intptr_t ) ==
sizeof(
void * ) );
6362 __kmp_validate_locks();
6365 __kmp_init_allocator();
6371 __kmp_register_library_startup( );
6374 if( TCR_4(__kmp_global.g.g_done) ) {
6375 KA_TRACE( 10, (
"__kmp_do_serial_initialize: reinitialization of library\n" ) );
6378 __kmp_global.g.g_abort = 0;
6379 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6382 #if KMP_USE_ADAPTIVE_LOCKS
6383 #if KMP_DEBUG_ADAPTIVE_LOCKS
6384 __kmp_init_speculative_stats();
6387 __kmp_init_lock( & __kmp_global_lock );
6388 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
6389 __kmp_init_lock( & __kmp_debug_lock );
6390 __kmp_init_atomic_lock( & __kmp_atomic_lock );
6391 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
6392 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
6393 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
6394 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
6395 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
6396 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
6397 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
6398 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
6399 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
6400 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
6401 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
6402 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
6403 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
6404 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
6405 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
6406 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
6410 __kmp_runtime_initialize();
6412 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6413 __kmp_check_mic_type();
6420 __kmp_abort_delay = 0;
6424 __kmp_dflt_team_nth_ub = __kmp_xproc;
6425 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
6426 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6428 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
6429 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6431 __kmp_max_nth = __kmp_sys_max_nth;
6434 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6435 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6436 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6438 __kmp_library = library_throughput;
6440 __kmp_static = kmp_sch_static_balanced;
6446 #if KMP_FAST_REDUCTION_BARRIER
6447 #define kmp_reduction_barrier_gather_bb ((int)1)
6448 #define kmp_reduction_barrier_release_bb ((int)1)
6449 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
6450 #define kmp_reduction_barrier_release_pat bp_hyper_bar
6451 #endif // KMP_FAST_REDUCTION_BARRIER
6452 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
6453 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
6454 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
6455 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
6456 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
6457 #if KMP_FAST_REDUCTION_BARRIER
6458 if( i == bs_reduction_barrier ) {
6459 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
6460 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
6461 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
6462 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
6464 #endif // KMP_FAST_REDUCTION_BARRIER
6466 #if KMP_FAST_REDUCTION_BARRIER
6467 #undef kmp_reduction_barrier_release_pat
6468 #undef kmp_reduction_barrier_gather_pat
6469 #undef kmp_reduction_barrier_release_bb
6470 #undef kmp_reduction_barrier_gather_bb
6471 #endif // KMP_FAST_REDUCTION_BARRIER
6472 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
6473 if( __kmp_mic_type != non_mic ) {
6475 __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3;
6476 __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1;
6477 __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6478 __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6480 #if KMP_FAST_REDUCTION_BARRIER
6481 if( __kmp_mic_type != non_mic ) {
6482 __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar;
6483 __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar;
6490 __kmp_env_checks = TRUE;
6492 __kmp_env_checks = FALSE;
6496 __kmp_foreign_tp = TRUE;
6498 __kmp_global.g.g_dynamic = FALSE;
6499 __kmp_global.g.g_dynamic_mode = dynamic_default;
6501 __kmp_env_initialize( NULL );
6505 char const * val = __kmp_env_get(
"KMP_DUMP_CATALOG" );
6506 if ( __kmp_str_match_true( val ) ) {
6507 kmp_str_buf_t buffer;
6508 __kmp_str_buf_init( & buffer );
6509 __kmp_i18n_dump_catalog( & buffer );
6510 __kmp_printf(
"%s", buffer.str );
6511 __kmp_str_buf_free( & buffer );
6513 __kmp_env_free( & val );
6516 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
6518 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6523 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
6524 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
6525 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
6526 __kmp_thread_pool = NULL;
6527 __kmp_thread_pool_insert_pt = NULL;
6528 __kmp_team_pool = NULL;
6533 size = (
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
6534 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
6535 __kmp_root = (kmp_root_t**) ((
char*)__kmp_threads +
sizeof(kmp_info_t*) * __kmp_threads_capacity );
6538 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 );
6539 KMP_DEBUG_ASSERT( __kmp_nth == 0 );
6544 gtid = __kmp_register_root( TRUE );
6545 KA_TRACE( 10, (
"__kmp_do_serial_initialize T#%d\n", gtid ));
6546 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6547 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
6551 __kmp_common_initialize();
6555 __kmp_register_atfork();
6558 #if ! defined KMP_DYNAMIC_LIB
6563 int rc = atexit( __kmp_internal_end_atexit );
6565 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError,
"atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
6570 #if KMP_HANDLE_SIGNALS
6577 __kmp_install_signals( FALSE );
6580 __kmp_install_signals( TRUE );
6585 __kmp_init_counter ++;
6587 __kmp_init_serial = TRUE;
6589 if (__kmp_settings) {
6594 if (__kmp_display_env || __kmp_display_env_verbose) {
6595 __kmp_env_print_2();
6597 #endif // OMP_40_ENABLED
6601 KA_TRACE( 10, (
"__kmp_do_serial_initialize: exit\n" ) );
6608 __kmp_serial_initialize(
void )
6610 if ( __kmp_init_serial ) {
6613 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6614 if ( __kmp_init_serial ) {
6615 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6618 __kmp_do_serial_initialize();
6619 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6623 __kmp_do_middle_initialize(
void )
6626 int prev_dflt_team_nth;
6628 if( !__kmp_init_serial ) {
6629 __kmp_do_serial_initialize();
6632 KA_TRACE( 10, (
"__kmp_middle_initialize: enter\n" ) );
6638 prev_dflt_team_nth = __kmp_dflt_team_nth;
6640 #if KMP_AFFINITY_SUPPORTED
6645 __kmp_affinity_initialize();
6651 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6652 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
6653 __kmp_affinity_set_init_mask( i, TRUE );
6658 KMP_ASSERT( __kmp_xproc > 0 );
6659 if ( __kmp_avail_proc == 0 ) {
6660 __kmp_avail_proc = __kmp_xproc;
6665 while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) {
6666 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
6670 if ( __kmp_dflt_team_nth == 0 ) {
6671 #ifdef KMP_DFLT_NTH_CORES
6675 __kmp_dflt_team_nth = __kmp_ncores;
6676 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
6677 __kmp_dflt_team_nth ) );
6682 __kmp_dflt_team_nth = __kmp_avail_proc;
6683 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
6684 __kmp_dflt_team_nth ) );
6688 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
6689 __kmp_dflt_team_nth = KMP_MIN_NTH;
6691 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
6692 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6699 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
6701 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
6708 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6709 kmp_info_t *thread = __kmp_threads[ i ];
6710 if ( thread == NULL )
continue;
6711 if ( thread->th.th_current_task->td_icvs.nproc != 0 )
continue;
6713 set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth );
6716 KA_TRACE( 20, (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6717 __kmp_dflt_team_nth) );
6719 #ifdef KMP_ADJUST_BLOCKTIME
6722 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6723 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6724 if ( __kmp_nth > __kmp_avail_proc ) {
6725 __kmp_zero_bt = TRUE;
6731 TCW_SYNC_4(__kmp_init_middle, TRUE);
6733 KA_TRACE( 10, (
"__kmp_do_middle_initialize: exit\n" ) );
6737 __kmp_middle_initialize(
void )
6739 if ( __kmp_init_middle ) {
6742 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6743 if ( __kmp_init_middle ) {
6744 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6747 __kmp_do_middle_initialize();
6748 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6755 __kmp_parallel_initialize(
void )
6757 int gtid = __kmp_entry_gtid();
6760 if( TCR_4(__kmp_init_parallel) )
return;
6761 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6762 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock );
return; }
6765 if( TCR_4(__kmp_global.g.g_done) ) {
6766 KA_TRACE( 10, (
"__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
6767 __kmp_infinite_loop();
6773 if( !__kmp_init_middle ) {
6774 __kmp_do_middle_initialize();
6778 KA_TRACE( 10, (
"__kmp_parallel_initialize: enter\n" ) );
6779 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6781 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6786 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
6787 __kmp_store_mxcsr( &__kmp_init_mxcsr );
6788 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6792 # if KMP_HANDLE_SIGNALS
6794 __kmp_install_signals( TRUE );
6798 __kmp_suspend_initialize();
6800 # if defined(USE_LOAD_BALANCE)
6801 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6802 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6805 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6806 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6810 if ( __kmp_version ) {
6811 __kmp_print_version_2();
6815 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6818 KA_TRACE( 10, (
"__kmp_parallel_initialize: exit\n" ) );
6820 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6830 __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6833 kmp_disp_t *dispatch;
6838 this_thr->th.th_local.this_construct = 0;
6839 #if KMP_CACHE_MANAGE
6840 KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
6842 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6843 KMP_DEBUG_ASSERT( dispatch );
6844 KMP_DEBUG_ASSERT( team->t.t_dispatch );
6847 dispatch->th_disp_index = 0;
6849 if( __kmp_env_consistency_check )
6850 __kmp_push_parallel( gtid, team->t.t_ident );
6856 __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6859 if( __kmp_env_consistency_check )
6860 __kmp_pop_parallel( gtid, team->t.t_ident );
6864 __kmp_invoke_task_func(
int gtid )
6867 int tid = __kmp_tid_from_gtid( gtid );
6868 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6869 kmp_team_t *team = this_thr->th.th_team;
6871 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
6873 if ( __itt_stack_caller_create_ptr ) {
6874 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id );
6877 #if INCLUDE_SSC_MARKS
6878 SSC_MARK_INVOKING();
6883 void **exit_runtime_p;
6884 ompt_task_id_t my_task_id;
6885 ompt_parallel_id_t my_parallel_id;
6887 if (ompt_status & ompt_status_track) {
6888 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid].
6889 ompt_task_info.frame.exit_runtime_frame);
6891 exit_runtime_p = &dummy;
6895 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
6896 my_parallel_id = team->t.ompt_team_info.parallel_id;
6897 if ((ompt_status == ompt_status_track_callback) &&
6898 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
6899 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
6900 my_parallel_id, my_task_id);
6905 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
6906 gtid, tid, (
int) team->t.t_argc, (
void **) team->t.t_argv
6912 #if OMPT_SUPPORT && OMPT_TRACE
6913 if (ompt_status & ompt_status_track) {
6914 if ((ompt_status == ompt_status_track_callback) &&
6915 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
6916 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
6917 my_parallel_id, my_task_id);
6920 team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_runtime_frame = 0;
6925 if ( __itt_stack_caller_create_ptr ) {
6926 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id );
6929 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
6936 __kmp_teams_master(
int gtid )
6939 kmp_info_t *thr = __kmp_threads[ gtid ];
6940 kmp_team_t *team = thr->th.th_team;
6941 ident_t *loc = team->t.t_ident;
6942 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6943 KMP_DEBUG_ASSERT( thr->th.th_teams_microtask );
6944 KMP_DEBUG_ASSERT( thr->th.th_set_nproc );
6945 KA_TRACE( 20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
6946 gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) );
6949 #if INCLUDE_SSC_MARKS
6952 __kmp_fork_call( loc, gtid, fork_context_intel,
6955 (
void *)thr->th.th_teams_microtask,
6957 (microtask_t)thr->th.th_teams_microtask,
6958 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
6960 #if INCLUDE_SSC_MARKS
6963 __kmp_join_call( loc, gtid, 1 );
6968 __kmp_invoke_teams_master(
int gtid )
6970 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6971 kmp_team_t *team = this_thr->th.th_team;
6973 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
6974 KMP_DEBUG_ASSERT( (
void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (
void*)__kmp_teams_master );
6976 __kmp_run_before_invoked_task( gtid, 0, this_thr, team );
6977 __kmp_teams_master( gtid );
6978 __kmp_run_after_invoked_task( gtid, 0, this_thr, team );
6989 __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads )
6991 kmp_info_t *thr = __kmp_threads[gtid];
6993 if( num_threads > 0 )
6994 thr->th.th_set_nproc = num_threads;
7002 __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
int num_threads )
7004 kmp_info_t *thr = __kmp_threads[gtid];
7005 KMP_DEBUG_ASSERT(num_teams >= 0);
7006 KMP_DEBUG_ASSERT(num_threads >= 0);
7007 if( num_teams == 0 ) {
7011 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7014 if( num_threads > 0 ) {
7015 thr->th.th_teams_size.nth = num_threads;
7017 if( !TCR_4(__kmp_init_middle) )
7018 __kmp_middle_initialize();
7019 thr->th.th_teams_size.nth = __kmp_avail_proc / num_teams;
7028 __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind )
7030 kmp_info_t *thr = __kmp_threads[gtid];
7031 thr->th.th_set_proc_bind = proc_bind;
7039 __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team )
7041 kmp_info_t *this_thr = __kmp_threads[gtid];
7047 KMP_DEBUG_ASSERT( team );
7048 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
7049 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7052 team->t.t_construct = 0;
7053 team->t.t_ordered.dt.t_value = 0;
7056 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
7057 if ( team->t.t_max_nproc > 1 ) {
7059 for (i = 0; i < KMP_MAX_DISP_BUF; ++i)
7060 team->t.t_disp_buffer[ i ].buffer_index = i;
7062 team->t.t_disp_buffer[ 0 ].buffer_index = 0;
7066 KMP_ASSERT( this_thr->th.th_team == team );
7069 for( f=0 ; f<team->t.t_nproc ; f++ ) {
7070 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
7071 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
7076 __kmp_fork_barrier( gtid, 0 );
7081 __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team )
7083 kmp_info_t *this_thr = __kmp_threads[gtid];
7085 KMP_DEBUG_ASSERT( team );
7086 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
7087 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7093 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
7094 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
7095 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
7096 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
7097 __kmp_print_structure();
7099 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
7100 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
7103 __kmp_join_barrier( gtid );
7106 KMP_ASSERT( this_thr->th.th_team == team );
7113 #ifdef USE_LOAD_BALANCE
7120 __kmp_active_hot_team_nproc( kmp_root_t *root )
7124 kmp_team_t *hot_team;
7126 if ( root->r.r_active ) {
7129 hot_team = root->r.r_hot_team;
7130 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
7131 return hot_team->t.t_nproc - 1;
7138 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
7139 if ( hot_team->t.t_threads[i]->th.th_active ) {
7151 __kmp_load_balance_nproc( kmp_root_t *root,
int set_nproc )
7155 int hot_team_active;
7156 int team_curr_active;
7159 KB_TRACE( 20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
7160 root, set_nproc ) );
7161 KMP_DEBUG_ASSERT( root );
7162 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
7163 KMP_DEBUG_ASSERT( set_nproc > 1 );
7165 if ( set_nproc == 1) {
7166 KB_TRACE( 20, (
"__kmp_load_balance_nproc: serial execution.\n" ) );
7177 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7178 hot_team_active = __kmp_active_hot_team_nproc( root );
7179 team_curr_active = pool_active + hot_team_active + 1;
7184 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
7185 KB_TRACE( 30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
7186 system_active, pool_active, hot_team_active ) );
7188 if ( system_active < 0 ) {
7195 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7196 KMP_WARNING( CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit" );
7201 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
7202 : root->r.r_hot_team->t.t_nproc);
7203 if ( retval > set_nproc ) {
7206 if ( retval < KMP_MIN_NTH ) {
7207 retval = KMP_MIN_NTH;
7210 KB_TRACE( 20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
7220 if ( system_active < team_curr_active ) {
7221 system_active = team_curr_active;
7223 retval = __kmp_avail_proc - system_active + team_curr_active;
7224 if ( retval > set_nproc ) {
7227 if ( retval < KMP_MIN_NTH ) {
7228 retval = KMP_MIN_NTH;
7231 KB_TRACE( 20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
7243 __kmp_cleanup(
void )
7247 KA_TRACE( 10, (
"__kmp_cleanup: enter\n" ) );
7249 if (TCR_4(__kmp_init_parallel)) {
7250 #if KMP_HANDLE_SIGNALS
7251 __kmp_remove_signals();
7253 TCW_4(__kmp_init_parallel, FALSE);
7256 if (TCR_4(__kmp_init_middle)) {
7257 #if KMP_AFFINITY_SUPPORTED
7258 __kmp_affinity_uninitialize();
7260 TCW_4(__kmp_init_middle, FALSE);
7263 KA_TRACE( 10, (
"__kmp_cleanup: go serial cleanup\n" ) );
7265 if (__kmp_init_serial) {
7267 __kmp_runtime_destroy();
7269 __kmp_init_serial = FALSE;
7272 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
7273 if ( __kmp_root[ f ] != NULL ) {
7274 __kmp_free( __kmp_root[ f ] );
7275 __kmp_root[ f ] = NULL;
7278 __kmp_free( __kmp_threads );
7281 __kmp_threads = NULL;
7283 __kmp_threads_capacity = 0;
7285 #if KMP_USE_DYNAMIC_LOCK
7286 __kmp_cleanup_indirect_user_locks();
7288 __kmp_cleanup_user_locks();
7291 #if KMP_AFFINITY_SUPPORTED
7292 KMP_INTERNAL_FREE( (
void *) __kmp_cpuinfo_file );
7293 __kmp_cpuinfo_file = NULL;
7296 #if KMP_USE_ADAPTIVE_LOCKS
7297 #if KMP_DEBUG_ADAPTIVE_LOCKS
7298 __kmp_print_speculative_stats();
7301 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
7302 __kmp_nested_nth.nth = NULL;
7303 __kmp_nested_nth.size = 0;
7304 __kmp_nested_nth.used = 0;
7306 __kmp_i18n_catclose();
7308 #if KMP_STATS_ENABLED
7309 __kmp_accumulate_stats_at_exit();
7310 __kmp_stats_list.deallocate();
7313 KA_TRACE( 10, (
"__kmp_cleanup: exit\n" ) );
7320 __kmp_ignore_mppbeg(
void )
7324 if ((env = getenv(
"KMP_IGNORE_MPPBEG" )) != NULL) {
7325 if (__kmp_str_match_false( env ))
7333 __kmp_ignore_mppend(
void )
7337 if ((env = getenv(
"KMP_IGNORE_MPPEND" )) != NULL) {
7338 if (__kmp_str_match_false( env ))
7346 __kmp_internal_begin(
void )
7353 gtid = __kmp_entry_gtid();
7354 root = __kmp_threads[ gtid ]->th.th_root;
7355 KMP_ASSERT( KMP_UBER_GTID( gtid ));
7357 if( root->r.r_begin )
return;
7358 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
7359 if( root->r.r_begin ) {
7360 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7364 root->r.r_begin = TRUE;
7366 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7374 __kmp_user_set_library (
enum library_type arg)
7382 gtid = __kmp_entry_gtid();
7383 thread = __kmp_threads[ gtid ];
7385 root = thread->th.th_root;
7387 KA_TRACE( 20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
7388 if (root->r.r_in_parallel) {
7389 KMP_WARNING( SetLibraryIncorrectCall );
7394 case library_serial :
7395 thread->th.th_set_nproc = 0;
7396 set__nproc( thread, 1 );
7398 case library_turnaround :
7399 thread->th.th_set_nproc = 0;
7400 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
7402 case library_throughput :
7403 thread->th.th_set_nproc = 0;
7404 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
7407 KMP_FATAL( UnknownLibraryType, arg );
7410 __kmp_aux_set_library ( arg );
7414 __kmp_aux_set_stacksize(
size_t arg )
7416 if (! __kmp_init_serial)
7417 __kmp_serial_initialize();
7420 if (arg & (0x1000 - 1)) {
7421 arg &= ~(0x1000 - 1);
7426 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7429 if (! TCR_4(__kmp_init_parallel)) {
7432 if (value < __kmp_sys_min_stksize )
7433 value = __kmp_sys_min_stksize ;
7434 else if (value > KMP_MAX_STKSIZE)
7435 value = KMP_MAX_STKSIZE;
7437 __kmp_stksize = value;
7439 __kmp_env_stksize = TRUE;
7442 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7448 __kmp_aux_set_library (
enum library_type arg)
7450 __kmp_library = arg;
7452 switch ( __kmp_library ) {
7453 case library_serial :
7455 KMP_INFORM( LibraryIsSerial );
7456 (void) __kmp_change_library( TRUE );
7459 case library_turnaround :
7460 (void) __kmp_change_library( TRUE );
7462 case library_throughput :
7463 (void) __kmp_change_library( FALSE );
7466 KMP_FATAL( UnknownLibraryType, arg );
7474 __kmp_aux_set_blocktime (
int arg, kmp_info_t *thread,
int tid)
7476 int blocktime = arg;
7480 __kmp_save_internal_controls( thread );
7483 if (blocktime < KMP_MIN_BLOCKTIME)
7484 blocktime = KMP_MIN_BLOCKTIME;
7485 else if (blocktime > KMP_MAX_BLOCKTIME)
7486 blocktime = KMP_MAX_BLOCKTIME;
7488 set__blocktime_team( thread->th.th_team, tid, blocktime );
7489 set__blocktime_team( thread->th.th_serial_team, 0, blocktime );
7492 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7494 set__bt_intervals_team( thread->th.th_team, tid, bt_intervals );
7495 set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals );
7500 set__bt_set_team( thread->th.th_team, tid, bt_set );
7501 set__bt_set_team( thread->th.th_serial_team, 0, bt_set );
7502 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, bt_intervals=%d, monitor_updates=%d\n",
7503 __kmp_gtid_from_tid(tid, thread->th.th_team),
7504 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, __kmp_monitor_wakeups ) );
7508 __kmp_aux_set_defaults(
7512 if ( ! __kmp_init_serial ) {
7513 __kmp_serial_initialize();
7515 __kmp_env_initialize( str );
7519 || __kmp_display_env || __kmp_display_env_verbose
7532 PACKED_REDUCTION_METHOD_T
7533 __kmp_determine_reduction_method(
ident_t *loc, kmp_int32 global_tid,
7534 kmp_int32 num_vars,
size_t reduce_size,
void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
7535 kmp_critical_name *lck )
7543 PACKED_REDUCTION_METHOD_T retval;
7547 KMP_DEBUG_ASSERT( loc );
7548 KMP_DEBUG_ASSERT( lck );
7550 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) )
7551 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) )
7553 retval = critical_reduce_block;
7555 team_size = __kmp_get_team_num_threads( global_tid );
7557 if( team_size == 1 ) {
7559 retval = empty_reduce_block;
7563 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7564 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7566 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
7568 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
7570 int teamsize_cutoff = 4;
7572 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
7573 if( __kmp_mic_type != non_mic ) {
7574 teamsize_cutoff = 8;
7577 if( tree_available ) {
7578 if( team_size <= teamsize_cutoff ) {
7579 if ( atomic_available ) {
7580 retval = atomic_reduce_block;
7583 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7585 }
else if ( atomic_available ) {
7586 retval = atomic_reduce_block;
7589 #error "Unknown or unsupported OS"
7590 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN
7592 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
7594 #if KMP_OS_LINUX || KMP_OS_WINDOWS
7598 if( atomic_available ) {
7599 if( num_vars <= 2 ) {
7600 retval = atomic_reduce_block;
7606 if( atomic_available && ( num_vars <= 3 ) ) {
7607 retval = atomic_reduce_block;
7608 }
else if( tree_available ) {
7609 if( ( reduce_size > ( 9 *
sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 *
sizeof( kmp_real64 ) ) ) ) {
7610 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7615 #error "Unknown or unsupported OS"
7619 #error "Unknown or unsupported architecture"
7626 if( __kmp_force_reduction_method != reduction_method_not_defined ) {
7628 PACKED_REDUCTION_METHOD_T forced_retval;
7630 int atomic_available, tree_available;
7632 switch( ( forced_retval = __kmp_force_reduction_method ) )
7634 case critical_reduce_block:
7636 if( team_size <= 1 ) {
7637 forced_retval = empty_reduce_block;
7641 case atomic_reduce_block:
7642 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7643 KMP_ASSERT( atomic_available );
7646 case tree_reduce_block:
7647 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7648 KMP_ASSERT( tree_available );
7649 #if KMP_FAST_REDUCTION_BARRIER
7650 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7658 retval = forced_retval;
7661 KA_TRACE(10, (
"reduction method selected=%08x\n", retval ) );
7663 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
7664 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
7671 __kmp_get_reduce_method(
void ) {
7672 return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 );
#define KMP_START_EXPLICIT_TIMER(name)
"Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro...
#define KMP_STOP_EXPLICIT_TIMER(name)
"Stops" an explicit timer.
#define KMP_TIME_BLOCK(name)
Uses specified timer (name) to time code block.
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_IDENT_AUTOPAR
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)