17 #include "kmp_atomic.h" 18 #include "kmp_wrapper_getpid.h" 19 #include "kmp_environment.h" 22 #include "kmp_settings.h" 25 #include "kmp_error.h" 26 #include "kmp_stats.h" 27 #include "kmp_wait_release.h" 30 #include "ompt-specific.h" 34 #define KMP_USE_PRCTL 0 41 #if defined(KMP_GOMP_COMPAT) 42 char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX
"alternative compiler support: yes";
45 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX
"API version: " 53 char const __kmp_version_lock[] = KMP_VERSION_PREFIX
"lock type: run time selectable";
56 #define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) ) 61 kmp_info_t __kmp_monitor;
68 void __kmp_cleanup(
void );
70 static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *,
int tid,
int gtid );
71 static void __kmp_initialize_team( kmp_team_t * team,
int new_nproc, kmp_internal_control_t * new_icvs,
ident_t * loc );
72 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 73 static void __kmp_partition_places( kmp_team_t *team,
int update_master_only=0 );
75 static void __kmp_do_serial_initialize(
void );
76 void __kmp_fork_barrier(
int gtid,
int tid );
77 void __kmp_join_barrier(
int gtid );
78 void __kmp_setup_icv_copy( kmp_team_t *team,
int new_nproc, kmp_internal_control_t * new_icvs,
ident_t *loc );
80 #ifdef USE_LOAD_BALANCE 81 static int __kmp_load_balance_nproc( kmp_root_t * root,
int set_nproc );
84 static int __kmp_expand_threads(
int nWish,
int nNeed);
86 static int __kmp_unregister_root_other_thread(
int gtid );
88 static void __kmp_unregister_library(
void );
89 static void __kmp_reap_thread( kmp_info_t * thread,
int is_root );
90 static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
101 __kmp_get_global_thread_id( )
104 kmp_info_t **other_threads;
110 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
111 __kmp_nth, __kmp_all_nth ));
118 if ( !TCR_4(__kmp_init_gtid) )
return KMP_GTID_DNE;
120 #ifdef KMP_TDATA_GTID 121 if ( TCR_4(__kmp_gtid_mode) >= 3) {
122 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using TDATA\n" ));
126 if ( TCR_4(__kmp_gtid_mode) >= 2) {
127 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n" ));
128 return __kmp_gtid_get_specific();
130 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n" ));
132 stack_addr = (
char*) & stack_data;
133 other_threads = __kmp_threads;
148 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
150 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
153 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
154 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
158 if( stack_addr <= stack_base ) {
159 size_t stack_diff = stack_base - stack_addr;
161 if( stack_diff <= stack_size ) {
164 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
171 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: internal alg. failed to find " 172 "thread, using TLS\n" ));
173 i = __kmp_gtid_get_specific();
181 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
182 KMP_FATAL( StackOverflow, i );
185 stack_base = (
char *) other_threads[i]->th.th_info.ds.ds_stackbase;
186 if( stack_addr > stack_base ) {
187 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
188 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
189 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
191 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
195 if ( __kmp_storage_map ) {
196 char *stack_end = (
char *) other_threads[i]->th.th_info.ds.ds_stackbase;
197 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
198 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
199 other_threads[i]->th.th_info.ds.ds_stacksize,
200 "th_%d stack (refinement)", i );
206 __kmp_get_global_thread_id_reg( )
210 if ( !__kmp_init_serial ) {
213 #ifdef KMP_TDATA_GTID 214 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
215 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
219 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
220 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
221 gtid = __kmp_gtid_get_specific();
223 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
224 gtid = __kmp_get_global_thread_id();
228 if( gtid == KMP_GTID_DNE ) {
229 KA_TRACE( 10, (
"__kmp_get_global_thread_id_reg: Encountered new root thread. " 230 "Registering a new gtid.\n" ));
231 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
232 if( !__kmp_init_serial ) {
233 __kmp_do_serial_initialize();
234 gtid = __kmp_gtid_get_specific();
236 gtid = __kmp_register_root(FALSE);
238 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
242 KMP_DEBUG_ASSERT( gtid >=0 );
249 __kmp_check_stack_overlap( kmp_info_t *th )
252 char *stack_beg = NULL;
253 char *stack_end = NULL;
256 KA_TRACE(10,(
"__kmp_check_stack_overlap: called\n"));
257 if ( __kmp_storage_map ) {
258 stack_end = (
char *) th->th.th_info.ds.ds_stackbase;
259 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
261 gtid = __kmp_gtid_from_thread( th );
263 if (gtid == KMP_GTID_MONITOR) {
264 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
265 "th_%s stack (%s)",
"mon",
266 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
268 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
269 "th_%d stack (%s)", gtid,
270 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
275 gtid = __kmp_gtid_from_thread( th );
276 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid))
278 KA_TRACE(10,(
"__kmp_check_stack_overlap: performing extensive checking\n"));
279 if ( stack_beg == NULL ) {
280 stack_end = (
char *) th->th.th_info.ds.ds_stackbase;
281 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
284 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
285 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
287 if( f_th && f_th != th ) {
288 char *other_stack_end = (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
289 char *other_stack_beg = other_stack_end -
290 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
291 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
292 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
295 if ( __kmp_storage_map )
296 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
297 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
298 "th_%d stack (overlapped)",
299 __kmp_gtid_from_thread( f_th ) );
301 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
306 KA_TRACE(10,(
"__kmp_check_stack_overlap: returning\n"));
315 __kmp_infinite_loop(
void )
317 static int done = FALSE;
324 #define MAX_MESSAGE 512 327 __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
char const *format, ...) {
328 char buffer[MAX_MESSAGE];
331 va_start( ap, format);
332 KMP_SNPRINTF( buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1, p2, (
unsigned long) size, format );
333 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
334 __kmp_vprintf( kmp_err, buffer, ap );
335 #if KMP_PRINT_DATA_PLACEMENT 338 if(p1 <= p2 && (
char*)p2 - (
char*)p1 == size) {
339 if( __kmp_storage_map_verbose ) {
340 node = __kmp_get_host_node(p1);
342 __kmp_storage_map_verbose = FALSE;
346 int localProc = __kmp_get_cpu_from_gtid(gtid);
348 p1 = (
void *)( (
size_t)p1 & ~((size_t)PAGE_SIZE - 1) );
349 p2 = (
void *)( ((
size_t) p2 - 1) & ~((
size_t)PAGE_SIZE - 1) );
351 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid, localProc>>1);
353 __kmp_printf_no_lock(
" GTID %d\n", gtid);
361 (
char*)p1 += PAGE_SIZE;
362 }
while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
363 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last,
364 (
char*)p1 - 1, lastNode);
367 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
368 (
char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1));
370 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
371 (
char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2));
377 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR( StorageMapWarning ) );
380 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
384 __kmp_warn(
char const * format, ... )
386 char buffer[MAX_MESSAGE];
389 if ( __kmp_generate_warnings == kmp_warnings_off ) {
393 va_start( ap, format );
395 KMP_SNPRINTF( buffer,
sizeof(buffer) ,
"OMP warning: %s\n", format );
396 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
397 __kmp_vprintf( kmp_err, buffer, ap );
398 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
404 __kmp_abort_process()
408 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
410 if ( __kmp_debug_buf ) {
411 __kmp_dump_debug_buffer();
414 if ( KMP_OS_WINDOWS ) {
417 __kmp_global.g.g_abort = SIGABRT;
435 __kmp_infinite_loop();
436 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
441 __kmp_abort_thread(
void )
445 __kmp_infinite_loop();
456 __kmp_print_thread_storage_map( kmp_info_t *thr,
int gtid )
458 __kmp_print_storage_map_gtid( gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d", gtid );
460 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team,
sizeof(kmp_desc_t),
461 "th_%d.th_info", gtid );
463 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head,
sizeof(kmp_local_t),
464 "th_%d.th_local", gtid );
466 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
467 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid );
469 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
470 &thr->th.th_bar[bs_plain_barrier+1],
471 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]", gtid);
473 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
474 &thr->th.th_bar[bs_forkjoin_barrier+1],
475 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]", gtid);
477 #if KMP_FAST_REDUCTION_BARRIER 478 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
479 &thr->th.th_bar[bs_reduction_barrier+1],
480 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]", gtid);
481 #endif // KMP_FAST_REDUCTION_BARRIER 490 __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
int team_id,
int num_thr )
492 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
493 __kmp_print_storage_map_gtid( -1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
496 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
497 sizeof(kmp_balign_team_t) * bs_last_barrier,
"%s_%d.t_bar", header, team_id );
500 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
501 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]", header, team_id );
503 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
504 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[forkjoin]", header, team_id );
506 #if KMP_FAST_REDUCTION_BARRIER 507 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
508 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[reduction]", header, team_id );
509 #endif // KMP_FAST_REDUCTION_BARRIER 511 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
512 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id );
514 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
515 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id );
517 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
518 sizeof(dispatch_shared_info_t) * num_disp_buff,
"%s_%d.t_disp_buffer",
522 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
523 sizeof(kmp_taskq_t),
"%s_%d.t_taskq", header, team_id );
526 static void __kmp_init_allocator() {}
527 static void __kmp_fini_allocator() {}
531 #ifdef KMP_DYNAMIC_LIB 535 __kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
537 __kmp_init_bootstrap_lock( lck );
541 __kmp_reset_locks_on_process_detach(
int gtid_req ) {
558 for( i = 0; i < __kmp_threads_capacity; ++i ) {
559 if( !__kmp_threads )
continue;
560 kmp_info_t* th = __kmp_threads[ i ];
561 if( th == NULL )
continue;
562 int gtid = th->th.th_info.ds.ds_gtid;
563 if( gtid == gtid_req )
continue;
564 if( gtid < 0 )
continue;
566 int alive = __kmp_is_thread_alive( th, &exit_val );
571 if( thread_count == 0 )
break;
578 __kmp_reset_lock( &__kmp_forkjoin_lock );
580 __kmp_reset_lock( &__kmp_stdio_lock );
585 DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
588 switch( fdwReason ) {
590 case DLL_PROCESS_ATTACH:
591 KA_TRACE( 10, (
"DllMain: PROCESS_ATTACH\n" ));
595 case DLL_PROCESS_DETACH:
596 KA_TRACE( 10, (
"DllMain: PROCESS_DETACH T#%d\n",
597 __kmp_gtid_get_specific() ));
599 if( lpReserved != NULL )
625 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
628 __kmp_internal_end_library( __kmp_gtid_get_specific() );
632 case DLL_THREAD_ATTACH:
633 KA_TRACE( 10, (
"DllMain: THREAD_ATTACH\n" ));
639 case DLL_THREAD_DETACH:
640 KA_TRACE( 10, (
"DllMain: THREAD_DETACH T#%d\n",
641 __kmp_gtid_get_specific() ));
643 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
659 __kmp_change_library(
int status )
663 old_status = __kmp_yield_init & 1;
666 __kmp_yield_init |= 1;
669 __kmp_yield_init &= ~1;
682 __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
684 int gtid = *gtid_ref;
685 #ifdef BUILD_PARALLEL_ORDERED 686 kmp_team_t *team = __kmp_team_from_gtid( gtid );
689 if( __kmp_env_consistency_check ) {
690 if( __kmp_threads[gtid]->th.th_root->r.r_active )
691 #if KMP_USE_DYNAMIC_LOCK 692 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 );
694 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
697 #ifdef BUILD_PARALLEL_ORDERED 698 if( !team->t.t_serialized ) {
700 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
711 __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
713 int gtid = *gtid_ref;
714 #ifdef BUILD_PARALLEL_ORDERED 715 int tid = __kmp_tid_from_gtid( gtid );
716 kmp_team_t *team = __kmp_team_from_gtid( gtid );
719 if( __kmp_env_consistency_check ) {
720 if( __kmp_threads[gtid]->th.th_root->r.r_active )
721 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
723 #ifdef BUILD_PARALLEL_ORDERED 724 if ( ! team->t.t_serialized ) {
729 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
731 #if OMPT_SUPPORT && OMPT_BLAME 733 ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
735 kmp_info_t *this_thread = __kmp_threads[gtid];
736 ompt_callbacks.ompt_callback(ompt_event_release_ordered)(
737 this_thread->th.ompt_thread_info.wait_id);
755 __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws )
761 if( ! TCR_4(__kmp_init_parallel) )
762 __kmp_parallel_initialize();
764 th = __kmp_threads[ gtid ];
765 team = th->th.th_team;
768 th->th.th_ident = id_ref;
770 if ( team->t.t_serialized ) {
773 kmp_int32 old_this = th->th.th_local.this_construct;
775 ++th->th.th_local.this_construct;
780 if (team->t.t_construct == old_this) {
781 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
782 th->th.th_local.this_construct);
785 if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) &&
787 th->th.th_teams_microtask == NULL &&
789 team->t.t_active_level == 1 )
791 __kmp_itt_metadata_single( id_ref );
796 if( __kmp_env_consistency_check ) {
797 if (status && push_ws) {
798 __kmp_push_workshare( gtid, ct_psingle, id_ref );
800 __kmp_check_workshare( gtid, ct_psingle, id_ref );
805 __kmp_itt_single_start( gtid );
812 __kmp_exit_single(
int gtid )
815 __kmp_itt_single_end( gtid );
817 if( __kmp_env_consistency_check )
818 __kmp_pop_workshare( gtid, ct_psingle, NULL );
831 __kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
832 int master_tid,
int set_nthreads
840 KMP_DEBUG_ASSERT( __kmp_init_serial );
841 KMP_DEBUG_ASSERT( root && parent_team );
847 new_nthreads = set_nthreads;
848 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
851 #ifdef USE_LOAD_BALANCE 852 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
853 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
854 if ( new_nthreads == 1 ) {
855 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
859 if ( new_nthreads < set_nthreads ) {
860 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
861 master_tid, new_nthreads ));
865 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
866 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
867 : root->r.r_hot_team->t.t_nproc);
868 if ( new_nthreads <= 1 ) {
869 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
873 if ( new_nthreads < set_nthreads ) {
874 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
875 master_tid, new_nthreads ));
878 new_nthreads = set_nthreads;
881 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
882 if ( set_nthreads > 2 ) {
883 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
884 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
885 if ( new_nthreads == 1 ) {
886 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
890 if ( new_nthreads < set_nthreads ) {
891 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
892 master_tid, new_nthreads ));
903 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
904 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
905 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
906 root->r.r_hot_team->t.t_nproc );
907 if ( tl_nthreads <= 0 ) {
914 if ( ! get__dynamic_2( parent_team, master_tid )
915 && ( ! __kmp_reserve_warn ) ) {
916 __kmp_reserve_warn = 1;
919 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
920 KMP_HNT( Unset_ALL_THREADS ),
924 if ( tl_nthreads == 1 ) {
925 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
929 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
930 master_tid, tl_nthreads ));
931 new_nthreads = tl_nthreads;
940 capacity = __kmp_threads_capacity;
941 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
944 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
945 root->r.r_hot_team->t.t_nproc ) > capacity ) {
949 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
950 root->r.r_hot_team->t.t_nproc ) - capacity;
951 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
952 if ( slotsAdded < slotsRequired ) {
956 new_nthreads -= ( slotsRequired - slotsAdded );
957 KMP_ASSERT( new_nthreads >= 1 );
962 if ( ! get__dynamic_2( parent_team, master_tid )
963 && ( ! __kmp_reserve_warn ) ) {
964 __kmp_reserve_warn = 1;
965 if ( __kmp_tp_cached ) {
968 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
969 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
970 KMP_HNT( PossibleSystemLimitOnThreads ),
977 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
978 KMP_HNT( SystemLimitOnThreads ),
986 if ( new_nthreads == 1 ) {
987 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
988 __kmp_get_gtid(), set_nthreads ) );
992 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
993 __kmp_get_gtid(), new_nthreads, set_nthreads ));
1005 __kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
1006 kmp_info_t *master_th,
int master_gtid )
1011 KA_TRACE( 10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
1012 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
1016 master_th->th.th_info.ds.ds_tid = 0;
1017 master_th->th.th_team = team;
1018 master_th->th.th_team_nproc = team->t.t_nproc;
1019 master_th->th.th_team_master = master_th;
1020 master_th->th.th_team_serialized = FALSE;
1021 master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ];
1024 #if KMP_NESTED_HOT_TEAMS 1026 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1028 int level = team->t.t_active_level - 1;
1029 if( master_th->th.th_teams_microtask ) {
1030 if( master_th->th.th_teams_size.nteams > 1 ) {
1033 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1034 master_th->th.th_teams_level == team->t.t_level ) {
1038 if( level < __kmp_hot_teams_max_level ) {
1039 if( hot_teams[level].hot_team ) {
1041 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1045 hot_teams[level].hot_team = team;
1046 hot_teams[level].hot_team_nth = team->t.t_nproc;
1053 use_hot_team = team == root->r.r_hot_team;
1055 if ( !use_hot_team ) {
1058 team->t.t_threads[ 0 ] = master_th;
1059 __kmp_initialize_info( master_th, team, 0, master_gtid );
1062 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
1065 kmp_info_t *thr = __kmp_allocate_thread( root, team, i );
1066 team->t.t_threads[ i ] = thr;
1067 KMP_DEBUG_ASSERT( thr );
1068 KMP_DEBUG_ASSERT( thr->th.th_team == team );
1070 KA_TRACE( 20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n",
1071 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1072 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1073 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
1074 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
1076 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1077 thr->th.th_teams_level = master_th->th.th_teams_level;
1078 thr->th.th_teams_size = master_th->th.th_teams_size;
1082 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
1083 for ( b = 0; b < bs_last_barrier; ++ b ) {
1084 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
1085 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1087 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
1093 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1094 __kmp_partition_places( team );
1102 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1109 propagateFPControl(kmp_team_t * team)
1111 if ( __kmp_inherit_fp_control ) {
1112 kmp_int16 x87_fpu_control_word;
1116 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1117 __kmp_store_mxcsr( &mxcsr );
1118 mxcsr &= KMP_X86_MXCSR_MASK;
1127 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1128 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1131 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1135 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1141 updateHWFPControl(kmp_team_t * team)
1143 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
1148 kmp_int16 x87_fpu_control_word;
1150 __kmp_store_x87_fpu_control_word( &x87_fpu_control_word );
1151 __kmp_store_mxcsr( &mxcsr );
1152 mxcsr &= KMP_X86_MXCSR_MASK;
1154 if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) {
1155 __kmp_clear_x87_fpu_status_word();
1156 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
1159 if ( team->t.t_mxcsr != mxcsr ) {
1160 __kmp_load_mxcsr( &team->t.t_mxcsr );
1165 # define propagateFPControl(x) ((void)0) 1166 # define updateHWFPControl(x) ((void)0) 1170 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc );
1176 __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid)
1178 kmp_info_t *this_thr;
1179 kmp_team_t *serial_team;
1181 KC_TRACE( 10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
1188 if( ! TCR_4( __kmp_init_parallel ) )
1189 __kmp_parallel_initialize();
1191 this_thr = __kmp_threads[ global_tid ];
1192 serial_team = this_thr->th.th_serial_team;
1195 KMP_DEBUG_ASSERT( serial_team );
1198 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1199 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1200 KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL );
1201 KA_TRACE( 20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
1202 global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) );
1203 this_thr->th.th_task_team = NULL;
1207 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1208 if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1209 proc_bind = proc_bind_false;
1211 else if ( proc_bind == proc_bind_default ) {
1216 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1221 this_thr->th.th_set_proc_bind = proc_bind_default;
1224 if( this_thr->th.th_team != serial_team ) {
1226 int level = this_thr->th.th_team->t.t_level;
1228 if( serial_team->t.t_serialized ) {
1231 kmp_team_t *new_team;
1233 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1236 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1239 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1246 & this_thr->th.th_current_task->td_icvs,
1247 0 USE_NESTED_HOT_ARG(NULL) );
1248 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1249 KMP_ASSERT( new_team );
1252 new_team->t.t_threads[0] = this_thr;
1253 new_team->t.t_parent = this_thr->th.th_team;
1254 serial_team = new_team;
1255 this_thr->th.th_serial_team = serial_team;
1257 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1258 global_tid, serial_team ) );
1265 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1266 global_tid, serial_team ) );
1270 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1271 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1272 KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
1273 serial_team->t.t_ident = loc;
1274 serial_team->t.t_serialized = 1;
1275 serial_team->t.t_nproc = 1;
1276 serial_team->t.t_parent = this_thr->th.th_team;
1277 serial_team->t.t_sched = this_thr->th.th_team->t.t_sched;
1278 this_thr->th.th_team = serial_team;
1279 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1281 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#d curtask=%p\n",
1282 global_tid, this_thr->th.th_current_task ) );
1283 KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
1284 this_thr->th.th_current_task->td_flags.executing = 0;
1286 __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
1291 & this_thr->th.th_current_task->td_icvs,
1292 & this_thr->th.th_current_task->td_parent->td_icvs );
1295 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1296 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1300 if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
1301 this_thr->th.th_current_task->td_icvs.proc_bind
1302 = __kmp_nested_proc_bind.bind_types[ level + 1 ];
1307 serial_team->t.t_pkfn = (microtask_t)( ~0 );
1309 this_thr->th.th_info.ds.ds_tid = 0;
1312 this_thr->th.th_team_nproc = 1;
1313 this_thr->th.th_team_master = this_thr;
1314 this_thr->th.th_team_serialized = 1;
1316 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1317 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1319 propagateFPControl (serial_team);
1322 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1323 if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
1324 serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
1325 __kmp_allocate(
sizeof( dispatch_private_info_t ) );
1327 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1330 ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid);
1331 __ompt_team_assign_id(serial_team, ompt_parallel_id);
1339 KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
1340 KMP_DEBUG_ASSERT( serial_team->t.t_threads );
1341 KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
1342 ++ serial_team->t.t_serialized;
1343 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1346 int level = this_thr->th.th_team->t.t_level;
1348 if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
1349 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
1351 serial_team->t.t_level++;
1352 KF_TRACE( 10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
1353 global_tid, serial_team, serial_team->t.t_level ) );
1356 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1358 dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
1359 __kmp_allocate(
sizeof( dispatch_private_info_t ) );
1360 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1361 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1363 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1368 if ( __kmp_env_consistency_check )
1369 __kmp_push_parallel( global_tid, NULL );
1379 enum fork_context_e call_context,
1382 void *unwrapped_task,
1384 microtask_t microtask,
1387 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1397 int master_this_cons;
1399 kmp_team_t *parent_team;
1400 kmp_info_t *master_th;
1404 int master_set_numthreads;
1410 #if KMP_NESTED_HOT_TEAMS 1411 kmp_hot_team_ptr_t **p_hot_teams;
1414 KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call);
1417 KA_TRACE( 20, (
"__kmp_fork_call: enter T#%d\n", gtid ));
1418 if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) {
1421 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1423 if ( __kmp_stkpadding > KMP_MAX_STKPADDING )
1424 __kmp_stkpadding += (short)((kmp_int64)dummy);
1428 KMP_DEBUG_ASSERT( __kmp_init_serial );
1429 if( ! TCR_4(__kmp_init_parallel) )
1430 __kmp_parallel_initialize();
1433 master_th = __kmp_threads[ gtid ];
1434 parent_team = master_th->th.th_team;
1435 master_tid = master_th->th.th_info.ds.ds_tid;
1436 master_this_cons = master_th->th.th_local.this_construct;
1437 root = master_th->th.th_root;
1438 master_active = root->r.r_active;
1439 master_set_numthreads = master_th->th.th_set_nproc;
1442 ompt_parallel_id_t ompt_parallel_id;
1443 ompt_task_id_t ompt_task_id;
1444 ompt_frame_t *ompt_frame;
1445 ompt_task_id_t my_task_id;
1446 ompt_parallel_id_t my_parallel_id;
1449 ompt_parallel_id = __ompt_parallel_id_new(gtid);
1450 ompt_task_id = __ompt_get_task_id_internal(0);
1451 ompt_frame = __ompt_get_task_frame_internal(0);
1456 level = parent_team->t.t_level;
1457 active_level = parent_team->t.t_active_level;
1459 teams_level = master_th->th.th_teams_level;
1461 #if KMP_NESTED_HOT_TEAMS 1462 p_hot_teams = &master_th->th.th_hot_teams;
1463 if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) {
1464 *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate(
1465 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1466 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1467 (*p_hot_teams)[0].hot_team_nth = 1;
1473 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) {
1474 int team_size = master_set_numthreads;
1476 ompt_callbacks.ompt_callback(ompt_event_parallel_begin)(
1477 ompt_task_id, ompt_frame, ompt_parallel_id,
1478 team_size, unwrapped_task, OMPT_INVOKER(call_context));
1482 master_th->th.th_ident = loc;
1485 if ( master_th->th.th_teams_microtask &&
1486 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
1490 parent_team->t.t_ident = loc;
1491 __kmp_alloc_argv_entries( argc, parent_team, TRUE );
1492 parent_team->t.t_argc = argc;
1493 argv = (
void**)parent_team->t.t_argv;
1494 for( i=argc-1; i >= 0; --i )
1496 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 1497 *argv++ = va_arg( *ap,
void * );
1499 *argv++ = va_arg( ap,
void * );
1502 if ( parent_team == master_th->th.th_serial_team ) {
1505 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
1506 parent_team->t.t_serialized--;
1511 void **exit_runtime_p;
1513 ompt_lw_taskteam_t lw_taskteam;
1516 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1517 unwrapped_task, ompt_parallel_id);
1518 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1519 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1521 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1525 my_task_id = lw_taskteam.ompt_task_info.task_id;
1526 my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
1527 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1528 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1529 my_parallel_id, my_task_id);
1534 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1536 exit_runtime_p = &dummy;
1541 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1542 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1543 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1553 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1555 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1556 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1557 ompt_parallel_id, ompt_task_id);
1560 __ompt_lw_taskteam_unlink(master_th);
1562 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1565 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1566 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1567 ompt_parallel_id, ompt_task_id,
1568 OMPT_INVOKER(call_context));
1570 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1576 parent_team->t.t_pkfn = microtask;
1578 parent_team->t.ompt_team_info.microtask = unwrapped_task;
1580 parent_team->t.t_invoke = invoker;
1581 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1582 parent_team->t.t_active_level ++;
1583 parent_team->t.t_level ++;
1586 if ( master_set_numthreads ) {
1587 if ( master_set_numthreads < master_th->th.th_teams_size.nth ) {
1589 kmp_info_t **other_threads = parent_team->t.t_threads;
1590 parent_team->t.t_nproc = master_set_numthreads;
1591 for ( i = 0; i < master_set_numthreads; ++i ) {
1592 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1596 master_th->th.th_set_nproc = 0;
1600 if ( __kmp_debugging ) {
1601 int nth = __kmp_omp_num_threads( loc );
1603 master_set_numthreads = nth;
1608 KF_TRACE( 10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1609 __kmp_internal_fork( loc, gtid, parent_team );
1610 KF_TRACE( 10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
1613 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
1614 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1617 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1618 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1619 if (! parent_team->t.t_invoke( gtid )) {
1620 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
1623 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
1624 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
1627 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
1634 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1635 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
1639 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
1643 int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level));
1645 nthreads = master_set_numthreads ?
1646 master_set_numthreads : get__nproc_2( parent_team, master_tid );
1651 if ( ( !get__nested(master_th) && (root->r.r_in_parallel
1655 ) ) || ( __kmp_library == library_serial ) ) {
1656 KC_TRACE( 10, (
"__kmp_fork_call: T#%d serializing team; requested %d threads\n",
1661 if ( nthreads > 1 ) {
1663 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
1665 nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads
1673 if ( nthreads == 1 ) {
1677 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
1681 KMP_DEBUG_ASSERT( nthreads > 0 );
1684 master_th->th.th_set_nproc = 0;
1687 if ( nthreads == 1 ) {
1689 #if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 1690 void * args[ argc ];
1692 void * * args = (
void**) KMP_ALLOCA( argc *
sizeof(
void * ) );
1695 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
1699 if ( call_context == fork_context_intel ) {
1701 master_th->th.th_serial_team->t.t_ident = loc;
1705 master_th->th.th_serial_team->t.t_level--;
1710 void **exit_runtime_p;
1712 ompt_lw_taskteam_t lw_taskteam;
1715 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1716 unwrapped_task, ompt_parallel_id);
1717 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1718 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1720 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1723 my_task_id = lw_taskteam.ompt_task_info.task_id;
1724 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1725 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1726 ompt_parallel_id, my_task_id);
1731 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1733 exit_runtime_p = &dummy;
1738 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1739 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1740 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
1749 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1752 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1753 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1754 ompt_parallel_id, ompt_task_id);
1758 __ompt_lw_taskteam_unlink(master_th);
1760 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1762 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1763 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1764 ompt_parallel_id, ompt_task_id,
1765 OMPT_INVOKER(call_context));
1767 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1770 }
else if ( microtask == (microtask_t)__kmp_teams_master ) {
1771 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
1772 team = master_th->th.th_team;
1774 team->t.t_invoke = invoker;
1775 __kmp_alloc_argv_entries( argc, team, TRUE );
1776 team->t.t_argc = argc;
1777 argv = (
void**) team->t.t_argv;
1779 for( i=argc-1; i >= 0; --i )
1781 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1782 *argv++ = va_arg( *ap,
void * );
1784 *argv++ = va_arg( ap,
void * );
1787 for( i=0; i < argc; ++i )
1789 argv[i] = parent_team->t.t_argv[i];
1796 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1797 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1803 for( i=argc-1; i >= 0; --i )
1805 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1806 *argv++ = va_arg( *ap,
void * );
1808 *argv++ = va_arg( ap,
void * );
1814 void **exit_runtime_p;
1816 ompt_lw_taskteam_t lw_taskteam;
1819 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1820 unwrapped_task, ompt_parallel_id);
1821 lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid);
1822 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
1824 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
1828 my_task_id = lw_taskteam.ompt_task_info.task_id;
1829 my_parallel_id = ompt_parallel_id;
1830 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
1831 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
1832 my_parallel_id, my_task_id);
1837 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1839 exit_runtime_p = &dummy;
1844 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1845 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1846 __kmp_invoke_microtask( microtask, gtid, 0, argc, args
1856 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
1858 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
1859 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
1860 my_parallel_id, my_task_id);
1864 __ompt_lw_taskteam_unlink(master_th);
1866 lw_taskteam.ompt_task_info.task_id = ompt_task_id_none;
1868 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
1869 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
1870 ompt_parallel_id, ompt_task_id,
1871 OMPT_INVOKER(call_context));
1873 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1880 else if ( call_context == fork_context_gnu ) {
1882 ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *)
1883 __kmp_allocate(
sizeof(ompt_lw_taskteam_t));
1884 __ompt_lw_taskteam_init(lwt, master_th, gtid,
1885 unwrapped_task, ompt_parallel_id);
1887 lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid);
1888 lwt->ompt_task_info.frame.exit_runtime_frame = 0;
1889 __ompt_lw_taskteam_link(lwt, master_th);
1893 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
1897 KMP_ASSERT2( call_context < fork_context_last,
"__kmp_fork_call: unknown fork_context parameter" );
1901 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
1908 KF_TRACE( 10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
1909 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
1910 master_th->th.th_current_task->td_icvs.max_active_levels ) );
1913 master_th->th.th_current_task->td_flags.executing = 0;
1916 if ( !master_th->th.th_teams_microtask || level > teams_level )
1920 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
1924 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1925 if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) {
1926 nthreads_icv = __kmp_nested_nth.nth[level+1];
1934 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1935 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1936 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
1937 proc_bind = proc_bind_false;
1940 if (proc_bind == proc_bind_default) {
1942 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1947 if ((level+1 < __kmp_nested_proc_bind.used)
1948 && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) {
1949 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1];
1954 master_th->th.th_set_proc_bind = proc_bind_default;
1957 if ((nthreads_icv > 0)
1959 || (proc_bind_icv != proc_bind_default)
1962 kmp_internal_control_t new_icvs;
1963 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
1964 new_icvs.next = NULL;
1965 if (nthreads_icv > 0) {
1966 new_icvs.nproc = nthreads_icv;
1970 if (proc_bind_icv != proc_bind_default) {
1971 new_icvs.proc_bind = proc_bind_icv;
1976 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
1977 team = __kmp_allocate_team(root, nthreads, nthreads,
1984 &new_icvs, argc USE_NESTED_HOT_ARG(master_th) );
1987 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
1988 team = __kmp_allocate_team(root, nthreads, nthreads,
1995 &master_th->th.th_current_task->td_icvs, argc
1996 USE_NESTED_HOT_ARG(master_th) );
1998 KF_TRACE( 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) );
2001 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2002 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2003 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2004 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2005 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2007 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task);
2009 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2012 if ( !master_th->th.th_teams_microtask || level > teams_level ) {
2014 int new_level = parent_team->t.t_level + 1;
2015 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2016 new_level = parent_team->t.t_active_level + 1;
2017 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2021 int new_level = parent_team->t.t_level;
2022 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2023 new_level = parent_team->t.t_active_level;
2024 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2027 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2028 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || team->t.t_sched.chunk != new_sched.chunk)
2029 team->t.t_sched = new_sched;
2032 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2036 propagateFPControl(team);
2038 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2043 KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]);
2045 KA_TRACE( 20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
2046 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
2047 parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) );
2049 if ( active_level || master_th->th.th_task_team ) {
2051 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2052 if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) {
2053 kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz;
2054 kmp_uint8 *old_stack, *new_stack;
2056 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2057 for (i=0; i<master_th->th.th_task_state_stack_sz; ++i) {
2058 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2060 for (i=master_th->th.th_task_state_stack_sz; i<new_size; ++i) {
2063 old_stack = master_th->th.th_task_state_memo_stack;
2064 master_th->th.th_task_state_memo_stack = new_stack;
2065 master_th->th.th_task_state_stack_sz = new_size;
2066 __kmp_free(old_stack);
2069 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
2070 master_th->th.th_task_state_top++;
2071 #if KMP_NESTED_HOT_TEAMS 2072 if (team == master_th->th.th_hot_teams[active_level].hot_team) {
2073 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
2077 master_th->th.th_task_state = 0;
2078 #if KMP_NESTED_HOT_TEAMS 2082 #if !KMP_NESTED_HOT_TEAMS 2083 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team));
2087 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2088 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
2089 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
2090 ( team->t.t_master_tid == 0 &&
2091 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
2095 argv = (
void**)team->t.t_argv;
2099 for ( i=argc-1; i >= 0; --i ) {
2101 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 2102 void *new_argv = va_arg(*ap,
void *);
2104 void *new_argv = va_arg(ap,
void *);
2106 KMP_CHECK_UPDATE(*argv, new_argv);
2111 for ( i=0; i < argc; ++i ) {
2113 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2119 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2120 if (!root->r.r_active)
2121 root->r.r_active = TRUE;
2123 __kmp_fork_team_threads( root, team, master_th, gtid );
2124 __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc );
2127 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2130 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2133 if ( team->t.t_active_level == 1
2135 && !master_th->th.th_teams_microtask
2139 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) &&
2140 ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) )
2142 kmp_uint64 tmp_time = 0;
2143 if ( __itt_get_timestamp_ptr )
2144 tmp_time = __itt_get_timestamp();
2146 master_th->th.th_frame_time = tmp_time;
2147 if ( __kmp_forkjoin_frames_mode == 3 )
2148 team->t.t_region_time = tmp_time;
2151 if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) &&
2152 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode )
2154 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2160 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
2162 KF_TRACE(10, (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2163 root, team, master_th, gtid));
2166 if ( __itt_stack_caller_create_ptr ) {
2167 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2175 __kmp_internal_fork( loc, gtid, team );
2176 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n",
2177 root, team, master_th, gtid));
2180 if (call_context == fork_context_gnu) {
2181 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
2186 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2187 gtid, team->t.t_id, team->t.t_pkfn ) );
2191 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
2192 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
2194 if (! team->t.t_invoke( gtid )) {
2195 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
2198 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2199 gtid, team->t.t_id, team->t.t_pkfn ) );
2202 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
2206 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2215 __kmp_join_restore_state(
2220 thread->th.ompt_thread_info.state = ((team->t.t_serialized) ?
2221 ompt_state_work_serial : ompt_state_work_parallel);
2228 ompt_parallel_id_t parallel_id,
2229 fork_context_e fork_context)
2231 if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
2232 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2233 ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
2234 parallel_id, task_info->task_id, OMPT_INVOKER(fork_context));
2237 __kmp_join_restore_state(thread,team);
2242 __kmp_join_call(
ident_t *loc,
int gtid
2244 ,
enum fork_context_e fork_context
2251 KMP_TIME_DEVELOPER_BLOCK(KMP_join_call);
2253 kmp_team_t *parent_team;
2254 kmp_info_t *master_th;
2259 KA_TRACE( 20, (
"__kmp_join_call: enter T#%d\n", gtid ));
2262 master_th = __kmp_threads[ gtid ];
2263 root = master_th->th.th_root;
2264 team = master_th->th.th_team;
2265 parent_team = team->t.t_parent;
2267 master_th->th.th_ident = loc;
2271 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2276 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2277 KA_TRACE( 20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
2278 __kmp_gtid_from_thread( master_th ), team,
2279 team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) );
2280 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] );
2284 if( team->t.t_serialized ) {
2286 if ( master_th->th.th_teams_microtask ) {
2288 int level = team->t.t_level;
2289 int tlevel = master_th->th.th_teams_level;
2290 if ( level == tlevel ) {
2294 }
else if ( level == tlevel + 1 ) {
2297 team->t.t_serialized++;
2305 __kmp_join_restore_state(master_th, parent_team);
2312 master_active = team->t.t_master_active;
2320 __kmp_internal_join( loc, gtid, team );
2324 master_th->th.th_task_state = 0;
2331 ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id;
2335 if ( __itt_stack_caller_create_ptr ) {
2336 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id );
2340 if ( team->t.t_active_level == 1
2342 && !master_th->th.th_teams_microtask
2345 master_th->th.th_ident = loc;
2347 if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 )
2348 __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time,
2349 0, loc, master_th->th.th_team_nproc, 1 );
2350 else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) &&
2351 ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames )
2352 __kmp_itt_region_joined( gtid );
2357 if ( master_th->th.th_teams_microtask &&
2359 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2360 team->t.t_level == master_th->th.th_teams_level + 1 ) {
2367 team->t.t_active_level --;
2368 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2371 if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) {
2372 int old_num = master_th->th.th_team_nproc;
2373 int new_num = master_th->th.th_teams_size.nth;
2374 kmp_info_t **other_threads = team->t.t_threads;
2375 team->t.t_nproc = new_num;
2376 for ( i = 0; i < old_num; ++i ) {
2377 other_threads[i]->th.th_team_nproc = new_num;
2380 for ( i = old_num; i < new_num; ++i ) {
2383 kmp_balign_t * balign = other_threads[i]->th.th_bar;
2384 for ( b = 0; b < bs_last_barrier; ++ b ) {
2385 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
2386 KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2388 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
2391 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2393 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2400 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
2409 master_th->th.th_info .ds.ds_tid = team->t.t_master_tid;
2410 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2412 master_th->th.th_dispatch =
2413 & parent_team->t.t_dispatch[ team->t.t_master_tid ];
2419 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2422 if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level )
2426 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
2428 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
2430 #if OMPT_SUPPORT && OMPT_TRACE 2432 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
2433 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
2434 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
2435 parallel_id, task_info->task_id);
2437 task_info->frame.exit_runtime_frame = 0;
2438 task_info->task_id = 0;
2442 KF_TRACE( 10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
2443 0, master_th, team ) );
2444 __kmp_pop_current_task_from_thread( master_th );
2446 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 2450 master_th->th.th_first_place = team->t.t_first_place;
2451 master_th->th.th_last_place = team->t.t_last_place;
2454 updateHWFPControl (team);
2456 if ( root->r.r_active != master_active )
2457 root->r.r_active = master_active;
2459 __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) );
2467 master_th->th.th_team = parent_team;
2468 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2469 master_th->th.th_team_master = parent_team->t.t_threads[0];
2470 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2473 if( parent_team->t.t_serialized &&
2474 parent_team != master_th->th.th_serial_team &&
2475 parent_team != root->r.r_root_team ) {
2476 __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) );
2477 master_th->th.th_serial_team = parent_team;
2480 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2481 if (master_th->th.th_task_state_top > 0) {
2482 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2484 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state;
2485 --master_th->th.th_task_state_top;
2487 master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top];
2490 master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state];
2491 KA_TRACE( 20, (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2492 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) );
2497 master_th->th.th_current_task->td_flags.executing = 1;
2499 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2503 __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context);
2508 KA_TRACE( 20, (
"__kmp_join_call: exit T#%d\n", gtid ));
2517 __kmp_save_internal_controls ( kmp_info_t * thread )
2520 if ( thread->th.th_team != thread->th.th_serial_team ) {
2523 if (thread->th.th_team->t.t_serialized > 1) {
2526 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2529 if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2530 thread->th.th_team->t.t_serialized ) {
2535 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(
sizeof(kmp_internal_control_t));
2537 copy_icvs( control, & thread->th.th_current_task->td_icvs );
2539 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2541 control->next = thread->th.th_team->t.t_control_stack_top;
2542 thread->th.th_team->t.t_control_stack_top = control;
2549 __kmp_set_num_threads(
int new_nth,
int gtid )
2554 KF_TRACE( 10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
2555 KMP_DEBUG_ASSERT( __kmp_init_serial );
2559 else if (new_nth > __kmp_max_nth)
2560 new_nth = __kmp_max_nth;
2563 thread = __kmp_threads[gtid];
2565 __kmp_save_internal_controls( thread );
2567 set__nproc( thread, new_nth );
2574 root = thread->th.th_root;
2575 if ( __kmp_init_parallel && ( ! root->r.r_active )
2576 && ( root->r.r_hot_team->t.t_nproc > new_nth )
2577 #
if KMP_NESTED_HOT_TEAMS
2578 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2581 kmp_team_t *hot_team = root->r.r_hot_team;
2584 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2587 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
2588 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2589 if ( __kmp_tasking_mode != tskm_immediate_exec) {
2591 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2593 __kmp_free_thread( hot_team->t.t_threads[f] );
2594 hot_team->t.t_threads[f] = NULL;
2596 hot_team->t.t_nproc = new_nth;
2597 #if KMP_NESTED_HOT_TEAMS 2598 if( thread->th.th_hot_teams ) {
2599 KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team );
2600 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2604 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2609 for( f=0 ; f < new_nth; f++ ) {
2610 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
2611 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2614 hot_team->t.t_size_changed = -1;
2620 __kmp_set_max_active_levels(
int gtid,
int max_active_levels )
2624 KF_TRACE( 10, (
"__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2625 KMP_DEBUG_ASSERT( __kmp_init_serial );
2628 if( max_active_levels < 0 ) {
2629 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
2633 KF_TRACE( 10, (
"__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2636 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
2640 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
2641 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2646 KF_TRACE( 10, (
"__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
2648 thread = __kmp_threads[ gtid ];
2650 __kmp_save_internal_controls( thread );
2652 set__max_active_levels( thread, max_active_levels );
2658 __kmp_get_max_active_levels(
int gtid )
2662 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d\n", gtid ) );
2663 KMP_DEBUG_ASSERT( __kmp_init_serial );
2665 thread = __kmp_threads[ gtid ];
2666 KMP_DEBUG_ASSERT( thread->th.th_current_task );
2667 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
2668 gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) );
2669 return thread->th.th_current_task->td_icvs.max_active_levels;
2674 __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk )
2679 KF_TRACE( 10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (
int)kind, chunk ));
2680 KMP_DEBUG_ASSERT( __kmp_init_serial );
2686 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2687 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
2692 KMP_MSG( ScheduleKindOutOfRange, kind ),
2693 KMP_HNT( DefaultScheduleKindUsed,
"static, no chunk" ),
2696 kind = kmp_sched_default;
2700 thread = __kmp_threads[ gtid ];
2702 __kmp_save_internal_controls( thread );
2704 if ( kind < kmp_sched_upper_std ) {
2705 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
2708 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2710 thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
2714 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2715 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
2717 if ( kind == kmp_sched_auto ) {
2719 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2721 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2727 __kmp_get_schedule(
int gtid, kmp_sched_t * kind,
int * chunk )
2732 KF_TRACE( 10, (
"__kmp_get_schedule: thread %d\n", gtid ));
2733 KMP_DEBUG_ASSERT( __kmp_init_serial );
2735 thread = __kmp_threads[ gtid ];
2737 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2739 switch ( th_type ) {
2741 case kmp_sch_static_greedy:
2742 case kmp_sch_static_balanced:
2743 *kind = kmp_sched_static;
2746 case kmp_sch_static_chunked:
2747 *kind = kmp_sched_static;
2749 case kmp_sch_dynamic_chunked:
2750 *kind = kmp_sched_dynamic;
2753 case kmp_sch_guided_iterative_chunked:
2754 case kmp_sch_guided_analytical_chunked:
2755 *kind = kmp_sched_guided;
2758 *kind = kmp_sched_auto;
2760 case kmp_sch_trapezoidal:
2761 *kind = kmp_sched_trapezoidal;
2769 KMP_FATAL( UnknownSchedulingType, th_type );
2772 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2776 __kmp_get_ancestor_thread_num(
int gtid,
int level ) {
2782 KF_TRACE( 10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
2783 KMP_DEBUG_ASSERT( __kmp_init_serial );
2786 if( level == 0 )
return 0;
2787 if( level < 0 )
return -1;
2788 thr = __kmp_threads[ gtid ];
2789 team = thr->th.th_team;
2790 ii = team->t.t_level;
2791 if( level > ii )
return -1;
2794 if( thr->th.th_teams_microtask ) {
2796 int tlevel = thr->th.th_teams_level;
2797 if( level <= tlevel ) {
2798 KMP_DEBUG_ASSERT( ii >= tlevel );
2800 if ( ii == tlevel ) {
2809 if( ii == level )
return __kmp_tid_from_gtid( gtid );
2811 dd = team->t.t_serialized;
2815 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
2818 if( ( team->t.t_serialized ) && ( !dd ) ) {
2819 team = team->t.t_parent;
2823 team = team->t.t_parent;
2824 dd = team->t.t_serialized;
2829 return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid );
2833 __kmp_get_team_size(
int gtid,
int level ) {
2839 KF_TRACE( 10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level ));
2840 KMP_DEBUG_ASSERT( __kmp_init_serial );
2843 if( level == 0 )
return 1;
2844 if( level < 0 )
return -1;
2845 thr = __kmp_threads[ gtid ];
2846 team = thr->th.th_team;
2847 ii = team->t.t_level;
2848 if( level > ii )
return -1;
2851 if( thr->th.th_teams_microtask ) {
2853 int tlevel = thr->th.th_teams_level;
2854 if( level <= tlevel ) {
2855 KMP_DEBUG_ASSERT( ii >= tlevel );
2857 if ( ii == tlevel ) {
2868 for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
2871 if( team->t.t_serialized && ( !dd ) ) {
2872 team = team->t.t_parent;
2876 team = team->t.t_parent;
2881 return team->t.t_nproc;
2885 __kmp_get_schedule_global() {
2889 kmp_r_sched_t r_sched;
2895 r_sched.r_sched_type = __kmp_static;
2897 r_sched.r_sched_type = __kmp_guided;
2899 r_sched.r_sched_type = __kmp_sched;
2902 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) {
2903 r_sched.chunk = KMP_DEFAULT_CHUNK;
2905 r_sched.chunk = __kmp_chunk;
2920 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc )
2923 KMP_DEBUG_ASSERT( team );
2924 if( !realloc || argc > team->t.t_max_argc ) {
2926 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
2927 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
2929 if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] )
2930 __kmp_free( (
void *) team->t.t_argv );
2932 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
2934 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
2935 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
2936 team->t.t_id, team->t.t_max_argc ));
2937 team->t.t_argv = &team->t.t_inline_argv[0];
2938 if ( __kmp_storage_map ) {
2939 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
2940 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
2941 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
2942 "team_%d.t_inline_argv",
2947 team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
2948 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
2949 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
2950 team->t.t_id, team->t.t_max_argc ));
2951 team->t.t_argv = (
void**) __kmp_page_allocate(
sizeof(
void*) * team->t.t_max_argc );
2952 if ( __kmp_storage_map ) {
2953 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
2954 sizeof(
void *) * team->t.t_max_argc,
"team_%d.t_argv",
2962 __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth)
2965 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
2966 team->t.t_threads = (kmp_info_t**) __kmp_allocate(
sizeof(kmp_info_t*) * max_nth );
2967 team->t.t_disp_buffer = (dispatch_shared_info_t*)
2968 __kmp_allocate(
sizeof(dispatch_shared_info_t) * num_disp_buff );
2969 team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate(
sizeof(kmp_disp_t) * max_nth );
2970 team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth );
2971 team->t.t_max_nproc = max_nth;
2974 for(i = 0 ; i < num_disp_buff; ++i) {
2975 team->t.t_disp_buffer[i].buffer_index = i;
2977 team->t.t_disp_buffer[i].doacross_buf_idx = i;
2983 __kmp_free_team_arrays(kmp_team_t *team) {
2986 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
2987 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
2988 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
2989 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
2992 __kmp_free(team->t.t_threads);
2993 __kmp_free(team->t.t_disp_buffer);
2994 __kmp_free(team->t.t_dispatch);
2995 __kmp_free(team->t.t_implicit_task_taskdata);
2996 team->t.t_threads = NULL;
2997 team->t.t_disp_buffer = NULL;
2998 team->t.t_dispatch = NULL;
2999 team->t.t_implicit_task_taskdata = 0;
3003 __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3004 kmp_info_t **oldThreads = team->t.t_threads;
3006 __kmp_free(team->t.t_disp_buffer);
3007 __kmp_free(team->t.t_dispatch);
3008 __kmp_free(team->t.t_implicit_task_taskdata);
3009 __kmp_allocate_team_arrays(team, max_nth);
3011 KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
3013 __kmp_free(oldThreads);
3016 static kmp_internal_control_t
3017 __kmp_get_global_icvs(
void ) {
3019 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
3022 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
3025 kmp_internal_control_t g_icvs = {
3027 (kmp_int8)__kmp_dflt_nested,
3028 (kmp_int8)__kmp_global.g.g_dynamic,
3029 (kmp_int8)__kmp_env_blocktime,
3030 __kmp_dflt_blocktime,
3032 __kmp_dflt_team_nth,
3034 __kmp_dflt_max_active_levels,
3037 __kmp_nested_proc_bind.bind_types[0],
3045 static kmp_internal_control_t
3046 __kmp_get_x_global_icvs(
const kmp_team_t *team ) {
3048 kmp_internal_control_t gx_icvs;
3049 gx_icvs.serial_nesting_level = 0;
3050 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
3051 gx_icvs.next = NULL;
3057 __kmp_initialize_root( kmp_root_t *root )
3060 kmp_team_t *root_team;
3061 kmp_team_t *hot_team;
3062 int hot_team_max_nth;
3063 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
3064 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3065 KMP_DEBUG_ASSERT( root );
3066 KMP_ASSERT( ! root->r.r_begin );
3069 __kmp_init_lock( &root->r.r_begin_lock );
3070 root->r.r_begin = FALSE;
3071 root->r.r_active = FALSE;
3072 root->r.r_in_parallel = 0;
3073 root->r.r_blocktime = __kmp_dflt_blocktime;
3074 root->r.r_nested = __kmp_dflt_nested;
3078 KF_TRACE( 10, (
"__kmp_initialize_root: before root_team\n" ) );
3081 __kmp_allocate_team(
3089 __kmp_nested_proc_bind.bind_types[0],
3093 USE_NESTED_HOT_ARG(NULL)
3097 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 ));
3100 KF_TRACE( 10, (
"__kmp_initialize_root: after root_team = %p\n", root_team ) );
3102 root->r.r_root_team = root_team;
3103 root_team->t.t_control_stack_top = NULL;
3106 root_team->t.t_threads[0] = NULL;
3107 root_team->t.t_nproc = 1;
3108 root_team->t.t_serialized = 1;
3110 root_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3111 root_team->t.t_sched.chunk = r_sched.chunk;
3112 KA_TRACE( 20, (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3113 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
3117 KF_TRACE( 10, (
"__kmp_initialize_root: before hot_team\n" ) );
3120 __kmp_allocate_team(
3123 __kmp_dflt_team_nth_ub * 2,
3128 __kmp_nested_proc_bind.bind_types[0],
3132 USE_NESTED_HOT_ARG(NULL)
3134 KF_TRACE( 10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
3136 root->r.r_hot_team = hot_team;
3137 root_team->t.t_control_stack_top = NULL;
3140 hot_team->t.t_parent = root_team;
3143 hot_team_max_nth = hot_team->t.t_max_nproc;
3144 for ( f = 0; f < hot_team_max_nth; ++ f ) {
3145 hot_team->t.t_threads[ f ] = NULL;
3147 hot_team->t.t_nproc = 1;
3149 hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type;
3150 hot_team->t.t_sched.chunk = r_sched.chunk;
3151 hot_team->t.t_size_changed = 0;
3157 typedef struct kmp_team_list_item {
3158 kmp_team_p
const * entry;
3159 struct kmp_team_list_item * next;
3160 } kmp_team_list_item_t;
3161 typedef kmp_team_list_item_t * kmp_team_list_t;
3165 __kmp_print_structure_team_accum(
3166 kmp_team_list_t list,
3167 kmp_team_p
const * team
3177 KMP_DEBUG_ASSERT( list != NULL );
3178 if ( team == NULL ) {
3182 __kmp_print_structure_team_accum( list, team->t.t_parent );
3183 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
3187 while ( l->next != NULL && l->entry != team ) {
3190 if ( l->next != NULL ) {
3196 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
3202 kmp_team_list_item_t * item =
3203 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
3212 __kmp_print_structure_team(
3214 kmp_team_p
const * team
3217 __kmp_printf(
"%s", title );
3218 if ( team != NULL ) {
3219 __kmp_printf(
"%2x %p\n", team->t.t_id, team );
3221 __kmp_printf(
" - (nil)\n" );
3226 __kmp_print_structure_thread(
3228 kmp_info_p
const * thread
3231 __kmp_printf(
"%s", title );
3232 if ( thread != NULL ) {
3233 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
3235 __kmp_printf(
" - (nil)\n" );
3240 __kmp_print_structure(
3244 kmp_team_list_t list;
3247 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
3251 __kmp_printf(
"\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
3254 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3255 __kmp_printf(
"%2d", gtid );
3256 if ( __kmp_threads != NULL ) {
3257 __kmp_printf(
" %p", __kmp_threads[ gtid ] );
3259 if ( __kmp_root != NULL ) {
3260 __kmp_printf(
" %p", __kmp_root[ gtid ] );
3262 __kmp_printf(
"\n" );
3267 __kmp_printf(
"\n------------------------------\nThreads\n------------------------------\n" );
3268 if ( __kmp_threads != NULL ) {
3270 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3271 kmp_info_t
const * thread = __kmp_threads[ gtid ];
3272 if ( thread != NULL ) {
3273 __kmp_printf(
"GTID %2d %p:\n", gtid, thread );
3274 __kmp_printf(
" Our Root: %p\n", thread->th.th_root );
3275 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team );
3276 __kmp_print_structure_team(
" Serial Team: ", thread->th.th_serial_team );
3277 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc );
3278 __kmp_print_structure_thread(
" Master: ", thread->th.th_team_master );
3279 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized );
3280 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc );
3282 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
3284 __kmp_print_structure_thread(
" Next in pool: ", thread->th.th_next_pool );
3285 __kmp_printf(
"\n" );
3286 __kmp_print_structure_team_accum( list, thread->th.th_team );
3287 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
3291 __kmp_printf(
"Threads array is not allocated.\n" );
3295 __kmp_printf(
"\n------------------------------\nUbers\n------------------------------\n" );
3296 if ( __kmp_root != NULL ) {
3298 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
3299 kmp_root_t
const * root = __kmp_root[ gtid ];
3300 if ( root != NULL ) {
3301 __kmp_printf(
"GTID %2d %p:\n", gtid, root );
3302 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team );
3303 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team );
3304 __kmp_print_structure_thread(
" Uber Thread: ", root->r.r_uber_thread );
3305 __kmp_printf(
" Active?: %2d\n", root->r.r_active );
3306 __kmp_printf(
" Nested?: %2d\n", root->r.r_nested );
3307 __kmp_printf(
" In Parallel: %2d\n", root->r.r_in_parallel );
3308 __kmp_printf(
"\n" );
3309 __kmp_print_structure_team_accum( list, root->r.r_root_team );
3310 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
3314 __kmp_printf(
"Ubers array is not allocated.\n" );
3317 __kmp_printf(
"\n------------------------------\nTeams\n------------------------------\n" );
3318 while ( list->next != NULL ) {
3319 kmp_team_p
const * team = list->entry;
3321 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team );
3322 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent );
3323 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid );
3324 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc );
3325 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized );
3326 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc );
3327 for ( i = 0; i < team->t.t_nproc; ++ i ) {
3328 __kmp_printf(
" Thread %2d: ", i );
3329 __kmp_print_structure_thread(
"", team->t.t_threads[ i ] );
3331 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool );
3332 __kmp_printf(
"\n" );
3337 __kmp_printf(
"\n------------------------------\nPools\n------------------------------\n" );
3338 __kmp_print_structure_thread(
"Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
3339 __kmp_print_structure_team(
"Team pool: ", (kmp_team_t *)__kmp_team_pool );
3340 __kmp_printf(
"\n" );
3343 while ( list != NULL ) {
3344 kmp_team_list_item_t * item = list;
3346 KMP_INTERNAL_FREE( item );
3358 static const unsigned __kmp_primes[] = {
3359 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
3360 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
3361 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3362 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
3363 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
3364 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3365 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
3366 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
3367 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3368 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
3369 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
3370 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3371 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
3372 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
3373 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3374 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
3381 __kmp_get_random( kmp_info_t * thread )
3383 unsigned x = thread->th.th_x;
3384 unsigned short r = x>>16;
3386 thread->th.th_x = x*thread->th.th_a+1;
3388 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3389 thread->th.th_info.ds.ds_tid, r) );
3397 __kmp_init_random( kmp_info_t * thread )
3399 unsigned seed = thread->th.th_info.ds.ds_tid;
3401 thread->th.th_a = __kmp_primes[seed%(
sizeof(__kmp_primes)/
sizeof(__kmp_primes[0]))];
3402 thread->th.th_x = (seed+1)*thread->th.th_a+1;
3403 KA_TRACE(30, (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) );
3410 __kmp_reclaim_dead_roots(
void) {
3413 for(i = 0; i < __kmp_threads_capacity; ++i) {
3414 if( KMP_UBER_GTID( i ) &&
3415 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3416 !__kmp_root[i]->r.r_active ) {
3417 r += __kmp_unregister_root_other_thread(i);
3446 __kmp_expand_threads(
int nWish,
int nNeed) {
3449 int __kmp_actual_max_nth;
3453 #if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB 3456 added = __kmp_reclaim_dead_roots();
3474 int minimumRequiredCapacity;
3476 kmp_info_t **newThreads;
3477 kmp_root_t **newRoot;
3499 old_tp_cached = __kmp_tp_cached;
3500 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
3501 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
3505 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3509 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
3515 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
3522 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
3524 newCapacity = __kmp_threads_capacity;
3527 newCapacity <= (__kmp_actual_max_nth >> 1) ?
3528 (newCapacity << 1) :
3529 __kmp_actual_max_nth;
3530 }
while(newCapacity < minimumRequiredCapacity);
3531 newThreads = (kmp_info_t**) __kmp_allocate((
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
3532 newRoot = (kmp_root_t**) ((
char*)newThreads +
sizeof(kmp_info_t*) * newCapacity );
3533 KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity *
sizeof(kmp_info_t*));
3534 KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity *
sizeof(kmp_root_t*));
3535 memset(newThreads + __kmp_threads_capacity, 0,
3536 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_info_t*));
3537 memset(newRoot + __kmp_threads_capacity, 0,
3538 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_root_t*));
3540 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3546 __kmp_free(newThreads);
3549 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3550 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3552 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3553 __kmp_free(newThreads);
3559 *(kmp_info_t**
volatile*)&__kmp_threads = newThreads;
3560 *(kmp_root_t**
volatile*)&__kmp_root = newRoot;
3561 added += newCapacity - __kmp_threads_capacity;
3562 *(
volatile int*)&__kmp_threads_capacity = newCapacity;
3563 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3574 __kmp_register_root(
int initial_thread )
3576 kmp_info_t *root_thread;
3580 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3581 KA_TRACE( 20, (
"__kmp_register_root: entered\n"));
3599 capacity = __kmp_threads_capacity;
3600 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
3605 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
3606 if ( __kmp_tp_cached ) {
3609 KMP_MSG( CantRegisterNewThread ),
3610 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
3611 KMP_HNT( PossibleSystemLimitOnThreads ),
3618 KMP_MSG( CantRegisterNewThread ),
3619 KMP_HNT( SystemLimitOnThreads ),
3628 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ )
3630 KA_TRACE( 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
3631 KMP_ASSERT( gtid < __kmp_threads_capacity );
3635 TCW_4(__kmp_nth, __kmp_nth + 1);
3642 if ( __kmp_adjust_gtid_mode ) {
3643 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
3644 if ( TCR_4(__kmp_gtid_mode) != 2) {
3645 TCW_4(__kmp_gtid_mode, 2);
3649 if (TCR_4(__kmp_gtid_mode) != 1 ) {
3650 TCW_4(__kmp_gtid_mode, 1);
3655 #ifdef KMP_ADJUST_BLOCKTIME 3658 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
3659 if ( __kmp_nth > __kmp_avail_proc ) {
3660 __kmp_zero_bt = TRUE;
3666 if( ! ( root = __kmp_root[gtid] )) {
3667 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate(
sizeof(kmp_root_t) );
3668 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
3671 __kmp_initialize_root( root );
3674 if( root->r.r_uber_thread ) {
3675 root_thread = root->r.r_uber_thread;
3677 root_thread = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
3678 if ( __kmp_storage_map ) {
3679 __kmp_print_thread_storage_map( root_thread, gtid );
3681 root_thread->th.th_info .ds.ds_gtid = gtid;
3682 root_thread->th.th_root = root;
3683 if( __kmp_env_consistency_check ) {
3684 root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid );
3687 __kmp_initialize_fast_memory( root_thread );
3691 KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL );
3692 __kmp_initialize_bget( root_thread );
3694 __kmp_init_random( root_thread );
3698 if( ! root_thread->th.th_serial_team ) {
3699 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3700 KF_TRACE( 10, (
"__kmp_register_root: before serial_team\n" ) );
3702 root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1,
3710 0 USE_NESTED_HOT_ARG(NULL) );
3712 KMP_ASSERT( root_thread->th.th_serial_team );
3713 KF_TRACE( 10, (
"__kmp_register_root: after serial_team = %p\n",
3714 root_thread->th.th_serial_team ) );
3717 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3719 root->r.r_root_team->t.t_threads[0] = root_thread;
3720 root->r.r_hot_team ->t.t_threads[0] = root_thread;
3721 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3722 root_thread->th.th_serial_team->t.t_serialized = 0;
3723 root->r.r_uber_thread = root_thread;
3726 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
3727 TCW_4(__kmp_init_gtid, TRUE);
3730 __kmp_gtid_set_specific( gtid );
3733 __kmp_itt_thread_name( gtid );
3736 #ifdef KMP_TDATA_GTID 3739 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
3740 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
3742 KA_TRACE( 20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
3743 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
3744 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3745 KMP_INIT_BARRIER_STATE ) );
3748 for ( b = 0; b < bs_last_barrier; ++ b ) {
3749 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3751 root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0;
3755 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
3757 #if KMP_AFFINITY_SUPPORTED 3759 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3760 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3761 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3762 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3765 if ( TCR_4(__kmp_init_middle) ) {
3766 __kmp_affinity_set_init_mask( gtid, TRUE );
3770 __kmp_root_counter ++;
3773 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3778 #if KMP_NESTED_HOT_TEAMS 3780 __kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr,
int level,
const int max_level )
3783 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3784 if( !hot_teams || !hot_teams[level].hot_team ) {
3787 KMP_DEBUG_ASSERT( level < max_level );
3788 kmp_team_t *team = hot_teams[level].hot_team;
3789 nth = hot_teams[level].hot_team_nth;
3791 if( level < max_level - 1 ) {
3792 for( i = 0; i < nth; ++i ) {
3793 kmp_info_t *th = team->t.t_threads[i];
3794 n += __kmp_free_hot_teams( root, th, level + 1, max_level );
3795 if( i > 0 && th->th.th_hot_teams ) {
3796 __kmp_free( th->th.th_hot_teams );
3797 th->th.th_hot_teams = NULL;
3801 __kmp_free_team( root, team, NULL );
3810 __kmp_reset_root(
int gtid, kmp_root_t *root)
3812 kmp_team_t * root_team = root->r.r_root_team;
3813 kmp_team_t * hot_team = root->r.r_hot_team;
3814 int n = hot_team->t.t_nproc;
3817 KMP_DEBUG_ASSERT( ! root->r.r_active );
3819 root->r.r_root_team = NULL;
3820 root->r.r_hot_team = NULL;
3823 __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) );
3824 #if KMP_NESTED_HOT_TEAMS 3825 if( __kmp_hot_teams_max_level > 0 ) {
3826 for( i = 0; i < hot_team->t.t_nproc; ++i ) {
3827 kmp_info_t *th = hot_team->t.t_threads[i];
3828 if( __kmp_hot_teams_max_level > 1 ) {
3829 n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level );
3831 if( th->th.th_hot_teams ) {
3832 __kmp_free( th->th.th_hot_teams );
3833 th->th.th_hot_teams = NULL;
3838 __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) );
3844 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3845 __kmp_wait_to_unref_task_teams();
3850 KA_TRACE( 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
"\n",
3851 (LPVOID)&(root->r.r_uber_thread->th),
3852 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
3853 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
3858 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
3859 int gtid = __kmp_get_gtid();
3860 __ompt_thread_end(ompt_thread_initial, gtid);
3864 TCW_4(__kmp_nth, __kmp_nth - 1);
3865 __kmp_reap_thread( root->r.r_uber_thread, 1 );
3868 root->r.r_uber_thread = NULL;
3870 root->r.r_begin = FALSE;
3876 __kmp_unregister_root_current_thread(
int gtid )
3878 KA_TRACE( 1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
3883 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3884 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
3885 KC_TRACE( 10, (
"__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid ));
3886 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3889 kmp_root_t *root = __kmp_root[gtid];
3891 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3892 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3893 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3894 KMP_ASSERT( root->r.r_active == FALSE );
3900 kmp_info_t * thread = __kmp_threads[gtid];
3901 kmp_team_t * team = thread->th.th_team;
3902 kmp_task_team_t * task_team = thread->th.th_task_team;
3905 if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) {
3908 thread->th.ompt_thread_info.state = ompt_state_undefined;
3910 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
3914 __kmp_reset_root(gtid, root);
3917 __kmp_gtid_set_specific( KMP_GTID_DNE );
3918 #ifdef KMP_TDATA_GTID 3919 __kmp_gtid = KMP_GTID_DNE;
3923 KC_TRACE( 10, (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
3925 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3934 __kmp_unregister_root_other_thread(
int gtid )
3936 kmp_root_t *root = __kmp_root[gtid];
3939 KA_TRACE( 1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
3940 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
3941 KMP_ASSERT( KMP_UBER_GTID( gtid ));
3942 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
3943 KMP_ASSERT( root->r.r_active == FALSE );
3945 r = __kmp_reset_root(gtid, root);
3946 KC_TRACE( 10, (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
3952 void __kmp_task_info() {
3954 kmp_int32 gtid = __kmp_entry_gtid();
3955 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
3956 kmp_info_t *this_thr = __kmp_threads[ gtid ];
3957 kmp_team_t *steam = this_thr->th.th_serial_team;
3958 kmp_team_t *team = this_thr->th.th_team;
3960 __kmp_printf(
"__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
3961 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
3969 __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team,
int tid,
int gtid )
3973 kmp_info_t *master = team->t.t_threads[0];
3974 KMP_DEBUG_ASSERT( this_thr != NULL );
3975 KMP_DEBUG_ASSERT( this_thr->th.th_serial_team );
3976 KMP_DEBUG_ASSERT( team );
3977 KMP_DEBUG_ASSERT( team->t.t_threads );
3978 KMP_DEBUG_ASSERT( team->t.t_dispatch );
3979 KMP_DEBUG_ASSERT( master );
3980 KMP_DEBUG_ASSERT( master->th.th_root );
3984 TCW_SYNC_PTR(this_thr->th.th_team, team);
3986 this_thr->th.th_info.ds.ds_tid = tid;
3987 this_thr->th.th_set_nproc = 0;
3989 this_thr->th.th_set_proc_bind = proc_bind_default;
3990 # if KMP_AFFINITY_SUPPORTED 3991 this_thr->th.th_new_place = this_thr->th.th_current_place;
3994 this_thr->th.th_root = master->th.th_root;
3997 this_thr->th.th_team_nproc = team->t.t_nproc;
3998 this_thr->th.th_team_master = master;
3999 this_thr->th.th_team_serialized = team->t.t_serialized;
4000 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4002 KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata );
4004 KF_TRACE( 10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4005 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4007 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
4009 KF_TRACE( 10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4010 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4014 this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ];
4016 this_thr->th.th_local.this_construct = 0;
4019 this_thr->th.th_local.tv_data = 0;
4022 if ( ! this_thr->th.th_pri_common ) {
4023 this_thr->th.th_pri_common = (
struct common_table *) __kmp_allocate(
sizeof(
struct common_table) );
4024 if ( __kmp_storage_map ) {
4025 __kmp_print_storage_map_gtid(
4026 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4027 sizeof(
struct common_table ),
"th_%d.th_pri_common\n", gtid
4030 this_thr->th.th_pri_head = NULL;
4035 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4039 size_t disp_size =
sizeof( dispatch_private_info_t ) *
4040 ( team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers );
4041 KD_TRACE( 10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
4042 KMP_ASSERT( dispatch );
4043 KMP_DEBUG_ASSERT( team->t.t_dispatch );
4044 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
4046 dispatch->th_disp_index = 0;
4048 dispatch->th_doacross_buf_idx = 0;
4050 if( ! dispatch->th_disp_buffer ) {
4051 dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
4053 if ( __kmp_storage_map ) {
4054 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
4055 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers ],
4056 disp_size,
"th_%d.th_dispatch.th_disp_buffer " 4057 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4058 gtid, team->t.t_id, gtid );
4061 memset( & dispatch->th_disp_buffer[0],
'\0', disp_size );
4064 dispatch->th_dispatch_pr_current = 0;
4065 dispatch->th_dispatch_sh_current = 0;
4067 dispatch->th_deo_fcn = 0;
4068 dispatch->th_dxo_fcn = 0;
4071 this_thr->th.th_next_pool = NULL;
4073 if (!this_thr->th.th_task_state_memo_stack) {
4075 this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*
sizeof(kmp_uint8) );
4076 this_thr->th.th_task_state_top = 0;
4077 this_thr->th.th_task_state_stack_sz = 4;
4078 for (i=0; i<this_thr->th.th_task_state_stack_sz; ++i)
4079 this_thr->th.th_task_state_memo_stack[i] = 0;
4082 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
4083 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
4096 __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team,
int new_tid )
4098 kmp_team_t *serial_team;
4099 kmp_info_t *new_thr;
4102 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
4103 KMP_DEBUG_ASSERT( root && team );
4104 #if !KMP_NESTED_HOT_TEAMS 4105 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
4110 if ( __kmp_thread_pool ) {
4112 new_thr = (kmp_info_t*)__kmp_thread_pool;
4113 __kmp_thread_pool = (
volatile kmp_info_t *) new_thr->th.th_next_pool;
4114 if ( new_thr == __kmp_thread_pool_insert_pt ) {
4115 __kmp_thread_pool_insert_pt = NULL;
4117 TCW_4(new_thr->th.th_in_pool, FALSE);
4122 __kmp_thread_pool_nth--;
4124 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4125 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
4126 KMP_ASSERT( ! new_thr->th.th_team );
4127 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
4128 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
4131 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
4132 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
4134 TCW_4(__kmp_nth, __kmp_nth + 1);
4136 new_thr->th.th_task_state = 0;
4137 new_thr->th.th_task_state_top = 0;
4138 new_thr->th.th_task_state_stack_sz = 4;
4140 #ifdef KMP_ADJUST_BLOCKTIME 4143 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4144 if ( __kmp_nth > __kmp_avail_proc ) {
4145 __kmp_zero_bt = TRUE;
4153 kmp_balign_t * balign = new_thr->th.th_bar;
4154 for( b = 0; b < bs_last_barrier; ++ b )
4155 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4158 KF_TRACE( 10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4159 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
4167 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
4168 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
4174 if ( ! TCR_4( __kmp_init_monitor ) ) {
4175 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
4176 if ( ! TCR_4( __kmp_init_monitor ) ) {
4177 KF_TRACE( 10, (
"before __kmp_create_monitor\n" ) );
4178 TCW_4( __kmp_init_monitor, 1 );
4179 __kmp_create_monitor( & __kmp_monitor );
4180 KF_TRACE( 10, (
"after __kmp_create_monitor\n" ) );
4189 while ( TCR_4(__kmp_init_monitor) < 2 ) {
4192 KF_TRACE( 10, (
"after monitor thread has started\n" ) );
4195 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
4199 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
4200 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
4204 new_thr = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
4206 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4208 if ( __kmp_storage_map ) {
4209 __kmp_print_thread_storage_map( new_thr, new_gtid );
4214 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
4215 KF_TRACE( 10, (
"__kmp_allocate_thread: before th_serial/serial_team\n" ) );
4217 new_thr->th.th_serial_team = serial_team =
4218 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
4226 0 USE_NESTED_HOT_ARG(NULL) );
4228 KMP_ASSERT ( serial_team );
4229 serial_team->t.t_serialized = 0;
4230 serial_team->t.t_threads[0] = new_thr;
4231 KF_TRACE( 10, (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4235 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
4238 __kmp_initialize_fast_memory( new_thr );
4242 KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL );
4243 __kmp_initialize_bget( new_thr );
4246 __kmp_init_random( new_thr );
4249 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4250 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4253 kmp_balign_t * balign = new_thr->th.th_bar;
4254 for(b=0; b<bs_last_barrier; ++b) {
4255 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4256 balign[b].bb.team = NULL;
4257 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4258 balign[b].bb.use_oncore_barrier = 0;
4261 new_thr->th.th_spin_here = FALSE;
4262 new_thr->th.th_next_waiting = 0;
4264 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4265 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4266 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4267 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4268 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4271 TCW_4(new_thr->th.th_in_pool, FALSE);
4272 new_thr->th.th_active_in_pool = FALSE;
4273 TCW_4(new_thr->th.th_active, TRUE);
4284 if ( __kmp_adjust_gtid_mode ) {
4285 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
4286 if ( TCR_4(__kmp_gtid_mode) != 2) {
4287 TCW_4(__kmp_gtid_mode, 2);
4291 if (TCR_4(__kmp_gtid_mode) != 1 ) {
4292 TCW_4(__kmp_gtid_mode, 1);
4297 #ifdef KMP_ADJUST_BLOCKTIME 4300 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4301 if ( __kmp_nth > __kmp_avail_proc ) {
4302 __kmp_zero_bt = TRUE;
4308 KF_TRACE( 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
4309 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
4310 KF_TRACE( 10, (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
4312 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
4327 __kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs,
ident_t *loc ) {
4328 KF_TRACE( 10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4329 team->t.t_threads[0], team ) );
4330 KMP_DEBUG_ASSERT( team && new_icvs);
4331 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
4332 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4334 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4337 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
4338 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4340 KF_TRACE( 10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4341 team->t.t_threads[0], team ) );
4349 __kmp_initialize_team(
4352 kmp_internal_control_t * new_icvs,
4355 KF_TRACE( 10, (
"__kmp_initialize_team: enter: team=%p\n", team ) );
4358 KMP_DEBUG_ASSERT( team );
4359 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
4360 KMP_DEBUG_ASSERT( team->t.t_threads );
4363 team->t.t_master_tid = 0;
4365 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4366 team->t.t_nproc = new_nproc;
4369 team->t.t_next_pool = NULL;
4372 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4373 team->t.t_invoke = NULL;
4376 team->t.t_sched = new_icvs->sched;
4378 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4379 team->t.t_fp_control_saved = FALSE;
4380 team->t.t_x87_fpu_control_word = 0;
4381 team->t.t_mxcsr = 0;
4384 team->t.t_construct = 0;
4385 __kmp_init_lock( & team->t.t_single_lock );
4387 team->t.t_ordered .dt.t_value = 0;
4388 team->t.t_master_active = FALSE;
4390 memset( & team->t.t_taskq,
'\0',
sizeof( kmp_taskq_t ));
4393 team->t.t_copypriv_data = NULL;
4395 team->t.t_copyin_counter = 0;
4397 team->t.t_control_stack_top = NULL;
4399 __kmp_reinitialize_team( team, new_icvs, loc );
4402 KF_TRACE( 10, (
"__kmp_initialize_team: exit: team=%p\n", team ) );
4405 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4408 __kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
4410 if ( KMP_AFFINITY_CAPABLE() ) {
4412 if ( old_mask != NULL ) {
4413 status = __kmp_get_system_affinity( old_mask, TRUE );
4415 if ( status != 0 ) {
4418 KMP_MSG( ChangeThreadAffMaskError ),
4424 __kmp_set_system_affinity( __kmp_affin_fullMask, TRUE );
4429 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4438 __kmp_partition_places( kmp_team_t *team,
int update_master_only )
4443 kmp_info_t *master_th = team->t.t_threads[0];
4444 KMP_DEBUG_ASSERT( master_th != NULL );
4445 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4446 int first_place = master_th->th.th_first_place;
4447 int last_place = master_th->th.th_last_place;
4448 int masters_place = master_th->th.th_current_place;
4449 team->t.t_first_place = first_place;
4450 team->t.t_last_place = last_place;
4452 KA_TRACE( 20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
4453 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
4454 masters_place, first_place, last_place ) );
4456 switch ( proc_bind ) {
4458 case proc_bind_default:
4464 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
4467 case proc_bind_master:
4470 int n_th = team->t.t_nproc;
4471 for ( f = 1; f < n_th; f++ ) {
4472 kmp_info_t *th = team->t.t_threads[f];
4473 KMP_DEBUG_ASSERT( th != NULL );
4474 th->th.th_first_place = first_place;
4475 th->th.th_last_place = last_place;
4476 th->th.th_new_place = masters_place;
4478 KA_TRACE( 100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4479 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4480 team->t.t_id, f, masters_place, first_place, last_place ) );
4485 case proc_bind_close:
4488 int n_th = team->t.t_nproc;
4490 if ( first_place <= last_place ) {
4491 n_places = last_place - first_place + 1;
4494 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4496 if ( n_th <= n_places ) {
4497 int place = masters_place;
4498 for ( f = 1; f < n_th; f++ ) {
4499 kmp_info_t *th = team->t.t_threads[f];
4500 KMP_DEBUG_ASSERT( th != NULL );
4502 if ( place == last_place ) {
4503 place = first_place;
4505 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4511 th->th.th_first_place = first_place;
4512 th->th.th_last_place = last_place;
4513 th->th.th_new_place = place;
4515 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4516 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4517 team->t.t_id, f, place, first_place, last_place ) );
4521 int S, rem, gap, s_count;
4522 S = n_th / n_places;
4524 rem = n_th - ( S * n_places );
4525 gap = rem > 0 ? n_places/rem : n_places;
4526 int place = masters_place;
4528 for ( f = 0; f < n_th; f++ ) {
4529 kmp_info_t *th = team->t.t_threads[f];
4530 KMP_DEBUG_ASSERT( th != NULL );
4532 th->th.th_first_place = first_place;
4533 th->th.th_last_place = last_place;
4534 th->th.th_new_place = place;
4537 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4540 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4542 if ( place == last_place ) {
4543 place = first_place;
4545 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4555 else if (s_count == S) {
4556 if ( place == last_place ) {
4557 place = first_place;
4559 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4569 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4570 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4571 team->t.t_id, f, th->th.th_new_place, first_place,
4574 KMP_DEBUG_ASSERT( place == masters_place );
4579 case proc_bind_spread:
4582 int n_th = team->t.t_nproc;
4585 if ( first_place <= last_place ) {
4586 n_places = last_place - first_place + 1;
4589 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4591 if ( n_th <= n_places ) {
4592 int place = masters_place;
4593 int S = n_places/n_th;
4594 int s_count, rem, gap, gap_ct;
4595 rem = n_places - n_th*S;
4596 gap = rem ? n_th/rem : 1;
4599 if (update_master_only == 1)
4601 for ( f = 0; f < thidx; f++ ) {
4602 kmp_info_t *th = team->t.t_threads[f];
4603 KMP_DEBUG_ASSERT( th != NULL );
4605 th->th.th_first_place = place;
4606 th->th.th_new_place = place;
4608 while (s_count < S) {
4609 if ( place == last_place ) {
4610 place = first_place;
4612 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4620 if (rem && (gap_ct == gap)) {
4621 if ( place == last_place ) {
4622 place = first_place;
4624 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4633 th->th.th_last_place = place;
4636 if ( place == last_place ) {
4637 place = first_place;
4639 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4646 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4647 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4648 team->t.t_id, f, th->th.th_new_place,
4649 th->th.th_first_place, th->th.th_last_place ) );
4651 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
4654 int S, rem, gap, s_count;
4655 S = n_th / n_places;
4657 rem = n_th - ( S * n_places );
4658 gap = rem > 0 ? n_places/rem : n_places;
4659 int place = masters_place;
4662 if (update_master_only == 1)
4664 for ( f = 0; f < thidx; f++ ) {
4665 kmp_info_t *th = team->t.t_threads[f];
4666 KMP_DEBUG_ASSERT( th != NULL );
4668 th->th.th_first_place = place;
4669 th->th.th_last_place = place;
4670 th->th.th_new_place = place;
4673 if ( (s_count == S) && rem && (gap_ct == gap) ) {
4676 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
4678 if ( place == last_place ) {
4679 place = first_place;
4681 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4691 else if (s_count == S) {
4692 if ( place == last_place ) {
4693 place = first_place;
4695 else if ( place == (
int)(__kmp_affinity_num_masks - 1) ) {
4705 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
4706 __kmp_gtid_from_thread( team->t.t_threads[f] ),
4707 team->t.t_id, f, th->th.th_new_place,
4708 th->th.th_first_place, th->th.th_last_place) );
4710 KMP_DEBUG_ASSERT( update_master_only || place == masters_place );
4719 KA_TRACE( 20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
4726 __kmp_allocate_team( kmp_root_t *root,
int new_nproc,
int max_nproc,
4728 ompt_parallel_id_t ompt_parallel_id,
4731 kmp_proc_bind_t new_proc_bind,
4733 kmp_internal_control_t *new_icvs,
4734 int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
4736 KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team);
4739 int use_hot_team = ! root->r.r_active;
4742 KA_TRACE( 20, (
"__kmp_allocate_team: called\n"));
4743 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
4744 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
4747 #if KMP_NESTED_HOT_TEAMS 4748 kmp_hot_team_ptr_t *hot_teams;
4750 team = master->th.th_team;
4751 level = team->t.t_active_level;
4752 if( master->th.th_teams_microtask ) {
4753 if( master->th.th_teams_size.nteams > 1 && (
4754 team->t.t_pkfn == (microtask_t)__kmp_teams_master ||
4755 master->th.th_teams_level < team->t.t_level ) ) {
4759 hot_teams = master->th.th_hot_teams;
4760 if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team )
4769 if( use_hot_team && new_nproc > 1 ) {
4770 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
4771 #if KMP_NESTED_HOT_TEAMS 4772 team = hot_teams[level].hot_team;
4774 team = root->r.r_hot_team;
4777 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4778 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n",
4779 team->t.t_task_team[0], team->t.t_task_team[1] ));
4786 if (team->t.t_nproc == new_nproc) {
4787 KA_TRACE( 20, (
"__kmp_allocate_team: reusing hot team\n" ));
4790 if ( team->t.t_size_changed == -1 ) {
4791 team->t.t_size_changed = 1;
4793 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
4797 kmp_r_sched_t new_sched = new_icvs->sched;
4798 if (team->t.t_sched.r_sched_type != new_sched.r_sched_type ||
4799 team->t.t_sched.chunk != new_sched.chunk)
4800 team->t.t_sched = new_sched;
4802 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4804 KF_TRACE( 10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
4805 0, team->t.t_threads[0], team ) );
4806 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4809 # if KMP_AFFINITY_SUPPORTED 4810 if ( ( team->t.t_size_changed == 0 )
4811 && ( team->t.t_proc_bind == new_proc_bind ) ) {
4812 if (new_proc_bind == proc_bind_spread) {
4813 __kmp_partition_places(team, 1);
4815 KA_TRACE( 200, (
"__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
4816 team->t.t_id, new_proc_bind, team->t.t_first_place,
4817 team->t.t_last_place ) );
4820 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4821 __kmp_partition_places( team );
4824 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4828 else if( team->t.t_nproc > new_nproc ) {
4829 KA_TRACE( 20, (
"__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
4831 team->t.t_size_changed = 1;
4832 #if KMP_NESTED_HOT_TEAMS 4833 if( __kmp_hot_teams_mode == 0 ) {
4836 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4837 hot_teams[level].hot_team_nth = new_nproc;
4838 #endif // KMP_NESTED_HOT_TEAMS 4840 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
4841 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
4842 if ( __kmp_tasking_mode != tskm_immediate_exec) {
4844 team->t.t_threads[f]->th.th_task_team = NULL;
4846 __kmp_free_thread( team->t.t_threads[ f ] );
4847 team->t.t_threads[ f ] = NULL;
4849 #if KMP_NESTED_HOT_TEAMS 4853 for (f=new_nproc; f<team->t.t_nproc; ++f) {
4854 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4855 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4856 for (
int b=0; b<bs_last_barrier; ++b) {
4857 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
4858 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
4860 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
4864 #endif // KMP_NESTED_HOT_TEAMS 4865 team->t.t_nproc = new_nproc;
4867 if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type ||
4868 team->t.t_sched.chunk != new_icvs->sched.chunk)
4869 team->t.t_sched = new_icvs->sched;
4870 __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident );
4873 for(f = 0; f < new_nproc; ++f) {
4874 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
4877 KF_TRACE( 10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
4878 0, team->t.t_threads[0], team ) );
4880 __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 );
4883 for ( f = 0; f < team->t.t_nproc; f++ ) {
4884 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
4885 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
4890 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4891 # if KMP_AFFINITY_SUPPORTED 4892 __kmp_partition_places( team );
4897 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4898 kmp_affin_mask_t *old_mask;
4899 if ( KMP_AFFINITY_CAPABLE() ) {
4900 KMP_CPU_ALLOC(old_mask);
4904 KA_TRACE( 20, (
"__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
4906 team->t.t_size_changed = 1;
4908 #if KMP_NESTED_HOT_TEAMS 4909 int avail_threads = hot_teams[level].hot_team_nth;
4910 if( new_nproc < avail_threads )
4911 avail_threads = new_nproc;
4912 kmp_info_t **other_threads = team->t.t_threads;
4913 for ( f = team->t.t_nproc; f < avail_threads; ++f ) {
4917 kmp_balign_t * balign = other_threads[f]->th.th_bar;
4918 for ( b = 0; b < bs_last_barrier; ++ b ) {
4919 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
4920 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4922 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
4926 if( hot_teams[level].hot_team_nth >= new_nproc ) {
4929 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
4930 team->t.t_nproc = new_nproc;
4933 team->t.t_nproc = hot_teams[level].hot_team_nth;
4934 hot_teams[level].hot_team_nth = new_nproc;
4935 #endif // KMP_NESTED_HOT_TEAMS 4936 if(team->t.t_max_nproc < new_nproc) {
4938 __kmp_reallocate_team_arrays(team, new_nproc);
4939 __kmp_reinitialize_team( team, new_icvs, NULL );
4942 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4949 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
4953 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
4954 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
4955 KMP_DEBUG_ASSERT( new_worker );
4956 team->t.t_threads[ f ] = new_worker;
4958 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n",
4959 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
4960 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
4961 team->t.t_bar[bs_plain_barrier].b_arrived ) );
4965 kmp_balign_t * balign = new_worker->th.th_bar;
4966 for( b = 0; b < bs_last_barrier; ++ b ) {
4967 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
4968 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4970 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
4976 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4977 if ( KMP_AFFINITY_CAPABLE() ) {
4979 __kmp_set_system_affinity( old_mask, TRUE );
4980 KMP_CPU_FREE(old_mask);
4983 #if KMP_NESTED_HOT_TEAMS 4985 #endif // KMP_NESTED_HOT_TEAMS 4987 int old_nproc = team->t.t_nproc;
4988 __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident );
4991 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
4992 for (f=0; f < team->t.t_nproc; ++f)
4993 __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) );
4998 for (f=old_nproc; f < team->t.t_nproc; ++f)
4999 team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5002 int old_state = team->t.t_threads[0]->th.th_task_state;
5003 for (f=old_nproc; f < team->t.t_nproc; ++f)
5004 team->t.t_threads[f]->th.th_task_state = old_state;
5008 for ( f = 0; f < team->t.t_nproc; ++ f ) {
5009 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
5010 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5015 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5016 # if KMP_AFFINITY_SUPPORTED 5017 __kmp_partition_places( team );
5023 kmp_info_t *master = team->t.t_threads[0];
5024 if( master->th.th_teams_microtask ) {
5025 for( f = 1; f < new_nproc; ++f ) {
5027 kmp_info_t *thr = team->t.t_threads[f];
5028 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5029 thr->th.th_teams_level = master->th.th_teams_level;
5030 thr->th.th_teams_size = master->th.th_teams_size;
5034 #if KMP_NESTED_HOT_TEAMS 5037 for( f = 1; f < new_nproc; ++f ) {
5038 kmp_info_t *thr = team->t.t_threads[f];
5040 kmp_balign_t * balign = thr->th.th_bar;
5041 for( b = 0; b < bs_last_barrier; ++ b ) {
5042 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5043 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5045 balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived;
5050 #endif // KMP_NESTED_HOT_TEAMS 5053 __kmp_alloc_argv_entries( argc, team, TRUE );
5054 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5060 KF_TRACE( 10, (
" hot_team = %p\n", team ) );
5063 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5064 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n",
5065 team->t.t_task_team[0], team->t.t_task_team[1] ));
5070 __ompt_team_assign_id(team, ompt_parallel_id);
5080 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
5083 if ( team->t.t_max_nproc >= max_nproc ) {
5085 __kmp_team_pool = team->t.t_next_pool;
5088 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
5090 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5091 &team->t.t_task_team[0], &team->t.t_task_team[1]) );
5092 team->t.t_task_team[0] = NULL;
5093 team->t.t_task_team[1] = NULL;
5096 __kmp_alloc_argv_entries( argc, team, TRUE );
5097 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5099 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5100 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5103 for ( b = 0; b < bs_last_barrier; ++ b) {
5104 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
5106 team->t.t_bar[ b ].b_master_arrived = 0;
5107 team->t.t_bar[ b ].b_team_arrived = 0;
5113 team->t.t_proc_bind = new_proc_bind;
5116 KA_TRACE( 20, (
"__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
5119 __ompt_team_assign_id(team, ompt_parallel_id);
5130 team = __kmp_reap_team( team );
5131 __kmp_team_pool = team;
5136 team = (kmp_team_t*) __kmp_allocate(
sizeof( kmp_team_t ) );
5139 team->t.t_max_nproc = max_nproc;
5143 __kmp_allocate_team_arrays( team, max_nproc );
5145 KA_TRACE( 20, (
"__kmp_allocate_team: making a new team\n" ) );
5146 __kmp_initialize_team( team, new_nproc, new_icvs, NULL );
5148 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n",
5149 &team->t.t_task_team[0], &team->t.t_task_team[1] ) );
5150 team->t.t_task_team[0] = NULL;
5151 team->t.t_task_team[1] = NULL;
5153 if ( __kmp_storage_map ) {
5154 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc );
5158 __kmp_alloc_argv_entries( argc, team, FALSE );
5159 team->t.t_argc = argc;
5161 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5162 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5165 for ( b = 0; b < bs_last_barrier; ++ b ) {
5166 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
5168 team->t.t_bar[ b ].b_master_arrived = 0;
5169 team->t.t_bar[ b ].b_team_arrived = 0;
5175 team->t.t_proc_bind = new_proc_bind;
5179 __ompt_team_assign_id(team, ompt_parallel_id);
5180 team->t.ompt_serialized_team_info = NULL;
5185 KA_TRACE( 20, (
"__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
5196 __kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) )
5199 KA_TRACE( 20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
5202 KMP_DEBUG_ASSERT( root );
5203 KMP_DEBUG_ASSERT( team );
5204 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
5205 KMP_DEBUG_ASSERT( team->t.t_threads );
5207 int use_hot_team = team == root->r.r_hot_team;
5208 #if KMP_NESTED_HOT_TEAMS 5210 kmp_hot_team_ptr_t *hot_teams;
5212 level = team->t.t_active_level - 1;
5213 if( master->th.th_teams_microtask ) {
5214 if( master->th.th_teams_size.nteams > 1 ) {
5217 if( team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5218 master->th.th_teams_level == team->t.t_level ) {
5222 hot_teams = master->th.th_hot_teams;
5223 if( level < __kmp_hot_teams_max_level ) {
5224 KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team );
5228 #endif // KMP_NESTED_HOT_TEAMS 5231 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
5232 team->t.t_copyin_counter = 0;
5236 if( ! use_hot_team ) {
5237 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5240 for (tt_idx=0; tt_idx<2; ++tt_idx) {
5241 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5242 if ( task_team != NULL ) {
5243 for (f=0; f<team->t.t_nproc; ++f) {
5244 team->t.t_threads[f]->th.th_task_team = NULL;
5246 KA_TRACE( 20, (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) );
5247 #if KMP_NESTED_HOT_TEAMS 5248 __kmp_free_task_team( master, task_team );
5250 team->t.t_task_team[tt_idx] = NULL;
5256 team->t.t_parent = NULL;
5257 team->t.t_level = 0;
5258 team->t.t_active_level = 0;
5261 for ( f = 1; f < team->t.t_nproc; ++ f ) {
5262 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5263 __kmp_free_thread( team->t.t_threads[ f ] );
5264 team->t.t_threads[ f ] = NULL;
5269 team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
5270 __kmp_team_pool = (
volatile kmp_team_t*) team;
5279 __kmp_reap_team( kmp_team_t *team )
5281 kmp_team_t *next_pool = team->t.t_next_pool;
5283 KMP_DEBUG_ASSERT( team );
5284 KMP_DEBUG_ASSERT( team->t.t_dispatch );
5285 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
5286 KMP_DEBUG_ASSERT( team->t.t_threads );
5287 KMP_DEBUG_ASSERT( team->t.t_argv );
5293 __kmp_free_team_arrays( team );
5294 if ( team->t.t_argv != &team->t.t_inline_argv[0] )
5295 __kmp_free( (
void*) team->t.t_argv );
5330 __kmp_free_thread( kmp_info_t *this_th )
5335 KA_TRACE( 20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5336 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
5338 KMP_DEBUG_ASSERT( this_th );
5342 kmp_balign_t *balign = this_th->th.th_bar;
5343 for (b=0; b<bs_last_barrier; ++b) {
5344 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5345 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5346 balign[b].bb.team = NULL;
5347 balign[b].bb.leaf_kids = 0;
5349 this_th->th.th_task_state = 0;
5352 TCW_PTR(this_th->th.th_team, NULL);
5353 TCW_PTR(this_th->th.th_root, NULL);
5354 TCW_PTR(this_th->th.th_dispatch, NULL);
5360 gtid = this_th->th.th_info.ds.ds_gtid;
5361 if ( __kmp_thread_pool_insert_pt != NULL ) {
5362 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
5363 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
5364 __kmp_thread_pool_insert_pt = NULL;
5375 if ( __kmp_thread_pool_insert_pt != NULL ) {
5376 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
5379 scan = (kmp_info_t **)&__kmp_thread_pool;
5381 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
5382 scan = &( (*scan)->th.th_next_pool ) );
5388 TCW_PTR(this_th->th.th_next_pool, *scan);
5389 __kmp_thread_pool_insert_pt = *scan = this_th;
5390 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
5391 || ( this_th->th.th_info.ds.ds_gtid
5392 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
5393 TCW_4(this_th->th.th_in_pool, TRUE);
5394 __kmp_thread_pool_nth++;
5396 TCW_4(__kmp_nth, __kmp_nth - 1);
5398 #ifdef KMP_ADJUST_BLOCKTIME 5401 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5402 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5403 if ( __kmp_nth <= __kmp_avail_proc ) {
5404 __kmp_zero_bt = FALSE;
5416 __kmp_launch_thread( kmp_info_t *this_thr )
5418 int gtid = this_thr->th.th_info.ds.ds_gtid;
5420 kmp_team_t *(*
volatile pteam);
5423 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d start\n", gtid ) );
5425 if( __kmp_env_consistency_check ) {
5426 this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid );
5431 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5432 this_thr->th.ompt_thread_info.wait_id = 0;
5433 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
5434 if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
5435 __ompt_thread_begin(ompt_thread_worker, gtid);
5441 while( ! TCR_4(__kmp_global.g.g_done) ) {
5442 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
5446 KA_TRACE( 20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid ));
5450 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5455 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
5459 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5463 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
5466 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
5468 ompt_task_info_t *task_info;
5469 ompt_parallel_id_t my_parallel_id;
5471 task_info = __ompt_get_taskinfo(0);
5472 my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id;
5476 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
5478 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5479 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
5481 updateHWFPControl (*pteam);
5485 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
5487 int tid = __kmp_tid_from_gtid(gtid);
5488 task_info->task_id = __ompt_task_id_new(tid);
5492 KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
5494 KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke);
5495 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
5496 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
5497 rc = (*pteam)->t.t_invoke( gtid );
5499 KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
5505 task_info->frame.exit_runtime_frame = 0;
5507 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5511 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5512 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn));
5515 __kmp_join_barrier( gtid );
5516 #if OMPT_SUPPORT && OMPT_TRACE 5518 if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
5521 ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
5522 my_parallel_id, task_info->task_id);
5524 task_info->frame.exit_runtime_frame = 0;
5525 task_info->task_id = 0;
5530 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5534 ompt_callbacks.ompt_callback(ompt_event_thread_end)) {
5535 __ompt_thread_end(ompt_thread_worker, gtid);
5539 this_thr->th.th_task_team = NULL;
5541 __kmp_common_destroy_gtid( gtid );
5543 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d done\n", gtid ) );
5552 __kmp_internal_end_dest(
void *specific_gtid )
5554 #if KMP_COMPILER_ICC 5555 #pragma warning( push ) 5556 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits 5559 int gtid = (kmp_intptr_t)specific_gtid - 1;
5560 #if KMP_COMPILER_ICC 5561 #pragma warning( pop ) 5564 KA_TRACE( 30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
5578 if(gtid >= 0 && KMP_UBER_GTID(gtid))
5579 __kmp_gtid_set_specific( gtid );
5580 #ifdef KMP_TDATA_GTID 5583 __kmp_internal_end_thread( gtid );
5586 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 5592 __attribute__(( destructor ))
5594 __kmp_internal_end_dtor(
void )
5596 __kmp_internal_end_atexit();
5600 __kmp_internal_end_fini(
void )
5602 __kmp_internal_end_atexit();
5609 __kmp_internal_end_atexit(
void )
5611 KA_TRACE( 30, (
"__kmp_internal_end_atexit\n" ) );
5633 __kmp_internal_end_library( -1 );
5635 __kmp_close_console();
5641 kmp_info_t * thread,
5649 KMP_DEBUG_ASSERT( thread != NULL );
5651 gtid = thread->th.th_info.ds.ds_gtid;
5655 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
5657 KA_TRACE( 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
5659 kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread);
5660 __kmp_release_64(&flag);
5664 __kmp_reap_worker( thread );
5679 if ( thread->th.th_active_in_pool ) {
5680 thread->th.th_active_in_pool = FALSE;
5681 KMP_TEST_THEN_DEC32(
5682 (kmp_int32 *) &__kmp_thread_pool_active_nth );
5683 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
5687 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
5688 --__kmp_thread_pool_nth;
5691 __kmp_free_implicit_task(thread);
5695 __kmp_free_fast_memory( thread );
5698 __kmp_suspend_uninitialize_thread( thread );
5700 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
5701 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5706 #ifdef KMP_ADJUST_BLOCKTIME 5709 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5710 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
5711 if ( __kmp_nth <= __kmp_avail_proc ) {
5712 __kmp_zero_bt = FALSE;
5718 if( __kmp_env_consistency_check ) {
5719 if ( thread->th.th_cons ) {
5720 __kmp_free_cons_stack( thread->th.th_cons );
5721 thread->th.th_cons = NULL;
5725 if ( thread->th.th_pri_common != NULL ) {
5726 __kmp_free( thread->th.th_pri_common );
5727 thread->th.th_pri_common = NULL;
5730 if (thread->th.th_task_state_memo_stack != NULL) {
5731 __kmp_free(thread->th.th_task_state_memo_stack);
5732 thread->th.th_task_state_memo_stack = NULL;
5736 if ( thread->th.th_local.bget_data != NULL ) {
5737 __kmp_finalize_bget( thread );
5741 #if KMP_AFFINITY_SUPPORTED 5742 if ( thread->th.th_affin_mask != NULL ) {
5743 KMP_CPU_FREE( thread->th.th_affin_mask );
5744 thread->th.th_affin_mask = NULL;
5748 __kmp_reap_team( thread->th.th_serial_team );
5749 thread->th.th_serial_team = NULL;
5750 __kmp_free( thread );
5757 __kmp_internal_end(
void)
5762 __kmp_unregister_library();
5770 __kmp_reclaim_dead_roots();
5773 for( i=0 ; i<__kmp_threads_capacity ; i++ )
5775 if( __kmp_root[i]->r.r_active )
5778 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5780 if ( i < __kmp_threads_capacity ) {
5795 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5796 if ( TCR_4( __kmp_init_monitor ) ) {
5797 __kmp_reap_monitor( & __kmp_monitor );
5798 TCW_4( __kmp_init_monitor, 0 );
5800 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5801 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
5806 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
5807 if( __kmp_root[i] ) {
5809 KMP_ASSERT( ! __kmp_root[i]->r.r_active );
5818 while ( __kmp_thread_pool != NULL ) {
5820 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
5821 __kmp_thread_pool = thread->th.th_next_pool;
5823 thread->th.th_next_pool = NULL;
5824 thread->th.th_in_pool = FALSE;
5825 __kmp_reap_thread( thread, 0 );
5827 __kmp_thread_pool_insert_pt = NULL;
5830 while ( __kmp_team_pool != NULL ) {
5832 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
5833 __kmp_team_pool = team->t.t_next_pool;
5835 team->t.t_next_pool = NULL;
5836 __kmp_reap_team( team );
5839 __kmp_reap_task_teams( );
5841 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
5848 TCW_SYNC_4(__kmp_init_common, FALSE);
5850 KA_TRACE( 10, (
"__kmp_internal_end: all workers reaped\n" ) );
5859 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
5860 if ( TCR_4( __kmp_init_monitor ) ) {
5861 __kmp_reap_monitor( & __kmp_monitor );
5862 TCW_4( __kmp_init_monitor, 0 );
5864 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
5865 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
5868 TCW_4(__kmp_init_gtid, FALSE);
5878 __kmp_internal_end_library(
int gtid_req )
5886 if( __kmp_global.g.g_abort ) {
5887 KA_TRACE( 11, (
"__kmp_internal_end_library: abort, exiting\n" ));
5891 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5892 KA_TRACE( 10, (
"__kmp_internal_end_library: already finished\n" ));
5901 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
5902 KA_TRACE( 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
5903 if( gtid == KMP_GTID_SHUTDOWN ) {
5904 KA_TRACE( 10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
5906 }
else if( gtid == KMP_GTID_MONITOR ) {
5907 KA_TRACE( 10, (
"__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
5909 }
else if( gtid == KMP_GTID_DNE ) {
5910 KA_TRACE( 10, (
"__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
5912 }
else if( KMP_UBER_GTID( gtid )) {
5914 if( __kmp_root[gtid]->r.r_active ) {
5915 __kmp_global.g.g_abort = -1;
5916 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5917 KA_TRACE( 10, (
"__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
5920 KA_TRACE( 10, (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
5921 __kmp_unregister_root_current_thread( gtid );
5928 #ifdef DUMP_DEBUG_ON_EXIT 5929 if ( __kmp_debug_buf )
5930 __kmp_dump_debug_buffer( );
5936 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
5939 if( __kmp_global.g.g_abort ) {
5940 KA_TRACE( 10, (
"__kmp_internal_end_library: abort, exiting\n" ));
5942 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5945 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5946 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5956 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
5959 __kmp_internal_end();
5961 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
5962 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
5964 KA_TRACE( 10, (
"__kmp_internal_end_library: exit\n" ) );
5966 #ifdef DUMP_DEBUG_ON_EXIT 5967 if ( __kmp_debug_buf )
5968 __kmp_dump_debug_buffer();
5972 __kmp_close_console();
5975 __kmp_fini_allocator();
5980 __kmp_internal_end_thread(
int gtid_req )
5990 if( __kmp_global.g.g_abort ) {
5991 KA_TRACE( 11, (
"__kmp_internal_end_thread: abort, exiting\n" ));
5995 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
5996 KA_TRACE( 10, (
"__kmp_internal_end_thread: already finished\n" ));
6004 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
6005 KA_TRACE( 10, (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
6006 if( gtid == KMP_GTID_SHUTDOWN ) {
6007 KA_TRACE( 10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
6009 }
else if( gtid == KMP_GTID_MONITOR ) {
6010 KA_TRACE( 10, (
"__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
6012 }
else if( gtid == KMP_GTID_DNE ) {
6013 KA_TRACE( 10, (
"__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
6016 }
else if( KMP_UBER_GTID( gtid )) {
6018 if( __kmp_root[gtid]->r.r_active ) {
6019 __kmp_global.g.g_abort = -1;
6020 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6021 KA_TRACE( 10, (
"__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
6024 KA_TRACE( 10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
6025 __kmp_unregister_root_current_thread( gtid );
6029 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
6032 __kmp_threads[gtid]->th.th_task_team = NULL;
6035 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
6039 #if defined KMP_DYNAMIC_LIB 6047 KA_TRACE( 10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req) );
6051 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6054 if( __kmp_global.g.g_abort ) {
6055 KA_TRACE( 10, (
"__kmp_internal_end_thread: abort, exiting\n" ));
6057 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6060 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6061 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6073 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6075 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
6076 if ( KMP_UBER_GTID( i ) ) {
6077 KA_TRACE( 10, (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
6078 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6079 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6086 __kmp_internal_end();
6088 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6089 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6091 KA_TRACE( 10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req ) );
6093 #ifdef DUMP_DEBUG_ON_EXIT 6094 if ( __kmp_debug_buf )
6095 __kmp_dump_debug_buffer();
6102 static long __kmp_registration_flag = 0;
6104 static char * __kmp_registration_str = NULL;
6110 __kmp_reg_status_name() {
6116 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int) getpid() );
6121 __kmp_register_library_startup(
6125 char * name = __kmp_reg_status_name();
6132 __kmp_initialize_system_tick();
6134 __kmp_read_system_time( & time.dtime );
6135 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
6136 __kmp_registration_str =
6139 & __kmp_registration_flag,
6140 __kmp_registration_flag,
6144 KA_TRACE( 50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
6148 char * value = NULL;
6151 __kmp_env_set( name, __kmp_registration_str, 0 );
6153 value = __kmp_env_get( name );
6154 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6163 char * tail = value;
6164 char * flag_addr_str = NULL;
6165 char * flag_val_str = NULL;
6166 char const * file_name = NULL;
6167 __kmp_str_split( tail,
'-', & flag_addr_str, & tail );
6168 __kmp_str_split( tail,
'-', & flag_val_str, & tail );
6170 if ( tail != NULL ) {
6171 long * flag_addr = 0;
6173 KMP_SSCANF( flag_addr_str,
"%p", & flag_addr );
6174 KMP_SSCANF( flag_val_str,
"%lx", & flag_val );
6175 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name,
"" ) != 0 ) {
6179 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
6187 switch ( neighbor ) {
6192 file_name =
"unknown library";
6196 char * duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK" );
6197 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
6201 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
6202 KMP_HNT( DuplicateLibrary ),
6206 KMP_INTERNAL_FREE( duplicate_ok );
6207 __kmp_duplicate_library_ok = 1;
6212 __kmp_env_unset( name );
6215 KMP_DEBUG_ASSERT( 0 );
6220 KMP_INTERNAL_FREE( (
void *) value );
6223 KMP_INTERNAL_FREE( (
void *) name );
6229 __kmp_unregister_library(
void ) {
6231 char * name = __kmp_reg_status_name();
6232 char * value = __kmp_env_get( name );
6234 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
6235 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
6236 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
6238 __kmp_env_unset( name );
6241 KMP_INTERNAL_FREE( __kmp_registration_str );
6242 KMP_INTERNAL_FREE( value );
6243 KMP_INTERNAL_FREE( name );
6245 __kmp_registration_flag = 0;
6246 __kmp_registration_str = NULL;
6254 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6256 static void __kmp_check_mic_type()
6258 kmp_cpuid_t cpuid_state = {0};
6259 kmp_cpuid_t * cs_p = &cpuid_state;
6260 __kmp_x86_cpuid(1, 0, cs_p);
6262 if( (cs_p->eax & 0xff0) == 0xB10 ) {
6263 __kmp_mic_type = mic2;
6264 }
else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) {
6265 __kmp_mic_type = mic3;
6267 __kmp_mic_type = non_mic;
6274 __kmp_do_serial_initialize(
void )
6279 KA_TRACE( 10, (
"__kmp_do_serial_initialize: enter\n" ) );
6281 KMP_DEBUG_ASSERT(
sizeof( kmp_int32 ) == 4 );
6282 KMP_DEBUG_ASSERT(
sizeof( kmp_uint32 ) == 4 );
6283 KMP_DEBUG_ASSERT(
sizeof( kmp_int64 ) == 8 );
6284 KMP_DEBUG_ASSERT(
sizeof( kmp_uint64 ) == 8 );
6285 KMP_DEBUG_ASSERT(
sizeof( kmp_intptr_t ) ==
sizeof(
void * ) );
6291 __kmp_validate_locks();
6294 __kmp_init_allocator();
6300 __kmp_register_library_startup( );
6303 if( TCR_4(__kmp_global.g.g_done) ) {
6304 KA_TRACE( 10, (
"__kmp_do_serial_initialize: reinitialization of library\n" ) );
6307 __kmp_global.g.g_abort = 0;
6308 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6311 #if KMP_USE_ADAPTIVE_LOCKS 6312 #if KMP_DEBUG_ADAPTIVE_LOCKS 6313 __kmp_init_speculative_stats();
6316 #if KMP_STATS_ENABLED 6317 __kmp_init_tas_lock( & __kmp_stats_lock );
6319 __kmp_init_lock( & __kmp_global_lock );
6320 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
6321 __kmp_init_lock( & __kmp_debug_lock );
6322 __kmp_init_atomic_lock( & __kmp_atomic_lock );
6323 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
6324 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
6325 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
6326 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
6327 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
6328 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
6329 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
6330 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
6331 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
6332 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
6333 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
6334 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
6335 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
6336 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
6337 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
6338 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
6342 __kmp_runtime_initialize();
6344 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6345 __kmp_check_mic_type();
6352 __kmp_abort_delay = 0;
6356 __kmp_dflt_team_nth_ub = __kmp_xproc;
6357 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
6358 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6360 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
6361 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6363 __kmp_max_nth = __kmp_sys_max_nth;
6366 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6367 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6368 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
6370 __kmp_library = library_throughput;
6372 __kmp_static = kmp_sch_static_balanced;
6378 #if KMP_FAST_REDUCTION_BARRIER 6379 #define kmp_reduction_barrier_gather_bb ((int)1) 6380 #define kmp_reduction_barrier_release_bb ((int)1) 6381 #define kmp_reduction_barrier_gather_pat bp_hyper_bar 6382 #define kmp_reduction_barrier_release_pat bp_hyper_bar 6383 #endif // KMP_FAST_REDUCTION_BARRIER 6384 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
6385 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
6386 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
6387 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
6388 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
6389 #if KMP_FAST_REDUCTION_BARRIER 6390 if( i == bs_reduction_barrier ) {
6391 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
6392 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
6393 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
6394 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
6396 #endif // KMP_FAST_REDUCTION_BARRIER 6398 #if KMP_FAST_REDUCTION_BARRIER 6399 #undef kmp_reduction_barrier_release_pat 6400 #undef kmp_reduction_barrier_gather_pat 6401 #undef kmp_reduction_barrier_release_bb 6402 #undef kmp_reduction_barrier_gather_bb 6403 #endif // KMP_FAST_REDUCTION_BARRIER 6404 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 6405 if (__kmp_mic_type == mic2) {
6407 __kmp_barrier_gather_branch_bits [ bs_plain_barrier ] = 3;
6408 __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] = 1;
6409 __kmp_barrier_gather_pattern [ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6410 __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] = bp_hierarchical_bar;
6412 #if KMP_FAST_REDUCTION_BARRIER 6413 if (__kmp_mic_type == mic2) {
6414 __kmp_barrier_gather_pattern [ bs_reduction_barrier ] = bp_hierarchical_bar;
6415 __kmp_barrier_release_pattern[ bs_reduction_barrier ] = bp_hierarchical_bar;
6422 __kmp_env_checks = TRUE;
6424 __kmp_env_checks = FALSE;
6428 __kmp_foreign_tp = TRUE;
6430 __kmp_global.g.g_dynamic = FALSE;
6431 __kmp_global.g.g_dynamic_mode = dynamic_default;
6433 __kmp_env_initialize( NULL );
6437 char const * val = __kmp_env_get(
"KMP_DUMP_CATALOG" );
6438 if ( __kmp_str_match_true( val ) ) {
6439 kmp_str_buf_t buffer;
6440 __kmp_str_buf_init( & buffer );
6441 __kmp_i18n_dump_catalog( & buffer );
6442 __kmp_printf(
"%s", buffer.str );
6443 __kmp_str_buf_free( & buffer );
6445 __kmp_env_free( & val );
6448 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
6450 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6454 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
6455 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
6456 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
6457 __kmp_thread_pool = NULL;
6458 __kmp_thread_pool_insert_pt = NULL;
6459 __kmp_team_pool = NULL;
6464 size = (
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
6465 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
6466 __kmp_root = (kmp_root_t**) ((
char*)__kmp_threads +
sizeof(kmp_info_t*) * __kmp_threads_capacity );
6469 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 );
6470 KMP_DEBUG_ASSERT( __kmp_nth == 0 );
6475 gtid = __kmp_register_root( TRUE );
6476 KA_TRACE( 10, (
"__kmp_do_serial_initialize T#%d\n", gtid ));
6477 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6478 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
6482 __kmp_common_initialize();
6486 __kmp_register_atfork();
6489 #if ! defined KMP_DYNAMIC_LIB 6494 int rc = atexit( __kmp_internal_end_atexit );
6496 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError,
"atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
6501 #if KMP_HANDLE_SIGNALS 6508 __kmp_install_signals( FALSE );
6511 __kmp_install_signals( TRUE );
6516 __kmp_init_counter ++;
6518 __kmp_init_serial = TRUE;
6520 if (__kmp_settings) {
6525 if (__kmp_display_env || __kmp_display_env_verbose) {
6526 __kmp_env_print_2();
6528 #endif // OMP_40_ENABLED 6536 KA_TRACE( 10, (
"__kmp_do_serial_initialize: exit\n" ) );
6540 __kmp_serial_initialize(
void )
6542 if ( __kmp_init_serial ) {
6545 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6546 if ( __kmp_init_serial ) {
6547 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6550 __kmp_do_serial_initialize();
6551 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6555 __kmp_do_middle_initialize(
void )
6558 int prev_dflt_team_nth;
6560 if( !__kmp_init_serial ) {
6561 __kmp_do_serial_initialize();
6564 KA_TRACE( 10, (
"__kmp_middle_initialize: enter\n" ) );
6570 prev_dflt_team_nth = __kmp_dflt_team_nth;
6572 #if KMP_AFFINITY_SUPPORTED 6577 __kmp_affinity_initialize();
6583 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6584 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
6585 __kmp_affinity_set_init_mask( i, TRUE );
6590 KMP_ASSERT( __kmp_xproc > 0 );
6591 if ( __kmp_avail_proc == 0 ) {
6592 __kmp_avail_proc = __kmp_xproc;
6597 while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) {
6598 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
6602 if ( __kmp_dflt_team_nth == 0 ) {
6603 #ifdef KMP_DFLT_NTH_CORES 6607 __kmp_dflt_team_nth = __kmp_ncores;
6608 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
6609 __kmp_dflt_team_nth ) );
6614 __kmp_dflt_team_nth = __kmp_avail_proc;
6615 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
6616 __kmp_dflt_team_nth ) );
6620 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
6621 __kmp_dflt_team_nth = KMP_MIN_NTH;
6623 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
6624 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6631 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
6633 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
6640 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6641 kmp_info_t *thread = __kmp_threads[ i ];
6642 if ( thread == NULL )
continue;
6643 if ( thread->th.th_current_task->td_icvs.nproc != 0 )
continue;
6645 set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth );
6648 KA_TRACE( 20, (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6649 __kmp_dflt_team_nth) );
6651 #ifdef KMP_ADJUST_BLOCKTIME 6654 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6655 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6656 if ( __kmp_nth > __kmp_avail_proc ) {
6657 __kmp_zero_bt = TRUE;
6663 TCW_SYNC_4(__kmp_init_middle, TRUE);
6665 KA_TRACE( 10, (
"__kmp_do_middle_initialize: exit\n" ) );
6669 __kmp_middle_initialize(
void )
6671 if ( __kmp_init_middle ) {
6674 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6675 if ( __kmp_init_middle ) {
6676 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6679 __kmp_do_middle_initialize();
6680 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6684 __kmp_parallel_initialize(
void )
6686 int gtid = __kmp_entry_gtid();
6689 if( TCR_4(__kmp_init_parallel) )
return;
6690 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6691 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock );
return; }
6694 if( TCR_4(__kmp_global.g.g_done) ) {
6695 KA_TRACE( 10, (
"__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
6696 __kmp_infinite_loop();
6702 if( !__kmp_init_middle ) {
6703 __kmp_do_middle_initialize();
6707 KA_TRACE( 10, (
"__kmp_parallel_initialize: enter\n" ) );
6708 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
6710 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6715 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
6716 __kmp_store_mxcsr( &__kmp_init_mxcsr );
6717 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6721 # if KMP_HANDLE_SIGNALS 6723 __kmp_install_signals( TRUE );
6727 __kmp_suspend_initialize();
6729 #if defined(USE_LOAD_BALANCE) 6730 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6731 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6734 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
6735 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6739 if ( __kmp_version ) {
6740 __kmp_print_version_2();
6744 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6747 KA_TRACE( 10, (
"__kmp_parallel_initialize: exit\n" ) );
6749 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6756 __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6759 kmp_disp_t *dispatch;
6764 this_thr->th.th_local.this_construct = 0;
6765 #if KMP_CACHE_MANAGE 6766 KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
6768 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6769 KMP_DEBUG_ASSERT( dispatch );
6770 KMP_DEBUG_ASSERT( team->t.t_dispatch );
6773 dispatch->th_disp_index = 0;
6775 dispatch->th_doacross_buf_idx = 0;
6777 if( __kmp_env_consistency_check )
6778 __kmp_push_parallel( gtid, team->t.t_ident );
6784 __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6787 if( __kmp_env_consistency_check )
6788 __kmp_pop_parallel( gtid, team->t.t_ident );
6790 __kmp_finish_implicit_task(this_thr);
6794 __kmp_invoke_task_func(
int gtid )
6797 int tid = __kmp_tid_from_gtid( gtid );
6798 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6799 kmp_team_t *team = this_thr->th.th_team;
6801 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
6803 if ( __itt_stack_caller_create_ptr ) {
6804 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id );
6807 #if INCLUDE_SSC_MARKS 6808 SSC_MARK_INVOKING();
6813 void **exit_runtime_p;
6814 ompt_task_id_t my_task_id;
6815 ompt_parallel_id_t my_parallel_id;
6818 exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid].
6819 ompt_task_info.frame.exit_runtime_frame);
6821 exit_runtime_p = &dummy;
6825 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
6826 my_parallel_id = team->t.ompt_team_info.parallel_id;
6828 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
6829 ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
6830 my_parallel_id, my_task_id);
6836 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6837 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
6838 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
6839 gtid, tid, (
int) team->t.t_argc, (
void **) team->t.t_argv
6847 if ( __itt_stack_caller_create_ptr ) {
6848 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id );
6851 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
6858 __kmp_teams_master(
int gtid )
6861 kmp_info_t *thr = __kmp_threads[ gtid ];
6862 kmp_team_t *team = thr->th.th_team;
6863 ident_t *loc = team->t.t_ident;
6864 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6865 KMP_DEBUG_ASSERT( thr->th.th_teams_microtask );
6866 KMP_DEBUG_ASSERT( thr->th.th_set_nproc );
6867 KA_TRACE( 20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
6868 gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) );
6871 #if INCLUDE_SSC_MARKS 6874 __kmp_fork_call( loc, gtid, fork_context_intel,
6877 (
void *)thr->th.th_teams_microtask,
6879 (microtask_t)thr->th.th_teams_microtask,
6880 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
6882 #if INCLUDE_SSC_MARKS 6888 __kmp_join_call( loc, gtid
6890 , fork_context_intel
6896 __kmp_invoke_teams_master(
int gtid )
6898 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6899 kmp_team_t *team = this_thr->th.th_team;
6901 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
6902 KMP_DEBUG_ASSERT( (
void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (
void*)__kmp_teams_master );
6904 __kmp_run_before_invoked_task( gtid, 0, this_thr, team );
6905 __kmp_teams_master( gtid );
6906 __kmp_run_after_invoked_task( gtid, 0, this_thr, team );
6917 __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads )
6919 kmp_info_t *thr = __kmp_threads[gtid];
6921 if( num_threads > 0 )
6922 thr->th.th_set_nproc = num_threads;
6930 __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
int num_threads )
6932 kmp_info_t *thr = __kmp_threads[gtid];
6933 KMP_DEBUG_ASSERT(num_teams >= 0);
6934 KMP_DEBUG_ASSERT(num_threads >= 0);
6936 if( num_teams == 0 )
6938 if( num_teams > __kmp_max_nth ) {
6939 if ( !__kmp_reserve_warn ) {
6940 __kmp_reserve_warn = 1;
6943 KMP_MSG( CantFormThrTeam, num_teams, __kmp_max_nth ),
6944 KMP_HNT( Unset_ALL_THREADS ),
6948 num_teams = __kmp_max_nth;
6951 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
6954 if( num_threads == 0 ) {
6955 if( !TCR_4(__kmp_init_middle) )
6956 __kmp_middle_initialize();
6957 num_threads = __kmp_avail_proc / num_teams;
6958 if( num_teams * num_threads > __kmp_max_nth ) {
6960 num_threads = __kmp_max_nth / num_teams;
6963 if( num_teams * num_threads > __kmp_max_nth ) {
6964 int new_threads = __kmp_max_nth / num_teams;
6965 if ( !__kmp_reserve_warn ) {
6966 __kmp_reserve_warn = 1;
6969 KMP_MSG( CantFormThrTeam, num_threads, new_threads ),
6970 KMP_HNT( Unset_ALL_THREADS ),
6974 num_threads = new_threads;
6977 thr->th.th_teams_size.nth = num_threads;
6985 __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind )
6987 kmp_info_t *thr = __kmp_threads[gtid];
6988 thr->th.th_set_proc_bind = proc_bind;
6996 __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team )
6998 kmp_info_t *this_thr = __kmp_threads[gtid];
7004 KMP_DEBUG_ASSERT( team );
7005 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
7006 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7009 team->t.t_construct = 0;
7010 team->t.t_ordered.dt.t_value = 0;
7013 KMP_DEBUG_ASSERT( team->t.t_disp_buffer );
7014 if ( team->t.t_max_nproc > 1 ) {
7016 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7017 team->t.t_disp_buffer[ i ].buffer_index = i;
7019 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7023 team->t.t_disp_buffer[ 0 ].buffer_index = 0;
7025 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7030 KMP_ASSERT( this_thr->th.th_team == team );
7033 for( f=0 ; f<team->t.t_nproc ; f++ ) {
7034 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
7035 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
7040 __kmp_fork_barrier( gtid, 0 );
7045 __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team )
7047 kmp_info_t *this_thr = __kmp_threads[gtid];
7049 KMP_DEBUG_ASSERT( team );
7050 KMP_DEBUG_ASSERT( this_thr->th.th_team == team );
7051 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7057 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
7058 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
7059 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
7060 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
7061 __kmp_print_structure();
7063 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
7064 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
7067 __kmp_join_barrier( gtid );
7070 KMP_ASSERT( this_thr->th.th_team == team );
7077 #ifdef USE_LOAD_BALANCE 7084 __kmp_active_hot_team_nproc( kmp_root_t *root )
7088 kmp_team_t *hot_team;
7090 if ( root->r.r_active ) {
7093 hot_team = root->r.r_hot_team;
7094 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
7095 return hot_team->t.t_nproc - 1;
7102 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
7103 if ( hot_team->t.t_threads[i]->th.th_active ) {
7115 __kmp_load_balance_nproc( kmp_root_t *root,
int set_nproc )
7119 int hot_team_active;
7120 int team_curr_active;
7123 KB_TRACE( 20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
7124 root, set_nproc ) );
7125 KMP_DEBUG_ASSERT( root );
7126 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
7127 KMP_DEBUG_ASSERT( set_nproc > 1 );
7129 if ( set_nproc == 1) {
7130 KB_TRACE( 20, (
"__kmp_load_balance_nproc: serial execution.\n" ) );
7141 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7142 hot_team_active = __kmp_active_hot_team_nproc( root );
7143 team_curr_active = pool_active + hot_team_active + 1;
7148 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
7149 KB_TRACE( 30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
7150 system_active, pool_active, hot_team_active ) );
7152 if ( system_active < 0 ) {
7159 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7160 KMP_WARNING( CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit" );
7165 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
7166 : root->r.r_hot_team->t.t_nproc);
7167 if ( retval > set_nproc ) {
7170 if ( retval < KMP_MIN_NTH ) {
7171 retval = KMP_MIN_NTH;
7174 KB_TRACE( 20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
7184 if ( system_active < team_curr_active ) {
7185 system_active = team_curr_active;
7187 retval = __kmp_avail_proc - system_active + team_curr_active;
7188 if ( retval > set_nproc ) {
7191 if ( retval < KMP_MIN_NTH ) {
7192 retval = KMP_MIN_NTH;
7195 KB_TRACE( 20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
7206 __kmp_cleanup(
void )
7210 KA_TRACE( 10, (
"__kmp_cleanup: enter\n" ) );
7212 if (TCR_4(__kmp_init_parallel)) {
7213 #if KMP_HANDLE_SIGNALS 7214 __kmp_remove_signals();
7216 TCW_4(__kmp_init_parallel, FALSE);
7219 if (TCR_4(__kmp_init_middle)) {
7220 #if KMP_AFFINITY_SUPPORTED 7221 __kmp_affinity_uninitialize();
7223 __kmp_cleanup_hierarchy();
7224 TCW_4(__kmp_init_middle, FALSE);
7227 KA_TRACE( 10, (
"__kmp_cleanup: go serial cleanup\n" ) );
7229 if (__kmp_init_serial) {
7230 __kmp_runtime_destroy();
7231 __kmp_init_serial = FALSE;
7234 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
7235 if ( __kmp_root[ f ] != NULL ) {
7236 __kmp_free( __kmp_root[ f ] );
7237 __kmp_root[ f ] = NULL;
7240 __kmp_free( __kmp_threads );
7243 __kmp_threads = NULL;
7245 __kmp_threads_capacity = 0;
7247 #if KMP_USE_DYNAMIC_LOCK 7248 __kmp_cleanup_indirect_user_locks();
7250 __kmp_cleanup_user_locks();
7253 #if KMP_AFFINITY_SUPPORTED 7254 KMP_INTERNAL_FREE( (
void *) __kmp_cpuinfo_file );
7255 __kmp_cpuinfo_file = NULL;
7258 #if KMP_USE_ADAPTIVE_LOCKS 7259 #if KMP_DEBUG_ADAPTIVE_LOCKS 7260 __kmp_print_speculative_stats();
7263 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
7264 __kmp_nested_nth.nth = NULL;
7265 __kmp_nested_nth.size = 0;
7266 __kmp_nested_nth.used = 0;
7268 __kmp_i18n_catclose();
7270 #if KMP_STATS_ENABLED 7271 __kmp_accumulate_stats_at_exit();
7272 __kmp_stats_list.deallocate();
7275 KA_TRACE( 10, (
"__kmp_cleanup: exit\n" ) );
7282 __kmp_ignore_mppbeg(
void )
7286 if ((env = getenv(
"KMP_IGNORE_MPPBEG" )) != NULL) {
7287 if (__kmp_str_match_false( env ))
7295 __kmp_ignore_mppend(
void )
7299 if ((env = getenv(
"KMP_IGNORE_MPPEND" )) != NULL) {
7300 if (__kmp_str_match_false( env ))
7308 __kmp_internal_begin(
void )
7315 gtid = __kmp_entry_gtid();
7316 root = __kmp_threads[ gtid ]->th.th_root;
7317 KMP_ASSERT( KMP_UBER_GTID( gtid ));
7319 if( root->r.r_begin )
return;
7320 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
7321 if( root->r.r_begin ) {
7322 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7326 root->r.r_begin = TRUE;
7328 __kmp_release_lock( & root->r.r_begin_lock, gtid );
7336 __kmp_user_set_library (
enum library_type arg)
7344 gtid = __kmp_entry_gtid();
7345 thread = __kmp_threads[ gtid ];
7347 root = thread->th.th_root;
7349 KA_TRACE( 20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
7350 if (root->r.r_in_parallel) {
7351 KMP_WARNING( SetLibraryIncorrectCall );
7356 case library_serial :
7357 thread->th.th_set_nproc = 0;
7358 set__nproc( thread, 1 );
7360 case library_turnaround :
7361 thread->th.th_set_nproc = 0;
7362 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
7364 case library_throughput :
7365 thread->th.th_set_nproc = 0;
7366 set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
7369 KMP_FATAL( UnknownLibraryType, arg );
7372 __kmp_aux_set_library ( arg );
7376 __kmp_aux_set_stacksize(
size_t arg )
7378 if (! __kmp_init_serial)
7379 __kmp_serial_initialize();
7382 if (arg & (0x1000 - 1)) {
7383 arg &= ~(0x1000 - 1);
7388 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7391 if (! TCR_4(__kmp_init_parallel)) {
7394 if (value < __kmp_sys_min_stksize )
7395 value = __kmp_sys_min_stksize ;
7396 else if (value > KMP_MAX_STKSIZE)
7397 value = KMP_MAX_STKSIZE;
7399 __kmp_stksize = value;
7401 __kmp_env_stksize = TRUE;
7404 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7410 __kmp_aux_set_library (
enum library_type arg)
7412 __kmp_library = arg;
7414 switch ( __kmp_library ) {
7415 case library_serial :
7417 KMP_INFORM( LibraryIsSerial );
7418 (void) __kmp_change_library( TRUE );
7421 case library_turnaround :
7422 (void) __kmp_change_library( TRUE );
7424 case library_throughput :
7425 (void) __kmp_change_library( FALSE );
7428 KMP_FATAL( UnknownLibraryType, arg );
7436 __kmp_aux_set_blocktime (
int arg, kmp_info_t *thread,
int tid)
7438 int blocktime = arg;
7442 __kmp_save_internal_controls( thread );
7445 if (blocktime < KMP_MIN_BLOCKTIME)
7446 blocktime = KMP_MIN_BLOCKTIME;
7447 else if (blocktime > KMP_MAX_BLOCKTIME)
7448 blocktime = KMP_MAX_BLOCKTIME;
7450 set__blocktime_team( thread->th.th_team, tid, blocktime );
7451 set__blocktime_team( thread->th.th_serial_team, 0, blocktime );
7454 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7456 set__bt_intervals_team( thread->th.th_team, tid, bt_intervals );
7457 set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals );
7462 set__bt_set_team( thread->th.th_team, tid, bt_set );
7463 set__bt_set_team( thread->th.th_serial_team, 0, bt_set );
7464 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, bt_intervals=%d, monitor_updates=%d\n",
7465 __kmp_gtid_from_tid(tid, thread->th.th_team),
7466 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, __kmp_monitor_wakeups ) );
7470 __kmp_aux_set_defaults(
7474 if ( ! __kmp_init_serial ) {
7475 __kmp_serial_initialize();
7477 __kmp_env_initialize( str );
7481 || __kmp_display_env || __kmp_display_env_verbose
7494 PACKED_REDUCTION_METHOD_T
7495 __kmp_determine_reduction_method(
ident_t *loc, kmp_int32 global_tid,
7496 kmp_int32 num_vars,
size_t reduce_size,
void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
7497 kmp_critical_name *lck )
7505 PACKED_REDUCTION_METHOD_T retval;
7509 KMP_DEBUG_ASSERT( loc );
7510 KMP_DEBUG_ASSERT( lck );
7512 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) ) 7513 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) ) 7515 retval = critical_reduce_block;
7517 team_size = __kmp_get_team_num_threads( global_tid );
7519 if( team_size == 1 ) {
7521 retval = empty_reduce_block;
7525 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7526 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7528 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 7530 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN 7532 int teamsize_cutoff = 4;
7534 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 7535 if( __kmp_mic_type != non_mic ) {
7536 teamsize_cutoff = 8;
7539 if( tree_available ) {
7540 if( team_size <= teamsize_cutoff ) {
7541 if ( atomic_available ) {
7542 retval = atomic_reduce_block;
7545 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7547 }
else if ( atomic_available ) {
7548 retval = atomic_reduce_block;
7551 #error "Unknown or unsupported OS" 7552 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN 7554 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS 7556 #if KMP_OS_LINUX || KMP_OS_WINDOWS 7560 if( atomic_available ) {
7561 if( num_vars <= 2 ) {
7562 retval = atomic_reduce_block;
7568 if( atomic_available && ( num_vars <= 3 ) ) {
7569 retval = atomic_reduce_block;
7570 }
else if( tree_available ) {
7571 if( ( reduce_size > ( 9 *
sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 *
sizeof( kmp_real64 ) ) ) ) {
7572 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7577 #error "Unknown or unsupported OS" 7581 #error "Unknown or unsupported architecture" 7590 if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) {
7592 PACKED_REDUCTION_METHOD_T forced_retval;
7594 int atomic_available, tree_available;
7596 switch( ( forced_retval = __kmp_force_reduction_method ) )
7598 case critical_reduce_block:
7602 case atomic_reduce_block:
7603 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7604 KMP_ASSERT( atomic_available );
7607 case tree_reduce_block:
7608 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7609 KMP_ASSERT( tree_available );
7610 #if KMP_FAST_REDUCTION_BARRIER 7611 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7619 retval = forced_retval;
7622 KA_TRACE(10, (
"reduction method selected=%08x\n", retval ) );
7624 #undef FAST_REDUCTION_TREE_METHOD_GENERATED 7625 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 7632 __kmp_get_reduce_method(
void ) {
7633 return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 );
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_IDENT_AUTOPAR
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)