20 #include "kmp_wrapper_getpid.h" 21 #include "kmp_affinity.h" 26 void __kmp_cleanup_hierarchy() {
27 machine_hierarchy.fini();
30 void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
33 if (TCR_1(machine_hierarchy.uninitialized))
34 machine_hierarchy.init(NULL, nproc);
37 if (nproc > machine_hierarchy.base_num_threads)
38 machine_hierarchy.resize(nproc);
40 depth = machine_hierarchy.
depth;
41 KMP_DEBUG_ASSERT(depth > 0);
43 thr_bar->depth = depth;
44 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.
numPerLevel[0]-1;
45 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
48 #if KMP_AFFINITY_SUPPORTED 50 bool KMPAffinity::picked_api =
false;
52 void* KMPAffinity::Mask::operator
new(
size_t n) {
return __kmp_allocate(n); }
53 void* KMPAffinity::Mask::operator
new[](
size_t n) {
return __kmp_allocate(n); }
54 void KMPAffinity::Mask::operator
delete(
void* p) { __kmp_free(p); }
55 void KMPAffinity::Mask::operator
delete[](
void* p) { __kmp_free(p); }
56 void* KMPAffinity::operator
new(
size_t n) {
return __kmp_allocate(n); }
57 void KMPAffinity::operator
delete(
void* p) { __kmp_free(p); }
59 void KMPAffinity::pick_api() {
60 KMPAffinity* affinity_dispatch;
64 if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
65 affinity_dispatch =
new KMPHwlocAffinity();
69 affinity_dispatch =
new KMPNativeAffinity();
71 __kmp_affinity_dispatch = affinity_dispatch;
75 void KMPAffinity::destroy_api() {
76 if (__kmp_affinity_dispatch != NULL) {
77 delete __kmp_affinity_dispatch;
78 __kmp_affinity_dispatch = NULL;
87 __kmp_affinity_print_mask(
char *buf,
int buf_len, kmp_affin_mask_t *mask)
89 KMP_ASSERT(buf_len >= 40);
91 char *end = buf + buf_len - 1;
98 if (i == mask->end()) {
99 KMP_SNPRINTF(scan, end-scan+1,
"{<empty>}");
100 while (*scan !=
'\0') scan++;
101 KMP_ASSERT(scan <= end);
105 KMP_SNPRINTF(scan, end-scan+1,
"{%ld", (
long)i);
106 while (*scan !=
'\0') scan++;
108 for (; i != mask->end(); i = mask->next(i)) {
109 if (! KMP_CPU_ISSET(i, mask)) {
119 if (end - scan < 15) {
122 KMP_SNPRINTF(scan, end-scan+1,
",%-ld", (
long)i);
123 while (*scan !=
'\0') scan++;
125 if (i != mask->end()) {
126 KMP_SNPRINTF(scan, end-scan+1,
",...");
127 while (*scan !=
'\0') scan++;
129 KMP_SNPRINTF(scan, end-scan+1,
"}");
130 while (*scan !=
'\0') scan++;
131 KMP_ASSERT(scan <= end);
137 __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
141 # if KMP_GROUP_AFFINITY 143 if (__kmp_num_proc_groups > 1) {
145 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
146 for (group = 0; group < __kmp_num_proc_groups; group++) {
148 int num = __kmp_GetActiveProcessorCount(group);
149 for (i = 0; i < num; i++) {
150 KMP_CPU_SET(i + group * (CHAR_BIT *
sizeof(DWORD_PTR)), mask);
160 for (proc = 0; proc < __kmp_xproc; proc++) {
161 KMP_CPU_SET(proc, mask);
180 __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
183 KMP_DEBUG_ASSERT(numAddrs > 0);
184 int depth = address2os->first.depth;
185 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
186 unsigned *lastLabel = (
unsigned *)__kmp_allocate(depth
189 for (labCt = 0; labCt < depth; labCt++) {
190 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
191 lastLabel[labCt] = address2os[0].first.labels[labCt];
194 for (i = 1; i < numAddrs; i++) {
195 for (labCt = 0; labCt < depth; labCt++) {
196 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
198 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
200 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
203 lastLabel[labCt] = address2os[i].first.labels[labCt];
207 for (labCt = 0; labCt < depth; labCt++) {
208 address2os[i].first.childNums[labCt] = counts[labCt];
210 for (; labCt < (int)Address::maxDepth; labCt++) {
211 address2os[i].first.childNums[labCt] = 0;
214 __kmp_free(lastLabel);
231 kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
233 static int nCoresPerPkg, nPackages;
234 static int __kmp_nThreadsPerCore;
235 #ifndef KMP_DFLT_NTH_CORES 236 static int __kmp_ncores;
238 static int *__kmp_pu_os_idx = NULL;
247 __kmp_affinity_uniform_topology()
249 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
258 __kmp_affinity_print_topology(AddrUnsPair *address2os,
int len,
int depth,
259 int pkgLevel,
int coreLevel,
int threadLevel)
263 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
264 for (proc = 0; proc < len; proc++) {
267 __kmp_str_buf_init(&buf);
268 for (level = 0; level < depth; level++) {
269 if (level == threadLevel) {
270 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Thread));
272 else if (level == coreLevel) {
273 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Core));
275 else if (level == pkgLevel) {
276 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Package));
278 else if (level > pkgLevel) {
279 __kmp_str_buf_print(&buf,
"%s_%d ", KMP_I18N_STR(Node),
280 level - pkgLevel - 1);
283 __kmp_str_buf_print(&buf,
"L%d ", level);
285 __kmp_str_buf_print(&buf,
"%d ",
286 address2os[proc].first.labels[level]);
288 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", address2os[proc].second,
290 __kmp_str_buf_free(&buf);
302 __kmp_affinity_remove_radix_one_levels(AddrUnsPair *address2os,
int nActiveThreads,
int depth,
int* pkgLevel,
int* coreLevel,
int* threadLevel) {
307 for (level = depth-1; level >= 0; --level) {
309 if (level == *pkgLevel)
313 for (i = 1; i < nActiveThreads; ++i) {
314 if (address2os[0].first.labels[level] != address2os[i].first.labels[level]) {
320 if (!radix1_detected)
323 if (level == *threadLevel) {
326 for (i = 0; i < nActiveThreads; ++i) {
327 address2os[i].first.depth--;
330 }
else if (level == *coreLevel) {
333 for (i = 0; i < nActiveThreads; ++i) {
334 if (*threadLevel != -1) {
335 address2os[i].first.labels[*coreLevel] = address2os[i].first.labels[*threadLevel];
337 address2os[i].first.depth--;
342 return address2os[0].first.depth;
349 __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj, hwloc_obj_type_t type) {
352 for(first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type, obj->logical_index, type, 0);
353 first != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) == obj;
354 first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type, first))
362 __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
363 kmp_i18n_id_t *
const msg_id)
366 *msg_id = kmp_i18n_null;
371 kmp_affin_mask_t *oldMask;
372 KMP_CPU_ALLOC(oldMask);
373 __kmp_get_system_affinity(oldMask, TRUE);
380 if (! KMP_AFFINITY_CAPABLE())
386 KMP_ASSERT(__kmp_affinity_type == affinity_none);
388 nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0), HWLOC_OBJ_CORE);
389 __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
390 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
391 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
392 if (__kmp_affinity_verbose) {
393 KMP_INFORM(AffNotCapableUseLocCpuidL11,
"KMP_AFFINITY");
394 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
395 if (__kmp_affinity_uniform_topology()) {
396 KMP_INFORM(Uniform,
"KMP_AFFINITY");
398 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
400 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
401 __kmp_nThreadsPerCore, __kmp_ncores);
403 KMP_CPU_FREE(oldMask);
410 AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
411 __kmp_pu_os_idx = (
int*)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
423 int nActiveThreads = 0;
424 int socket_identifier = 0;
426 __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
427 for(socket = hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0);
429 socket = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, socket),
432 int core_identifier = 0;
433 int num_active_cores = 0;
434 for(core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type, socket->logical_index, HWLOC_OBJ_CORE, 0);
435 core != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type, core) == socket;
436 core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, core),
439 int pu_identifier = 0;
440 int num_active_threads = 0;
441 for(pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type, core->logical_index, HWLOC_OBJ_PU, 0);
442 pu != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type, pu) == core;
443 pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU, pu),
447 if(! KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
449 KA_TRACE(20, (
"Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
450 socket->os_index, socket->logical_index, core->os_index, core->logical_index, pu->os_index,pu->logical_index));
451 addr.labels[0] = socket_identifier;
452 addr.labels[1] = core_identifier;
453 addr.labels[2] = pu_identifier;
454 retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
455 __kmp_pu_os_idx[nActiveThreads] = pu->os_index;
457 ++num_active_threads;
459 if (num_active_threads) {
462 if (num_active_threads > __kmp_nThreadsPerCore)
463 __kmp_nThreadsPerCore = num_active_threads;
466 if (num_active_cores) {
468 if (num_active_cores > nCoresPerPkg)
469 nCoresPerPkg = num_active_cores;
476 KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
477 KMP_ASSERT(nActiveThreads > 0);
478 if (nActiveThreads == 1) {
479 __kmp_ncores = nPackages = 1;
480 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
481 if (__kmp_affinity_verbose) {
482 char buf[KMP_AFFIN_MASK_PRINT_LEN];
483 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
485 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
486 if (__kmp_affinity_respect_mask) {
487 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
489 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
491 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
492 KMP_INFORM(Uniform,
"KMP_AFFINITY");
493 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
494 __kmp_nThreadsPerCore, __kmp_ncores);
497 if (__kmp_affinity_type == affinity_none) {
499 KMP_CPU_FREE(oldMask);
507 addr.labels[0] = retval[0].first.labels[pkgLevel];
508 retval[0].first = addr;
510 if (__kmp_affinity_gran_levels < 0) {
511 __kmp_affinity_gran_levels = 0;
514 if (__kmp_affinity_verbose) {
515 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
518 *address2os = retval;
519 KMP_CPU_FREE(oldMask);
526 qsort(retval, nActiveThreads,
sizeof(*retval), __kmp_affinity_cmp_Address_labels);
531 unsigned uniform = (nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads);
536 if (__kmp_affinity_verbose) {
537 char mask[KMP_AFFIN_MASK_PRINT_LEN];
538 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
540 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
541 if (__kmp_affinity_respect_mask) {
542 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", mask);
544 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", mask);
546 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
548 KMP_INFORM(Uniform,
"KMP_AFFINITY");
550 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
554 __kmp_str_buf_init(&buf);
556 __kmp_str_buf_print(&buf,
"%d", nPackages);
560 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, nCoresPerPkg,
561 __kmp_nThreadsPerCore, __kmp_ncores);
563 __kmp_str_buf_free(&buf);
566 if (__kmp_affinity_type == affinity_none) {
568 KMP_CPU_FREE(oldMask);
576 depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth, &pkgLevel, &coreLevel, &threadLevel);
578 if (__kmp_affinity_gran_levels < 0) {
583 __kmp_affinity_gran_levels = 0;
584 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
585 __kmp_affinity_gran_levels++;
587 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
588 __kmp_affinity_gran_levels++;
590 if (__kmp_affinity_gran > affinity_gran_package) {
591 __kmp_affinity_gran_levels++;
595 if (__kmp_affinity_verbose) {
596 __kmp_affinity_print_topology(retval, nActiveThreads, depth, pkgLevel,
597 coreLevel, threadLevel);
600 KMP_CPU_FREE(oldMask);
601 *address2os = retval;
604 #endif // KMP_USE_HWLOC 612 __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
613 kmp_i18n_id_t *
const msg_id)
616 *msg_id = kmp_i18n_null;
623 if (! KMP_AFFINITY_CAPABLE()) {
624 KMP_ASSERT(__kmp_affinity_type == affinity_none);
625 __kmp_ncores = nPackages = __kmp_xproc;
626 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
627 if (__kmp_affinity_verbose) {
628 KMP_INFORM(AffFlatTopology,
"KMP_AFFINITY");
629 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
630 KMP_INFORM(Uniform,
"KMP_AFFINITY");
631 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
632 __kmp_nThreadsPerCore, __kmp_ncores);
643 __kmp_ncores = nPackages = __kmp_avail_proc;
644 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
645 if (__kmp_affinity_verbose) {
646 char buf[KMP_AFFIN_MASK_PRINT_LEN];
647 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, __kmp_affin_fullMask);
649 KMP_INFORM(AffCapableUseFlat,
"KMP_AFFINITY");
650 if (__kmp_affinity_respect_mask) {
651 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
653 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
655 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
656 KMP_INFORM(Uniform,
"KMP_AFFINITY");
657 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
658 __kmp_nThreadsPerCore, __kmp_ncores);
660 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
661 __kmp_pu_os_idx = (
int*)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
662 if (__kmp_affinity_type == affinity_none) {
665 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
666 if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask))
668 __kmp_pu_os_idx[avail_ct++] = i;
676 *address2os = (AddrUnsPair*)
677 __kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
680 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
684 if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
687 __kmp_pu_os_idx[avail_ct] = i;
690 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
692 if (__kmp_affinity_verbose) {
693 KMP_INFORM(OSProcToPackage,
"KMP_AFFINITY");
696 if (__kmp_affinity_gran_levels < 0) {
701 if (__kmp_affinity_gran > affinity_gran_package) {
702 __kmp_affinity_gran_levels = 1;
705 __kmp_affinity_gran_levels = 0;
712 # if KMP_GROUP_AFFINITY 723 __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
724 kmp_i18n_id_t *
const msg_id)
727 *msg_id = kmp_i18n_null;
733 if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(__kmp_affin_fullMask) >= 0)) {
741 *address2os = (AddrUnsPair*)
742 __kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
743 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
744 __kmp_pu_os_idx = (
int*)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
747 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
751 if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
754 __kmp_pu_os_idx[avail_ct] = i;
756 addr.labels[0] = i / (CHAR_BIT *
sizeof(DWORD_PTR));
757 addr.labels[1] = i % (CHAR_BIT *
sizeof(DWORD_PTR));
758 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
760 if (__kmp_affinity_verbose) {
761 KMP_INFORM(AffOSProcToGroup,
"KMP_AFFINITY", i, addr.labels[0],
766 if (__kmp_affinity_gran_levels < 0) {
767 if (__kmp_affinity_gran == affinity_gran_group) {
768 __kmp_affinity_gran_levels = 1;
770 else if ((__kmp_affinity_gran == affinity_gran_fine)
771 || (__kmp_affinity_gran == affinity_gran_thread)) {
772 __kmp_affinity_gran_levels = 0;
775 const char *gran_str = NULL;
776 if (__kmp_affinity_gran == affinity_gran_core) {
779 else if (__kmp_affinity_gran == affinity_gran_package) {
780 gran_str =
"package";
782 else if (__kmp_affinity_gran == affinity_gran_node) {
790 __kmp_affinity_gran_levels = 0;
799 # if KMP_ARCH_X86 || KMP_ARCH_X86_64 802 __kmp_cpuid_mask_width(
int count) {
805 while((1<<r) < count)
811 class apicThreadInfo {
815 unsigned maxCoresPerPkg;
816 unsigned maxThreadsPerPkg;
824 __kmp_affinity_cmp_apicThreadInfo_os_id(
const void *a,
const void *b)
826 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
827 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
828 if (aa->osId < bb->osId)
return -1;
829 if (aa->osId > bb->osId)
return 1;
835 __kmp_affinity_cmp_apicThreadInfo_phys_id(
const void *a,
const void *b)
837 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
838 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
839 if (aa->pkgId < bb->pkgId)
return -1;
840 if (aa->pkgId > bb->pkgId)
return 1;
841 if (aa->coreId < bb->coreId)
return -1;
842 if (aa->coreId > bb->coreId)
return 1;
843 if (aa->threadId < bb->threadId)
return -1;
844 if (aa->threadId > bb->threadId)
return 1;
856 __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
857 kmp_i18n_id_t *
const msg_id)
862 *msg_id = kmp_i18n_null;
867 __kmp_x86_cpuid(0, 0, &buf);
869 *msg_id = kmp_i18n_str_NoLeaf4Support;
881 if (! KMP_AFFINITY_CAPABLE()) {
886 KMP_ASSERT(__kmp_affinity_type == affinity_none);
896 __kmp_x86_cpuid(1, 0, &buf);
897 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
898 if (maxThreadsPerPkg == 0) {
899 maxThreadsPerPkg = 1;
915 __kmp_x86_cpuid(0, 0, &buf);
917 __kmp_x86_cpuid(4, 0, &buf);
918 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
941 __kmp_ncores = __kmp_xproc;
942 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
943 __kmp_nThreadsPerCore = 1;
944 if (__kmp_affinity_verbose) {
945 KMP_INFORM(AffNotCapableUseLocCpuid,
"KMP_AFFINITY");
946 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
947 if (__kmp_affinity_uniform_topology()) {
948 KMP_INFORM(Uniform,
"KMP_AFFINITY");
950 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
952 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
953 __kmp_nThreadsPerCore, __kmp_ncores);
968 kmp_affin_mask_t *oldMask;
969 KMP_CPU_ALLOC(oldMask);
970 KMP_ASSERT(oldMask != NULL);
971 __kmp_get_system_affinity(oldMask, TRUE);
1006 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
1007 __kmp_avail_proc *
sizeof(apicThreadInfo));
1008 unsigned nApics = 0;
1009 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
1013 if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
1016 KMP_DEBUG_ASSERT((
int)nApics < __kmp_avail_proc);
1018 __kmp_affinity_dispatch->bind_thread(i);
1019 threadInfo[nApics].osId = i;
1024 __kmp_x86_cpuid(1, 0, &buf);
1025 if (((buf.edx >> 9) & 1) == 0) {
1026 __kmp_set_system_affinity(oldMask, TRUE);
1027 __kmp_free(threadInfo);
1028 KMP_CPU_FREE(oldMask);
1029 *msg_id = kmp_i18n_str_ApicNotPresent;
1032 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
1033 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1034 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
1035 threadInfo[nApics].maxThreadsPerPkg = 1;
1046 __kmp_x86_cpuid(0, 0, &buf);
1048 __kmp_x86_cpuid(4, 0, &buf);
1049 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1052 threadInfo[nApics].maxCoresPerPkg = 1;
1059 int widthCT = __kmp_cpuid_mask_width(
1060 threadInfo[nApics].maxThreadsPerPkg);
1061 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
1063 int widthC = __kmp_cpuid_mask_width(
1064 threadInfo[nApics].maxCoresPerPkg);
1065 int widthT = widthCT - widthC;
1072 __kmp_set_system_affinity(oldMask, TRUE);
1073 __kmp_free(threadInfo);
1074 KMP_CPU_FREE(oldMask);
1075 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1079 int maskC = (1 << widthC) - 1;
1080 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
1083 int maskT = (1 << widthT) - 1;
1084 threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
1093 __kmp_set_system_affinity(oldMask, TRUE);
1105 KMP_ASSERT(nApics > 0);
1107 __kmp_ncores = nPackages = 1;
1108 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1109 if (__kmp_affinity_verbose) {
1110 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1111 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1113 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1114 if (__kmp_affinity_respect_mask) {
1115 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1117 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1119 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1120 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1121 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1122 __kmp_nThreadsPerCore, __kmp_ncores);
1125 if (__kmp_affinity_type == affinity_none) {
1126 __kmp_free(threadInfo);
1127 KMP_CPU_FREE(oldMask);
1131 *address2os = (AddrUnsPair*)__kmp_allocate(
sizeof(AddrUnsPair));
1133 addr.labels[0] = threadInfo[0].pkgId;
1134 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
1136 if (__kmp_affinity_gran_levels < 0) {
1137 __kmp_affinity_gran_levels = 0;
1140 if (__kmp_affinity_verbose) {
1141 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1144 __kmp_free(threadInfo);
1145 KMP_CPU_FREE(oldMask);
1152 qsort(threadInfo, nApics,
sizeof(*threadInfo),
1153 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1172 __kmp_nThreadsPerCore = 1;
1173 unsigned nCores = 1;
1176 unsigned lastPkgId = threadInfo[0].pkgId;
1177 unsigned coreCt = 1;
1178 unsigned lastCoreId = threadInfo[0].coreId;
1179 unsigned threadCt = 1;
1180 unsigned lastThreadId = threadInfo[0].threadId;
1183 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1184 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1186 for (i = 1; i < nApics; i++) {
1187 if (threadInfo[i].pkgId != lastPkgId) {
1190 lastPkgId = threadInfo[i].pkgId;
1191 if ((
int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1193 lastCoreId = threadInfo[i].coreId;
1194 if ((
int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1196 lastThreadId = threadInfo[i].threadId;
1203 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1204 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1208 if (threadInfo[i].coreId != lastCoreId) {
1211 lastCoreId = threadInfo[i].coreId;
1212 if ((
int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1214 lastThreadId = threadInfo[i].threadId;
1216 else if (threadInfo[i].threadId != lastThreadId) {
1218 lastThreadId = threadInfo[i].threadId;
1221 __kmp_free(threadInfo);
1222 KMP_CPU_FREE(oldMask);
1223 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1231 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
1232 || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1233 __kmp_free(threadInfo);
1234 KMP_CPU_FREE(oldMask);
1235 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1240 if ((
int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1241 if ((
int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1249 __kmp_ncores = nCores;
1250 if (__kmp_affinity_verbose) {
1251 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1252 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1254 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1255 if (__kmp_affinity_respect_mask) {
1256 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1258 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1260 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1261 if (__kmp_affinity_uniform_topology()) {
1262 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1264 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1266 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1267 __kmp_nThreadsPerCore, __kmp_ncores);
1270 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1271 KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
1272 __kmp_pu_os_idx = (
int*)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1273 for (i = 0; i < nApics; ++i) {
1274 __kmp_pu_os_idx[i] = threadInfo[i].osId;
1276 if (__kmp_affinity_type == affinity_none) {
1277 __kmp_free(threadInfo);
1278 KMP_CPU_FREE(oldMask);
1288 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1289 int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1290 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1292 KMP_ASSERT(depth > 0);
1293 *address2os = (AddrUnsPair*)__kmp_allocate(
sizeof(AddrUnsPair) * nApics);
1295 for (i = 0; i < nApics; ++i) {
1296 Address addr(depth);
1297 unsigned os = threadInfo[i].osId;
1300 if (pkgLevel >= 0) {
1301 addr.labels[d++] = threadInfo[i].pkgId;
1303 if (coreLevel >= 0) {
1304 addr.labels[d++] = threadInfo[i].coreId;
1306 if (threadLevel >= 0) {
1307 addr.labels[d++] = threadInfo[i].threadId;
1309 (*address2os)[i] = AddrUnsPair(addr, os);
1312 if (__kmp_affinity_gran_levels < 0) {
1317 __kmp_affinity_gran_levels = 0;
1318 if ((threadLevel >= 0)
1319 && (__kmp_affinity_gran > affinity_gran_thread)) {
1320 __kmp_affinity_gran_levels++;
1322 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1323 __kmp_affinity_gran_levels++;
1325 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1326 __kmp_affinity_gran_levels++;
1330 if (__kmp_affinity_verbose) {
1331 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1332 coreLevel, threadLevel);
1335 __kmp_free(threadInfo);
1336 KMP_CPU_FREE(oldMask);
1347 __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1348 kmp_i18n_id_t *
const msg_id)
1353 *msg_id = kmp_i18n_null;
1358 __kmp_x86_cpuid(0, 0, &buf);
1360 *msg_id = kmp_i18n_str_NoLeaf11Support;
1363 __kmp_x86_cpuid(11, 0, &buf);
1365 *msg_id = kmp_i18n_str_NoLeaf11Support;
1376 int threadLevel = -1;
1379 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1381 for (level = 0;; level++) {
1394 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1397 __kmp_x86_cpuid(11, level, &buf);
1408 int kind = (buf.ecx >> 8) & 0xff;
1413 threadLevel = level;
1416 __kmp_nThreadsPerCore = buf.ebx & 0xffff;
1417 if (__kmp_nThreadsPerCore == 0) {
1418 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1422 else if (kind == 2) {
1428 nCoresPerPkg = buf.ebx & 0xffff;
1429 if (nCoresPerPkg == 0) {
1430 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1436 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1439 if (pkgLevel >= 0) {
1443 nPackages = buf.ebx & 0xffff;
1444 if (nPackages == 0) {
1445 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1458 if (threadLevel >= 0) {
1459 threadLevel = depth - threadLevel - 1;
1461 if (coreLevel >= 0) {
1462 coreLevel = depth - coreLevel - 1;
1464 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1465 pkgLevel = depth - pkgLevel - 1;
1475 if (! KMP_AFFINITY_CAPABLE())
1481 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1483 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1484 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1485 if (__kmp_affinity_verbose) {
1486 KMP_INFORM(AffNotCapableUseLocCpuidL11,
"KMP_AFFINITY");
1487 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1488 if (__kmp_affinity_uniform_topology()) {
1489 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1491 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1493 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1494 __kmp_nThreadsPerCore, __kmp_ncores);
1509 kmp_affin_mask_t *oldMask;
1510 KMP_CPU_ALLOC(oldMask);
1511 __kmp_get_system_affinity(oldMask, TRUE);
1516 AddrUnsPair *retval = (AddrUnsPair *)
1517 __kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
1525 KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
1529 if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
1532 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1534 __kmp_affinity_dispatch->bind_thread(proc);
1540 Address addr(depth);
1543 for (level = 0; level < depth; level++) {
1544 __kmp_x86_cpuid(11, level, &buf);
1545 unsigned apicId = buf.edx;
1547 if (level != depth - 1) {
1548 KMP_CPU_FREE(oldMask);
1549 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1552 addr.labels[depth - level - 1] = apicId >> prev_shift;
1556 int shift = buf.eax & 0x1f;
1557 int mask = (1 << shift) - 1;
1558 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1561 if (level != depth) {
1562 KMP_CPU_FREE(oldMask);
1563 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1567 retval[nApics] = AddrUnsPair(addr, proc);
1575 __kmp_set_system_affinity(oldMask, TRUE);
1580 KMP_ASSERT(nApics > 0);
1582 __kmp_ncores = nPackages = 1;
1583 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1584 if (__kmp_affinity_verbose) {
1585 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1586 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1588 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1589 if (__kmp_affinity_respect_mask) {
1590 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1592 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1594 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1595 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1596 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1597 __kmp_nThreadsPerCore, __kmp_ncores);
1600 if (__kmp_affinity_type == affinity_none) {
1602 KMP_CPU_FREE(oldMask);
1610 addr.labels[0] = retval[0].first.labels[pkgLevel];
1611 retval[0].first = addr;
1613 if (__kmp_affinity_gran_levels < 0) {
1614 __kmp_affinity_gran_levels = 0;
1617 if (__kmp_affinity_verbose) {
1618 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1621 *address2os = retval;
1622 KMP_CPU_FREE(oldMask);
1629 qsort(retval, nApics,
sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1634 unsigned *totals = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1635 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1636 unsigned *maxCt = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1637 unsigned *last = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1638 for (level = 0; level < depth; level++) {
1642 last[level] = retval[0].first.labels[level];
1651 for (proc = 1; (int)proc < nApics; proc++) {
1653 for (level = 0; level < depth; level++) {
1654 if (retval[proc].first.labels[level] != last[level]) {
1656 for (j = level + 1; j < depth; j++) {
1666 last[j] = retval[proc].first.labels[j];
1670 if (counts[level] > maxCt[level]) {
1671 maxCt[level] = counts[level];
1673 last[level] = retval[proc].first.labels[level];
1676 else if (level == depth - 1) {
1682 KMP_CPU_FREE(oldMask);
1683 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1695 if (threadLevel >= 0) {
1696 __kmp_nThreadsPerCore = maxCt[threadLevel];
1699 __kmp_nThreadsPerCore = 1;
1701 nPackages = totals[pkgLevel];
1703 if (coreLevel >= 0) {
1704 __kmp_ncores = totals[coreLevel];
1705 nCoresPerPkg = maxCt[coreLevel];
1708 __kmp_ncores = nPackages;
1715 unsigned prod = maxCt[0];
1716 for (level = 1; level < depth; level++) {
1717 prod *= maxCt[level];
1719 bool uniform = (prod == totals[level - 1]);
1724 if (__kmp_affinity_verbose) {
1725 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1726 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1728 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1729 if (__kmp_affinity_respect_mask) {
1730 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", mask);
1732 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", mask);
1734 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1736 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1738 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1742 __kmp_str_buf_init(&buf);
1744 __kmp_str_buf_print(&buf,
"%d", totals[0]);
1745 for (level = 1; level <= pkgLevel; level++) {
1746 __kmp_str_buf_print(&buf,
" x %d", maxCt[level]);
1748 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, nCoresPerPkg,
1749 __kmp_nThreadsPerCore, __kmp_ncores);
1751 __kmp_str_buf_free(&buf);
1753 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1754 KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
1755 __kmp_pu_os_idx = (
int*)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1756 for (proc = 0; (int)proc < nApics; ++proc) {
1757 __kmp_pu_os_idx[proc] = retval[proc].second;
1759 if (__kmp_affinity_type == affinity_none) {
1765 KMP_CPU_FREE(oldMask);
1774 for (level = 0; level < depth; level++) {
1775 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1785 if (new_depth != depth) {
1786 AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
1787 sizeof(AddrUnsPair) * nApics);
1788 for (proc = 0; (int)proc < nApics; proc++) {
1789 Address addr(new_depth);
1790 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1793 int newPkgLevel = -1;
1794 int newCoreLevel = -1;
1795 int newThreadLevel = -1;
1797 for (level = 0; level < depth; level++) {
1798 if ((maxCt[level] == 1)
1799 && (level != pkgLevel)) {
1805 if (level == pkgLevel) {
1806 newPkgLevel = level;
1808 if (level == coreLevel) {
1809 newCoreLevel = level;
1811 if (level == threadLevel) {
1812 newThreadLevel = level;
1814 for (proc = 0; (int)proc < nApics; proc++) {
1815 new_retval[proc].first.labels[new_level]
1816 = retval[proc].first.labels[level];
1822 retval = new_retval;
1824 pkgLevel = newPkgLevel;
1825 coreLevel = newCoreLevel;
1826 threadLevel = newThreadLevel;
1829 if (__kmp_affinity_gran_levels < 0) {
1834 __kmp_affinity_gran_levels = 0;
1835 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1836 __kmp_affinity_gran_levels++;
1838 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1839 __kmp_affinity_gran_levels++;
1841 if (__kmp_affinity_gran > affinity_gran_package) {
1842 __kmp_affinity_gran_levels++;
1846 if (__kmp_affinity_verbose) {
1847 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
1848 coreLevel, threadLevel);
1855 KMP_CPU_FREE(oldMask);
1856 *address2os = retval;
1865 #define threadIdIndex 1 1866 #define coreIdIndex 2 1867 #define pkgIdIndex 3 1868 #define nodeIdIndex 4 1870 typedef unsigned *ProcCpuInfo;
1871 static unsigned maxIndex = pkgIdIndex;
1875 __kmp_affinity_cmp_ProcCpuInfo_os_id(
const void *a,
const void *b)
1877 const unsigned *aa = (
const unsigned *)a;
1878 const unsigned *bb = (
const unsigned *)b;
1879 if (aa[osIdIndex] < bb[osIdIndex])
return -1;
1880 if (aa[osIdIndex] > bb[osIdIndex])
return 1;
1886 __kmp_affinity_cmp_ProcCpuInfo_phys_id(
const void *a,
const void *b)
1889 const unsigned *aa = *((
const unsigned **)a);
1890 const unsigned *bb = *((
const unsigned **)b);
1891 for (i = maxIndex; ; i--) {
1892 if (aa[i] < bb[i])
return -1;
1893 if (aa[i] > bb[i])
return 1;
1894 if (i == osIdIndex)
break;
1905 __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
int *line,
1906 kmp_i18n_id_t *
const msg_id, FILE *f)
1909 *msg_id = kmp_i18n_null;
1916 unsigned num_records = 0;
1918 buf[
sizeof(buf) - 1] = 1;
1919 if (! fgets(buf,
sizeof(buf), f)) {
1926 char s1[] =
"processor";
1927 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
1936 if (KMP_SSCANF(buf,
"node_%d id", &level) == 1) {
1937 if (nodeIdIndex + level >= maxIndex) {
1938 maxIndex = nodeIdIndex + level;
1949 if (num_records == 0) {
1951 *msg_id = kmp_i18n_str_NoProcRecords;
1954 if (num_records > (
unsigned)__kmp_xproc) {
1956 *msg_id = kmp_i18n_str_TooManyProcRecords;
1967 if (fseek(f, 0, SEEK_SET) != 0) {
1969 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
1977 unsigned **threadInfo = (
unsigned **)__kmp_allocate((num_records + 1)
1978 *
sizeof(
unsigned *));
1980 for (i = 0; i <= num_records; i++) {
1981 threadInfo[i] = (
unsigned *)__kmp_allocate((maxIndex + 1)
1982 *
sizeof(unsigned));
1985 #define CLEANUP_THREAD_INFO \ 1986 for (i = 0; i <= num_records; i++) { \ 1987 __kmp_free(threadInfo[i]); \ 1989 __kmp_free(threadInfo); 1996 #define INIT_PROC_INFO(p) \ 1997 for (__index = 0; __index <= maxIndex; __index++) { \ 1998 (p)[__index] = UINT_MAX; \ 2001 for (i = 0; i <= num_records; i++) {
2002 INIT_PROC_INFO(threadInfo[i]);
2005 unsigned num_avail = 0;
2015 buf[
sizeof(buf) - 1] = 1;
2016 bool long_line =
false;
2017 if (! fgets(buf,
sizeof(buf), f)) {
2025 for (i = 0; i <= maxIndex; i++) {
2026 if (threadInfo[num_avail][i] != UINT_MAX) {
2034 }
else if (!buf[
sizeof(buf) - 1]) {
2041 #define CHECK_LINE \ 2043 CLEANUP_THREAD_INFO; \ 2044 *msg_id = kmp_i18n_str_LongLineCpuinfo; \ 2050 char s1[] =
"processor";
2051 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
2053 char *p = strchr(buf +
sizeof(s1) - 1,
':');
2055 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
goto no_val;
2056 if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
goto dup_field;
2057 threadInfo[num_avail][osIdIndex] = val;
2058 #if KMP_OS_LINUX && USE_SYSFS_INFO 2060 KMP_SNPRINTF(path,
sizeof(path),
2061 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
2062 threadInfo[num_avail][osIdIndex]);
2063 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][pkgIdIndex]);
2065 KMP_SNPRINTF(path,
sizeof(path),
2066 "/sys/devices/system/cpu/cpu%u/topology/core_id",
2067 threadInfo[num_avail][osIdIndex]);
2068 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][coreIdIndex]);
2072 char s2[] =
"physical id";
2073 if (strncmp(buf, s2,
sizeof(s2) - 1) == 0) {
2075 char *p = strchr(buf +
sizeof(s2) - 1,
':');
2077 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
goto no_val;
2078 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
goto dup_field;
2079 threadInfo[num_avail][pkgIdIndex] = val;
2082 char s3[] =
"core id";
2083 if (strncmp(buf, s3,
sizeof(s3) - 1) == 0) {
2085 char *p = strchr(buf +
sizeof(s3) - 1,
':');
2087 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
goto no_val;
2088 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
goto dup_field;
2089 threadInfo[num_avail][coreIdIndex] = val;
2091 #endif // KMP_OS_LINUX && USE_SYSFS_INFO 2093 char s4[] =
"thread id";
2094 if (strncmp(buf, s4,
sizeof(s4) - 1) == 0) {
2096 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2098 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
goto no_val;
2099 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
goto dup_field;
2100 threadInfo[num_avail][threadIdIndex] = val;
2104 if (KMP_SSCANF(buf,
"node_%d id", &level) == 1) {
2106 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2108 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
goto no_val;
2109 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
2110 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
goto dup_field;
2111 threadInfo[num_avail][nodeIdIndex + level] = val;
2120 if ((*buf != 0) && (*buf !=
'\n')) {
2127 while (((ch = fgetc(f)) != EOF) && (ch !=
'\n'));
2136 if ((
int)num_avail == __kmp_xproc) {
2137 CLEANUP_THREAD_INFO;
2138 *msg_id = kmp_i18n_str_TooManyEntries;
2146 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
2147 CLEANUP_THREAD_INFO;
2148 *msg_id = kmp_i18n_str_MissingProcField;
2151 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
2152 CLEANUP_THREAD_INFO;
2153 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2160 if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], __kmp_affin_fullMask)) {
2161 INIT_PROC_INFO(threadInfo[num_avail]);
2170 KMP_ASSERT(num_avail <= num_records);
2171 INIT_PROC_INFO(threadInfo[num_avail]);
2176 CLEANUP_THREAD_INFO;
2177 *msg_id = kmp_i18n_str_MissingValCpuinfo;
2181 CLEANUP_THREAD_INFO;
2182 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2187 # if KMP_MIC && REDUCE_TEAM_SIZE 2188 unsigned teamSize = 0;
2189 # endif // KMP_MIC && REDUCE_TEAM_SIZE 2203 KMP_ASSERT(num_avail > 0);
2204 KMP_ASSERT(num_avail <= num_records);
2205 if (num_avail == 1) {
2207 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
2208 if (__kmp_affinity_verbose) {
2209 if (! KMP_AFFINITY_CAPABLE()) {
2210 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2211 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2212 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2215 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2216 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2217 __kmp_affin_fullMask);
2218 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2219 if (__kmp_affinity_respect_mask) {
2220 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
2222 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
2224 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2225 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2229 __kmp_str_buf_init(&buf);
2230 __kmp_str_buf_print(&buf,
"1");
2231 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2232 __kmp_str_buf_print(&buf,
" x 1");
2234 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, 1, 1, 1);
2235 __kmp_str_buf_free(&buf);
2238 if (__kmp_affinity_type == affinity_none) {
2239 CLEANUP_THREAD_INFO;
2243 *address2os = (AddrUnsPair*)__kmp_allocate(
sizeof(AddrUnsPair));
2245 addr.labels[0] = threadInfo[0][pkgIdIndex];
2246 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2248 if (__kmp_affinity_gran_levels < 0) {
2249 __kmp_affinity_gran_levels = 0;
2252 if (__kmp_affinity_verbose) {
2253 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2256 CLEANUP_THREAD_INFO;
2263 qsort(threadInfo, num_avail,
sizeof(*threadInfo),
2264 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2277 unsigned *counts = (
unsigned *)__kmp_allocate((maxIndex + 1)
2278 *
sizeof(unsigned));
2279 unsigned *maxCt = (
unsigned *)__kmp_allocate((maxIndex + 1)
2280 *
sizeof(unsigned));
2281 unsigned *totals = (
unsigned *)__kmp_allocate((maxIndex + 1)
2282 *
sizeof(unsigned));
2283 unsigned *lastId = (
unsigned *)__kmp_allocate((maxIndex + 1)
2284 *
sizeof(unsigned));
2286 bool assign_thread_ids =
false;
2287 unsigned threadIdCt;
2290 restart_radix_check:
2296 if (assign_thread_ids) {
2297 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2298 threadInfo[0][threadIdIndex] = threadIdCt++;
2300 else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2301 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2304 for (index = 0; index <= maxIndex; index++) {
2308 lastId[index] = threadInfo[0][index];;
2314 for (i = 1; i < num_avail; i++) {
2319 for (index = maxIndex; index >= threadIdIndex; index--) {
2320 if (assign_thread_ids && (index == threadIdIndex)) {
2324 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2325 threadInfo[i][threadIdIndex] = threadIdCt++;
2333 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2334 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2337 if (threadInfo[i][index] != lastId[index]) {
2346 for (index2 = threadIdIndex; index2 < index; index2++) {
2348 if (counts[index2] > maxCt[index2]) {
2349 maxCt[index2] = counts[index2];
2352 lastId[index2] = threadInfo[i][index2];
2356 lastId[index] = threadInfo[i][index];
2358 if (assign_thread_ids && (index > threadIdIndex)) {
2360 # if KMP_MIC && REDUCE_TEAM_SIZE 2365 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2366 # endif // KMP_MIC && REDUCE_TEAM_SIZE 2376 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2377 threadInfo[i][threadIdIndex] = threadIdCt++;
2385 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2386 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2392 if (index < threadIdIndex) {
2398 if ((threadInfo[i][threadIdIndex] != UINT_MAX)
2399 || assign_thread_ids) {
2404 CLEANUP_THREAD_INFO;
2405 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2414 assign_thread_ids =
true;
2415 goto restart_radix_check;
2419 # if KMP_MIC && REDUCE_TEAM_SIZE 2424 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2425 # endif // KMP_MIC && REDUCE_TEAM_SIZE 2427 for (index = threadIdIndex; index <= maxIndex; index++) {
2428 if (counts[index] > maxCt[index]) {
2429 maxCt[index] = counts[index];
2433 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2434 nCoresPerPkg = maxCt[coreIdIndex];
2435 nPackages = totals[pkgIdIndex];
2440 unsigned prod = totals[maxIndex];
2441 for (index = threadIdIndex; index < maxIndex; index++) {
2442 prod *= maxCt[index];
2444 bool uniform = (prod == totals[threadIdIndex]);
2452 __kmp_ncores = totals[coreIdIndex];
2454 if (__kmp_affinity_verbose) {
2455 if (! KMP_AFFINITY_CAPABLE()) {
2456 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2457 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2459 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2461 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2465 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2466 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, __kmp_affin_fullMask);
2467 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2468 if (__kmp_affinity_respect_mask) {
2469 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
2471 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
2473 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2475 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2477 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2481 __kmp_str_buf_init(&buf);
2483 __kmp_str_buf_print(&buf,
"%d", totals[maxIndex]);
2484 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2485 __kmp_str_buf_print(&buf,
" x %d", maxCt[index]);
2487 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2488 maxCt[threadIdIndex], __kmp_ncores);
2490 __kmp_str_buf_free(&buf);
2493 # if KMP_MIC && REDUCE_TEAM_SIZE 2497 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2498 __kmp_dflt_team_nth = teamSize;
2499 KA_TRACE(20, (
"__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
2500 __kmp_dflt_team_nth));
2502 # endif // KMP_MIC && REDUCE_TEAM_SIZE 2504 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
2505 KMP_DEBUG_ASSERT(num_avail == __kmp_avail_proc);
2506 __kmp_pu_os_idx = (
int*)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
2507 for (i = 0; i < num_avail; ++i) {
2508 __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
2511 if (__kmp_affinity_type == affinity_none) {
2516 CLEANUP_THREAD_INFO;
2527 bool *inMap = (
bool *)__kmp_allocate((maxIndex + 1) *
sizeof(bool));
2529 for (index = threadIdIndex; index < maxIndex; index++) {
2530 KMP_ASSERT(totals[index] >= totals[index + 1]);
2531 inMap[index] = (totals[index] > totals[index + 1]);
2533 inMap[maxIndex] = (totals[maxIndex] > 1);
2534 inMap[pkgIdIndex] =
true;
2537 for (index = threadIdIndex; index <= maxIndex; index++) {
2542 KMP_ASSERT(depth > 0);
2547 *address2os = (AddrUnsPair*)
2548 __kmp_allocate(
sizeof(AddrUnsPair) * num_avail);
2551 int threadLevel = -1;
2553 for (i = 0; i < num_avail; ++i) {
2554 Address addr(depth);
2555 unsigned os = threadInfo[i][osIdIndex];
2559 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2560 if (! inMap[src_index]) {
2563 addr.labels[dst_index] = threadInfo[i][src_index];
2564 if (src_index == pkgIdIndex) {
2565 pkgLevel = dst_index;
2567 else if (src_index == coreIdIndex) {
2568 coreLevel = dst_index;
2570 else if (src_index == threadIdIndex) {
2571 threadLevel = dst_index;
2575 (*address2os)[i] = AddrUnsPair(addr, os);
2578 if (__kmp_affinity_gran_levels < 0) {
2584 __kmp_affinity_gran_levels = 0;
2585 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2586 if (! inMap[src_index]) {
2589 switch (src_index) {
2591 if (__kmp_affinity_gran > affinity_gran_thread) {
2592 __kmp_affinity_gran_levels++;
2597 if (__kmp_affinity_gran > affinity_gran_core) {
2598 __kmp_affinity_gran_levels++;
2603 if (__kmp_affinity_gran > affinity_gran_package) {
2604 __kmp_affinity_gran_levels++;
2611 if (__kmp_affinity_verbose) {
2612 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2613 coreLevel, threadLevel);
2621 CLEANUP_THREAD_INFO;
2631 static kmp_affin_mask_t *
2632 __kmp_create_masks(
unsigned *maxIndex,
unsigned *numUnique,
2633 AddrUnsPair *address2os,
unsigned numAddrs)
2642 KMP_ASSERT(numAddrs > 0);
2643 depth = address2os[0].first.depth;
2646 for (i = 0; i < numAddrs; i++) {
2647 unsigned osId = address2os[i].second;
2648 if (osId > maxOsId) {
2652 kmp_affin_mask_t *osId2Mask;
2653 KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId+1));
2660 qsort(address2os, numAddrs,
sizeof(*address2os),
2661 __kmp_affinity_cmp_Address_labels);
2663 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2664 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2665 KMP_INFORM(ThreadsMigrate,
"KMP_AFFINITY", __kmp_affinity_gran_levels);
2667 if (__kmp_affinity_gran_levels >= (
int)depth) {
2668 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2669 && (__kmp_affinity_type != affinity_none))) {
2670 KMP_WARNING(AffThreadsMayMigrate);
2680 unsigned unique = 0;
2682 unsigned leader = 0;
2683 Address *leaderAddr = &(address2os[0].first);
2684 kmp_affin_mask_t *sum;
2685 KMP_CPU_ALLOC_ON_STACK(sum);
2687 KMP_CPU_SET(address2os[0].second, sum);
2688 for (i = 1; i < numAddrs; i++) {
2694 if (leaderAddr->isClose(address2os[i].first,
2695 __kmp_affinity_gran_levels)) {
2696 KMP_CPU_SET(address2os[i].second, sum);
2705 for (; j < i; j++) {
2706 unsigned osId = address2os[j].second;
2707 KMP_DEBUG_ASSERT(osId <= maxOsId);
2708 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2709 KMP_CPU_COPY(mask, sum);
2710 address2os[j].first.leader = (j == leader);
2718 leaderAddr = &(address2os[i].first);
2720 KMP_CPU_SET(address2os[i].second, sum);
2727 for (; j < i; j++) {
2728 unsigned osId = address2os[j].second;
2729 KMP_DEBUG_ASSERT(osId <= maxOsId);
2730 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2731 KMP_CPU_COPY(mask, sum);
2732 address2os[j].first.leader = (j == leader);
2735 KMP_CPU_FREE_FROM_STACK(sum);
2737 *maxIndex = maxOsId;
2738 *numUnique = unique;
2748 static kmp_affin_mask_t *newMasks;
2749 static int numNewMasks;
2750 static int nextNewMask;
2752 #define ADD_MASK(_mask) \ 2754 if (nextNewMask >= numNewMasks) { \ 2757 kmp_affin_mask_t* temp; \ 2758 KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \ 2759 for(i=0;i<numNewMasks/2;i++) { \ 2760 kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i); \ 2761 kmp_affin_mask_t* dest = KMP_CPU_INDEX(temp, i); \ 2762 KMP_CPU_COPY(dest, src); \ 2764 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks/2); \ 2767 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \ 2771 #define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \ 2773 if (((_osId) > _maxOsId) || \ 2774 (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \ 2775 if (__kmp_affinity_verbose || (__kmp_affinity_warnings \ 2776 && (__kmp_affinity_type != affinity_none))) { \ 2777 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \ 2781 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \ 2791 __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2792 unsigned int *out_numMasks,
const char *proclist,
2793 kmp_affin_mask_t *osId2Mask,
int maxOsId)
2796 const char *scan = proclist;
2797 const char *next = proclist;
2804 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
2806 kmp_affin_mask_t *sumMask;
2807 KMP_CPU_ALLOC(sumMask);
2811 int start, end, stride;
2815 if (*next ==
'\0') {
2829 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
2832 num = __kmp_str_to_int(scan, *next);
2833 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2838 if ((num > maxOsId) ||
2839 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2840 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2841 && (__kmp_affinity_type != affinity_none))) {
2842 KMP_WARNING(AffIgnoreInvalidProcID, num);
2844 KMP_CPU_ZERO(sumMask);
2847 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2873 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
2874 "bad explicit proc list");
2877 num = __kmp_str_to_int(scan, *next);
2878 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2883 if ((num > maxOsId) ||
2884 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2885 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2886 && (__kmp_affinity_type != affinity_none))) {
2887 KMP_WARNING(AffIgnoreInvalidProcID, num);
2891 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2910 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2912 start = __kmp_str_to_int(scan, *next);
2913 KMP_ASSERT2(start >= 0,
"bad explicit proc list");
2920 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2938 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2940 end = __kmp_str_to_int(scan, *next);
2941 KMP_ASSERT2(end >= 0,
"bad explicit proc list");
2962 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
2963 "bad explicit proc list");
2965 stride = __kmp_str_to_int(scan, *next);
2966 KMP_ASSERT2(stride >= 0,
"bad explicit proc list");
2973 KMP_ASSERT2(stride != 0,
"bad explicit proc list");
2975 KMP_ASSERT2(start <= end,
"bad explicit proc list");
2978 KMP_ASSERT2(start >= end,
"bad explicit proc list");
2980 KMP_ASSERT2((end - start) / stride <= 65536,
"bad explicit proc list");
2987 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2989 }
while (start <= end);
2993 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2995 }
while (start >= end);
3008 *out_numMasks = nextNewMask;
3009 if (nextNewMask == 0) {
3011 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3014 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3015 for(i = 0; i < nextNewMask; i++) {
3016 kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i);
3017 kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i);
3018 KMP_CPU_COPY(dest, src);
3020 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3021 KMP_CPU_FREE(sumMask);
3051 __kmp_process_subplace_list(
const char **scan, kmp_affin_mask_t *osId2Mask,
3052 int maxOsId, kmp_affin_mask_t *tempMask,
int *setSize)
3057 int start, count, stride, i;
3063 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
3064 "bad explicit places list");
3067 start = __kmp_str_to_int(*scan, *next);
3068 KMP_ASSERT(start >= 0);
3075 if (**scan ==
'}' || **scan ==
',') {
3076 if ((start > maxOsId) ||
3077 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3078 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3079 && (__kmp_affinity_type != affinity_none))) {
3080 KMP_WARNING(AffIgnoreInvalidProcID, start);
3084 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3087 if (**scan ==
'}') {
3093 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3100 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
3101 "bad explicit places list");
3104 count = __kmp_str_to_int(*scan, *next);
3105 KMP_ASSERT(count >= 0);
3112 if (**scan ==
'}' || **scan ==
',') {
3113 for (i = 0; i < count; i++) {
3114 if ((start > maxOsId) ||
3115 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3116 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3117 && (__kmp_affinity_type != affinity_none))) {
3118 KMP_WARNING(AffIgnoreInvalidProcID, start);
3123 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3128 if (**scan ==
'}') {
3134 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3143 if (**scan ==
'+') {
3147 if (**scan ==
'-') {
3155 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
3156 "bad explicit places list");
3159 stride = __kmp_str_to_int(*scan, *next);
3160 KMP_ASSERT(stride >= 0);
3168 if (**scan ==
'}' || **scan ==
',') {
3169 for (i = 0; i < count; i++) {
3170 if ((start > maxOsId) ||
3171 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3172 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3173 && (__kmp_affinity_type != affinity_none))) {
3174 KMP_WARNING(AffIgnoreInvalidProcID, start);
3179 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3184 if (**scan ==
'}') {
3191 KMP_ASSERT2(0,
"bad explicit places list");
3197 __kmp_process_place(
const char **scan, kmp_affin_mask_t *osId2Mask,
3198 int maxOsId, kmp_affin_mask_t *tempMask,
int *setSize)
3206 if (**scan ==
'{') {
3208 __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
3210 KMP_ASSERT2(**scan ==
'}',
"bad explicit places list");
3213 else if (**scan ==
'!') {
3215 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
3216 KMP_CPU_COMPLEMENT(maxOsId, tempMask);
3218 else if ((**scan >=
'0') && (**scan <=
'9')) {
3221 int num = __kmp_str_to_int(*scan, *next);
3222 KMP_ASSERT(num >= 0);
3223 if ((num > maxOsId) ||
3224 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3225 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3226 && (__kmp_affinity_type != affinity_none))) {
3227 KMP_WARNING(AffIgnoreInvalidProcID, num);
3231 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3237 KMP_ASSERT2(0,
"bad explicit places list");
3244 __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3245 unsigned int *out_numMasks,
const char *placelist,
3246 kmp_affin_mask_t *osId2Mask,
int maxOsId)
3248 int i,j,count,stride,sign;
3249 const char *scan = placelist;
3250 const char *next = placelist;
3253 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
3259 kmp_affin_mask_t *tempMask;
3260 kmp_affin_mask_t *previousMask;
3261 KMP_CPU_ALLOC(tempMask);
3262 KMP_CPU_ZERO(tempMask);
3263 KMP_CPU_ALLOC(previousMask);
3264 KMP_CPU_ZERO(previousMask);
3268 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3274 if (*scan ==
'\0' || *scan ==
',') {
3278 KMP_CPU_ZERO(tempMask);
3280 if (*scan ==
'\0') {
3287 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3294 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
3295 "bad explicit places list");
3298 count = __kmp_str_to_int(scan, *next);
3299 KMP_ASSERT(count >= 0);
3306 if (*scan ==
'\0' || *scan ==
',') {
3310 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3331 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
3332 "bad explicit places list");
3335 stride = __kmp_str_to_int(scan, *next);
3336 KMP_DEBUG_ASSERT(stride >= 0);
3342 for (i = 0; i < count; i++) {
3347 KMP_CPU_COPY(previousMask, tempMask);
3348 ADD_MASK(previousMask);
3349 KMP_CPU_ZERO(tempMask);
3351 KMP_CPU_SET_ITERATE(j, previousMask) {
3352 if (! KMP_CPU_ISSET(j, previousMask)) {
3355 if ((j+stride > maxOsId) || (j+stride < 0) ||
3356 (! KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
3357 (! KMP_CPU_ISSET(j+stride, KMP_CPU_INDEX(osId2Mask, j+stride)))) {
3358 if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
3359 && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
3360 KMP_WARNING(AffIgnoreInvalidProcID, j+stride);
3364 KMP_CPU_SET(j+stride, tempMask);
3368 KMP_CPU_ZERO(tempMask);
3375 if (*scan ==
'\0') {
3383 KMP_ASSERT2(0,
"bad explicit places list");
3386 *out_numMasks = nextNewMask;
3387 if (nextNewMask == 0) {
3389 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3392 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3393 KMP_CPU_FREE(tempMask);
3394 KMP_CPU_FREE(previousMask);
3395 for(i = 0; i < nextNewMask; i++) {
3396 kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i);
3397 kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i);
3398 KMP_CPU_COPY(dest, src);
3400 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3406 #undef ADD_MASK_OSID 3409 __kmp_apply_thread_places(AddrUnsPair **pAddr,
int depth)
3411 int i, j, k, n_old = 0, n_new = 0, proc_num = 0;
3412 if (__kmp_place_num_sockets == 0 &&
3413 __kmp_place_num_cores == 0 &&
3414 __kmp_place_num_threads_per_core == 0 )
3416 if (__kmp_place_num_sockets == 0)
3417 __kmp_place_num_sockets = nPackages;
3418 if (__kmp_place_num_cores == 0)
3419 __kmp_place_num_cores = nCoresPerPkg;
3420 if (__kmp_place_num_threads_per_core == 0 ||
3421 __kmp_place_num_threads_per_core > __kmp_nThreadsPerCore)
3422 __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore;
3424 if ( !__kmp_affinity_uniform_topology() ) {
3425 KMP_WARNING( AffHWSubsetNonUniform );
3429 KMP_WARNING( AffHWSubsetNonThreeLevel );
3432 if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) {
3433 KMP_WARNING(AffHWSubsetManySockets);
3436 if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
3437 KMP_WARNING( AffHWSubsetManyCores );
3441 AddrUnsPair *newAddr;
3443 newAddr = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) *
3444 __kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
3446 for (i = 0; i < nPackages; ++i) {
3447 if (i < __kmp_place_socket_offset ||
3448 i >= __kmp_place_socket_offset + __kmp_place_num_sockets) {
3449 n_old += nCoresPerPkg * __kmp_nThreadsPerCore;
3450 if (__kmp_pu_os_idx != NULL) {
3451 for (j = 0; j < nCoresPerPkg; ++j) {
3452 for (k = 0; k < __kmp_nThreadsPerCore; ++k) {
3453 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3459 for (j = 0; j < nCoresPerPkg; ++j) {
3460 if (j < __kmp_place_core_offset ||
3461 j >= __kmp_place_core_offset + __kmp_place_num_cores) {
3462 n_old += __kmp_nThreadsPerCore;
3463 if (__kmp_pu_os_idx != NULL) {
3464 for (k = 0; k < __kmp_nThreadsPerCore; ++k) {
3465 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3470 for (k = 0; k < __kmp_nThreadsPerCore; ++k) {
3471 if (k < __kmp_place_num_threads_per_core) {
3473 newAddr[n_new] = (*pAddr)[n_old];
3476 if (__kmp_pu_os_idx != NULL)
3477 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3486 KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
3487 KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores *
3488 __kmp_place_num_threads_per_core);
3490 nPackages = __kmp_place_num_sockets;
3491 nCoresPerPkg = __kmp_place_num_cores;
3492 __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core;
3493 __kmp_avail_proc = n_new;
3494 __kmp_ncores = nPackages * __kmp_place_num_cores;
3497 __kmp_free( *pAddr );
3501 if (__kmp_pu_os_idx != NULL) {
3502 __kmp_free(__kmp_pu_os_idx);
3503 __kmp_pu_os_idx = NULL;
3512 __kmp_affinity_find_core_level(
const AddrUnsPair *address2os,
int nprocs,
int bottom_level)
3516 for(
int i = 0; i < nprocs; i++ ) {
3517 for(
int j = bottom_level; j > 0; j-- ) {
3518 if( address2os[i].first.labels[j] > 0 ) {
3519 if( core_level < ( j - 1 ) ) {
3531 static int __kmp_affinity_compute_ncores(
const AddrUnsPair *address2os,
int nprocs,
int bottom_level,
int core_level)
3537 for( i = 0; i < nprocs; i++ ) {
3538 for ( j = bottom_level; j > core_level; j-- ) {
3539 if( ( i + 1 ) < nprocs ) {
3540 if( address2os[i + 1].first.labels[j] > 0 ) {
3545 if( j == core_level ) {
3549 if( j > core_level ) {
3562 static int __kmp_affinity_find_core(
const AddrUnsPair *address2os,
int proc,
int bottom_level,
int core_level)
3564 return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level, core_level) - 1;
3570 static int __kmp_affinity_max_proc_per_core(
const AddrUnsPair *address2os,
int nprocs,
int bottom_level,
int core_level)
3572 int maxprocpercore = 0;
3574 if( core_level < bottom_level ) {
3575 for(
int i = 0; i < nprocs; i++ ) {
3576 int percore = address2os[i].first.labels[core_level + 1] + 1;
3578 if( percore > maxprocpercore ) {
3579 maxprocpercore = percore;
3585 return maxprocpercore;
3588 static AddrUnsPair *address2os = NULL;
3589 static int * procarr = NULL;
3590 static int __kmp_aff_depth = 0;
3592 #define KMP_EXIT_AFF_NONE \ 3593 KMP_ASSERT(__kmp_affinity_type == affinity_none); \ 3594 KMP_ASSERT(address2os == NULL); \ 3595 __kmp_apply_thread_places(NULL, 0); \ 3599 __kmp_affinity_cmp_Address_child_num(
const void *a,
const void *b)
3601 const Address *aa = (
const Address *)&(((AddrUnsPair *)a)
3603 const Address *bb = (
const Address *)&(((AddrUnsPair *)b)
3605 unsigned depth = aa->depth;
3607 KMP_DEBUG_ASSERT(depth == bb->depth);
3608 KMP_DEBUG_ASSERT((
unsigned)__kmp_affinity_compact <= depth);
3609 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
3610 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
3611 int j = depth - i - 1;
3612 if (aa->childNums[j] < bb->childNums[j])
return -1;
3613 if (aa->childNums[j] > bb->childNums[j])
return 1;
3615 for (; i < depth; i++) {
3616 int j = i - __kmp_affinity_compact;
3617 if (aa->childNums[j] < bb->childNums[j])
return -1;
3618 if (aa->childNums[j] > bb->childNums[j])
return 1;
3624 __kmp_aux_affinity_initialize(
void)
3626 if (__kmp_affinity_masks != NULL) {
3627 KMP_ASSERT(__kmp_affin_fullMask != NULL);
3637 if (__kmp_affin_fullMask == NULL) {
3638 KMP_CPU_ALLOC(__kmp_affin_fullMask);
3640 if (KMP_AFFINITY_CAPABLE()) {
3641 if (__kmp_affinity_respect_mask) {
3642 __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
3648 __kmp_avail_proc = 0;
3649 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
3650 if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
3655 if (__kmp_avail_proc > __kmp_xproc) {
3656 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3657 && (__kmp_affinity_type != affinity_none))) {
3658 KMP_WARNING(ErrorInitializeAffinity);
3660 __kmp_affinity_type = affinity_none;
3661 KMP_AFFINITY_DISABLE();
3666 __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
3667 __kmp_avail_proc = __kmp_xproc;
3672 kmp_i18n_id_t msg_id = kmp_i18n_null;
3678 if ((__kmp_cpuinfo_file != NULL) &&
3679 (__kmp_affinity_top_method == affinity_top_method_all)) {
3680 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
3683 if (__kmp_affinity_top_method == affinity_top_method_all) {
3689 const char *file_name = NULL;
3692 if (depth < 0 && __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
3693 if (__kmp_affinity_verbose) {
3694 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
3696 if(!__kmp_hwloc_error) {
3697 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
3700 }
else if(depth < 0 && __kmp_affinity_verbose) {
3701 KMP_INFORM(AffIgnoringHwloc,
"KMP_AFFINITY");
3703 }
else if(__kmp_affinity_verbose) {
3704 KMP_INFORM(AffIgnoringHwloc,
"KMP_AFFINITY");
3709 # if KMP_ARCH_X86 || KMP_ARCH_X86_64 3712 if (__kmp_affinity_verbose) {
3713 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
3717 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3723 if (__kmp_affinity_verbose) {
3724 if (msg_id != kmp_i18n_null) {
3725 KMP_INFORM(AffInfoStrStr,
"KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
3726 KMP_I18N_STR(DecodingLegacyAPIC));
3729 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
3734 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3746 if (__kmp_affinity_verbose) {
3747 if (msg_id != kmp_i18n_null) {
3748 KMP_INFORM(AffStrParseFilename,
"KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
"/proc/cpuinfo");
3751 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY",
"/proc/cpuinfo");
3755 FILE *f = fopen(
"/proc/cpuinfo",
"r");
3757 msg_id = kmp_i18n_str_CantOpenCpuinfo;
3760 file_name =
"/proc/cpuinfo";
3761 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3771 # if KMP_GROUP_AFFINITY 3773 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
3774 if (__kmp_affinity_verbose) {
3775 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
3778 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3779 KMP_ASSERT(depth != 0);
3785 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
3786 if (file_name == NULL) {
3787 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
3789 else if (line == 0) {
3790 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
3793 KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
3799 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3803 KMP_ASSERT(depth > 0);
3804 KMP_ASSERT(address2os != NULL);
3814 # if KMP_ARCH_X86 || KMP_ARCH_X86_64 3816 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
3817 if (__kmp_affinity_verbose) {
3818 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY",
3819 KMP_I18N_STR(Decodingx2APIC));
3822 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3827 KMP_ASSERT(msg_id != kmp_i18n_null);
3828 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3831 else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
3832 if (__kmp_affinity_verbose) {
3833 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY",
3834 KMP_I18N_STR(DecodingLegacyAPIC));
3837 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3842 KMP_ASSERT(msg_id != kmp_i18n_null);
3843 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3849 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
3850 const char *filename;
3851 if (__kmp_cpuinfo_file != NULL) {
3852 filename = __kmp_cpuinfo_file;
3855 filename =
"/proc/cpuinfo";
3858 if (__kmp_affinity_verbose) {
3859 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY", filename);
3862 FILE *f = fopen(filename,
"r");
3865 if (__kmp_cpuinfo_file != NULL) {
3868 KMP_MSG(CantOpenFileForReading, filename),
3870 KMP_HNT(NameComesFrom_CPUINFO_FILE),
3877 KMP_MSG(CantOpenFileForReading, filename),
3884 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3887 KMP_ASSERT(msg_id != kmp_i18n_null);
3889 KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
3892 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
3895 if (__kmp_affinity_type == affinity_none) {
3896 KMP_ASSERT(depth == 0);
3901 # if KMP_GROUP_AFFINITY 3903 else if (__kmp_affinity_top_method == affinity_top_method_group) {
3904 if (__kmp_affinity_verbose) {
3905 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
3908 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3909 KMP_ASSERT(depth != 0);
3911 KMP_ASSERT(msg_id != kmp_i18n_null);
3912 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3918 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
3919 if (__kmp_affinity_verbose) {
3920 KMP_INFORM(AffUsingFlatOS,
"KMP_AFFINITY");
3923 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3928 KMP_ASSERT(depth > 0);
3929 KMP_ASSERT(address2os != NULL);
3933 else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
3934 KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
3935 if (__kmp_affinity_verbose) {
3936 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
3938 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
3943 # endif // KMP_USE_HWLOC 3945 if (address2os == NULL) {
3946 if (KMP_AFFINITY_CAPABLE()
3947 && (__kmp_affinity_verbose || (__kmp_affinity_warnings
3948 && (__kmp_affinity_type != affinity_none)))) {
3949 KMP_WARNING(ErrorInitializeAffinity);
3951 __kmp_affinity_type = affinity_none;
3952 KMP_AFFINITY_DISABLE();
3956 __kmp_apply_thread_places(&address2os, depth);
3963 kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
3964 address2os, __kmp_avail_proc);
3965 if (__kmp_affinity_gran_levels == 0) {
3966 KMP_DEBUG_ASSERT((
int)numUnique == __kmp_avail_proc);
3974 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
3976 switch (__kmp_affinity_type) {
3978 case affinity_explicit:
3979 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
3981 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3984 __kmp_affinity_process_proclist(&__kmp_affinity_masks,
3985 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3990 __kmp_affinity_process_placelist(&__kmp_affinity_masks,
3991 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3995 if (__kmp_affinity_num_masks == 0) {
3996 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3997 && (__kmp_affinity_type != affinity_none))) {
3998 KMP_WARNING(AffNoValidProcID);
4000 __kmp_affinity_type = affinity_none;
4013 case affinity_logical:
4014 __kmp_affinity_compact = 0;
4015 if (__kmp_affinity_offset) {
4016 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
4021 case affinity_physical:
4022 if (__kmp_nThreadsPerCore > 1) {
4023 __kmp_affinity_compact = 1;
4024 if (__kmp_affinity_compact >= depth) {
4025 __kmp_affinity_compact = 0;
4028 __kmp_affinity_compact = 0;
4030 if (__kmp_affinity_offset) {
4031 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
4036 case affinity_scatter:
4037 if (__kmp_affinity_compact >= depth) {
4038 __kmp_affinity_compact = 0;
4041 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
4045 case affinity_compact:
4046 if (__kmp_affinity_compact >= depth) {
4047 __kmp_affinity_compact = depth - 1;
4051 case affinity_balanced:
4053 if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
4054 KMP_WARNING( AffBalancedNotAvail,
"KMP_AFFINITY" );
4056 __kmp_affinity_type = affinity_none;
4058 }
else if( __kmp_affinity_uniform_topology() ) {
4063 __kmp_aff_depth = depth;
4065 int core_level = __kmp_affinity_find_core_level(address2os, __kmp_avail_proc, depth - 1);
4066 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc, depth - 1, core_level);
4067 int maxprocpercore = __kmp_affinity_max_proc_per_core(address2os, __kmp_avail_proc, depth - 1, core_level);
4069 int nproc = ncores * maxprocpercore;
4070 if( ( nproc < 2 ) || ( nproc < __kmp_avail_proc ) ) {
4071 if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
4072 KMP_WARNING( AffBalancedNotAvail,
"KMP_AFFINITY" );
4074 __kmp_affinity_type = affinity_none;
4078 procarr = (
int * )__kmp_allocate(
sizeof(
int ) * nproc );
4079 for(
int i = 0; i < nproc; i++ ) {
4085 for(
int i = 0; i < __kmp_avail_proc; i++ ) {
4086 int proc = address2os[ i ].second;
4087 int core = __kmp_affinity_find_core(address2os, i, depth - 1, core_level);
4089 if ( core == lastcore ) {
4096 procarr[ core * maxprocpercore + inlastcore ] = proc;
4106 if (__kmp_affinity_dups) {
4107 __kmp_affinity_num_masks = __kmp_avail_proc;
4110 __kmp_affinity_num_masks = numUnique;
4114 if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
4115 && ( __kmp_affinity_num_places > 0 )
4116 && ( (
unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
4117 __kmp_affinity_num_masks = __kmp_affinity_num_places;
4121 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4127 qsort(address2os, __kmp_avail_proc,
sizeof(*address2os),
4128 __kmp_affinity_cmp_Address_child_num);
4132 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
4133 if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
4136 unsigned osId = address2os[i].second;
4137 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
4138 kmp_affin_mask_t *dest
4139 = KMP_CPU_INDEX(__kmp_affinity_masks, j);
4140 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
4141 KMP_CPU_COPY(dest, src);
4142 if (++j >= __kmp_affinity_num_masks) {
4146 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
4151 KMP_ASSERT2(0,
"Unexpected affinity setting");
4154 KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex+1);
4155 machine_hierarchy.init(address2os, __kmp_avail_proc);
4157 #undef KMP_EXIT_AFF_NONE 4161 __kmp_affinity_initialize(
void)
4174 int disabled = (__kmp_affinity_type == affinity_disabled);
4175 if (! KMP_AFFINITY_CAPABLE()) {
4176 KMP_ASSERT(disabled);
4179 __kmp_affinity_type = affinity_none;
4181 __kmp_aux_affinity_initialize();
4183 __kmp_affinity_type = affinity_disabled;
4189 __kmp_affinity_uninitialize(
void)
4191 if (__kmp_affinity_masks != NULL) {
4192 KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4193 __kmp_affinity_masks = NULL;
4195 if (__kmp_affin_fullMask != NULL) {
4196 KMP_CPU_FREE(__kmp_affin_fullMask);
4197 __kmp_affin_fullMask = NULL;
4199 __kmp_affinity_num_masks = 0;
4201 __kmp_affinity_num_places = 0;
4203 if (__kmp_affinity_proclist != NULL) {
4204 __kmp_free(__kmp_affinity_proclist);
4205 __kmp_affinity_proclist = NULL;
4207 if( address2os != NULL ) {
4208 __kmp_free( address2os );
4211 if( procarr != NULL ) {
4212 __kmp_free( procarr );
4216 if (__kmp_hwloc_topology != NULL) {
4217 hwloc_topology_destroy(__kmp_hwloc_topology);
4218 __kmp_hwloc_topology = NULL;
4221 KMPAffinity::destroy_api();
4226 __kmp_affinity_set_init_mask(
int gtid,
int isa_root)
4228 if (! KMP_AFFINITY_CAPABLE()) {
4232 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4233 if (th->th.th_affin_mask == NULL) {
4234 KMP_CPU_ALLOC(th->th.th_affin_mask);
4237 KMP_CPU_ZERO(th->th.th_affin_mask);
4247 kmp_affin_mask_t *mask;
4251 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
4254 if ((__kmp_affinity_type == affinity_none) || (__kmp_affinity_type == affinity_balanced)
4256 # if KMP_GROUP_AFFINITY 4257 if (__kmp_num_proc_groups > 1) {
4261 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4263 mask = __kmp_affin_fullMask;
4266 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4267 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4268 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4274 || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
4275 # if KMP_GROUP_AFFINITY 4276 if (__kmp_num_proc_groups > 1) {
4280 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4282 mask = __kmp_affin_fullMask;
4289 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4290 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4291 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4297 th->th.th_current_place = i;
4299 th->th.th_new_place = i;
4300 th->th.th_first_place = 0;
4301 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4304 if (i == KMP_PLACE_ALL) {
4305 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4309 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4314 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",
4318 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
4323 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4325 if (__kmp_affinity_verbose) {
4326 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4327 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4328 th->th.th_affin_mask);
4329 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(), gtid,
4339 if ( __kmp_affinity_type == affinity_none ) {
4340 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4344 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4351 __kmp_affinity_set_place(
int gtid)
4355 if (! KMP_AFFINITY_CAPABLE()) {
4359 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4361 KA_TRACE(100, (
"__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
4362 gtid, th->th.th_new_place, th->th.th_current_place));
4367 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4368 KMP_ASSERT(th->th.th_new_place >= 0);
4369 KMP_ASSERT((
unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
4370 if (th->th.th_first_place <= th->th.th_last_place) {
4371 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
4372 && (th->th.th_new_place <= th->th.th_last_place));
4375 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
4376 || (th->th.th_new_place >= th->th.th_last_place));
4383 kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
4384 th->th.th_new_place);
4385 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4386 th->th.th_current_place = th->th.th_new_place;
4388 if (__kmp_affinity_verbose) {
4389 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4390 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4391 th->th.th_affin_mask);
4392 KMP_INFORM(BoundToOSProcSet,
"OMP_PROC_BIND", (kmp_int32)getpid(),
4395 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4402 __kmp_aux_set_affinity(
void **mask)
4408 if (! KMP_AFFINITY_CAPABLE()) {
4412 gtid = __kmp_entry_gtid();
4414 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4415 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4416 (kmp_affin_mask_t *)(*mask));
4417 __kmp_debug_printf(
"kmp_set_affinity: setting affinity mask for thread %d = %s\n",
4421 if (__kmp_env_consistency_check) {
4422 if ((mask == NULL) || (*mask == NULL)) {
4423 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4429 KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t*)(*mask))) {
4430 if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4431 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4433 if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4438 if (num_procs == 0) {
4439 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4442 # if KMP_GROUP_AFFINITY 4443 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4444 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4451 th = __kmp_threads[gtid];
4452 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4453 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4455 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4459 th->th.th_current_place = KMP_PLACE_UNDEFINED;
4460 th->th.th_new_place = KMP_PLACE_UNDEFINED;
4461 th->th.th_first_place = 0;
4462 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4467 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
4475 __kmp_aux_get_affinity(
void **mask)
4481 if (! KMP_AFFINITY_CAPABLE()) {
4485 gtid = __kmp_entry_gtid();
4486 th = __kmp_threads[gtid];
4487 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4490 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4491 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4492 th->th.th_affin_mask);
4493 __kmp_printf(
"kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
4496 if (__kmp_env_consistency_check) {
4497 if ((mask == NULL) || (*mask == NULL)) {
4498 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity");
4502 # if !KMP_OS_WINDOWS 4504 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4506 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4507 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4508 (kmp_affin_mask_t *)(*mask));
4509 __kmp_printf(
"kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
4515 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4523 __kmp_aux_get_affinity_max_proc() {
4524 if (! KMP_AFFINITY_CAPABLE()) {
4527 #if KMP_GROUP_AFFINITY 4528 if ( __kmp_num_proc_groups > 1 ) {
4529 return (
int)(__kmp_num_proc_groups*
sizeof(DWORD_PTR)*CHAR_BIT);
4536 __kmp_aux_set_affinity_mask_proc(
int proc,
void **mask)
4540 if (! KMP_AFFINITY_CAPABLE()) {
4545 int gtid = __kmp_entry_gtid();
4546 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4547 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4548 (kmp_affin_mask_t *)(*mask));
4549 __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
4553 if (__kmp_env_consistency_check) {
4554 if ((mask == NULL) || (*mask == NULL)) {
4555 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity_mask_proc");
4559 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4562 if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4566 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4572 __kmp_aux_unset_affinity_mask_proc(
int proc,
void **mask)
4576 if (! KMP_AFFINITY_CAPABLE()) {
4581 int gtid = __kmp_entry_gtid();
4582 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4583 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4584 (kmp_affin_mask_t *)(*mask));
4585 __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
4589 if (__kmp_env_consistency_check) {
4590 if ((mask == NULL) || (*mask == NULL)) {
4591 KMP_FATAL(AffinityInvalidMask,
"kmp_unset_affinity_mask_proc");
4595 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4598 if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4602 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
4608 __kmp_aux_get_affinity_mask_proc(
int proc,
void **mask)
4612 if (! KMP_AFFINITY_CAPABLE()) {
4617 int gtid = __kmp_entry_gtid();
4618 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4619 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4620 (kmp_affin_mask_t *)(*mask));
4621 __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
4625 if (__kmp_env_consistency_check) {
4626 if ((mask == NULL) || (*mask == NULL)) {
4627 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity_mask_proc");
4631 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4634 if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4638 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
4643 void __kmp_balanced_affinity(
int tid,
int nthreads )
4645 bool fine_gran =
true;
4647 switch (__kmp_affinity_gran) {
4648 case affinity_gran_fine:
4649 case affinity_gran_thread:
4651 case affinity_gran_core:
4652 if( __kmp_nThreadsPerCore > 1) {
4656 case affinity_gran_package:
4657 if( nCoresPerPkg > 1) {
4665 if( __kmp_affinity_uniform_topology() ) {
4669 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
4671 int ncores = __kmp_ncores;
4672 if( ( nPackages > 1 ) && ( __kmp_nth_per_core <= 1 ) ) {
4673 __kmp_nth_per_core = __kmp_avail_proc / nPackages;
4677 int chunk = nthreads / ncores;
4679 int big_cores = nthreads % ncores;
4681 int big_nth = ( chunk + 1 ) * big_cores;
4682 if( tid < big_nth ) {
4683 coreID = tid / (chunk + 1 );
4684 threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
4686 coreID = ( tid - big_cores ) / chunk;
4687 threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
4690 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
4691 "Illegal set affinity operation when not capable");
4693 kmp_affin_mask_t *mask;
4694 KMP_CPU_ALLOC_ON_STACK(mask);
4698 int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
4699 KMP_CPU_SET( osID, mask);
4701 for(
int i = 0; i < __kmp_nth_per_core; i++ ) {
4703 osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
4704 KMP_CPU_SET( osID, mask);
4707 if (__kmp_affinity_verbose) {
4708 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4709 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
4710 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4713 __kmp_set_system_affinity( mask, TRUE );
4714 KMP_CPU_FREE_FROM_STACK(mask);
4717 kmp_affin_mask_t *mask;
4718 KMP_CPU_ALLOC_ON_STACK(mask);
4721 int core_level = __kmp_affinity_find_core_level(address2os, __kmp_avail_proc, __kmp_aff_depth - 1);
4722 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
4723 int nth_per_core = __kmp_affinity_max_proc_per_core(address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
4726 if( nthreads == __kmp_avail_proc ) {
4728 int osID = address2os[ tid ].second;
4729 KMP_CPU_SET( osID, mask);
4731 int core = __kmp_affinity_find_core(address2os, tid, __kmp_aff_depth - 1, core_level);
4732 for(
int i = 0; i < __kmp_avail_proc; i++ ) {
4733 int osID = address2os[ i ].second;
4734 if( __kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1, core_level) == core ) {
4735 KMP_CPU_SET( osID, mask);
4739 }
else if( nthreads <= ncores ) {
4742 for(
int i = 0; i < ncores; i++ ) {
4745 for(
int j = 0; j < nth_per_core; j++ ) {
4746 if( procarr[ i * nth_per_core + j ] != - 1 ) {
4753 for(
int j = 0; j < nth_per_core; j++ ) {
4754 int osID = procarr[ i * nth_per_core + j ];
4756 KMP_CPU_SET( osID, mask );
4773 int* nproc_at_core = (
int*)KMP_ALLOCA(
sizeof(
int)*ncores);
4775 int* ncores_with_x_procs = (
int*)KMP_ALLOCA(
sizeof(
int)*(nth_per_core+1));
4777 int* ncores_with_x_to_max_procs = (
int*)KMP_ALLOCA(
sizeof(
int)*(nth_per_core+1));
4779 for(
int i = 0; i <= nth_per_core; i++ ) {
4780 ncores_with_x_procs[ i ] = 0;
4781 ncores_with_x_to_max_procs[ i ] = 0;
4784 for(
int i = 0; i < ncores; i++ ) {
4786 for(
int j = 0; j < nth_per_core; j++ ) {
4787 if( procarr[ i * nth_per_core + j ] != -1 ) {
4791 nproc_at_core[ i ] = cnt;
4792 ncores_with_x_procs[ cnt ]++;
4795 for(
int i = 0; i <= nth_per_core; i++ ) {
4796 for(
int j = i; j <= nth_per_core; j++ ) {
4797 ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
4802 int nproc = nth_per_core * ncores;
4804 int * newarr = (
int * )__kmp_allocate(
sizeof(
int ) * nproc );
4805 for(
int i = 0; i < nproc; i++ ) {
4812 for(
int j = 1; j <= nth_per_core; j++ ) {
4813 int cnt = ncores_with_x_to_max_procs[ j ];
4814 for(
int i = 0; i < ncores; i++ ) {
4816 if( nproc_at_core[ i ] == 0 ) {
4819 for(
int k = 0; k < nth_per_core; k++ ) {
4820 if( procarr[ i * nth_per_core + k ] != -1 ) {
4821 if( newarr[ i * nth_per_core + k ] == 0 ) {
4822 newarr[ i * nth_per_core + k ] = 1;
4828 newarr[ i * nth_per_core + k ] ++;
4836 if( cnt == 0 || nth == 0 ) {
4847 for(
int i = 0; i < nproc; i++ ) {
4851 int osID = procarr[ i ];
4852 KMP_CPU_SET( osID, mask);
4854 int coreID = i / nth_per_core;
4855 for(
int ii = 0; ii < nth_per_core; ii++ ) {
4856 int osID = procarr[ coreID * nth_per_core + ii ];
4858 KMP_CPU_SET( osID, mask);
4865 __kmp_free( newarr );
4868 if (__kmp_affinity_verbose) {
4869 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4870 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
4871 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4874 __kmp_set_system_affinity( mask, TRUE );
4875 KMP_CPU_FREE_FROM_STACK(mask);
4893 kmp_set_thread_affinity_mask_initial()
4898 int gtid = __kmp_get_gtid();
4901 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: " 4902 "non-omp thread, returning\n"));
4905 if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) {
4906 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: " 4907 "affinity not initialized, returning\n"));
4910 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: " 4911 "set full mask for thread %d\n", gtid));
4912 KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
4913 return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
4917 #endif // KMP_AFFINITY_SUPPORTED