/********************* physical_processors.cpp ***************************** * Author: Agner Fog * Date created: 2019-10-29 * Last modified: 2019-11-25 * Version: 1.02 beta * Project: vector class library * Description: Detect number of physical and logical processors on CPU chip. * Compile for C++11 or later * * (c) Copyright 2019 Agner Fog. * Apache License version 2.0 or later. ******************************************************************************* Some modern CPUs can run two threads in each CPU core when simultaneous multithreading (SMT, called hyperthreading by Intel) is enabled. The number of physical processors is the number of CPU cores. The number of logical processors is the same number multiplied by the number of threads that can run simultaneously in each CPU core. Simultaneous multithreading will slow down performance when two CPU-intensive threads running in the same physical processor (CPU core) are competing for the same resources. Therefore, the optimal number of threads for CPU-intensive tasks is most likely to be the number of physical processors. Tasks that are less CPU-intensive but limited by RAM access, disk access, network, etc. may get an advantage by running as many threads as the number of logical processors. This will be double the number of physical processors when simultaneous multithreading is enabled. The physicalProcessors function detects the number of physical processors and logical processors on an x86 computer. This is useful for determining the optimal number of threads. Note: There are several problems in detecting the number of physical processors: 1. The CPUID instruction on Intel CPUs will return a wrong number of logical processors when SMT (hyperthreading) is disabled. It may be necessary to compare the number of processors returned by the CPUID instruction with the number of processors reported by the operating system to detect if SMT is enabled (AMD processors do not have this problem). 2. It is necessary to rely on system functions to detect if there is more than one CPU chip installed. It is assumed that the status of SMT is the same on all CPU chips in a system. 3. The behavior of VIA processors is undocumented. 4. This function is not guaranteed to work on future CPUs. It may need updating when new CPUs with different configurations or different CPUID functionality appear. ******************************************************************************/ #include // std::thread functions #ifdef _MSC_VER #include // __cpuidex intrinsic function available on microsoft compilers #endif // Define interface to CPUID instruction. // input: leaf = eax, subleaf = ecx // output: output[0] = eax, output[1] = ebx, output[2] = ecx, output[3] = edx static inline void cpuid(int output[4], int leaf, int subleaf = 0) { #if defined(__GNUC__) || defined(__clang__) // use inline assembly, Gnu/AT&T syntax int a, b, c, d; __asm("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "a"(leaf), "c"(subleaf) : ); output[0] = a; output[1] = b; output[2] = c; output[3] = d; #elif defined (_MSC_VER) // Microsoft compiler, intrin.h included __cpuidex(output, leaf, subleaf); // intrinsic function for CPUID #else // unknown platform. try inline assembly with masm/intel syntax __asm { mov eax, leaf mov ecx, subleaf cpuid; mov esi, output mov[esi], eax mov[esi + 4], ebx mov[esi + 8], ecx mov[esi + 12], edx } #endif } // Function prototype: int physicalProcessors(int * logical_processors = 0); // Find the number of physical and logical processors supported by CPU // Parameter: // logical_processors: an optional pointer to an integer that will receive the number of logical processors. // Return value: number of physical processors int physicalProcessors(int * logical_processors) { int vendor = 0; // CPU vendor: 1 = Intel, 2 = AMD, 3 = VIA, 0 = other int logicalProc = 1; // number of logical processor cores int physicalProc = 1; // number of physical processor cores int procPerCore = 1; // logical cores per physical core bool hyperthreadingSupported = false; // CPU supports hyperthreading / simultaneous multithreading int systemProcessors = std::thread::hardware_concurrency(); // number of processors reported by operating system int abcd[4] = { 0,0,0,0 }; // CPUID output cpuid(abcd, 0); // CPUID function 0 int maxLeaf = abcd[0]; // maximum eax input for CPUID if (abcd[2] == 0x6C65746E) { // last 4 chars of "GenuineIntel" vendor = 1; } else if (abcd[2] == 0x444D4163) { // last 4 chars of "AuthenticAMD" vendor = 2; } else if (abcd[2] == 0x736C7561) { // last 4 chars of "CentaurHauls" vendor = 3; } if (maxLeaf >= 1) { cpuid(abcd, 1); if (abcd[3] & (1 << 28)) { // hyperthreading supported hyperthreadingSupported = true; } } if (vendor == 1) { ////////////////// // Intel // ////////////////// int hyper = 0; // hyperthreading status: 0 = unknown, 1 = disabled, 2 = enabled if (maxLeaf >= 0xB) { // leaf 0xB or 0x1F: Extended Topology Enumeration int num = 0xB; // if (maxLeaf >= 0x1F) num = 0x1F; for (int c = 0; c < 5; c++) { cpuid(abcd, num, c); // enumeration level c int type = (abcd[2] >> 8) & 0xFF;// enumeration type at level c if (type == 1) { // SMT level procPerCore = abcd[1] & 0xFFFF; } else if (type >= 2) { // core level logicalProc = abcd[1] & 0xFFFF; } else if (type == 0) break; // There are more types/levels to consider if we use num = 0x1F. We may need // to fix this in the future if CPUs with more complex configurations appear } physicalProc = logicalProc / procPerCore; // The number of performance monitor registers depends on hyperthreading status // on Intel CPUs with performance monitoring version 3 or 4 cpuid(abcd, 0xA, 0); // performance monitor counters information int perfVersion = abcd[0] & 0xFF; // performance monitoring version int perfNum = (abcd[0] >> 8) & 0xFF; // number of performance monitoring registers if (perfVersion == 3 || perfVersion == 4) { if (perfNum == 4) { hyper = 2; // 4 performance registers when hyperthreading enabled } else if (perfNum == 8) { // 8 performance registers when hyperthreading disabled hyper = 1; procPerCore = 1; logicalProc = physicalProc; // reduce the number of logical processors when hyperthreading is disabled } // hyper remains 0 in all other cases, indicating unknown status } } else if (maxLeaf >= 4) { // CPUID function 4: cache parameters and cores cpuid(abcd, 4); logicalProc = (abcd[0] >> 26) + 1; if (hyperthreadingSupported) { // number of logical processors per core is not known. Assume 2 if hyperthreading supported procPerCore = 2; } physicalProc = logicalProc / procPerCore; } else { // no information. Assume 1 processor } if (systemProcessors > logicalProc) { // Multiple CPU chips. Assume that chips are identical with respect to hypethreading physicalProc = systemProcessors * physicalProc / logicalProc; logicalProc = systemProcessors; } else if (logicalProc > systemProcessors && systemProcessors > 0 && hyper == 0) { // Hyperthreading is disabled logicalProc = systemProcessors; physicalProc = systemProcessors; } } else if (vendor == 2) { ////////////////// // AMD // ////////////////// cpuid(abcd, 0x80000000); // AMD specific CPUID functions int maxLeaf8 = abcd[0] & 0xFFFF; // maximum eax 0x8000.... input for CPUID if (maxLeaf8 >= 8) { cpuid(abcd, 0x80000008); logicalProc = (abcd[2] & 0xFF) + 1; if (maxLeaf8 >= 0x1E) { cpuid(abcd, 0x8000001E); procPerCore = ((abcd[1] >> 8) & 0x03) + 1; // procPerCore = 2 if simultaneous multithreading is enabled, 1 if disabled } else { if (hyperthreadingSupported) { procPerCore = 2; } else { procPerCore = 1; } } physicalProc = logicalProc / procPerCore; } else if (hyperthreadingSupported) { // number of logical processors per core is not known. Assume 2 if SMT supported logicalProc = 2; physicalProc = 1; } if (systemProcessors > logicalProc) { // Multiple CPU chips. Assume that chips are identical with respect to SMT physicalProc = systemProcessors * physicalProc / logicalProc; logicalProc = systemProcessors; } } else { ////////////////////////////// // VIA or unknown CPU // ////////////////////////////// // The behavior of VIA processors is undocumented! It is not known how to detect threads on a VIA processor physicalProc = logicalProc = systemProcessors; if (hyperthreadingSupported && physicalProc > 1) { physicalProc /= 2; } } if (logical_processors) { // return logical_processors if pointer is not null *logical_processors = logicalProc; } return physicalProc; } /* Uncomment this for testing: #include int main() { int logicalProc = 0; int physicalProc = physicalProcessors(&logicalProc); printf("\nlogical processors: %i", logicalProc); printf("\nphysical processors: %i", physicalProc); printf("\nlogical processors per core: %i", logicalProc / physicalProc); int sysproc = std::thread::hardware_concurrency(); printf("\nsystem processors: %i", sysproc); return 0; } */