1 /** 2 * Runtime CPU detection 3 * 4 * Copyright: 5 * (C) 2009-2010,2013 Jack Lloyd 6 * (C) 2014-2015 Etienne Cimon 7 * 8 * License: 9 * Botan is released under the Simplified BSD License (see LICENSE.md) 10 */ 11 module botan.utils.cpuid; 12 13 import core.cpuid; 14 import botan.constants; 15 import botan.utils.types; 16 import botan.utils.types; 17 import botan.utils.get_byte; 18 import botan.utils.mem_ops; 19 20 /** 21 * A class handling runtime CPU feature detection 22 */ 23 class CPUID 24 { 25 public: 26 /** 27 * Probe the CPU and see what extensions are supported 28 */ 29 static this() 30 { 31 if (max_cpuid == 0) 32 return; 33 34 version(PPC) 35 if (altivecCheckSysctl() || altivecCheckPvrEmul()) 36 m_altivec_capable = true; 37 38 39 m_x86_processor_flags[0] = (cast(ulong)(miscfeatures) << 32) | features; 40 41 m_cache_line_size = get_byte(3, l1cache); 42 43 if (max_cpuid >= 7) 44 m_x86_processor_flags[1] = (cast(ulong)(extreserved) << 32) | extfeatures; 45 46 if (is_amd) 47 { 48 version(X86_64) { 49 /* 50 * If we don't have access to CPUID, we can still safely assume that 51 * any x86-64 processor has SSE2 and RDTSC 52 */ 53 if (m_x86_processor_flags[0] == 0) 54 m_x86_processor_flags[0] = (1 << CPUID_SSE2_BIT) | (1 << CPUID_RDTSC_BIT); 55 } 56 } 57 58 } 59 60 61 /** 62 * Return a best guess of the cache line size 63 */ 64 static size_t cacheLineSize() { return m_cache_line_size; } 65 66 /** 67 * Check if the processor supports RDTSC 68 */ 69 static bool hasRdtsc() 70 { return x86_processor_flags_has(CPUID_RDTSC_BIT); } 71 72 /** 73 * Check if the processor supports SSE2 74 */ 75 static bool hasSse2() 76 { return x86_processor_flags_has(CPUID_SSE2_BIT); } 77 78 /** 79 * Check if the processor supports SSSE3 80 */ 81 static bool hasSsse3() 82 { return x86_processor_flags_has(CPUID_SSSE3_BIT); } 83 84 /** 85 * Check if the processor supports SSE4.1 86 */ 87 static bool hasSse41() 88 { return x86_processor_flags_has(CPUID_SSE41_BIT); } 89 90 /** 91 * Check if the processor supports SSE4.2 92 */ 93 static bool hasSse42() 94 { return x86_processor_flags_has(CPUID_SSE42_BIT); } 95 96 /** 97 * Check if the processor supports AVX2 98 */ 99 static bool hasAvx2() 100 { return x86_processor_flags_has(CPUID_AVX2_BIT); } 101 102 /** 103 * Check if the processor supports AVX-512F 104 */ 105 static bool hasAvx512f() 106 { return x86_processor_flags_has(CPUID_AVX512F_BIT); } 107 108 /** 109 * Check if the processor supports BMI2 110 */ 111 static bool hasBmi2() 112 { return x86_processor_flags_has(CPUID_BMI2_BIT); } 113 114 /** 115 * Check if the processor supports AES-NI 116 */ 117 static bool hasAesNi() 118 { return x86_processor_flags_has(CPUID_AESNI_BIT); } 119 120 /** 121 * Check if the processor supports CLMUL 122 */ 123 static bool hasClmul() 124 { return x86_processor_flags_has(CPUID_CLMUL_BIT); } 125 126 /** 127 * Check if the processor supports Intel SHA extension 128 */ 129 static bool hasIntelSha() 130 { return x86_processor_flags_has(CPUID_SHA_BIT); } 131 132 /** 133 * Check if the processor supports ADX extension 134 */ 135 static bool hasAdx() 136 { return x86_processor_flags_has(CPUID_ADX_BIT); } 137 138 /** 139 * Check if the processor supports RDRAND 140 */ 141 static bool hasRdrand() 142 { return x86_processor_flags_has(CPUID_RDRAND_BIT); } 143 144 /** 145 * Check if the processor supports RDSEED 146 */ 147 static bool hasRdseed() 148 { return x86_processor_flags_has(CPUID_RDSEED_BIT); } 149 150 /** 151 * Check if the processor supports AltiVec/VMX 152 */ 153 static bool hasAltivec() { return m_altivec_capable; } 154 155 static string toString() 156 { 157 import std.array : Appender; 158 Appender!string app; 159 160 app ~= "CPUID flags: "; 161 162 app ~= CPUID.hasSse2; 163 app ~= CPUID.hasSsse3; 164 app ~= CPUID.hasSse41; 165 app ~= CPUID.hasSse42; 166 app ~= CPUID.hasAvx2; 167 app ~= CPUID.hasAvx512f; 168 app ~= CPUID.hasAltivec; 169 170 app ~= CPUID.hasRdtsc; 171 app ~= CPUID.hasBmi2; 172 app ~= CPUID.hasClmul; 173 app ~= CPUID.hasAesNi; 174 app ~= CPUID.hasRdrand; 175 app ~= CPUID.hasRdseed; 176 app ~= CPUID.hasIntelSha; 177 app ~= CPUID.hasAdx; 178 179 return app.data; 180 } 181 private: 182 alias CPUIDbits = int; 183 enum : CPUIDbits { 184 CPUID_RDTSC_BIT = 4, 185 CPUID_SSE2_BIT = 26, 186 CPUID_CLMUL_BIT = 33, 187 CPUID_SSSE3_BIT = 41, 188 CPUID_SSE41_BIT = 51, 189 CPUID_SSE42_BIT = 52, 190 CPUID_AESNI_BIT = 57, 191 CPUID_RDRAND_BIT = 62, 192 193 CPUID_AVX2_BIT = 64+5, 194 CPUID_BMI2_BIT = 64+8, 195 CPUID_AVX512F_BIT = 64+16, 196 CPUID_RDSEED_BIT = 64+18, 197 CPUID_ADX_BIT = 64+19, 198 CPUID_SHA_BIT = 64+29, 199 } 200 201 static bool x86_processor_flags_has(int bit) 202 { 203 return ((m_x86_processor_flags[bit/64] >> (bit % 64)) & 1); 204 } 205 206 static ulong[2] m_x86_processor_flags; 207 static size_t m_cache_line_size; 208 static bool m_altivec_capable; 209 } 210 211 package: 212 213 private __gshared { 214 bool is_intel; // true = _probably_ an Intel processor, might be faking 215 bool is_amd; // true = _probably_ an AMD processor 216 217 uint apic; 218 uint max_cpuid; 219 uint max_extended_cpuid; // 0 220 uint extfeatures; 221 uint extreserved; 222 uint miscfeatures; 223 uint amdmiscfeatures; 224 uint features; 225 uint amdfeatures; 226 uint l1cache; 227 } 228 // EBX is used to store GOT's address in PIC on x86, so we must preserve its value 229 version(D_PIC) 230 version(X86) 231 version = PreserveEBX; 232 233 // todo: LDC/GDC 234 version(GNU) 235 { 236 private void rawCpuid(uint ain, uint cin, ref uint a, ref uint b, ref uint c, ref uint d) 237 { 238 version(PreserveEBX) 239 { 240 asm pure nothrow { 241 "xchg %1, %%ebx 242 cpuid 243 xchg %1, %%ebx" 244 : "=a" a, "=r" b, "=c" c, "=d" d 245 : "0" ain, "2" cin; 246 } 247 } 248 else 249 { 250 asm pure nothrow { 251 "cpuid" 252 : "=a" a, "=b" b, "=c" c, "=d" d 253 : "0" ain, "2" cin; 254 } 255 } 256 } 257 } 258 259 version(none) { 260 private void rawCpuid(uint ain, uint cin, ref uint a, ref uint b, ref uint c, ref uint d) 261 { 262 version(PreserveEBX) 263 { 264 mixin( q{ 265 __asm pure nothrow { 266 "xchg %1, %%ebx 267 cpuid 268 xchg %1, %%ebx" 269 : "=a" a, "=r" b, "=c" c, "=d" d 270 : "0" ain, "2" cin; 271 } 272 } ); 273 } 274 else 275 { 276 mixin( q{ 277 __asm pure nothrow { 278 "cpuid" 279 : "=a" a, "=b" b, "=c" c, "=d" d 280 : "0" ain, "2" cin; 281 } 282 }); 283 284 } 285 } 286 } 287 288 shared static this() { 289 290 logTrace("Loading CPUID ..."); 291 string processorName; 292 char[12] vendorID; 293 uint unused; 294 { 295 uint a, b, c, d, a2; 296 char * venptr = vendorID.ptr; 297 298 version(GNU) 299 { 300 rawCpuid(0, 0, a, venptr[0], venptr[2], venptr[1]); 301 } 302 else version(none) rawCpuid(0, 0, a, venptr[0], venptr[2], venptr[1]); 303 else { 304 version(D_InlineAsm_X86) 305 { 306 asm pure nothrow { 307 mov EAX, 0; 308 cpuid; 309 mov a, EAX; 310 mov EAX, venptr; 311 mov [EAX], EBX; 312 mov [EAX + 4], EDX; 313 mov [EAX + 8], ECX; 314 } 315 } 316 else version(D_InlineAsm_X86_64) 317 { 318 asm pure nothrow { 319 mov EAX, 0; 320 cpuid; 321 mov a, EAX; 322 mov RAX, venptr; 323 mov [RAX], EBX; 324 mov [RAX + 4], EDX; 325 mov [RAX + 8], ECX; 326 } 327 } 328 } 329 330 331 version(GNU) 332 { 333 rawCpuid(0x8000_0000, 0, a2, unused, unused, unused); 334 } 335 else version(none) rawCpuid(0x8000_0000, 0, a2, unused, unused, unused); 336 else { 337 asm pure nothrow { 338 mov EAX, 0x8000_0000; 339 cpuid; 340 mov a2, EAX; 341 } 342 } 343 max_cpuid = a; 344 max_extended_cpuid = a2; 345 346 } 347 348 is_intel = vendorID == "GenuineIntel"; 349 is_amd = vendorID == "AuthenticAMD"; 350 351 { 352 uint a, b, c, d; 353 version(GNU) 354 { 355 rawCpuid(1, 0, a, apic, c, d); 356 } else version(none) rawCpuid(1, 0, a, apic, c, d); 357 else 358 { 359 asm pure nothrow { 360 mov EAX, 1; // model, stepping 361 cpuid; 362 mov a, EAX; 363 mov b, EBX; 364 mov c, ECX; 365 mov d, EDX; 366 } 367 } 368 /// EAX(a) contains stepping, model, family, processor type, extended model, 369 /// extended family 370 371 apic = b; 372 miscfeatures = c; 373 features = d; 374 } 375 376 if (max_cpuid >= 7) 377 { 378 uint ext, reserved; 379 380 version(GNU) rawCpuid(7, 0, unused, ext, reserved, unused); 381 else version (none) rawCpuid(7, 0, unused, ext, reserved, unused); 382 else 383 { 384 asm 385 { 386 mov EAX, 7; // Structured extended feature leaf. 387 mov ECX, 0; // Main leaf. 388 cpuid; 389 mov ext, EBX; // HLE, AVX2, RTM, etc. 390 mov reserved, ECX; 391 } 392 } 393 extreserved = reserved; 394 extfeatures = ext; 395 } 396 397 /*if (miscfeatures & OSXSAVE_BIT) 398 { 399 uint a, d; 400 version(GNU) 401 { 402 // xgetbv does not affect ebx 403 asm pure nothrow { 404 "mov $0, %%ecx 405 xgetbv" 406 : "=a" a, "=d" d 407 : 408 : "ecx"; 409 } 410 } 411 else { 412 asm pure nothrow { 413 mov ECX, 0; 414 xgetbv; 415 mov d, EDX; 416 mov a, EAX; 417 } 418 } 419 xfeatures = cast(ulong)d << 32 | a; 420 }*/ 421 422 if (max_extended_cpuid >= 0x8000_0001) { 423 uint c, d; 424 version(GNU) 425 { 426 rawCpuid(0x8000_0001, 0, unused, unused, c, d); 427 } else version(none) rawCpuid(0x8000_0001, 0, unused, unused, c, d); 428 else 429 { 430 asm pure nothrow { 431 mov EAX, 0x8000_0001; 432 cpuid; 433 mov c, ECX; 434 mov d, EDX; 435 } 436 } 437 amdmiscfeatures = c; 438 amdfeatures = d; 439 440 } 441 if (max_extended_cpuid >= 0x8000_0005) { 442 uint c; 443 version(GNU) 444 { 445 rawCpuid(0x8000_0005, 0, unused, unused, c, unused); 446 } 447 else version(none) rawCpuid(0x8000_0005, 0, unused, unused, c, unused); 448 else 449 { 450 asm pure nothrow { 451 mov EAX, 0x8000_0005; // L1 cache 452 cpuid; 453 // EAX has L1_TLB_4M. 454 // EBX has L1_TLB_4K 455 // EDX has L1 instruction cache 456 mov c, ECX; 457 } 458 } 459 l1cache = c; 460 461 } 462 463 464 // Try to detect fraudulent vendorIDs 465 if (amd3dnow) is_intel = false; 466 467 468 } 469 470 471 version (PPC) { 472 bool altivecCheckSysctl() 473 { 474 version (OSX) 475 enum supported = true; 476 else version (BSD) 477 enum supported = true; 478 else enum supported = false; 479 static if (supported) { 480 int[2] sels = [ CTL_MACHDEP, CPU_ALTIVEC ]; 481 // From Apple's docs 482 int[2] sels = [ CTL_HW, HW_VECTORUNIT ]; 483 int vector_type = 0; 484 size_t length = (vector_type).sizeof; 485 int error = sysctl(sels, 2, &vector_type, &length, NULL, 0); 486 487 if (error == 0 && vector_type > 0) 488 return true; 489 } 490 return false; 491 } 492 493 bool altivecCheckPvrEmul() 494 { 495 bool altivec_capable = false; 496 497 version(linux) { 498 499 500 /* 501 On PowerPC, MSR 287 is PVR, the Processor Version Number 502 Normally it is only accessible to ring 0, but Linux and NetBSD 503 (others, too, maybe?) will trap and emulate it for us. 504 505 PVR identifiers for various AltiVec enabled CPUs. Taken from 506 PearPC and Linux sources, mostly. 507 */ 508 509 const ushort PVR_G4_7400 = 0x000C; 510 const ushort PVR_G5_970 = 0x0039; 511 const ushort PVR_G5_970FX = 0x003C; 512 const ushort PVR_G5_970MP = 0x0044; 513 const ushort PVR_G5_970GX = 0x0045; 514 const ushort PVR_POWER6 = 0x003E; 515 const ushort PVR_POWER7 = 0x003F; 516 const ushort PVR_CELL_PPU = 0x0070; 517 518 // Motorola produced G4s with PVR 0x800[0123C] (at least) 519 const ushort PVR_G4_74xx_24 = 0x800; 520 521 uint pvr = 0; 522 523 mixin(`asm pure nothrow { mfspr [pvr], 287; }`); // not supported in DMD? 524 525 // Top 16 bit suffice to identify model 526 pvr >>= 16; 527 528 altivec_capable |= (pvr == PVR_G4_7400); 529 altivec_capable |= ((pvr >> 4) == PVR_G4_74xx_24); 530 altivec_capable |= (pvr == PVR_G5_970); 531 altivec_capable |= (pvr == PVR_G5_970FX); 532 altivec_capable |= (pvr == PVR_G5_970MP); 533 altivec_capable |= (pvr == PVR_G5_970GX); 534 altivec_capable |= (pvr == PVR_POWER6); 535 altivec_capable |= (pvr == PVR_POWER7); 536 altivec_capable |= (pvr == PVR_CELL_PPU); 537 538 } 539 540 return altivec_capable; 541 542 } 543 544 }