1 /** 2 * Runtime CPU detection 3 * 4 * Copyright: 5 * (C) 2009-2010,2013 Jack Lloyd 6 * (C) 2014-2015 Etienne Cimon 7 * 8 * License: 9 * Botan is released under the Simplified BSD License (see LICENSE.md) 10 */ 11 module botan.utils.cpuid; 12 13 import core.cpuid; 14 import botan.constants; 15 import botan.utils.types; 16 import botan.utils.types; 17 import botan.utils.get_byte; 18 import botan.utils.mem_ops; 19 20 /** 21 * A class handling runtime CPU feature detection 22 */ 23 class CPUID 24 { 25 public: 26 /** 27 * Probe the CPU and see what extensions are supported 28 */ 29 static this() 30 { 31 if (max_cpuid == 0) 32 return; 33 34 version(PPC) 35 if (altivecCheckSysctl() || altivecCheckPvrEmul()) 36 m_altivec_capable = true; 37 38 39 m_x86_processor_flags[0] = (cast(ulong)(miscfeatures) << 32) | features; 40 41 m_cache_line_size = get_byte(3, l1cache); 42 43 if (max_cpuid >= 7) 44 m_x86_processor_flags[1] = (cast(ulong)(extreserved) << 32) | extfeatures; 45 46 if (is_amd) 47 { 48 version(X86_64) { 49 /* 50 * If we don't have access to CPUID, we can still safely assume that 51 * any x86-64 processor has SSE2 and RDTSC 52 */ 53 if (m_x86_processor_flags[0] == 0) 54 m_x86_processor_flags[0] = (1 << CPUID_SSE2_BIT) | (1 << CPUID_RDTSC_BIT); 55 } 56 } 57 58 } 59 60 61 /** 62 * Return a best guess of the cache line size 63 */ 64 static size_t cacheLineSize() { return m_cache_line_size; } 65 66 /** 67 * Check if the processor supports RDTSC 68 */ 69 static bool hasRdtsc() 70 { return x86_processor_flags_has(CPUID_RDTSC_BIT); } 71 72 /** 73 * Check if the processor supports SSE2 74 */ 75 static bool hasSse2() 76 { return x86_processor_flags_has(CPUID_SSE2_BIT); } 77 78 /** 79 * Check if the processor supports SSSE3 80 */ 81 static bool hasSsse3() 82 { return x86_processor_flags_has(CPUID_SSSE3_BIT); } 83 84 /** 85 * Check if the processor supports SSE4.1 86 */ 87 static bool hasSse41() 88 { return x86_processor_flags_has(CPUID_SSE41_BIT); } 89 90 /** 91 * Check if the processor supports SSE4.2 92 */ 93 static bool hasSse42() 94 { return x86_processor_flags_has(CPUID_SSE42_BIT); } 95 96 /** 97 * Check if the processor supports AVX2 98 */ 99 static bool hasAvx2() 100 { return x86_processor_flags_has(CPUID_AVX2_BIT); } 101 102 /** 103 * Check if the processor supports AVX-512F 104 */ 105 static bool hasAvx512f() 106 { return x86_processor_flags_has(CPUID_AVX512F_BIT); } 107 108 /** 109 * Check if the processor supports BMI2 110 */ 111 static bool hasBmi2() 112 { return x86_processor_flags_has(CPUID_BMI2_BIT); } 113 114 /** 115 * Check if the processor supports AES-NI 116 */ 117 static bool hasAesNi() 118 { return x86_processor_flags_has(CPUID_AESNI_BIT); } 119 120 /** 121 * Check if the processor supports CLMUL 122 */ 123 static bool hasClmul() 124 { return x86_processor_flags_has(CPUID_CLMUL_BIT); } 125 126 /** 127 * Check if the processor supports Intel SHA extension 128 */ 129 static bool hasIntelSha() 130 { return x86_processor_flags_has(CPUID_SHA_BIT); } 131 132 /** 133 * Check if the processor supports ADX extension 134 */ 135 static bool hasAdx() 136 { return x86_processor_flags_has(CPUID_ADX_BIT); } 137 138 /** 139 * Check if the processor supports RDRAND 140 */ 141 static bool hasRdrand() 142 { return x86_processor_flags_has(CPUID_RDRAND_BIT); } 143 144 /** 145 * Check if the processor supports RDSEED 146 */ 147 static bool hasRdseed() 148 { return x86_processor_flags_has(CPUID_RDSEED_BIT); } 149 150 /** 151 * Check if the processor supports AltiVec/VMX 152 */ 153 static bool hasAltivec() { return m_altivec_capable; } 154 155 static string toString() 156 { 157 import std.array : Appender; 158 Appender!string app; 159 160 app ~= "CPUID flags: "; 161 162 app ~= CPUID.hasSse2; 163 app ~= CPUID.hasSsse3; 164 app ~= CPUID.hasSse41; 165 app ~= CPUID.hasSse42; 166 app ~= CPUID.hasAvx2; 167 app ~= CPUID.hasAvx512f; 168 app ~= CPUID.hasAltivec; 169 170 app ~= CPUID.hasRdtsc; 171 app ~= CPUID.hasBmi2; 172 app ~= CPUID.hasClmul; 173 app ~= CPUID.hasAesNi; 174 app ~= CPUID.hasRdrand; 175 app ~= CPUID.hasRdseed; 176 app ~= CPUID.hasIntelSha; 177 app ~= CPUID.hasAdx; 178 179 return app.data; 180 } 181 private: 182 alias CPUIDbits = int; 183 enum : CPUIDbits { 184 CPUID_RDTSC_BIT = 4, 185 CPUID_SSE2_BIT = 26, 186 CPUID_CLMUL_BIT = 33, 187 CPUID_SSSE3_BIT = 41, 188 CPUID_SSE41_BIT = 51, 189 CPUID_SSE42_BIT = 52, 190 CPUID_AESNI_BIT = 57, 191 CPUID_RDRAND_BIT = 62, 192 193 CPUID_AVX2_BIT = 64+5, 194 CPUID_BMI2_BIT = 64+8, 195 CPUID_AVX512F_BIT = 64+16, 196 CPUID_RDSEED_BIT = 64+18, 197 CPUID_ADX_BIT = 64+19, 198 CPUID_SHA_BIT = 64+29, 199 } 200 201 static bool x86_processor_flags_has(int bit) 202 { 203 return ((m_x86_processor_flags[bit/64] >> (bit % 64)) & 1); 204 } 205 206 static ulong[2] m_x86_processor_flags; 207 static size_t m_cache_line_size; 208 static bool m_altivec_capable; 209 } 210 211 package: 212 213 private __gshared { 214 bool is_intel; // true = _probably_ an Intel processor, might be faking 215 bool is_amd; // true = _probably_ an AMD processor 216 217 uint apic; 218 uint max_cpuid; 219 uint max_extended_cpuid; // 0 220 uint extfeatures; 221 uint extreserved; 222 uint miscfeatures; 223 uint amdmiscfeatures; 224 uint features; 225 uint amdfeatures; 226 uint l1cache; 227 } 228 // EBX is used to store GOT's address in PIC on x86, so we must preserve its value 229 version(D_PIC) 230 version(X86) 231 version = PreserveEBX; 232 233 // todo: LDC/GDC 234 version(GNU) 235 { 236 private void rawCpuid(uint ain, uint cin, ref uint a, ref uint b, ref uint c, ref uint d) 237 { 238 version(PreserveEBX) 239 { 240 asm pure nothrow { 241 "xchg %1, %%ebx 242 cpuid 243 xchg %1, %%ebx" 244 : "=a" a, "=r" b, "=c" c, "=d" d 245 : "0" ain, "2" cin; 246 } 247 } 248 else 249 { 250 asm pure nothrow { 251 "cpuid" 252 : "=a" a, "=b" b, "=c" c, "=d" d 253 : "0" ain, "2" cin; 254 } 255 } 256 } 257 } 258 259 version(LDC) version(AArch64) { 260 import ldc.llvmasm : __asmtuple; 261 private void rawCpuid(uint eax, uint ecx, uint* a, uint* b, uint* c, uint* d) 262 { 263 } 264 } 265 266 shared static this() { 267 268 logTrace("Loading CPUID ..."); 269 string processorName; 270 char[12] vendorID; 271 uint unused; 272 { 273 uint a, b, c, d, a2; 274 char * venptr = vendorID.ptr; 275 276 version(LDC) { version(AArch64) rawCpuid(unused, unused, &a, cast(uint*) venptr, cast(uint*) (venptr+2*uint.sizeof), 277 cast(uint*) (venptr+uint.sizeof)); 278 } else { 279 version(D_InlineAsm_X86) 280 { 281 asm pure nothrow { 282 mov EAX, 0; 283 cpuid; 284 mov a, EAX; 285 mov EAX, venptr; 286 mov [EAX], EBX; 287 mov [EAX + 4], EDX; 288 mov [EAX + 8], ECX; 289 } 290 } 291 else version(D_InlineAsm_X86_64) 292 { 293 asm pure nothrow { 294 mov EAX, 0; 295 cpuid; 296 mov a, EAX; 297 mov RAX, venptr; 298 mov [RAX], EBX; 299 mov [RAX + 4], EDX; 300 mov [RAX + 8], ECX; 301 } 302 } 303 } 304 305 306 version(LDC) { 307 version(AArch64) rawCpuid(0x8000_0000U, 0U, &a2, &unused, &unused, &unused); 308 } else { 309 asm pure nothrow { 310 mov EAX, 0x8000_0000; 311 cpuid; 312 mov a2, EAX; 313 } 314 } 315 max_cpuid = a; 316 max_extended_cpuid = a2; 317 318 } 319 320 is_intel = vendorID == "GenuineIntel"; 321 is_amd = vendorID == "AuthenticAMD"; 322 323 { 324 uint a, b, c, d; 325 version(LDC) { 326 version(AArch64) rawCpuid(1U, 0U, &a, &apic, &c, &d); 327 } else 328 { 329 asm pure nothrow { 330 mov EAX, 1; // model, stepping 331 cpuid; 332 mov a, EAX; 333 mov b, EBX; 334 mov c, ECX; 335 mov d, EDX; 336 } 337 } 338 /// EAX(a) contains stepping, model, family, processor type, extended model, 339 /// extended family 340 341 apic = b; 342 miscfeatures = c; 343 features = d; 344 } 345 346 if (max_cpuid >= 7) 347 { 348 uint ext, reserved; 349 350 version (LDC) { 351 version(AArch64) rawCpuid(7U, 0U, &unused, &ext, &reserved, &unused); 352 } else 353 { 354 asm 355 { 356 mov EAX, 7; // Structured extended feature leaf. 357 mov ECX, 0; // Main leaf. 358 cpuid; 359 mov ext, EBX; // HLE, AVX2, RTM, etc. 360 mov reserved, ECX; 361 } 362 } 363 extreserved = reserved; 364 extfeatures = ext; 365 } 366 367 /*if (miscfeatures & OSXSAVE_BIT) 368 { 369 uint a, d; 370 version(GNU) 371 { 372 // xgetbv does not affect ebx 373 asm pure nothrow { 374 "mov $0, %%ecx 375 xgetbv" 376 : "=a" a, "=d" d 377 : 378 : "ecx"; 379 } 380 } 381 else { 382 asm pure nothrow { 383 mov ECX, 0; 384 xgetbv; 385 mov d, EDX; 386 mov a, EAX; 387 } 388 } 389 xfeatures = cast(ulong)d << 32 | a; 390 }*/ 391 392 if (max_extended_cpuid >= 0x8000_0001) { 393 uint c, d; 394 version(LDC) { 395 version(AArch64) rawCpuid(0x8000_0001U, 0U, &unused, &unused, &c, &d); 396 } else 397 { 398 asm pure nothrow { 399 mov EAX, 0x8000_0001; 400 cpuid; 401 mov c, ECX; 402 mov d, EDX; 403 } 404 } 405 amdmiscfeatures = c; 406 amdfeatures = d; 407 408 } 409 if (max_extended_cpuid >= 0x8000_0005) { 410 uint c; 411 version(LDC) { version(AArch64) rawCpuid(0x8000_0005U, 0U, &unused, &unused, &c, &unused); 412 } else 413 { 414 asm pure nothrow { 415 mov EAX, 0x8000_0005; // L1 cache 416 cpuid; 417 // EAX has L1_TLB_4M. 418 // EBX has L1_TLB_4K 419 // EDX has L1 instruction cache 420 mov c, ECX; 421 } 422 } 423 l1cache = c; 424 425 } 426 427 428 // Try to detect fraudulent vendorIDs 429 if (amd3dnow) is_intel = false; 430 431 432 } 433 434 435 version (PPC) { 436 bool altivecCheckSysctl() 437 { 438 version (OSX) 439 enum supported = true; 440 else version (BSD) 441 enum supported = true; 442 else enum supported = false; 443 static if (supported) { 444 int[2] sels = [ CTL_MACHDEP, CPU_ALTIVEC ]; 445 // From Apple's docs 446 int[2] sels = [ CTL_HW, HW_VECTORUNIT ]; 447 int vector_type = 0; 448 size_t length = (vector_type).sizeof; 449 int error = sysctl(sels, 2, &vector_type, &length, NULL, 0); 450 451 if (error == 0 && vector_type > 0) 452 return true; 453 } 454 return false; 455 } 456 457 bool altivecCheckPvrEmul() 458 { 459 bool altivec_capable = false; 460 461 version(linux) { 462 463 464 /* 465 On PowerPC, MSR 287 is PVR, the Processor Version Number 466 Normally it is only accessible to ring 0, but Linux and NetBSD 467 (others, too, maybe?) will trap and emulate it for us. 468 469 PVR identifiers for various AltiVec enabled CPUs. Taken from 470 PearPC and Linux sources, mostly. 471 */ 472 473 const ushort PVR_G4_7400 = 0x000C; 474 const ushort PVR_G5_970 = 0x0039; 475 const ushort PVR_G5_970FX = 0x003C; 476 const ushort PVR_G5_970MP = 0x0044; 477 const ushort PVR_G5_970GX = 0x0045; 478 const ushort PVR_POWER6 = 0x003E; 479 const ushort PVR_POWER7 = 0x003F; 480 const ushort PVR_CELL_PPU = 0x0070; 481 482 // Motorola produced G4s with PVR 0x800[0123C] (at least) 483 const ushort PVR_G4_74xx_24 = 0x800; 484 485 uint pvr = 0; 486 487 mixin(`asm pure nothrow { mfspr [pvr], 287; }`); // not supported in DMD? 488 489 // Top 16 bit suffice to identify model 490 pvr >>= 16; 491 492 altivec_capable |= (pvr == PVR_G4_7400); 493 altivec_capable |= ((pvr >> 4) == PVR_G4_74xx_24); 494 altivec_capable |= (pvr == PVR_G5_970); 495 altivec_capable |= (pvr == PVR_G5_970FX); 496 altivec_capable |= (pvr == PVR_G5_970MP); 497 altivec_capable |= (pvr == PVR_G5_970GX); 498 altivec_capable |= (pvr == PVR_POWER6); 499 altivec_capable |= (pvr == PVR_POWER7); 500 altivec_capable |= (pvr == PVR_CELL_PPU); 501 502 } 503 504 return altivec_capable; 505 506 } 507 508 }