1 /**
2 * Runtime CPU detection
3 * 
4 * Copyright:
5 * (C) 2009-2010,2013 Jack Lloyd
6 * (C) 2014-2015 Etienne Cimon
7 *
8 * License:
9 * Botan is released under the Simplified BSD License (see LICENSE.md)
10 */
11 module botan.utils.cpuid;
12 
13 import core.cpuid;
14 import botan.constants;
15 import botan.utils.types;
16 import botan.utils.types;
17 import botan.utils.get_byte;
18 import botan.utils.mem_ops;
19 
20 /**
21 * A class handling runtime CPU feature detection
22 */
23 class CPUID
24 {
25 public:
26     /**
27     * Probe the CPU and see what extensions are supported
28     */
29     static this()
30     {
31         if (max_cpuid == 0)
32             return;
33 
34         version(PPC)    
35             if (altivecCheckSysctl() || altivecCheckPvrEmul())
36                 m_altivec_capable = true;
37 
38 
39         m_x86_processor_flags[0] = (cast(ulong)(miscfeatures) << 32) | features;
40         
41         m_cache_line_size = get_byte(3, l1cache); 
42         
43         if (max_cpuid >= 7)
44             m_x86_processor_flags[1] = (cast(ulong)(extreserved) << 32) | extfeatures;
45         
46         if (is_amd)
47         {            
48             version(X86_64) {
49                 /*
50                 * If we don't have access to CPUID, we can still safely assume that
51                 * any x86-64 processor has SSE2 and RDTSC
52                 */
53                 if (m_x86_processor_flags[0] == 0)
54                     m_x86_processor_flags[0] = (1 << CPUID_SSE2_BIT) | (1 << CPUID_RDTSC_BIT);
55             }
56         }
57         
58     }
59 
60 
61     /**
62     * Return a best guess of the cache line size
63     */
64     static size_t cacheLineSize() { return m_cache_line_size; }
65 
66     /**
67     * Check if the processor supports RDTSC
68     */
69     static bool hasRdtsc()
70     { return x86_processor_flags_has(CPUID_RDTSC_BIT); }
71 
72     /**
73     * Check if the processor supports SSE2
74     */
75     static bool hasSse2()
76     { return x86_processor_flags_has(CPUID_SSE2_BIT); }
77 
78     /**
79     * Check if the processor supports SSSE3
80     */
81     static bool hasSsse3()
82     { return x86_processor_flags_has(CPUID_SSSE3_BIT); }
83 
84     /**
85     * Check if the processor supports SSE4.1
86     */
87     static bool hasSse41()
88     { return x86_processor_flags_has(CPUID_SSE41_BIT); }
89 
90     /**
91     * Check if the processor supports SSE4.2
92     */
93     static bool hasSse42()
94     { return x86_processor_flags_has(CPUID_SSE42_BIT); }
95 
96     /**
97     * Check if the processor supports AVX2
98     */
99     static bool hasAvx2()
100     { return x86_processor_flags_has(CPUID_AVX2_BIT); }
101 
102     /**
103     * Check if the processor supports AVX-512F
104     */
105     static bool hasAvx512f()
106     { return x86_processor_flags_has(CPUID_AVX512F_BIT); }
107 
108     /**
109     * Check if the processor supports BMI2
110     */
111     static bool hasBmi2()
112     { return x86_processor_flags_has(CPUID_BMI2_BIT); }
113 
114     /**
115     * Check if the processor supports AES-NI
116     */
117     static bool hasAesNi()
118     { return x86_processor_flags_has(CPUID_AESNI_BIT); }
119 
120     /**
121     * Check if the processor supports CLMUL
122     */
123     static bool hasClmul()
124     { return x86_processor_flags_has(CPUID_CLMUL_BIT); }
125 
126     /**
127     * Check if the processor supports Intel SHA extension
128     */
129     static bool hasIntelSha()
130     { return x86_processor_flags_has(CPUID_SHA_BIT); }
131 
132     /**
133     * Check if the processor supports ADX extension
134     */
135     static bool hasAdx()
136     { return x86_processor_flags_has(CPUID_ADX_BIT); }
137 
138     /**
139     * Check if the processor supports RDRAND
140     */
141     static bool hasRdrand()
142     { return x86_processor_flags_has(CPUID_RDRAND_BIT); }
143 
144     /**
145     * Check if the processor supports RDSEED
146     */
147     static bool hasRdseed()
148     { return x86_processor_flags_has(CPUID_RDSEED_BIT); }
149 
150     /**
151     * Check if the processor supports AltiVec/VMX
152     */
153     static bool hasAltivec() { return m_altivec_capable; }
154 
155     static string toString()
156     {
157         import std.array : Appender;
158         Appender!string app;
159         
160         app ~= "CPUID flags: ";
161         
162         app ~= CPUID.hasSse2;
163         app ~= CPUID.hasSsse3;
164         app ~= CPUID.hasSse41;
165         app ~= CPUID.hasSse42;
166         app ~= CPUID.hasAvx2;
167         app ~= CPUID.hasAvx512f;
168         app ~= CPUID.hasAltivec;
169         
170         app ~= CPUID.hasRdtsc;
171         app ~= CPUID.hasBmi2;
172         app ~= CPUID.hasClmul;
173         app ~= CPUID.hasAesNi;
174         app ~= CPUID.hasRdrand;
175         app ~= CPUID.hasRdseed;
176         app ~= CPUID.hasIntelSha;
177         app ~= CPUID.hasAdx;
178 
179         return app.data;
180     }
181 private:
182     alias CPUIDbits = int;
183     enum : CPUIDbits {
184         CPUID_RDTSC_BIT = 4,
185         CPUID_SSE2_BIT = 26,
186         CPUID_CLMUL_BIT = 33,
187         CPUID_SSSE3_BIT = 41,
188         CPUID_SSE41_BIT = 51,
189         CPUID_SSE42_BIT = 52,
190         CPUID_AESNI_BIT = 57,
191         CPUID_RDRAND_BIT = 62,
192 
193         CPUID_AVX2_BIT = 64+5,
194         CPUID_BMI2_BIT = 64+8,
195         CPUID_AVX512F_BIT = 64+16,
196         CPUID_RDSEED_BIT = 64+18,
197         CPUID_ADX_BIT = 64+19,
198         CPUID_SHA_BIT = 64+29,
199     }
200 
201     static bool x86_processor_flags_has(int bit)
202     {
203         return ((m_x86_processor_flags[bit/64] >> (bit % 64)) & 1);
204     }
205 
206     static ulong[2] m_x86_processor_flags;
207     static size_t m_cache_line_size;
208     static bool m_altivec_capable;
209 }
210 
211 package:
212 
213 private __gshared {
214     bool is_intel; // true = _probably_ an Intel processor, might be faking
215     bool is_amd; // true = _probably_ an AMD processor
216 
217     uint apic;
218     uint max_cpuid;
219     uint max_extended_cpuid; // 0
220     uint extfeatures;
221     uint extreserved;
222     uint miscfeatures;
223     uint amdmiscfeatures;
224     uint features;
225     uint amdfeatures; 
226     uint l1cache;
227 }
228 // EBX is used to store GOT's address in PIC on x86, so we must preserve its value
229 version(D_PIC)
230     version(X86)
231         version = PreserveEBX;
232         
233 // todo: LDC/GDC
234 version(GNU)
235 {
236     private void rawCpuid(uint ain, uint cin, ref uint a, ref uint b, ref uint c, ref uint d)
237     {
238         version(PreserveEBX)
239         {
240             asm pure nothrow { 
241                 "xchg %1, %%ebx
242                 cpuid 
243                 xchg %1, %%ebx"
244                     : "=a" a, "=r" b, "=c" c, "=d" d 
245                         : "0" ain, "2" cin; 
246             }
247         }
248         else
249         {
250             asm pure nothrow { 
251                 "cpuid"
252                     : "=a" a, "=b" b, "=c" c, "=d" d 
253                         : "0" ain, "2" cin; 
254             }
255         }
256     }
257 }
258 
259 version(LDC) version(AArch64) {
260     import ldc.llvmasm : __asmtuple;
261     private void rawCpuid(uint eax, uint ecx, uint* a, uint* b, uint* c, uint* d)
262     {
263     }
264 }
265 
266 shared static this() {
267     
268     logTrace("Loading CPUID ...");
269     string processorName;
270     char[12] vendorID;
271     uint unused;
272     {
273         uint a, b, c, d, a2;
274         char * venptr = vendorID.ptr;
275 
276         version(LDC) { version(AArch64) rawCpuid(unused, unused, &a, cast(uint*) venptr, cast(uint*) (venptr+2*uint.sizeof), 
277                                 cast(uint*) (venptr+uint.sizeof)); 
278         } else {
279             version(D_InlineAsm_X86)
280             {
281                 asm pure nothrow {
282                     mov EAX, 0;
283                     cpuid;
284                     mov a, EAX;
285                     mov EAX, venptr;
286                     mov [EAX], EBX;
287                     mov [EAX + 4], EDX;
288                     mov [EAX + 8], ECX;
289                 }
290             }
291             else version(D_InlineAsm_X86_64)
292             {
293                 asm pure nothrow {
294                     mov EAX, 0;
295                     cpuid;
296                     mov a, EAX;
297                     mov RAX, venptr;
298                     mov [RAX], EBX;
299                     mov [RAX + 4], EDX;
300                     mov [RAX + 8], ECX;
301                 }
302             }
303         }
304 
305         
306         version(LDC) { 
307             version(AArch64) rawCpuid(0x8000_0000U, 0U, &a2, &unused, &unused, &unused);
308         } else {
309             asm pure nothrow {
310                 mov EAX, 0x8000_0000;
311                 cpuid;
312                 mov a2, EAX;
313             }
314         }
315         max_cpuid = a;
316         max_extended_cpuid = a2;
317     
318     }
319     
320     is_intel = vendorID == "GenuineIntel";
321     is_amd = vendorID == "AuthenticAMD";
322 
323     {
324         uint a, b, c, d;
325         version(LDC) {
326              version(AArch64) rawCpuid(1U, 0U, &a, &apic, &c, &d);
327         } else
328         {
329             asm pure nothrow {
330                 mov EAX, 1; // model, stepping
331                 cpuid;
332                 mov a, EAX;
333                 mov b, EBX;
334                 mov c, ECX;
335                 mov d, EDX;
336             }
337         }
338         /// EAX(a) contains stepping, model, family, processor type, extended model,
339         /// extended family
340 
341         apic = b;
342         miscfeatures = c;
343         features = d;
344     }
345 
346     if (max_cpuid >= 7)
347     {
348         uint ext, reserved;
349 
350         version (LDC) {
351              version(AArch64) rawCpuid(7U, 0U, &unused, &ext, &reserved, &unused);
352         } else
353         {
354             asm
355             {
356                 mov EAX, 7; // Structured extended feature leaf.
357                 mov ECX, 0; // Main leaf.
358                 cpuid;
359                 mov ext, EBX; // HLE, AVX2, RTM, etc.
360                 mov reserved, ECX;
361             }
362         }
363         extreserved = reserved;
364         extfeatures = ext;
365     }
366     
367     /*if (miscfeatures & OSXSAVE_BIT)
368     {
369         uint a, d;
370         version(GNU)
371         {
372             // xgetbv does not affect ebx
373             asm pure nothrow {
374                 "mov $0, %%ecx
375                 xgetbv"
376               : "=a" a, "=d" d
377               :
378               : "ecx";
379             }    
380         }
381         else {
382             asm pure nothrow {
383                 mov ECX, 0;
384                 xgetbv;
385                 mov d, EDX;
386                 mov a, EAX;
387             }
388         }
389         xfeatures = cast(ulong)d << 32 | a;
390     }*/
391 
392     if (max_extended_cpuid >= 0x8000_0001) {
393         uint c, d;
394         version(LDC) {
395              version(AArch64) rawCpuid(0x8000_0001U, 0U, &unused, &unused, &c, &d);
396         } else
397         {
398             asm pure nothrow {
399                 mov EAX, 0x8000_0001;
400                 cpuid;
401                 mov c, ECX;
402                 mov d, EDX;
403             }
404         }
405         amdmiscfeatures = c;
406         amdfeatures = d;
407 
408     }
409     if (max_extended_cpuid >= 0x8000_0005) {
410         uint c;
411         version(LDC) { version(AArch64) rawCpuid(0x8000_0005U, 0U, &unused, &unused, &c, &unused);
412         } else
413         {
414             asm pure nothrow {
415                 mov EAX, 0x8000_0005; // L1 cache
416                 cpuid;
417                 // EAX has L1_TLB_4M.
418                 // EBX has L1_TLB_4K
419                 // EDX has L1 instruction cache
420                 mov c, ECX;
421             }
422         }
423         l1cache = c;
424 
425     }
426     
427 
428     // Try to detect fraudulent vendorIDs
429     if (amd3dnow) is_intel = false;
430 
431 
432 }
433 
434 
435 version (PPC) {
436     bool altivecCheckSysctl()
437     {
438         version (OSX)
439             enum supported = true;
440         else version (BSD)
441             enum supported = true;
442         else enum supported = false;
443         static if (supported) {
444             int[2] sels = [ CTL_MACHDEP, CPU_ALTIVEC ];
445             // From Apple's docs
446             int[2] sels = [ CTL_HW, HW_VECTORUNIT ];
447             int vector_type = 0;
448             size_t length = (vector_type).sizeof;
449             int error = sysctl(sels, 2, &vector_type, &length, NULL, 0);
450             
451             if (error == 0 && vector_type > 0)
452                 return true;
453         }
454         return false;
455     }
456     
457     bool altivecCheckPvrEmul()
458     {
459         bool altivec_capable = false;
460         
461         version(linux) {
462             
463             
464             /*
465             On PowerPC, MSR 287 is PVR, the Processor Version Number
466             Normally it is only accessible to ring 0, but Linux and NetBSD
467             (others, too, maybe?) will trap and emulate it for us.
468 
469             PVR identifiers for various AltiVec enabled CPUs. Taken from
470             PearPC and Linux sources, mostly.
471             */
472             
473             const ushort PVR_G4_7400  = 0x000C;
474             const ushort PVR_G5_970    = 0x0039;
475             const ushort PVR_G5_970FX = 0x003C;
476             const ushort PVR_G5_970MP = 0x0044;
477             const ushort PVR_G5_970GX = 0x0045;
478             const ushort PVR_POWER6    = 0x003E;
479             const ushort PVR_POWER7    = 0x003F;
480             const ushort PVR_CELL_PPU = 0x0070;
481             
482             // Motorola produced G4s with PVR 0x800[0123C] (at least)
483             const ushort PVR_G4_74xx_24  = 0x800;
484             
485             uint pvr = 0;
486             
487             mixin(`asm pure nothrow { mfspr [pvr], 287; }`); // not supported in DMD?
488             
489             // Top 16 bit suffice to identify model
490             pvr >>= 16;
491             
492             altivec_capable |= (pvr == PVR_G4_7400);
493             altivec_capable |= ((pvr >> 4) == PVR_G4_74xx_24);
494             altivec_capable |= (pvr == PVR_G5_970);
495             altivec_capable |= (pvr == PVR_G5_970FX);
496             altivec_capable |= (pvr == PVR_G5_970MP);
497             altivec_capable |= (pvr == PVR_G5_970GX);
498             altivec_capable |= (pvr == PVR_POWER6);
499             altivec_capable |= (pvr == PVR_POWER7);
500             altivec_capable |= (pvr == PVR_CELL_PPU);
501             
502         }
503         
504         return altivec_capable;
505         
506     }
507     
508 }