1 /**
2 * Runtime CPU detection
3 * 
4 * Copyright:
5 * (C) 2009-2010,2013 Jack Lloyd
6 * (C) 2014-2015 Etienne Cimon
7 *
8 * License:
9 * Botan is released under the Simplified BSD License (see LICENSE.md)
10 */
11 module botan.utils.cpuid;
12 
13 import core.cpuid;
14 import botan.constants;
15 import botan.utils.types;
16 import botan.utils.types;
17 import botan.utils.get_byte;
18 import botan.utils.mem_ops;
19 
20 /**
21 * A class handling runtime CPU feature detection
22 */
23 class CPUID
24 {
25 public:
26     /**
27     * Probe the CPU and see what extensions are supported
28     */
29     static this()
30     {
31         if (max_cpuid == 0)
32             return;
33 
34         version(PPC)    
35             if (altivecCheckSysctl() || altivecCheckPvrEmul())
36                 m_altivec_capable = true;
37 
38 
39         m_x86_processor_flags[0] = (cast(ulong)(miscfeatures) << 32) | features;
40         
41         m_cache_line_size = get_byte(3, l1cache); 
42         
43         if (max_cpuid >= 7)
44             m_x86_processor_flags[1] = (cast(ulong)(extreserved) << 32) | extfeatures;
45         
46         if (is_amd)
47         {            
48             version(X86_64) {
49                 /*
50                 * If we don't have access to CPUID, we can still safely assume that
51                 * any x86-64 processor has SSE2 and RDTSC
52                 */
53                 if (m_x86_processor_flags[0] == 0)
54                     m_x86_processor_flags[0] = (1 << CPUID_SSE2_BIT) | (1 << CPUID_RDTSC_BIT);
55             }
56         }
57         
58     }
59 
60 
61     /**
62     * Return a best guess of the cache line size
63     */
64     static size_t cacheLineSize() { return m_cache_line_size; }
65 
66     /**
67     * Check if the processor supports RDTSC
68     */
69     static bool hasRdtsc()
70     { return x86_processor_flags_has(CPUID_RDTSC_BIT); }
71 
72     /**
73     * Check if the processor supports SSE2
74     */
75     static bool hasSse2()
76     { return x86_processor_flags_has(CPUID_SSE2_BIT); }
77 
78     /**
79     * Check if the processor supports SSSE3
80     */
81     static bool hasSsse3()
82     { return x86_processor_flags_has(CPUID_SSSE3_BIT); }
83 
84     /**
85     * Check if the processor supports SSE4.1
86     */
87     static bool hasSse41()
88     { return x86_processor_flags_has(CPUID_SSE41_BIT); }
89 
90     /**
91     * Check if the processor supports SSE4.2
92     */
93     static bool hasSse42()
94     { return x86_processor_flags_has(CPUID_SSE42_BIT); }
95 
96     /**
97     * Check if the processor supports AVX2
98     */
99     static bool hasAvx2()
100     { return x86_processor_flags_has(CPUID_AVX2_BIT); }
101 
102     /**
103     * Check if the processor supports AVX-512F
104     */
105     static bool hasAvx512f()
106     { return x86_processor_flags_has(CPUID_AVX512F_BIT); }
107 
108     /**
109     * Check if the processor supports BMI2
110     */
111     static bool hasBmi2()
112     { return x86_processor_flags_has(CPUID_BMI2_BIT); }
113 
114     /**
115     * Check if the processor supports AES-NI
116     */
117     static bool hasAesNi()
118     { return x86_processor_flags_has(CPUID_AESNI_BIT); }
119 
120     /**
121     * Check if the processor supports CLMUL
122     */
123     static bool hasClmul()
124     { return x86_processor_flags_has(CPUID_CLMUL_BIT); }
125 
126     /**
127     * Check if the processor supports Intel SHA extension
128     */
129     static bool hasIntelSha()
130     { return x86_processor_flags_has(CPUID_SHA_BIT); }
131 
132     /**
133     * Check if the processor supports ADX extension
134     */
135     static bool hasAdx()
136     { return x86_processor_flags_has(CPUID_ADX_BIT); }
137 
138     /**
139     * Check if the processor supports RDRAND
140     */
141     static bool hasRdrand()
142     { return x86_processor_flags_has(CPUID_RDRAND_BIT); }
143 
144     /**
145     * Check if the processor supports RDSEED
146     */
147     static bool hasRdseed()
148     { return x86_processor_flags_has(CPUID_RDSEED_BIT); }
149 
150     /**
151     * Check if the processor supports AltiVec/VMX
152     */
153     static bool hasAltivec() { return m_altivec_capable; }
154 
155     static string toString()
156     {
157         import std.array : Appender;
158         Appender!string app;
159         
160         app ~= "CPUID flags: ";
161         
162         app ~= CPUID.hasSse2;
163         app ~= CPUID.hasSsse3;
164         app ~= CPUID.hasSse41;
165         app ~= CPUID.hasSse42;
166         app ~= CPUID.hasAvx2;
167         app ~= CPUID.hasAvx512f;
168         app ~= CPUID.hasAltivec;
169         
170         app ~= CPUID.hasRdtsc;
171         app ~= CPUID.hasBmi2;
172         app ~= CPUID.hasClmul;
173         app ~= CPUID.hasAesNi;
174         app ~= CPUID.hasRdrand;
175         app ~= CPUID.hasRdseed;
176         app ~= CPUID.hasIntelSha;
177         app ~= CPUID.hasAdx;
178 
179         return app.data;
180     }
181 private:
182     alias CPUIDbits = int;
183     enum : CPUIDbits {
184         CPUID_RDTSC_BIT = 4,
185         CPUID_SSE2_BIT = 26,
186         CPUID_CLMUL_BIT = 33,
187         CPUID_SSSE3_BIT = 41,
188         CPUID_SSE41_BIT = 51,
189         CPUID_SSE42_BIT = 52,
190         CPUID_AESNI_BIT = 57,
191         CPUID_RDRAND_BIT = 62,
192 
193         CPUID_AVX2_BIT = 64+5,
194         CPUID_BMI2_BIT = 64+8,
195         CPUID_AVX512F_BIT = 64+16,
196         CPUID_RDSEED_BIT = 64+18,
197         CPUID_ADX_BIT = 64+19,
198         CPUID_SHA_BIT = 64+29,
199     }
200 
201     static bool x86_processor_flags_has(int bit)
202     {
203         return ((m_x86_processor_flags[bit/64] >> (bit % 64)) & 1);
204     }
205 
206     static ulong[2] m_x86_processor_flags;
207     static size_t m_cache_line_size;
208     static bool m_altivec_capable;
209 }
210 
211 package:
212 
213 private __gshared {
214     bool is_intel; // true = _probably_ an Intel processor, might be faking
215     bool is_amd; // true = _probably_ an AMD processor
216 
217     uint apic;
218     uint max_cpuid;
219     uint max_extended_cpuid; // 0
220     uint extfeatures;
221     uint extreserved;
222     uint miscfeatures;
223     uint amdmiscfeatures;
224     uint features;
225     uint amdfeatures; 
226     uint l1cache;
227 }
228 // EBX is used to store GOT's address in PIC on x86, so we must preserve its value
229 version(D_PIC)
230     version(X86)
231         version = PreserveEBX;
232         
233 // todo: LDC/GDC
234 version(GNU)
235 {
236     private void rawCpuid(uint ain, uint cin, ref uint a, ref uint b, ref uint c, ref uint d)
237     {
238         version(PreserveEBX)
239         {
240             asm pure nothrow { 
241                 "xchg %1, %%ebx
242                 cpuid 
243                 xchg %1, %%ebx"
244                     : "=a" a, "=r" b, "=c" c, "=d" d 
245                         : "0" ain, "2" cin; 
246             }
247         }
248         else
249         {
250             asm pure nothrow { 
251                 "cpuid"
252                     : "=a" a, "=b" b, "=c" c, "=d" d 
253                         : "0" ain, "2" cin; 
254             }
255         }
256     }
257 }
258 
259 version(none) {
260     private void rawCpuid(uint ain, uint cin, ref uint a, ref uint b, ref uint c, ref uint d)
261     {
262         version(PreserveEBX)
263         {
264             mixin( q{
265                 __asm pure nothrow { 
266                     "xchg %1, %%ebx
267                     cpuid 
268                     xchg %1, %%ebx"
269                         : "=a" a, "=r" b, "=c" c, "=d" d 
270                             : "0" ain, "2" cin; 
271                 }
272             } );
273         }
274         else
275         {
276             mixin( q{
277                 __asm pure nothrow { 
278                     "cpuid"
279                         : "=a" a, "=b" b, "=c" c, "=d" d 
280                             : "0" ain, "2" cin; 
281                 }
282             });
283 
284         }
285     }
286 }
287 
288 shared static this() {
289     
290     logTrace("Loading CPUID ...");
291     string processorName;
292     char[12] vendorID;
293     uint unused;
294     {
295         uint a, b, c, d, a2;
296         char * venptr = vendorID.ptr;
297 
298         version(GNU)
299         {
300             rawCpuid(0, 0, a, venptr[0], venptr[2], venptr[1]);     
301         }
302         else version(none) rawCpuid(0, 0, a, venptr[0], venptr[2], venptr[1]);
303         else {
304             version(D_InlineAsm_X86)
305             {
306                 asm pure nothrow {
307                     mov EAX, 0;
308                     cpuid;
309                     mov a, EAX;
310                     mov EAX, venptr;
311                     mov [EAX], EBX;
312                     mov [EAX + 4], EDX;
313                     mov [EAX + 8], ECX;
314                 }
315             }
316             else version(D_InlineAsm_X86_64)
317             {
318                 asm pure nothrow {
319                     mov EAX, 0;
320                     cpuid;
321                     mov a, EAX;
322                     mov RAX, venptr;
323                     mov [RAX], EBX;
324                     mov [RAX + 4], EDX;
325                     mov [RAX + 8], ECX;
326                 }
327             }
328         }
329 
330         
331         version(GNU)
332         {
333             rawCpuid(0x8000_0000, 0, a2, unused, unused, unused);
334         }
335         else version(none) rawCpuid(0x8000_0000, 0, a2, unused, unused, unused);
336         else {
337             asm pure nothrow {
338                 mov EAX, 0x8000_0000;
339                 cpuid;
340                 mov a2, EAX;
341             }
342         }
343         max_cpuid = a;
344         max_extended_cpuid = a2;
345     
346     }
347 
348     is_intel = vendorID == "GenuineIntel";
349     is_amd = vendorID == "AuthenticAMD";
350 
351     {
352         uint a, b, c, d;
353         version(GNU)
354         {
355             rawCpuid(1, 0, a, apic, c, d);
356         } else version(none) rawCpuid(1, 0, a, apic, c, d);
357         else
358         {
359             asm pure nothrow {
360                 mov EAX, 1; // model, stepping
361                 cpuid;
362                 mov a, EAX;
363                 mov b, EBX;
364                 mov c, ECX;
365                 mov d, EDX;
366             }
367         }
368         /// EAX(a) contains stepping, model, family, processor type, extended model,
369         /// extended family
370 
371         apic = b;
372         miscfeatures = c;
373         features = d;
374     }
375 
376     if (max_cpuid >= 7)
377     {
378         uint ext, reserved;
379 
380         version(GNU) rawCpuid(7, 0, unused, ext, reserved, unused);
381         else version (none) rawCpuid(7, 0, unused, ext, reserved, unused);
382         else
383         {
384             asm
385             {
386                 mov EAX, 7; // Structured extended feature leaf.
387                 mov ECX, 0; // Main leaf.
388                 cpuid;
389                 mov ext, EBX; // HLE, AVX2, RTM, etc.
390                 mov reserved, ECX;
391             }
392         }
393         extreserved = reserved;
394         extfeatures = ext;
395     }
396     
397     /*if (miscfeatures & OSXSAVE_BIT)
398     {
399         uint a, d;
400         version(GNU)
401         {
402             // xgetbv does not affect ebx
403             asm pure nothrow {
404                 "mov $0, %%ecx
405                 xgetbv"
406               : "=a" a, "=d" d
407               :
408               : "ecx";
409             }    
410         }
411         else {
412             asm pure nothrow {
413                 mov ECX, 0;
414                 xgetbv;
415                 mov d, EDX;
416                 mov a, EAX;
417             }
418         }
419         xfeatures = cast(ulong)d << 32 | a;
420     }*/
421 
422     if (max_extended_cpuid >= 0x8000_0001) {
423         uint c, d;
424         version(GNU)
425         {
426             rawCpuid(0x8000_0001, 0, unused, unused, c, d);
427         } else version(none) rawCpuid(0x8000_0001, 0, unused, unused, c, d);
428         else
429         {
430             asm pure nothrow {
431                 mov EAX, 0x8000_0001;
432                 cpuid;
433                 mov c, ECX;
434                 mov d, EDX;
435             }
436         }
437         amdmiscfeatures = c;
438         amdfeatures = d;
439 
440     }
441     if (max_extended_cpuid >= 0x8000_0005) {
442         uint c;
443         version(GNU)
444         {
445             rawCpuid(0x8000_0005, 0, unused, unused, c, unused);
446         }
447         else version(none) rawCpuid(0x8000_0005, 0, unused, unused, c, unused);
448         else
449         {
450             asm pure nothrow {
451                 mov EAX, 0x8000_0005; // L1 cache
452                 cpuid;
453                 // EAX has L1_TLB_4M.
454                 // EBX has L1_TLB_4K
455                 // EDX has L1 instruction cache
456                 mov c, ECX;
457             }
458         }
459         l1cache = c;
460 
461     }
462     
463 
464     // Try to detect fraudulent vendorIDs
465     if (amd3dnow) is_intel = false;
466 
467 
468 }
469 
470 
471 version (PPC) {
472     bool altivecCheckSysctl()
473     {
474         version (OSX)
475             enum supported = true;
476         else version (BSD)
477             enum supported = true;
478         else enum supported = false;
479         static if (supported) {
480             int[2] sels = [ CTL_MACHDEP, CPU_ALTIVEC ];
481             // From Apple's docs
482             int[2] sels = [ CTL_HW, HW_VECTORUNIT ];
483             int vector_type = 0;
484             size_t length = (vector_type).sizeof;
485             int error = sysctl(sels, 2, &vector_type, &length, NULL, 0);
486             
487             if (error == 0 && vector_type > 0)
488                 return true;
489         }
490         return false;
491     }
492     
493     bool altivecCheckPvrEmul()
494     {
495         bool altivec_capable = false;
496         
497         version(linux) {
498             
499             
500             /*
501             On PowerPC, MSR 287 is PVR, the Processor Version Number
502             Normally it is only accessible to ring 0, but Linux and NetBSD
503             (others, too, maybe?) will trap and emulate it for us.
504 
505             PVR identifiers for various AltiVec enabled CPUs. Taken from
506             PearPC and Linux sources, mostly.
507             */
508             
509             const ushort PVR_G4_7400  = 0x000C;
510             const ushort PVR_G5_970    = 0x0039;
511             const ushort PVR_G5_970FX = 0x003C;
512             const ushort PVR_G5_970MP = 0x0044;
513             const ushort PVR_G5_970GX = 0x0045;
514             const ushort PVR_POWER6    = 0x003E;
515             const ushort PVR_POWER7    = 0x003F;
516             const ushort PVR_CELL_PPU = 0x0070;
517             
518             // Motorola produced G4s with PVR 0x800[0123C] (at least)
519             const ushort PVR_G4_74xx_24  = 0x800;
520             
521             uint pvr = 0;
522             
523             mixin(`asm pure nothrow { mfspr [pvr], 287; }`); // not supported in DMD?
524             
525             // Top 16 bit suffice to identify model
526             pvr >>= 16;
527             
528             altivec_capable |= (pvr == PVR_G4_7400);
529             altivec_capable |= ((pvr >> 4) == PVR_G4_74xx_24);
530             altivec_capable |= (pvr == PVR_G5_970);
531             altivec_capable |= (pvr == PVR_G5_970FX);
532             altivec_capable |= (pvr == PVR_G5_970MP);
533             altivec_capable |= (pvr == PVR_G5_970GX);
534             altivec_capable |= (pvr == PVR_POWER6);
535             altivec_capable |= (pvr == PVR_POWER7);
536             altivec_capable |= (pvr == PVR_CELL_PPU);
537             
538         }
539         
540         return altivec_capable;
541         
542     }
543     
544 }