Botan  2.8.0
Crypto and TLS for C++11
cpuid_x86.cpp
Go to the documentation of this file.
1 /*
2 * Runtime CPU detection for x86
3 * (C) 2009,2010,2013,2017 Jack Lloyd
4 *
5 * Botan is released under the Simplified BSD License (see license.txt)
6 */
7 
8 #include <botan/cpuid.h>
9 #include <botan/mem_ops.h>
10 #include <botan/loadstor.h>
11 
12 #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
13 
14 #if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
15  #include <intrin.h>
16 #elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
17  #include <ia32intrin.h>
18 #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
19  #include <cpuid.h>
20 #endif
21 
22 #endif
23 
24 namespace Botan {
25 
26 #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
27 
28 uint64_t CPUID::detect_cpu_features(size_t* cache_line_size)
29  {
30 #if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
31  #define X86_CPUID(type, out) do { __cpuid((int*)out, type); } while(0)
32  #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
33 
34 #elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
35  #define X86_CPUID(type, out) do { __cpuid(out, type); } while(0)
36  #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
37 
38 #elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && defined(BOTAN_USE_GCC_INLINE_ASM)
39  #define X86_CPUID(type, out) \
40  asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
41  : "0" (type))
42 
43  #define X86_CPUID_SUBLEVEL(type, level, out) \
44  asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
45  : "0" (type), "2" (level))
46 
47 #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
48  #define X86_CPUID(type, out) do { __get_cpuid(type, out, out+1, out+2, out+3); } while(0)
49 
50  #define X86_CPUID_SUBLEVEL(type, level, out) \
51  do { __cpuid_count(type, level, out[0], out[1], out[2], out[3]); } while(0)
52 #else
53  #warning "No way of calling x86 cpuid instruction for this compiler"
54  #define X86_CPUID(type, out) do { clear_mem(out, 4); } while(0)
55  #define X86_CPUID_SUBLEVEL(type, level, out) do { clear_mem(out, 4); } while(0)
56 #endif
57 
58  uint64_t features_detected = 0;
59  uint32_t cpuid[4] = { 0 };
60 
61  // CPUID 0: vendor identification, max sublevel
62  X86_CPUID(0, cpuid);
63 
64  const uint32_t max_supported_sublevel = cpuid[0];
65 
66  const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 };
67  const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 };
68  const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3);
69  const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3);
70 
71  if(max_supported_sublevel >= 1)
72  {
73  // CPUID 1: feature bits
74  X86_CPUID(1, cpuid);
75  const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
76 
77  enum x86_CPUID_1_bits : uint64_t {
78  RDTSC = (1ULL << 4),
79  SSE2 = (1ULL << 26),
80  CLMUL = (1ULL << 33),
81  SSSE3 = (1ULL << 41),
82  SSE41 = (1ULL << 51),
83  SSE42 = (1ULL << 52),
84  AESNI = (1ULL << 57),
85  RDRAND = (1ULL << 62)
86  };
87 
88  if(flags0 & x86_CPUID_1_bits::RDTSC)
89  features_detected |= CPUID::CPUID_RDTSC_BIT;
90  if(flags0 & x86_CPUID_1_bits::SSE2)
91  features_detected |= CPUID::CPUID_SSE2_BIT;
92  if(flags0 & x86_CPUID_1_bits::CLMUL)
93  features_detected |= CPUID::CPUID_CLMUL_BIT;
94  if(flags0 & x86_CPUID_1_bits::SSSE3)
95  features_detected |= CPUID::CPUID_SSSE3_BIT;
96  if(flags0 & x86_CPUID_1_bits::SSE41)
97  features_detected |= CPUID::CPUID_SSE41_BIT;
98  if(flags0 & x86_CPUID_1_bits::SSE42)
99  features_detected |= CPUID::CPUID_SSE42_BIT;
100  if(flags0 & x86_CPUID_1_bits::AESNI)
101  features_detected |= CPUID::CPUID_AESNI_BIT;
102  if(flags0 & x86_CPUID_1_bits::RDRAND)
103  features_detected |= CPUID::CPUID_RDRAND_BIT;
104  }
105 
106  if(is_intel)
107  {
108  // Intel cache line size is in cpuid(1) output
109  *cache_line_size = 8 * get_byte(2, cpuid[1]);
110  }
111  else if(is_amd)
112  {
113  // AMD puts it in vendor zone
114  X86_CPUID(0x80000005, cpuid);
115  *cache_line_size = get_byte(3, cpuid[2]);
116  }
117 
118  if(max_supported_sublevel >= 7)
119  {
120  clear_mem(cpuid, 4);
121  X86_CPUID_SUBLEVEL(7, 0, cpuid);
122 
123  enum x86_CPUID_7_bits : uint64_t {
124  BMI1 = (1ULL << 3),
125  AVX2 = (1ULL << 5),
126  BMI2 = (1ULL << 8),
127  AVX512F = (1ULL << 16),
128  RDSEED = (1ULL << 18),
129  ADX = (1ULL << 19),
130  SHA = (1ULL << 29),
131  };
132  uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
133 
134  if(flags7 & x86_CPUID_7_bits::AVX2)
135  features_detected |= CPUID::CPUID_AVX2_BIT;
136  if(flags7 & x86_CPUID_7_bits::BMI1)
137  {
138  features_detected |= CPUID::CPUID_BMI1_BIT;
139  /*
140  We only set the BMI2 bit if BMI1 is also supported, so BMI2
141  code can safely use both extensions. No known processor
142  implements BMI2 but not BMI1.
143  */
144  if(flags7 & x86_CPUID_7_bits::BMI2)
145  features_detected |= CPUID::CPUID_BMI2_BIT;
146  }
147 
148  if(flags7 & x86_CPUID_7_bits::AVX512F)
149  features_detected |= CPUID::CPUID_AVX512F_BIT;
150  if(flags7 & x86_CPUID_7_bits::RDSEED)
151  features_detected |= CPUID::CPUID_RDSEED_BIT;
152  if(flags7 & x86_CPUID_7_bits::ADX)
153  features_detected |= CPUID::CPUID_ADX_BIT;
154  if(flags7 & x86_CPUID_7_bits::SHA)
155  features_detected |= CPUID::CPUID_SHA_BIT;
156  }
157 
158 #undef X86_CPUID
159 #undef X86_CPUID_SUBLEVEL
160 
161  /*
162  * If we don't have access to CPUID, we can still safely assume that
163  * any x86-64 processor has SSE2 and RDTSC
164  */
165 #if defined(BOTAN_TARGET_ARCH_IS_X86_64)
166  if(features_detected == 0)
167  {
168  features_detected |= CPUID::CPUID_SSE2_BIT;
169  features_detected |= CPUID::CPUID_RDTSC_BIT;
170  }
171 #endif
172 
173  return features_detected;
174  }
175 
176 #endif
177 
178 }
bool same_mem(const T *p1, const T *p2, size_t n)
Definition: mem_ops.h:158
void clear_mem(T *ptr, size_t n)
Definition: mem_ops.h:97
static size_t cache_line_size()
Definition: cpuid.h:66
Definition: alg_id.cpp:13
uint8_t get_byte(size_t byte_num, T input)
Definition: loadstor.h:39