Botan 3.4.0
Crypto and TLS for C&
cpuid_x86.cpp
Go to the documentation of this file.
1/*
2* Runtime CPU detection for x86
3* (C) 2009,2010,2013,2017,2023 Jack Lloyd
4*
5* Botan is released under the Simplified BSD License (see license.txt)
6*/
7
8#include <botan/internal/cpuid.h>
9
10#include <botan/mem_ops.h>
11#include <botan/internal/loadstor.h>
12
13#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
14
15 #include <immintrin.h>
16
17 #if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
18 #include <intrin.h>
19 #elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
20 #include <ia32intrin.h>
21 #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
22 #include <cpuid.h>
23 #endif
24
25#endif
26
27namespace Botan {
28
29#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
30
31namespace {
32
33void invoke_cpuid(uint32_t type, uint32_t out[4]) {
34 #if defined(BOTAN_BUILD_COMPILER_IS_MSVC) || defined(BOTAN_BUILD_COMPILER_IS_INTEL)
35 __cpuid((int*)out, type);
36
37 #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
38 __get_cpuid(type, out, out + 1, out + 2, out + 3);
39
40 #elif defined(BOTAN_USE_GCC_INLINE_ASM)
41 asm("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type));
42
43 #else
44 #warning "No way of calling x86 cpuid instruction for this compiler"
45 clear_mem(out, 4);
46 #endif
47}
48
49BOTAN_FUNC_ISA("xsave") uint64_t xgetbv() {
50 return _xgetbv(0);
51}
52
53void invoke_cpuid_sublevel(uint32_t type, uint32_t level, uint32_t out[4]) {
54 #if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
55 __cpuidex((int*)out, type, level);
56
57 #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
58 __cpuid_count(type, level, out[0], out[1], out[2], out[3]);
59
60 #elif defined(BOTAN_USE_GCC_INLINE_ASM)
61 asm("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type), "2"(level));
62
63 #else
64 #warning "No way of calling x86 cpuid instruction for this compiler"
65 clear_mem(out, 4);
66 #endif
67}
68
69} // namespace
70
71uint32_t CPUID::CPUID_Data::detect_cpu_features() {
72 uint32_t features_detected = 0;
73 uint32_t cpuid[4] = {0};
74 bool has_os_ymm_support = false;
75 bool has_os_zmm_support = false;
76
77 // CPUID 0: vendor identification, max sublevel
78 invoke_cpuid(0, cpuid);
79
80 const uint32_t max_supported_sublevel = cpuid[0];
81
82 if(max_supported_sublevel >= 1) {
83 // CPUID 1: feature bits
84 invoke_cpuid(1, cpuid);
85 const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
86
87 enum x86_CPUID_1_bits : uint64_t {
88 RDTSC = (1ULL << 4),
89 SSE2 = (1ULL << 26),
90 CLMUL = (1ULL << 33),
91 SSSE3 = (1ULL << 41),
92 AESNI = (1ULL << 57),
93 OSXSAVE = (1ULL << 59),
94 AVX = (1ULL << 60),
95 RDRAND = (1ULL << 62)
96 };
97
98 if(flags0 & x86_CPUID_1_bits::RDTSC) {
99 features_detected |= CPUID::CPUID_RDTSC_BIT;
100 }
101 if(flags0 & x86_CPUID_1_bits::SSE2) {
102 features_detected |= CPUID::CPUID_SSE2_BIT;
103 }
104 if(flags0 & x86_CPUID_1_bits::CLMUL) {
105 features_detected |= CPUID::CPUID_CLMUL_BIT;
106 }
107 if(flags0 & x86_CPUID_1_bits::SSSE3) {
108 features_detected |= CPUID::CPUID_SSSE3_BIT;
109 }
110 if(flags0 & x86_CPUID_1_bits::AESNI) {
111 features_detected |= CPUID::CPUID_AESNI_BIT;
112 }
113 if(flags0 & x86_CPUID_1_bits::RDRAND) {
114 features_detected |= CPUID::CPUID_RDRAND_BIT;
115 }
116
117 if((flags0 & x86_CPUID_1_bits::AVX) && (flags0 & x86_CPUID_1_bits::OSXSAVE)) {
118 const uint64_t xcr_flags = xgetbv();
119 if((xcr_flags & 0x6) == 0x6) {
120 has_os_ymm_support = true;
121 has_os_zmm_support = (xcr_flags & 0xE0) == 0xE0;
122 }
123 }
124 }
125
126 if(max_supported_sublevel >= 7) {
127 clear_mem(cpuid, 4);
128 invoke_cpuid_sublevel(7, 0, cpuid);
129
130 enum x86_CPUID_7_bits : uint64_t {
131 BMI1 = (1ULL << 3),
132 AVX2 = (1ULL << 5),
133 BMI2 = (1ULL << 8),
134 AVX512_F = (1ULL << 16),
135 AVX512_DQ = (1ULL << 17),
136 RDSEED = (1ULL << 18),
137 ADX = (1ULL << 19),
138 AVX512_IFMA = (1ULL << 21),
139 SHA = (1ULL << 29),
140 AVX512_BW = (1ULL << 30),
141 AVX512_VL = (1ULL << 31),
142 AVX512_VBMI = (1ULL << 33),
143 AVX512_VBMI2 = (1ULL << 38),
144 AVX512_VAES = (1ULL << 41),
145 AVX512_VCLMUL = (1ULL << 42),
146 AVX512_VBITALG = (1ULL << 44),
147 };
148
149 const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
150
151 if((flags7 & x86_CPUID_7_bits::AVX2) && has_os_ymm_support) {
152 features_detected |= CPUID::CPUID_AVX2_BIT;
153 }
154 if(flags7 & x86_CPUID_7_bits::RDSEED) {
155 features_detected |= CPUID::CPUID_RDSEED_BIT;
156 }
157 if(flags7 & x86_CPUID_7_bits::ADX) {
158 features_detected |= CPUID::CPUID_ADX_BIT;
159 }
160 if(flags7 & x86_CPUID_7_bits::SHA) {
161 features_detected |= CPUID::CPUID_SHA_BIT;
162 }
163
164 /*
165 We only set the BMI bit if both BMI1 and BMI2 are supported, since
166 typically we want to use both extensions in the same code.
167 */
168 if((flags7 & x86_CPUID_7_bits::BMI1) && (flags7 & x86_CPUID_7_bits::BMI2)) {
169 features_detected |= CPUID::CPUID_BMI_BIT;
170 }
171
172 if((flags7 & x86_CPUID_7_bits::AVX512_F) && has_os_zmm_support) {
173 const uint64_t AVX512_PROFILE_FLAGS = x86_CPUID_7_bits::AVX512_F | x86_CPUID_7_bits::AVX512_DQ |
174 x86_CPUID_7_bits::AVX512_IFMA | x86_CPUID_7_bits::AVX512_BW |
175 x86_CPUID_7_bits::AVX512_VL | x86_CPUID_7_bits::AVX512_VBMI |
176 x86_CPUID_7_bits::AVX512_VBMI2 | x86_CPUID_7_bits::AVX512_VBITALG;
177
178 /*
179 We only enable AVX512 support if all of the above flags are available
180
181 This is more than we strictly need for most uses, however it also has
182 the effect of preventing execution of AVX512 codepaths on cores that
183 have serious downclocking problems when AVX512 code executes,
184 especially Intel Skylake.
185
186 VBMI2/VBITALG are the key flags here as they restrict us to Intel Ice
187 Lake/Rocket Lake, or AMD Zen4, all of which do not have penalties for
188 executing AVX512.
189
190 There is nothing stopping some future processor from supporting the
191 above flags and having AVX512 penalties, but maybe you should not have
192 bought such a processor.
193 */
194 if((flags7 & AVX512_PROFILE_FLAGS) == AVX512_PROFILE_FLAGS) {
195 features_detected |= CPUID::CPUID_AVX512_BIT;
196
197 if(flags7 & x86_CPUID_7_bits::AVX512_VAES) {
198 features_detected |= CPUID::CPUID_AVX512_AES_BIT;
199 }
200 if(flags7 & x86_CPUID_7_bits::AVX512_VCLMUL) {
201 features_detected |= CPUID::CPUID_AVX512_CLMUL_BIT;
202 }
203 }
204 }
205 }
206
207 /*
208 * If we don't have access to CPUID, we can still safely assume that
209 * any x86-64 processor has SSE2 and RDTSC
210 */
211 #if defined(BOTAN_TARGET_ARCH_IS_X86_64)
212 if(features_detected == 0) {
213 features_detected |= CPUID::CPUID_SSE2_BIT;
214 features_detected |= CPUID::CPUID_RDTSC_BIT;
215 }
216 #endif
217
218 return features_detected;
219}
220
221#endif
222
223} // namespace Botan
#define BOTAN_FUNC_ISA(isa)
Definition compiler.h:92
Internal Header.
constexpr void clear_mem(T *ptr, size_t n)
Definition mem_ops.h:120