Botan 3.5.0
Crypto and TLS for C&
cpuid_x86.cpp
Go to the documentation of this file.
1/*
2* Runtime CPU detection for x86
3* (C) 2009,2010,2013,2017,2023 Jack Lloyd
4*
5* Botan is released under the Simplified BSD License (see license.txt)
6*/
7
8#include <botan/internal/cpuid.h>
9
10#include <botan/mem_ops.h>
11#include <botan/internal/loadstor.h>
12
13#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
14
15 #include <immintrin.h>
16
17 #if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
18 #include <intrin.h>
19 #endif
20
21#endif
22
23namespace Botan {
24
25#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
26
27namespace {
28
29void invoke_cpuid(uint32_t type, uint32_t out[4]) {
30 clear_mem(out, 4);
31
32 #if defined(BOTAN_USE_GCC_INLINE_ASM)
33 asm volatile("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type));
34
35 #elif defined(BOTAN_BUILD_COMPILER_IS_MSVC)
36 __cpuid((int*)out, type);
37
38 #else
39 #warning "No way of calling x86 cpuid instruction for this compiler"
40 #endif
41}
42
43void invoke_cpuid_sublevel(uint32_t type, uint32_t level, uint32_t out[4]) {
44 clear_mem(out, 4);
45
46 #if defined(BOTAN_USE_GCC_INLINE_ASM)
47 asm volatile("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type), "2"(level));
48
49 #elif defined(BOTAN_BUILD_COMPILER_IS_MSVC)
50 __cpuidex((int*)out, type, level);
51
52 #else
53 #warning "No way of calling x86 cpuid instruction for this compiler"
54 #endif
55}
56
57BOTAN_FUNC_ISA("xsave") uint64_t xgetbv() {
58 return _xgetbv(0);
59}
60
61} // namespace
62
63uint32_t CPUID::CPUID_Data::detect_cpu_features() {
64 uint32_t features_detected = 0;
65 uint32_t cpuid[4] = {0};
66 bool has_os_ymm_support = false;
67 bool has_os_zmm_support = false;
68
69 // CPUID 0: vendor identification, max sublevel
70 invoke_cpuid(0, cpuid);
71
72 const uint32_t max_supported_sublevel = cpuid[0];
73
74 if(max_supported_sublevel >= 1) {
75 // CPUID 1: feature bits
76 invoke_cpuid(1, cpuid);
77 const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
78
79 enum x86_CPUID_1_bits : uint64_t {
80 RDTSC = (1ULL << 4),
81 SSE2 = (1ULL << 26),
82 CLMUL = (1ULL << 33),
83 SSSE3 = (1ULL << 41),
84 AESNI = (1ULL << 57),
85 OSXSAVE = (1ULL << 59),
86 AVX = (1ULL << 60),
87 RDRAND = (1ULL << 62)
88 };
89
90 if(flags0 & x86_CPUID_1_bits::RDTSC) {
91 features_detected |= CPUID::CPUID_RDTSC_BIT;
92 }
93 if(flags0 & x86_CPUID_1_bits::SSE2) {
94 features_detected |= CPUID::CPUID_SSE2_BIT;
95 }
96 if(flags0 & x86_CPUID_1_bits::CLMUL) {
97 features_detected |= CPUID::CPUID_CLMUL_BIT;
98 }
99 if(flags0 & x86_CPUID_1_bits::SSSE3) {
100 features_detected |= CPUID::CPUID_SSSE3_BIT;
101 }
102 if(flags0 & x86_CPUID_1_bits::AESNI) {
103 features_detected |= CPUID::CPUID_AESNI_BIT;
104 }
105 if(flags0 & x86_CPUID_1_bits::RDRAND) {
106 features_detected |= CPUID::CPUID_RDRAND_BIT;
107 }
108
109 if((flags0 & x86_CPUID_1_bits::AVX) && (flags0 & x86_CPUID_1_bits::OSXSAVE)) {
110 const uint64_t xcr_flags = xgetbv();
111 if((xcr_flags & 0x6) == 0x6) {
112 has_os_ymm_support = true;
113 has_os_zmm_support = (xcr_flags & 0xE0) == 0xE0;
114 }
115 }
116 }
117
118 if(max_supported_sublevel >= 7) {
119 clear_mem(cpuid, 4);
120 invoke_cpuid_sublevel(7, 0, cpuid);
121
122 enum x86_CPUID_7_bits : uint64_t {
123 BMI1 = (1ULL << 3),
124 AVX2 = (1ULL << 5),
125 BMI2 = (1ULL << 8),
126 AVX512_F = (1ULL << 16),
127 AVX512_DQ = (1ULL << 17),
128 RDSEED = (1ULL << 18),
129 ADX = (1ULL << 19),
130 AVX512_IFMA = (1ULL << 21),
131 SHA = (1ULL << 29),
132 AVX512_BW = (1ULL << 30),
133 AVX512_VL = (1ULL << 31),
134 AVX512_VBMI = (1ULL << 33),
135 AVX512_VBMI2 = (1ULL << 38),
136 AVX512_VAES = (1ULL << 41),
137 AVX512_VCLMUL = (1ULL << 42),
138 AVX512_VBITALG = (1ULL << 44),
139 };
140
141 const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
142
143 if((flags7 & x86_CPUID_7_bits::AVX2) && has_os_ymm_support) {
144 features_detected |= CPUID::CPUID_AVX2_BIT;
145 }
146 if(flags7 & x86_CPUID_7_bits::RDSEED) {
147 features_detected |= CPUID::CPUID_RDSEED_BIT;
148 }
149 if(flags7 & x86_CPUID_7_bits::ADX) {
150 features_detected |= CPUID::CPUID_ADX_BIT;
151 }
152 if(flags7 & x86_CPUID_7_bits::SHA) {
153 features_detected |= CPUID::CPUID_SHA_BIT;
154 }
155
156 /*
157 We only set the BMI bit if both BMI1 and BMI2 are supported, since
158 typically we want to use both extensions in the same code.
159 */
160 if((flags7 & x86_CPUID_7_bits::BMI1) && (flags7 & x86_CPUID_7_bits::BMI2)) {
161 features_detected |= CPUID::CPUID_BMI_BIT;
162 }
163
164 if((flags7 & x86_CPUID_7_bits::AVX512_F) && has_os_zmm_support) {
165 const uint64_t AVX512_PROFILE_FLAGS = x86_CPUID_7_bits::AVX512_F | x86_CPUID_7_bits::AVX512_DQ |
166 x86_CPUID_7_bits::AVX512_IFMA | x86_CPUID_7_bits::AVX512_BW |
167 x86_CPUID_7_bits::AVX512_VL | x86_CPUID_7_bits::AVX512_VBMI |
168 x86_CPUID_7_bits::AVX512_VBMI2 | x86_CPUID_7_bits::AVX512_VBITALG;
169
170 /*
171 We only enable AVX512 support if all of the above flags are available
172
173 This is more than we strictly need for most uses, however it also has
174 the effect of preventing execution of AVX512 codepaths on cores that
175 have serious downclocking problems when AVX512 code executes,
176 especially Intel Skylake.
177
178 VBMI2/VBITALG are the key flags here as they restrict us to Intel Ice
179 Lake/Rocket Lake, or AMD Zen4, all of which do not have penalties for
180 executing AVX512.
181
182 There is nothing stopping some future processor from supporting the
183 above flags and having AVX512 penalties, but maybe you should not have
184 bought such a processor.
185 */
186 if((flags7 & AVX512_PROFILE_FLAGS) == AVX512_PROFILE_FLAGS) {
187 features_detected |= CPUID::CPUID_AVX512_BIT;
188
189 if(flags7 & x86_CPUID_7_bits::AVX512_VAES) {
190 features_detected |= CPUID::CPUID_AVX512_AES_BIT;
191 }
192 if(flags7 & x86_CPUID_7_bits::AVX512_VCLMUL) {
193 features_detected |= CPUID::CPUID_AVX512_CLMUL_BIT;
194 }
195 }
196 }
197 }
198
199 /*
200 * If we don't have access to CPUID, we can still safely assume that
201 * any x86-64 processor has SSE2 and RDTSC
202 */
203 #if defined(BOTAN_TARGET_ARCH_IS_X86_64)
204 if(features_detected == 0) {
205 features_detected |= CPUID::CPUID_SSE2_BIT;
206 features_detected |= CPUID::CPUID_RDTSC_BIT;
207 }
208 #endif
209
210 return features_detected;
211}
212
213#endif
214
215} // namespace Botan
#define BOTAN_FUNC_ISA(isa)
Definition compiler.h:92
constexpr void clear_mem(T *ptr, size_t n)
Definition mem_ops.h:120