Botan 3.6.1
Crypto and TLS for C&
cpuid_x86.cpp
Go to the documentation of this file.
1/*
2* Runtime CPU detection for x86
3* (C) 2009,2010,2013,2017,2023,2024 Jack Lloyd
4*
5* Botan is released under the Simplified BSD License (see license.txt)
6*/
7
8#include <botan/internal/cpuid.h>
9
10#include <botan/mem_ops.h>
11#include <botan/internal/loadstor.h>
12
13#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
14 #include <immintrin.h>
15#endif
16
17#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
18 #include <intrin.h>
19#endif
20
21namespace Botan {
22
23#if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
24
25namespace {
26
27void invoke_cpuid(uint32_t type, uint32_t out[4]) {
28 clear_mem(out, 4);
29
30 #if defined(BOTAN_USE_GCC_INLINE_ASM)
31 asm volatile("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type));
32
33 #elif defined(BOTAN_BUILD_COMPILER_IS_MSVC)
34 __cpuid((int*)out, type);
35
36 #else
37 BOTAN_UNUSED(type);
38 #warning "No way of calling x86 cpuid instruction for this compiler"
39 #endif
40}
41
42void invoke_cpuid_sublevel(uint32_t type, uint32_t level, uint32_t out[4]) {
43 clear_mem(out, 4);
44
45 #if defined(BOTAN_USE_GCC_INLINE_ASM)
46 asm volatile("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type), "2"(level));
47
48 #elif defined(BOTAN_BUILD_COMPILER_IS_MSVC)
49 __cpuidex((int*)out, type, level);
50
51 #else
52 BOTAN_UNUSED(type, level);
53 #warning "No way of calling x86 cpuid instruction for this compiler"
54 #endif
55}
56
57BOTAN_FUNC_ISA("xsave") uint64_t xgetbv() {
58 return _xgetbv(0);
59}
60
61} // namespace
62
63uint32_t CPUID::CPUID_Data::detect_cpu_features(uint32_t allowed) {
64 enum class x86_CPUID_1_bits : uint64_t {
65 RDTSC = (1ULL << 4),
66 SSE2 = (1ULL << 26),
67 CLMUL = (1ULL << 33),
68 SSSE3 = (1ULL << 41),
69 SSE41 = (1ULL << 51),
70 AESNI = (1ULL << 57),
71 // AVX + OSXSAVE
72 OSXSAVE = (1ULL << 59) | (1ULL << 60),
73 RDRAND = (1ULL << 62)
74 };
75
76 enum class x86_CPUID_7_bits : uint64_t {
77 BMI1 = (1ULL << 3),
78 AVX2 = (1ULL << 5),
79 BMI2 = (1ULL << 8),
80 BMI_1_AND_2 = BMI1 | BMI2,
81 AVX512_F = (1ULL << 16),
82 AVX512_DQ = (1ULL << 17),
83 RDSEED = (1ULL << 18),
84 ADX = (1ULL << 19),
85 AVX512_IFMA = (1ULL << 21),
86 SHA = (1ULL << 29),
87 AVX512_BW = (1ULL << 30),
88 AVX512_VL = (1ULL << 31),
89 AVX512_VBMI = (1ULL << 33),
90 AVX512_VBMI2 = (1ULL << 38),
91 GFNI = (1ULL << 40),
92 AVX512_VAES = (1ULL << 41),
93 AVX512_VCLMUL = (1ULL << 42),
94 AVX512_VBITALG = (1ULL << 44),
95
96 /*
97 We only enable AVX512 support if all of the below flags are available
98
99 This is more than we strictly need for most uses, however it also has
100 the effect of preventing execution of AVX512 codepaths on cores that
101 have serious downclocking problems when AVX512 code executes,
102 especially Intel Skylake.
103
104 VBMI2/VBITALG are the key flags here as they restrict us to Intel Ice
105 Lake/Rocket Lake, or AMD Zen4, all of which do not have penalties for
106 executing AVX512.
107
108 There is nothing stopping some future processor from supporting the
109 above flags and having AVX512 penalties, but maybe you should not have
110 bought such a processor.
111 */
112 AVX512_PROFILE =
113 AVX512_F | AVX512_DQ | AVX512_IFMA | AVX512_BW | AVX512_VL | AVX512_VBMI | AVX512_VBMI2 | AVX512_VBITALG,
114 };
115
116 // NOLINTNEXTLINE(performance-enum-size)
117 enum class x86_CPUID_7_1_bits : uint64_t {
118 SHA512 = (1 << 0),
119 SM3 = (1 << 1),
120 SM4 = (1 << 2),
121 };
122
123 uint32_t feat = 0;
124 uint32_t cpuid[4] = {0};
125 bool has_os_ymm_support = false;
126 bool has_os_zmm_support = false;
127
128 // CPUID 0: vendor identification, max sublevel
129 invoke_cpuid(0, cpuid);
130
131 const uint32_t max_supported_sublevel = cpuid[0];
132
133 if(max_supported_sublevel >= 1) {
134 // CPUID 1: feature bits
135 invoke_cpuid(1, cpuid);
136 const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
137
138 feat |= if_set(flags0, x86_CPUID_1_bits::RDTSC, CPUID::CPUID_RDTSC_BIT, allowed);
139
140 feat |= if_set(flags0, x86_CPUID_1_bits::RDRAND, CPUID::CPUID_RDRAND_BIT, allowed);
141
142 feat |= if_set(flags0, x86_CPUID_1_bits::SSE2, CPUID::CPUID_SSE2_BIT, allowed);
143
144 if(feat & CPUID::CPUID_SSE2_BIT) {
145 feat |= if_set(flags0, x86_CPUID_1_bits::SSSE3, CPUID::CPUID_SSSE3_BIT, allowed);
146
147 if(feat & CPUID::CPUID_SSSE3_BIT) {
148 feat |= if_set(flags0, x86_CPUID_1_bits::CLMUL, CPUID::CPUID_CLMUL_BIT, allowed);
149 feat |= if_set(flags0, x86_CPUID_1_bits::AESNI, CPUID::CPUID_AESNI_BIT, allowed);
150 }
151
152 const uint64_t osxsave64 = static_cast<uint64_t>(x86_CPUID_1_bits::OSXSAVE);
153 if((flags0 & osxsave64) == osxsave64) {
154 const uint64_t xcr_flags = xgetbv();
155 if((xcr_flags & 0x6) == 0x6) {
156 has_os_ymm_support = true;
157 has_os_zmm_support = (xcr_flags & 0xE0) == 0xE0;
158 }
159 }
160 }
161 }
162
163 if(max_supported_sublevel >= 7) {
164 clear_mem(cpuid, 4);
165 invoke_cpuid_sublevel(7, 0, cpuid);
166
167 const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
168
169 clear_mem(cpuid, 4);
170 invoke_cpuid_sublevel(7, 1, cpuid);
171 const uint32_t flags7_1 = cpuid[0];
172
173 feat |= if_set(flags7, x86_CPUID_7_bits::RDSEED, CPUID::CPUID_RDSEED_BIT, allowed);
174 feat |= if_set(flags7, x86_CPUID_7_bits::ADX, CPUID::CPUID_ADX_BIT, allowed);
175
176 /*
177 We only set the BMI bit if both BMI1 and BMI2 are supported, since
178 typically we want to use both extensions in the same code.
179 */
180 feat |= if_set(flags7, x86_CPUID_7_bits::BMI_1_AND_2, CPUID::CPUID_BMI_BIT, allowed);
181
182 if(feat & CPUID::CPUID_SSSE3_BIT) {
183 feat |= if_set(flags7, x86_CPUID_7_bits::SHA, CPUID::CPUID_SHA_BIT, allowed);
184 feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SM3, CPUID::CPUID_SM3_BIT, allowed);
185 }
186
187 if(has_os_ymm_support) {
188 feat |= if_set(flags7, x86_CPUID_7_bits::AVX2, CPUID::CPUID_AVX2_BIT, allowed);
189
190 if(feat & CPUID::CPUID_AVX2_BIT) {
191 feat |= if_set(flags7, x86_CPUID_7_bits::GFNI, CPUID::CPUID_GFNI_BIT, allowed);
192 feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VAES, CPUID::CPUID_AVX2_AES_BIT, allowed);
193 feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VCLMUL, CPUID::CPUID_AVX2_CLMUL_BIT, allowed);
194 feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SHA512, CPUID::CPUID_SHA512_BIT, allowed);
195 feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SM4, CPUID::CPUID_SM4_BIT, allowed);
196
197 if(has_os_zmm_support) {
198 feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_PROFILE, CPUID::CPUID_AVX512_BIT, allowed);
199
200 if(feat & CPUID::CPUID_AVX512_BIT) {
201 feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VAES, CPUID::CPUID_AVX512_AES_BIT, allowed);
202 feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VCLMUL, CPUID::CPUID_AVX512_CLMUL_BIT, allowed);
203 }
204 }
205 }
206 }
207 }
208
209 /*
210 * If we don't have access to CPUID, we can still safely assume that
211 * any x86-64 processor has SSE2 and RDTSC
212 */
213 #if defined(BOTAN_TARGET_ARCH_IS_X86_64)
214 if(feat == 0) {
215 feat |= CPUID::CPUID_SSE2_BIT & allowed;
216 feat |= CPUID::CPUID_RDTSC_BIT & allowed;
217 }
218 #endif
219
220 return feat;
221}
222
223#endif
224
225} // namespace Botan
#define BOTAN_UNUSED
Definition assert.h:118
#define BOTAN_FUNC_ISA(isa)
Definition compiler.h:92
constexpr void clear_mem(T *ptr, size_t n)
Definition mem_ops.h:120