Botan 3.10.0
Crypto and TLS for C&
cpuid_x86.cpp
Go to the documentation of this file.
1/*
2* Runtime CPU detection for x86
3* (C) 2009,2010,2013,2017,2023,2024 Jack Lloyd
4*
5* Botan is released under the Simplified BSD License (see license.txt)
6*/
7
8#include <botan/internal/cpuid.h>
9
10#include <botan/compiler.h>
11#include <botan/mem_ops.h>
12#include <botan/internal/target_info.h>
13
14#include <immintrin.h>
15
16#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
17 #include <intrin.h>
18#endif
19
20namespace Botan {
21
22namespace {
23
24void invoke_cpuid(uint32_t type, uint32_t out[4]) {
25 clear_mem(out, 4);
26
27#if defined(BOTAN_USE_GCC_INLINE_ASM)
28 // NOLINTNEXTLINE(*-no-assembler)
29 asm volatile("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type));
30
31#elif defined(BOTAN_BUILD_COMPILER_IS_MSVC)
32 __cpuid((int*)out, type);
33
34#else
35 BOTAN_UNUSED(type);
36 #warning "No way of calling x86 cpuid instruction for this compiler"
37#endif
38}
39
40void invoke_cpuid_sublevel(uint32_t type, uint32_t level, uint32_t out[4]) {
41 clear_mem(out, 4);
42
43#if defined(BOTAN_USE_GCC_INLINE_ASM)
44 // NOLINTNEXTLINE(*-no-assembler)
45 asm volatile("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type), "2"(level));
46
47#elif defined(BOTAN_BUILD_COMPILER_IS_MSVC)
48 __cpuidex((int*)out, type, level);
49
50#else
51 BOTAN_UNUSED(type, level);
52 #warning "No way of calling x86 cpuid instruction for this compiler"
53#endif
54}
55
56BOTAN_FUNC_ISA("xsave") uint64_t xgetbv() {
57 return _xgetbv(0);
58}
59
60} // namespace
61
62uint32_t CPUID::CPUID_Data::detect_cpu_features(uint32_t allowed) {
63 enum class x86_CPUID_1_bits : uint64_t {
64 RDTSC = (1ULL << 4),
65 SSE2 = (1ULL << 26),
66 CLMUL = (1ULL << 33),
67 SSSE3 = (1ULL << 41),
68 SSE41 = (1ULL << 51),
69 AESNI = (1ULL << 57),
70 // AVX + OSXSAVE
71 OSXSAVE = (1ULL << 59) | (1ULL << 60),
72 RDRAND = (1ULL << 62)
73 };
74
75 enum class x86_CPUID_7_bits : uint64_t {
76 BMI1 = (1ULL << 3),
77 AVX2 = (1ULL << 5),
78 BMI2 = (1ULL << 8),
79 BMI_1_AND_2 = BMI1 | BMI2,
80 AVX512_F = (1ULL << 16),
81 AVX512_DQ = (1ULL << 17),
82 RDSEED = (1ULL << 18),
83 ADX = (1ULL << 19),
84 AVX512_IFMA = (1ULL << 21),
85 SHA = (1ULL << 29),
86 AVX512_BW = (1ULL << 30),
87 AVX512_VL = (1ULL << 31),
88 AVX512_VBMI = (1ULL << 33),
89 AVX512_VBMI2 = (1ULL << 38),
90 GFNI = (1ULL << 40),
91 AVX512_VAES = (1ULL << 41),
92 AVX512_VCLMUL = (1ULL << 42),
93 AVX512_VBITALG = (1ULL << 44),
94
95 /*
96 We only enable AVX512 support if all of the below flags are available
97
98 This is more than we strictly need for most uses, however it also has
99 the effect of preventing execution of AVX512 codepaths on cores that
100 have serious downclocking problems when AVX512 code executes,
101 especially Intel Skylake.
102
103 VBMI2/VBITALG are the key flags here as they restrict us to Intel Ice
104 Lake/Rocket Lake, or AMD Zen4, all of which do not have penalties for
105 executing AVX512.
106
107 There is nothing stopping some future processor from supporting the
108 above flags and having AVX512 penalties, but maybe you should not have
109 bought such a processor.
110 */
111 AVX512_PROFILE =
112 AVX512_F | AVX512_DQ | AVX512_IFMA | AVX512_BW | AVX512_VL | AVX512_VBMI | AVX512_VBMI2 | AVX512_VBITALG,
113 };
114
115 // NOLINTNEXTLINE(performance-enum-size)
116 enum class x86_CPUID_7_1_bits : uint64_t {
117 SHA512 = (1 << 0),
118 SM3 = (1 << 1),
119 SM4 = (1 << 2),
120 };
121
122 uint32_t feat = 0;
123 uint32_t cpuid[4] = {0};
124 bool has_os_ymm_support = false;
125 bool has_os_zmm_support = false;
126
127 // CPUID 0: vendor identification, max sublevel
128 invoke_cpuid(0, cpuid);
129
130 const uint32_t max_supported_sublevel = cpuid[0];
131
132 if(max_supported_sublevel >= 1) {
133 // CPUID 1: feature bits
134 invoke_cpuid(1, cpuid);
135 const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
136
137 feat |= if_set(flags0, x86_CPUID_1_bits::RDTSC, CPUFeature::Bit::RDTSC, allowed);
138
139 feat |= if_set(flags0, x86_CPUID_1_bits::RDRAND, CPUFeature::Bit::RDRAND, allowed);
140
141 feat |= if_set(flags0, x86_CPUID_1_bits::SSE2, CPUFeature::Bit::SSE2, allowed);
142
143 if(is_set(feat, CPUFeature::Bit::SSE2)) {
144 feat |= if_set(flags0, x86_CPUID_1_bits::SSSE3, CPUFeature::Bit::SSSE3, allowed);
145
146 if(is_set(feat, CPUFeature::Bit::SSSE3)) {
147 feat |= if_set(flags0, x86_CPUID_1_bits::CLMUL, CPUFeature::Bit::CLMUL, allowed);
148 feat |= if_set(flags0, x86_CPUID_1_bits::AESNI, CPUFeature::Bit::AESNI, allowed);
149 }
150
151 const uint64_t osxsave64 = static_cast<uint64_t>(x86_CPUID_1_bits::OSXSAVE);
152 if((flags0 & osxsave64) == osxsave64) {
153 const uint64_t xcr_flags = xgetbv();
154 if((xcr_flags & 0x6) == 0x6) {
155 has_os_ymm_support = true;
156 has_os_zmm_support = (xcr_flags & 0xE0) == 0xE0;
157 }
158 }
159 }
160 }
161
162 if(max_supported_sublevel >= 7) {
163 clear_mem(cpuid, 4);
164 invoke_cpuid_sublevel(7, 0, cpuid);
165
166 const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
167
168 clear_mem(cpuid, 4);
169 invoke_cpuid_sublevel(7, 1, cpuid);
170 const uint32_t flags7_1 = cpuid[0];
171
172 feat |= if_set(flags7, x86_CPUID_7_bits::RDSEED, CPUFeature::Bit::RDSEED, allowed);
173 feat |= if_set(flags7, x86_CPUID_7_bits::ADX, CPUFeature::Bit::ADX, allowed);
174
175 /*
176 We only set the BMI bit if both BMI1 and BMI2 are supported, since
177 typically we want to use both extensions in the same code.
178 */
179 feat |= if_set(flags7, x86_CPUID_7_bits::BMI_1_AND_2, CPUFeature::Bit::BMI, allowed);
180
181 if(is_set(feat, CPUFeature::Bit::SSSE3)) {
182 feat |= if_set(flags7, x86_CPUID_7_bits::SHA, CPUFeature::Bit::SHA, allowed);
183 feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SM3, CPUFeature::Bit::SM3, allowed);
184
185 // We only consider AVX2 if SSSE3 is supported
186 if(has_os_ymm_support) {
187 feat |= if_set(flags7, x86_CPUID_7_bits::AVX2, CPUFeature::Bit::AVX2, allowed);
188
189 if(is_set(feat, CPUFeature::Bit::AVX2)) {
190 feat |= if_set(flags7, x86_CPUID_7_bits::GFNI, CPUFeature::Bit::GFNI, allowed);
191 feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VAES, CPUFeature::Bit::AVX2_AES, allowed);
192 feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VCLMUL, CPUFeature::Bit::AVX2_CLMUL, allowed);
193 feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SHA512, CPUFeature::Bit::SHA512, allowed);
194 feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SM4, CPUFeature::Bit::SM4, allowed);
195
196 // Likewise we only consider AVX-512 if AVX2 is supported
197 if(has_os_zmm_support) {
198 feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_PROFILE, CPUFeature::Bit::AVX512, allowed);
199
200 if(is_set(feat, CPUFeature::Bit::AVX512)) {
201 feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VAES, CPUFeature::Bit::AVX512_AES, allowed);
202 feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VCLMUL, CPUFeature::Bit::AVX512_CLMUL, allowed);
203 }
204 }
205 }
206 }
207 }
208 }
209
210/*
211 * If we don't have access to CPUID, we can still safely assume that
212 * any x86-64 processor has SSE2 and RDTSC
213 */
214#if defined(BOTAN_TARGET_ARCH_IS_X86_64)
215 if(feat == 0) {
216 feat |= CPUFeature::Bit::SSE2 & allowed;
217 feat |= CPUFeature::Bit::RDTSC & allowed;
218 }
219#endif
220
221 return feat;
222}
223
224} // namespace Botan
#define BOTAN_UNUSED
Definition assert.h:144
static uint32_t if_set(uint64_t cpuid, T flag, CPUID::Feature bit, uint32_t allowed)
Definition cpuid.h:117
#define BOTAN_FUNC_ISA(isa)
Definition compiler.h:56
constexpr void clear_mem(T *ptr, size_t n)
Definition mem_ops.h:119