Botan 3.8.1
Crypto and TLS for C&
cpuid_x86.cpp
Go to the documentation of this file.
1/*
2* Runtime CPU detection for x86
3* (C) 2009,2010,2013,2017,2023,2024 Jack Lloyd
4*
5* Botan is released under the Simplified BSD License (see license.txt)
6*/
7
8#include <botan/internal/cpuid.h>
9
10#include <botan/assert.h>
11#include <botan/mem_ops.h>
12#include <botan/internal/loadstor.h>
13#include <botan/internal/target_info.h>
14
15#include <immintrin.h>
16
17#if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
18 #include <intrin.h>
19#endif
20
21namespace Botan {
22
23namespace {
24
25void invoke_cpuid(uint32_t type, uint32_t out[4]) {
26 clear_mem(out, 4);
27
28#if defined(BOTAN_USE_GCC_INLINE_ASM)
29 asm volatile("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type));
30
31#elif defined(BOTAN_BUILD_COMPILER_IS_MSVC)
32 __cpuid((int*)out, type);
33
34#else
35 BOTAN_UNUSED(type);
36 #warning "No way of calling x86 cpuid instruction for this compiler"
37#endif
38}
39
40void invoke_cpuid_sublevel(uint32_t type, uint32_t level, uint32_t out[4]) {
41 clear_mem(out, 4);
42
43#if defined(BOTAN_USE_GCC_INLINE_ASM)
44 asm volatile("cpuid\n\t" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "0"(type), "2"(level));
45
46#elif defined(BOTAN_BUILD_COMPILER_IS_MSVC)
47 __cpuidex((int*)out, type, level);
48
49#else
50 BOTAN_UNUSED(type, level);
51 #warning "No way of calling x86 cpuid instruction for this compiler"
52#endif
53}
54
55BOTAN_FUNC_ISA("xsave") uint64_t xgetbv() {
56 return _xgetbv(0);
57}
58
59} // namespace
60
61uint32_t CPUID::CPUID_Data::detect_cpu_features(uint32_t allowed) {
62 enum class x86_CPUID_1_bits : uint64_t {
63 RDTSC = (1ULL << 4),
64 SSE2 = (1ULL << 26),
65 CLMUL = (1ULL << 33),
66 SSSE3 = (1ULL << 41),
67 SSE41 = (1ULL << 51),
68 AESNI = (1ULL << 57),
69 // AVX + OSXSAVE
70 OSXSAVE = (1ULL << 59) | (1ULL << 60),
71 RDRAND = (1ULL << 62)
72 };
73
74 enum class x86_CPUID_7_bits : uint64_t {
75 BMI1 = (1ULL << 3),
76 AVX2 = (1ULL << 5),
77 BMI2 = (1ULL << 8),
78 BMI_1_AND_2 = BMI1 | BMI2,
79 AVX512_F = (1ULL << 16),
80 AVX512_DQ = (1ULL << 17),
81 RDSEED = (1ULL << 18),
82 ADX = (1ULL << 19),
83 AVX512_IFMA = (1ULL << 21),
84 SHA = (1ULL << 29),
85 AVX512_BW = (1ULL << 30),
86 AVX512_VL = (1ULL << 31),
87 AVX512_VBMI = (1ULL << 33),
88 AVX512_VBMI2 = (1ULL << 38),
89 GFNI = (1ULL << 40),
90 AVX512_VAES = (1ULL << 41),
91 AVX512_VCLMUL = (1ULL << 42),
92 AVX512_VBITALG = (1ULL << 44),
93
94 /*
95 We only enable AVX512 support if all of the below flags are available
96
97 This is more than we strictly need for most uses, however it also has
98 the effect of preventing execution of AVX512 codepaths on cores that
99 have serious downclocking problems when AVX512 code executes,
100 especially Intel Skylake.
101
102 VBMI2/VBITALG are the key flags here as they restrict us to Intel Ice
103 Lake/Rocket Lake, or AMD Zen4, all of which do not have penalties for
104 executing AVX512.
105
106 There is nothing stopping some future processor from supporting the
107 above flags and having AVX512 penalties, but maybe you should not have
108 bought such a processor.
109 */
110 AVX512_PROFILE =
111 AVX512_F | AVX512_DQ | AVX512_IFMA | AVX512_BW | AVX512_VL | AVX512_VBMI | AVX512_VBMI2 | AVX512_VBITALG,
112 };
113
114 // NOLINTNEXTLINE(performance-enum-size)
115 enum class x86_CPUID_7_1_bits : uint64_t {
116 SHA512 = (1 << 0),
117 SM3 = (1 << 1),
118 SM4 = (1 << 2),
119 };
120
121 uint32_t feat = 0;
122 uint32_t cpuid[4] = {0};
123 bool has_os_ymm_support = false;
124 bool has_os_zmm_support = false;
125
126 // CPUID 0: vendor identification, max sublevel
127 invoke_cpuid(0, cpuid);
128
129 const uint32_t max_supported_sublevel = cpuid[0];
130
131 if(max_supported_sublevel >= 1) {
132 // CPUID 1: feature bits
133 invoke_cpuid(1, cpuid);
134 const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
135
136 feat |= if_set(flags0, x86_CPUID_1_bits::RDTSC, CPUFeature::Bit::RDTSC, allowed);
137
138 feat |= if_set(flags0, x86_CPUID_1_bits::RDRAND, CPUFeature::Bit::RDRAND, allowed);
139
140 feat |= if_set(flags0, x86_CPUID_1_bits::SSE2, CPUFeature::Bit::SSE2, allowed);
141
142 if(feat & CPUFeature::Bit::SSE2) {
143 feat |= if_set(flags0, x86_CPUID_1_bits::SSSE3, CPUFeature::Bit::SSSE3, allowed);
144
145 if(feat & CPUFeature::Bit::SSSE3) {
146 feat |= if_set(flags0, x86_CPUID_1_bits::CLMUL, CPUFeature::Bit::CLMUL, allowed);
147 feat |= if_set(flags0, x86_CPUID_1_bits::AESNI, CPUFeature::Bit::AESNI, allowed);
148 }
149
150 const uint64_t osxsave64 = static_cast<uint64_t>(x86_CPUID_1_bits::OSXSAVE);
151 if((flags0 & osxsave64) == osxsave64) {
152 const uint64_t xcr_flags = xgetbv();
153 if((xcr_flags & 0x6) == 0x6) {
154 has_os_ymm_support = true;
155 has_os_zmm_support = (xcr_flags & 0xE0) == 0xE0;
156 }
157 }
158 }
159 }
160
161 if(max_supported_sublevel >= 7) {
162 clear_mem(cpuid, 4);
163 invoke_cpuid_sublevel(7, 0, cpuid);
164
165 const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
166
167 clear_mem(cpuid, 4);
168 invoke_cpuid_sublevel(7, 1, cpuid);
169 const uint32_t flags7_1 = cpuid[0];
170
171 feat |= if_set(flags7, x86_CPUID_7_bits::RDSEED, CPUFeature::Bit::RDSEED, allowed);
172 feat |= if_set(flags7, x86_CPUID_7_bits::ADX, CPUFeature::Bit::ADX, allowed);
173
174 /*
175 We only set the BMI bit if both BMI1 and BMI2 are supported, since
176 typically we want to use both extensions in the same code.
177 */
178 feat |= if_set(flags7, x86_CPUID_7_bits::BMI_1_AND_2, CPUFeature::Bit::BMI, allowed);
179
180 if(feat & CPUFeature::Bit::SSSE3) {
181 feat |= if_set(flags7, x86_CPUID_7_bits::SHA, CPUFeature::Bit::SHA, allowed);
182 feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SM3, CPUFeature::Bit::SM3, allowed);
183 }
184
185 if(has_os_ymm_support) {
186 feat |= if_set(flags7, x86_CPUID_7_bits::AVX2, CPUFeature::Bit::AVX2, allowed);
187
188 if(feat & CPUFeature::Bit::AVX2) {
189 feat |= if_set(flags7, x86_CPUID_7_bits::GFNI, CPUFeature::Bit::GFNI, allowed);
190 feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VAES, CPUFeature::Bit::AVX2_AES, allowed);
191 feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VCLMUL, CPUFeature::Bit::AVX2_CLMUL, allowed);
192 feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SHA512, CPUFeature::Bit::SHA512, allowed);
193 feat |= if_set(flags7_1, x86_CPUID_7_1_bits::SM4, CPUFeature::Bit::SM4, allowed);
194
195 if(has_os_zmm_support) {
196 feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_PROFILE, CPUFeature::Bit::AVX512, allowed);
197
198 if(feat & CPUFeature::Bit::AVX512) {
199 feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VAES, CPUFeature::Bit::AVX512_AES, allowed);
200 feat |= if_set(flags7, x86_CPUID_7_bits::AVX512_VCLMUL, CPUFeature::Bit::AVX512_CLMUL, allowed);
201 }
202 }
203 }
204 }
205 }
206
207/*
208 * If we don't have access to CPUID, we can still safely assume that
209 * any x86-64 processor has SSE2 and RDTSC
210 */
211#if defined(BOTAN_TARGET_ARCH_IS_X86_64)
212 if(feat == 0) {
213 feat |= CPUFeature::Bit::SSE2 & allowed;
214 feat |= CPUFeature::Bit::RDTSC & allowed;
215 }
216#endif
217
218 return feat;
219}
220
221} // namespace Botan
#define BOTAN_UNUSED
Definition assert.h:120
static uint32_t if_set(uint64_t cpuid, T flag, CPUID::Feature bit, uint32_t allowed)
Definition cpuid.h:117
#define BOTAN_FUNC_ISA(isa)
Definition compiler.h:54
constexpr void clear_mem(T *ptr, size_t n)
Definition mem_ops.h:123