Botan 3.11.0
Crypto and TLS for C&
aes_vaes.cpp
Go to the documentation of this file.
1/*
2* (C) 2024 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/aes.h>
8
9#include <botan/internal/isa_extn.h>
10#include <botan/internal/simd_avx2.h>
11#include <immintrin.h>
12
13namespace Botan {
14
15namespace {
16
17BOTAN_FORCE_INLINE void BOTAN_FN_ISA_AVX2_VAES
18keyxor(SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
19 B0 ^= K;
20 B1 ^= K;
21 B2 ^= K;
22 B3 ^= K;
23}
24
25// NOLINTBEGIN(portability-simd-intrinsics)
26
27BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesenc(SIMD_8x32 K, SIMD_8x32& B) {
28 B = SIMD_8x32(_mm256_aesenc_epi128(B.raw(), K.raw()));
29}
30
31BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesenc(
32 SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
33 B0 = SIMD_8x32(_mm256_aesenc_epi128(B0.raw(), K.raw()));
34 B1 = SIMD_8x32(_mm256_aesenc_epi128(B1.raw(), K.raw()));
35 B2 = SIMD_8x32(_mm256_aesenc_epi128(B2.raw(), K.raw()));
36 B3 = SIMD_8x32(_mm256_aesenc_epi128(B3.raw(), K.raw()));
37}
38
39BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesenclast(SIMD_8x32 K, SIMD_8x32& B) {
40 B = SIMD_8x32(_mm256_aesenclast_epi128(B.raw(), K.raw()));
41}
42
43BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesenclast(
44 SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
45 B0 = SIMD_8x32(_mm256_aesenclast_epi128(B0.raw(), K.raw()));
46 B1 = SIMD_8x32(_mm256_aesenclast_epi128(B1.raw(), K.raw()));
47 B2 = SIMD_8x32(_mm256_aesenclast_epi128(B2.raw(), K.raw()));
48 B3 = SIMD_8x32(_mm256_aesenclast_epi128(B3.raw(), K.raw()));
49}
50
51BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesdec(SIMD_8x32 K, SIMD_8x32& B) {
52 B = SIMD_8x32(_mm256_aesdec_epi128(B.raw(), K.raw()));
53}
54
55BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesdec(
56 SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
57 B0 = SIMD_8x32(_mm256_aesdec_epi128(B0.raw(), K.raw()));
58 B1 = SIMD_8x32(_mm256_aesdec_epi128(B1.raw(), K.raw()));
59 B2 = SIMD_8x32(_mm256_aesdec_epi128(B2.raw(), K.raw()));
60 B3 = SIMD_8x32(_mm256_aesdec_epi128(B3.raw(), K.raw()));
61}
62
63BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesdeclast(SIMD_8x32 K, SIMD_8x32& B) {
64 B = SIMD_8x32(_mm256_aesdeclast_epi128(B.raw(), K.raw()));
65}
66
67BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesdeclast(
68 SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
69 B0 = SIMD_8x32(_mm256_aesdeclast_epi128(B0.raw(), K.raw()));
70 B1 = SIMD_8x32(_mm256_aesdeclast_epi128(B1.raw(), K.raw()));
71 B2 = SIMD_8x32(_mm256_aesdeclast_epi128(B2.raw(), K.raw()));
72 B3 = SIMD_8x32(_mm256_aesdeclast_epi128(B3.raw(), K.raw()));
73}
74
75// NOLINTEND(portability-simd-intrinsics)
76
77} // namespace
78
79/*
80* AES-128 Encryption
81*/
82BOTAN_FN_ISA_AVX2_VAES void AES_128::x86_vaes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
83 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_EK[4 * 0]);
84 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_EK[4 * 1]);
85 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_EK[4 * 2]);
86 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_EK[4 * 3]);
87 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_EK[4 * 4]);
88 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_EK[4 * 5]);
89 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_EK[4 * 6]);
90 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_EK[4 * 7]);
91 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_EK[4 * 8]);
92 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_EK[4 * 9]);
93 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_EK[4 * 10]);
94
95 while(blocks >= 8) {
96 SIMD_8x32 B0 = SIMD_8x32::load_le(in);
97 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
98 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
99 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
100
101 keyxor(K0, B0, B1, B2, B3);
102 aesenc(K1, B0, B1, B2, B3);
103 aesenc(K2, B0, B1, B2, B3);
104 aesenc(K3, B0, B1, B2, B3);
105 aesenc(K4, B0, B1, B2, B3);
106 aesenc(K5, B0, B1, B2, B3);
107 aesenc(K6, B0, B1, B2, B3);
108 aesenc(K7, B0, B1, B2, B3);
109 aesenc(K8, B0, B1, B2, B3);
110 aesenc(K9, B0, B1, B2, B3);
111 aesenclast(K10, B0, B1, B2, B3);
112
113 B0.store_le(out);
114 B1.store_le(out + 16 * 2);
115 B2.store_le(out + 16 * 4);
116 B3.store_le(out + 16 * 6);
117
118 blocks -= 8;
119 in += 8 * 16;
120 out += 8 * 16;
121 }
122
123 while(blocks >= 2) {
124 SIMD_8x32 B = SIMD_8x32::load_le(in);
125
126 B ^= K0;
127 aesenc(K1, B);
128 aesenc(K2, B);
129 aesenc(K3, B);
130 aesenc(K4, B);
131 aesenc(K5, B);
132 aesenc(K6, B);
133 aesenc(K7, B);
134 aesenc(K8, B);
135 aesenc(K9, B);
136 aesenclast(K10, B);
137
138 B.store_le(out);
139
140 in += 2 * 16;
141 out += 2 * 16;
142 blocks -= 2;
143 }
144
145 if(blocks > 0) {
146 SIMD_8x32 B = SIMD_8x32::load_le128(in);
147
148 B ^= K0;
149 aesenc(K1, B);
150 aesenc(K2, B);
151 aesenc(K3, B);
152 aesenc(K4, B);
153 aesenc(K5, B);
154 aesenc(K6, B);
155 aesenc(K7, B);
156 aesenc(K8, B);
157 aesenc(K9, B);
158 aesenclast(K10, B);
159
160 B.store_le128(out);
161 }
162}
163
164/*
165* AES-128 Decryption
166*/
167BOTAN_FN_ISA_AVX2_VAES void AES_128::x86_vaes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
168 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_DK[4 * 0]);
169 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_DK[4 * 1]);
170 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_DK[4 * 2]);
171 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_DK[4 * 3]);
172 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_DK[4 * 4]);
173 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_DK[4 * 5]);
174 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_DK[4 * 6]);
175 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_DK[4 * 7]);
176 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_DK[4 * 8]);
177 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_DK[4 * 9]);
178 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_DK[4 * 10]);
179
180 while(blocks >= 8) {
181 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
182 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
183 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
184 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
185
186 keyxor(K0, B0, B1, B2, B3);
187 aesdec(K1, B0, B1, B2, B3);
188 aesdec(K2, B0, B1, B2, B3);
189 aesdec(K3, B0, B1, B2, B3);
190 aesdec(K4, B0, B1, B2, B3);
191 aesdec(K5, B0, B1, B2, B3);
192 aesdec(K6, B0, B1, B2, B3);
193 aesdec(K7, B0, B1, B2, B3);
194 aesdec(K8, B0, B1, B2, B3);
195 aesdec(K9, B0, B1, B2, B3);
196 aesdeclast(K10, B0, B1, B2, B3);
197
198 B0.store_le(out + 16 * 0);
199 B1.store_le(out + 16 * 2);
200 B2.store_le(out + 16 * 4);
201 B3.store_le(out + 16 * 6);
202
203 blocks -= 8;
204 in += 8 * 16;
205 out += 8 * 16;
206 }
207
208 while(blocks >= 2) {
209 SIMD_8x32 B = SIMD_8x32::load_le(in);
210
211 B ^= K0;
212 aesdec(K1, B);
213 aesdec(K2, B);
214 aesdec(K3, B);
215 aesdec(K4, B);
216 aesdec(K5, B);
217 aesdec(K6, B);
218 aesdec(K7, B);
219 aesdec(K8, B);
220 aesdec(K9, B);
221 aesdeclast(K10, B);
222
223 B.store_le(out);
224
225 in += 2 * 16;
226 out += 2 * 16;
227 blocks -= 2;
228 }
229
230 if(blocks > 0) {
231 SIMD_8x32 B = SIMD_8x32::load_le128(in);
232
233 B ^= K0;
234 aesdec(K1, B);
235 aesdec(K2, B);
236 aesdec(K3, B);
237 aesdec(K4, B);
238 aesdec(K5, B);
239 aesdec(K6, B);
240 aesdec(K7, B);
241 aesdec(K8, B);
242 aesdec(K9, B);
243 aesdeclast(K10, B);
244
245 B.store_le128(out);
246 }
247}
248
249/*
250* AES-192 Encryption
251*/
252BOTAN_FN_ISA_AVX2_VAES void AES_192::x86_vaes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
253 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_EK[4 * 0]);
254 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_EK[4 * 1]);
255 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_EK[4 * 2]);
256 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_EK[4 * 3]);
257 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_EK[4 * 4]);
258 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_EK[4 * 5]);
259 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_EK[4 * 6]);
260 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_EK[4 * 7]);
261 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_EK[4 * 8]);
262 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_EK[4 * 9]);
263 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_EK[4 * 10]);
264 const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_EK[4 * 11]);
265 const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_EK[4 * 12]);
266
267 while(blocks >= 8) {
268 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
269 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
270 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
271 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
272
273 keyxor(K0, B0, B1, B2, B3);
274 aesenc(K1, B0, B1, B2, B3);
275 aesenc(K2, B0, B1, B2, B3);
276 aesenc(K3, B0, B1, B2, B3);
277 aesenc(K4, B0, B1, B2, B3);
278 aesenc(K5, B0, B1, B2, B3);
279 aesenc(K6, B0, B1, B2, B3);
280 aesenc(K7, B0, B1, B2, B3);
281 aesenc(K8, B0, B1, B2, B3);
282 aesenc(K9, B0, B1, B2, B3);
283 aesenc(K10, B0, B1, B2, B3);
284 aesenc(K11, B0, B1, B2, B3);
285 aesenclast(K12, B0, B1, B2, B3);
286
287 B0.store_le(out + 16 * 0);
288 B1.store_le(out + 16 * 2);
289 B2.store_le(out + 16 * 4);
290 B3.store_le(out + 16 * 6);
291
292 blocks -= 8;
293 in += 8 * 16;
294 out += 8 * 16;
295 }
296
297 while(blocks >= 2) {
298 SIMD_8x32 B = SIMD_8x32::load_le(in);
299
300 B ^= K0;
301 aesenc(K1, B);
302 aesenc(K2, B);
303 aesenc(K3, B);
304 aesenc(K4, B);
305 aesenc(K5, B);
306 aesenc(K6, B);
307 aesenc(K7, B);
308 aesenc(K8, B);
309 aesenc(K9, B);
310 aesenc(K10, B);
311 aesenc(K11, B);
312 aesenclast(K12, B);
313
314 B.store_le(out);
315
316 in += 2 * 16;
317 out += 2 * 16;
318 blocks -= 2;
319 }
320
321 if(blocks > 0) {
322 SIMD_8x32 B = SIMD_8x32::load_le128(in);
323
324 B ^= K0;
325 aesenc(K1, B);
326 aesenc(K2, B);
327 aesenc(K3, B);
328 aesenc(K4, B);
329 aesenc(K5, B);
330 aesenc(K6, B);
331 aesenc(K7, B);
332 aesenc(K8, B);
333 aesenc(K9, B);
334 aesenc(K10, B);
335 aesenc(K11, B);
336 aesenclast(K12, B);
337
338 B.store_le128(out);
339 }
340}
341
342/*
343* AES-192 Decryption
344*/
345BOTAN_FN_ISA_AVX2_VAES void AES_192::x86_vaes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
346 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_DK[4 * 0]);
347 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_DK[4 * 1]);
348 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_DK[4 * 2]);
349 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_DK[4 * 3]);
350 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_DK[4 * 4]);
351 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_DK[4 * 5]);
352 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_DK[4 * 6]);
353 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_DK[4 * 7]);
354 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_DK[4 * 8]);
355 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_DK[4 * 9]);
356 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_DK[4 * 10]);
357 const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_DK[4 * 11]);
358 const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_DK[4 * 12]);
359
360 while(blocks >= 8) {
361 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
362 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
363 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
364 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
365
366 keyxor(K0, B0, B1, B2, B3);
367 aesdec(K1, B0, B1, B2, B3);
368 aesdec(K2, B0, B1, B2, B3);
369 aesdec(K3, B0, B1, B2, B3);
370 aesdec(K4, B0, B1, B2, B3);
371 aesdec(K5, B0, B1, B2, B3);
372 aesdec(K6, B0, B1, B2, B3);
373 aesdec(K7, B0, B1, B2, B3);
374 aesdec(K8, B0, B1, B2, B3);
375 aesdec(K9, B0, B1, B2, B3);
376 aesdec(K10, B0, B1, B2, B3);
377 aesdec(K11, B0, B1, B2, B3);
378 aesdeclast(K12, B0, B1, B2, B3);
379
380 B0.store_le(out + 16 * 0);
381 B1.store_le(out + 16 * 2);
382 B2.store_le(out + 16 * 4);
383 B3.store_le(out + 16 * 6);
384
385 blocks -= 8;
386 in += 8 * 16;
387 out += 8 * 16;
388 }
389
390 while(blocks >= 2) {
391 SIMD_8x32 B = SIMD_8x32::load_le(in);
392
393 B ^= K0;
394 aesdec(K1, B);
395 aesdec(K2, B);
396 aesdec(K3, B);
397 aesdec(K4, B);
398 aesdec(K5, B);
399 aesdec(K6, B);
400 aesdec(K7, B);
401 aesdec(K8, B);
402 aesdec(K9, B);
403 aesdec(K10, B);
404 aesdec(K11, B);
405 aesdeclast(K12, B);
406
407 B.store_le(out);
408
409 in += 2 * 16;
410 out += 2 * 16;
411 blocks -= 2;
412 }
413
414 if(blocks > 0) {
415 SIMD_8x32 B = SIMD_8x32::load_le128(in);
416
417 B ^= K0;
418 aesdec(K1, B);
419 aesdec(K2, B);
420 aesdec(K3, B);
421 aesdec(K4, B);
422 aesdec(K5, B);
423 aesdec(K6, B);
424 aesdec(K7, B);
425 aesdec(K8, B);
426 aesdec(K9, B);
427 aesdec(K10, B);
428 aesdec(K11, B);
429 aesdeclast(K12, B);
430
431 B.store_le128(out);
432 }
433}
434
435BOTAN_FN_ISA_AVX2_VAES void AES_256::x86_vaes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
436 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_EK[4 * 0]);
437 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_EK[4 * 1]);
438 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_EK[4 * 2]);
439 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_EK[4 * 3]);
440 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_EK[4 * 4]);
441 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_EK[4 * 5]);
442 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_EK[4 * 6]);
443 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_EK[4 * 7]);
444 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_EK[4 * 8]);
445 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_EK[4 * 9]);
446 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_EK[4 * 10]);
447 const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_EK[4 * 11]);
448 const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_EK[4 * 12]);
449 const SIMD_8x32 K13 = SIMD_8x32::load_le128(&m_EK[4 * 13]);
450 const SIMD_8x32 K14 = SIMD_8x32::load_le128(&m_EK[4 * 14]);
451
452 while(blocks >= 8) {
453 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
454 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
455 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
456 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
457
458 keyxor(K0, B0, B1, B2, B3);
459 aesenc(K1, B0, B1, B2, B3);
460 aesenc(K2, B0, B1, B2, B3);
461 aesenc(K3, B0, B1, B2, B3);
462 aesenc(K4, B0, B1, B2, B3);
463 aesenc(K5, B0, B1, B2, B3);
464 aesenc(K6, B0, B1, B2, B3);
465 aesenc(K7, B0, B1, B2, B3);
466 aesenc(K8, B0, B1, B2, B3);
467 aesenc(K9, B0, B1, B2, B3);
468 aesenc(K10, B0, B1, B2, B3);
469 aesenc(K11, B0, B1, B2, B3);
470 aesenc(K12, B0, B1, B2, B3);
471 aesenc(K13, B0, B1, B2, B3);
472 aesenclast(K14, B0, B1, B2, B3);
473
474 B0.store_le(out + 16 * 0);
475 B1.store_le(out + 16 * 2);
476 B2.store_le(out + 16 * 4);
477 B3.store_le(out + 16 * 6);
478
479 blocks -= 8;
480 in += 8 * 16;
481 out += 8 * 16;
482 }
483
484 while(blocks >= 2) {
485 SIMD_8x32 B = SIMD_8x32::load_le(in);
486
487 B ^= K0;
488 aesenc(K1, B);
489 aesenc(K2, B);
490 aesenc(K3, B);
491 aesenc(K4, B);
492 aesenc(K5, B);
493 aesenc(K6, B);
494 aesenc(K7, B);
495 aesenc(K8, B);
496 aesenc(K9, B);
497 aesenc(K10, B);
498 aesenc(K11, B);
499 aesenc(K12, B);
500 aesenc(K13, B);
501 aesenclast(K14, B);
502
503 B.store_le(out);
504
505 in += 2 * 16;
506 out += 2 * 16;
507 blocks -= 2;
508 }
509
510 if(blocks > 0) {
511 SIMD_8x32 B = SIMD_8x32::load_le128(in);
512
513 B ^= K0;
514 aesenc(K1, B);
515 aesenc(K2, B);
516 aesenc(K3, B);
517 aesenc(K4, B);
518 aesenc(K5, B);
519 aesenc(K6, B);
520 aesenc(K7, B);
521 aesenc(K8, B);
522 aesenc(K9, B);
523 aesenc(K10, B);
524 aesenc(K11, B);
525 aesenc(K12, B);
526 aesenc(K13, B);
527 aesenclast(K14, B);
528
529 B.store_le128(out);
530 }
531}
532
533/*
534* AES-256 Decryption
535*/
536BOTAN_FN_ISA_AVX2_VAES void AES_256::x86_vaes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
537 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_DK[4 * 0]);
538 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_DK[4 * 1]);
539 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_DK[4 * 2]);
540 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_DK[4 * 3]);
541 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_DK[4 * 4]);
542 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_DK[4 * 5]);
543 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_DK[4 * 6]);
544 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_DK[4 * 7]);
545 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_DK[4 * 8]);
546 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_DK[4 * 9]);
547 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_DK[4 * 10]);
548 const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_DK[4 * 11]);
549 const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_DK[4 * 12]);
550 const SIMD_8x32 K13 = SIMD_8x32::load_le128(&m_DK[4 * 13]);
551 const SIMD_8x32 K14 = SIMD_8x32::load_le128(&m_DK[4 * 14]);
552
553 while(blocks >= 8) {
554 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
555 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
556 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
557 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
558
559 keyxor(K0, B0, B1, B2, B3);
560 aesdec(K1, B0, B1, B2, B3);
561 aesdec(K2, B0, B1, B2, B3);
562 aesdec(K3, B0, B1, B2, B3);
563 aesdec(K4, B0, B1, B2, B3);
564 aesdec(K5, B0, B1, B2, B3);
565 aesdec(K6, B0, B1, B2, B3);
566 aesdec(K7, B0, B1, B2, B3);
567 aesdec(K8, B0, B1, B2, B3);
568 aesdec(K9, B0, B1, B2, B3);
569 aesdec(K10, B0, B1, B2, B3);
570 aesdec(K11, B0, B1, B2, B3);
571 aesdec(K12, B0, B1, B2, B3);
572 aesdec(K13, B0, B1, B2, B3);
573 aesdeclast(K14, B0, B1, B2, B3);
574
575 B0.store_le(out + 16 * 0);
576 B1.store_le(out + 16 * 2);
577 B2.store_le(out + 16 * 4);
578 B3.store_le(out + 16 * 6);
579
580 blocks -= 8;
581 in += 8 * 16;
582 out += 8 * 16;
583 }
584
585 while(blocks >= 2) {
586 SIMD_8x32 B = SIMD_8x32::load_le(in);
587
588 B ^= K0;
589 aesdec(K1, B);
590 aesdec(K2, B);
591 aesdec(K3, B);
592 aesdec(K4, B);
593 aesdec(K5, B);
594 aesdec(K6, B);
595 aesdec(K7, B);
596 aesdec(K8, B);
597 aesdec(K9, B);
598 aesdec(K10, B);
599 aesdec(K11, B);
600 aesdec(K12, B);
601 aesdec(K13, B);
602 aesdeclast(K14, B);
603
604 B.store_le(out);
605
606 in += 2 * 16;
607 out += 2 * 16;
608 blocks -= 2;
609 }
610
611 if(blocks > 0) {
612 SIMD_8x32 B = SIMD_8x32::load_le128(in);
613
614 B ^= K0;
615 aesdec(K1, B);
616 aesdec(K2, B);
617 aesdec(K3, B);
618 aesdec(K4, B);
619 aesdec(K5, B);
620 aesdec(K6, B);
621 aesdec(K7, B);
622 aesdec(K8, B);
623 aesdec(K9, B);
624 aesdec(K10, B);
625 aesdec(K11, B);
626 aesdec(K12, B);
627 aesdec(K13, B);
628 aesdeclast(K14, B);
629
630 B.store_le128(out);
631 }
632}
633
634} // namespace Botan
static BOTAN_FN_ISA_AVX2 SIMD_8x32 load_le128(const uint8_t *in) noexcept
Definition simd_avx2.h:71
static BOTAN_FN_ISA_AVX2 SIMD_8x32 load_le(const uint8_t *in) noexcept
Definition simd_avx2.h:61
#define BOTAN_FORCE_INLINE
Definition compiler.h:87
constexpr uint32_t K1
Definition sha1_f.h:16
constexpr uint32_t K4
Definition sha1_f.h:19
constexpr uint32_t K3
Definition sha1_f.h:18
constexpr uint32_t K2
Definition sha1_f.h:17