7#include <botan/internal/aes.h>
9#include <botan/internal/loadstor.h>
10#include <botan/internal/simd_avx2.h>
17BOTAN_FORCE_INLINE void keyxor(SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
25 B = SIMD_8x32(_mm256_aesenc_epi128(B.raw(),
K.raw()));
29void aesenc(SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
30 B0 = SIMD_8x32(_mm256_aesenc_epi128(B0.raw(),
K.raw()));
31 B1 = SIMD_8x32(_mm256_aesenc_epi128(B1.raw(),
K.raw()));
32 B2 = SIMD_8x32(_mm256_aesenc_epi128(B2.raw(),
K.raw()));
33 B3 = SIMD_8x32(_mm256_aesenc_epi128(B3.raw(),
K.raw()));
37 B = SIMD_8x32(_mm256_aesenclast_epi128(B.raw(),
K.raw()));
41void aesenclast(SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
42 B0 = SIMD_8x32(_mm256_aesenclast_epi128(B0.raw(),
K.raw()));
43 B1 = SIMD_8x32(_mm256_aesenclast_epi128(B1.raw(),
K.raw()));
44 B2 = SIMD_8x32(_mm256_aesenclast_epi128(B2.raw(),
K.raw()));
45 B3 = SIMD_8x32(_mm256_aesenclast_epi128(B3.raw(),
K.raw()));
49 B = SIMD_8x32(_mm256_aesdec_epi128(B.raw(),
K.raw()));
53void aesdec(SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
54 B0 = SIMD_8x32(_mm256_aesdec_epi128(B0.raw(),
K.raw()));
55 B1 = SIMD_8x32(_mm256_aesdec_epi128(B1.raw(),
K.raw()));
56 B2 = SIMD_8x32(_mm256_aesdec_epi128(B2.raw(),
K.raw()));
57 B3 = SIMD_8x32(_mm256_aesdec_epi128(B3.raw(),
K.raw()));
61 B = SIMD_8x32(_mm256_aesdeclast_epi128(B.raw(),
K.raw()));
65void aesdeclast(SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
66 B0 = SIMD_8x32(_mm256_aesdeclast_epi128(B0.raw(),
K.raw()));
67 B1 = SIMD_8x32(_mm256_aesdeclast_epi128(B1.raw(),
K.raw()));
68 B2 = SIMD_8x32(_mm256_aesdeclast_epi128(B2.raw(),
K.raw()));
69 B3 = SIMD_8x32(_mm256_aesdeclast_epi128(B3.raw(),
K.raw()));
77BOTAN_FUNC_ISA(
"vaes,avx2") void AES_128::x86_vaes_encrypt_n(const uint8_t in[], uint8_t out[],
size_t blocks)
const {
78 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_EK[4 * 0]);
79 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_EK[4 * 1]);
80 const SIMD_8x32
K2 = SIMD_8x32::load_le128(&m_EK[4 * 2]);
81 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_EK[4 * 3]);
82 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_EK[4 * 4]);
83 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_EK[4 * 5]);
84 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_EK[4 * 6]);
85 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_EK[4 * 7]);
86 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_EK[4 * 8]);
87 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_EK[4 * 9]);
88 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_EK[4 * 10]);
91 SIMD_8x32 B0 = SIMD_8x32::load_le(in);
92 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
93 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
94 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
96 keyxor(K0, B0, B1, B2, B3);
97 aesenc(K1, B0, B1, B2, B3);
98 aesenc(K2, B0, B1, B2, B3);
99 aesenc(K3, B0, B1, B2, B3);
100 aesenc(K4, B0, B1, B2, B3);
101 aesenc(K5, B0, B1, B2, B3);
102 aesenc(K6, B0, B1, B2, B3);
103 aesenc(K7, B0, B1, B2, B3);
104 aesenc(K8, B0, B1, B2, B3);
105 aesenc(K9, B0, B1, B2, B3);
106 aesenclast(K10, B0, B1, B2, B3);
109 B1.store_le(out + 16 * 2);
110 B2.store_le(out + 16 * 4);
111 B3.store_le(out + 16 * 6);
119 SIMD_8x32 B = SIMD_8x32::load_le(in);
141 SIMD_8x32 B = SIMD_8x32::load_le128(in);
162BOTAN_FUNC_ISA(
"vaes,avx2") void AES_128::x86_vaes_decrypt_n(const uint8_t in[], uint8_t out[],
size_t blocks)
const {
163 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_DK[4 * 0]);
164 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_DK[4 * 1]);
165 const SIMD_8x32
K2 = SIMD_8x32::load_le128(&m_DK[4 * 2]);
166 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_DK[4 * 3]);
167 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_DK[4 * 4]);
168 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_DK[4 * 5]);
169 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_DK[4 * 6]);
170 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_DK[4 * 7]);
171 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_DK[4 * 8]);
172 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_DK[4 * 9]);
173 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_DK[4 * 10]);
176 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
177 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
178 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
179 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
181 keyxor(K0, B0, B1, B2, B3);
182 aesdec(K1, B0, B1, B2, B3);
183 aesdec(K2, B0, B1, B2, B3);
184 aesdec(K3, B0, B1, B2, B3);
185 aesdec(K4, B0, B1, B2, B3);
186 aesdec(K5, B0, B1, B2, B3);
187 aesdec(K6, B0, B1, B2, B3);
188 aesdec(K7, B0, B1, B2, B3);
189 aesdec(K8, B0, B1, B2, B3);
190 aesdec(K9, B0, B1, B2, B3);
191 aesdeclast(K10, B0, B1, B2, B3);
193 B0.store_le(out + 16 * 0);
194 B1.store_le(out + 16 * 2);
195 B2.store_le(out + 16 * 4);
196 B3.store_le(out + 16 * 6);
204 SIMD_8x32 B = SIMD_8x32::load_le(in);
226 SIMD_8x32 B = SIMD_8x32::load_le128(in);
247BOTAN_FUNC_ISA(
"vaes,avx2") void AES_192::x86_vaes_encrypt_n(const uint8_t in[], uint8_t out[],
size_t blocks)
const {
248 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_EK[4 * 0]);
249 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_EK[4 * 1]);
250 const SIMD_8x32
K2 = SIMD_8x32::load_le128(&m_EK[4 * 2]);
251 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_EK[4 * 3]);
252 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_EK[4 * 4]);
253 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_EK[4 * 5]);
254 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_EK[4 * 6]);
255 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_EK[4 * 7]);
256 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_EK[4 * 8]);
257 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_EK[4 * 9]);
258 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_EK[4 * 10]);
259 const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_EK[4 * 11]);
260 const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_EK[4 * 12]);
263 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
264 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
265 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
266 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
268 keyxor(K0, B0, B1, B2, B3);
269 aesenc(K1, B0, B1, B2, B3);
270 aesenc(K2, B0, B1, B2, B3);
271 aesenc(K3, B0, B1, B2, B3);
272 aesenc(K4, B0, B1, B2, B3);
273 aesenc(K5, B0, B1, B2, B3);
274 aesenc(K6, B0, B1, B2, B3);
275 aesenc(K7, B0, B1, B2, B3);
276 aesenc(K8, B0, B1, B2, B3);
277 aesenc(K9, B0, B1, B2, B3);
278 aesenc(K10, B0, B1, B2, B3);
279 aesenc(K11, B0, B1, B2, B3);
280 aesenclast(K12, B0, B1, B2, B3);
282 B0.store_le(out + 16 * 0);
283 B1.store_le(out + 16 * 2);
284 B2.store_le(out + 16 * 4);
285 B3.store_le(out + 16 * 6);
293 SIMD_8x32 B = SIMD_8x32::load_le(in);
317 SIMD_8x32 B = SIMD_8x32::load_le128(in);
340BOTAN_FUNC_ISA(
"vaes,avx2") void AES_192::x86_vaes_decrypt_n(const uint8_t in[], uint8_t out[],
size_t blocks)
const {
341 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_DK[4 * 0]);
342 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_DK[4 * 1]);
343 const SIMD_8x32
K2 = SIMD_8x32::load_le128(&m_DK[4 * 2]);
344 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_DK[4 * 3]);
345 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_DK[4 * 4]);
346 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_DK[4 * 5]);
347 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_DK[4 * 6]);
348 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_DK[4 * 7]);
349 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_DK[4 * 8]);
350 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_DK[4 * 9]);
351 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_DK[4 * 10]);
352 const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_DK[4 * 11]);
353 const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_DK[4 * 12]);
356 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
357 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
358 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
359 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
361 keyxor(K0, B0, B1, B2, B3);
362 aesdec(K1, B0, B1, B2, B3);
363 aesdec(K2, B0, B1, B2, B3);
364 aesdec(K3, B0, B1, B2, B3);
365 aesdec(K4, B0, B1, B2, B3);
366 aesdec(K5, B0, B1, B2, B3);
367 aesdec(K6, B0, B1, B2, B3);
368 aesdec(K7, B0, B1, B2, B3);
369 aesdec(K8, B0, B1, B2, B3);
370 aesdec(K9, B0, B1, B2, B3);
371 aesdec(K10, B0, B1, B2, B3);
372 aesdec(K11, B0, B1, B2, B3);
373 aesdeclast(K12, B0, B1, B2, B3);
375 B0.store_le(out + 16 * 0);
376 B1.store_le(out + 16 * 2);
377 B2.store_le(out + 16 * 4);
378 B3.store_le(out + 16 * 6);
386 SIMD_8x32 B = SIMD_8x32::load_le(in);
410 SIMD_8x32 B = SIMD_8x32::load_le128(in);
430BOTAN_FUNC_ISA(
"vaes,avx2") void AES_256::x86_vaes_encrypt_n(const uint8_t in[], uint8_t out[],
size_t blocks)
const {
431 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_EK[4 * 0]);
432 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_EK[4 * 1]);
433 const SIMD_8x32
K2 = SIMD_8x32::load_le128(&m_EK[4 * 2]);
434 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_EK[4 * 3]);
435 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_EK[4 * 4]);
436 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_EK[4 * 5]);
437 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_EK[4 * 6]);
438 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_EK[4 * 7]);
439 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_EK[4 * 8]);
440 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_EK[4 * 9]);
441 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_EK[4 * 10]);
442 const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_EK[4 * 11]);
443 const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_EK[4 * 12]);
444 const SIMD_8x32 K13 = SIMD_8x32::load_le128(&m_EK[4 * 13]);
445 const SIMD_8x32 K14 = SIMD_8x32::load_le128(&m_EK[4 * 14]);
448 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
449 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
450 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
451 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
453 keyxor(K0, B0, B1, B2, B3);
454 aesenc(K1, B0, B1, B2, B3);
455 aesenc(K2, B0, B1, B2, B3);
456 aesenc(K3, B0, B1, B2, B3);
457 aesenc(K4, B0, B1, B2, B3);
458 aesenc(K5, B0, B1, B2, B3);
459 aesenc(K6, B0, B1, B2, B3);
460 aesenc(K7, B0, B1, B2, B3);
461 aesenc(K8, B0, B1, B2, B3);
462 aesenc(K9, B0, B1, B2, B3);
463 aesenc(K10, B0, B1, B2, B3);
464 aesenc(K11, B0, B1, B2, B3);
465 aesenc(K12, B0, B1, B2, B3);
466 aesenc(K13, B0, B1, B2, B3);
467 aesenclast(K14, B0, B1, B2, B3);
469 B0.store_le(out + 16 * 0);
470 B1.store_le(out + 16 * 2);
471 B2.store_le(out + 16 * 4);
472 B3.store_le(out + 16 * 6);
480 SIMD_8x32 B = SIMD_8x32::load_le(in);
506 SIMD_8x32 B = SIMD_8x32::load_le128(in);
531BOTAN_FUNC_ISA(
"vaes,avx2") void AES_256::x86_vaes_decrypt_n(const uint8_t in[], uint8_t out[],
size_t blocks)
const {
532 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_DK[4 * 0]);
533 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_DK[4 * 1]);
534 const SIMD_8x32
K2 = SIMD_8x32::load_le128(&m_DK[4 * 2]);
535 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_DK[4 * 3]);
536 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_DK[4 * 4]);
537 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_DK[4 * 5]);
538 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_DK[4 * 6]);
539 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_DK[4 * 7]);
540 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_DK[4 * 8]);
541 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_DK[4 * 9]);
542 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_DK[4 * 10]);
543 const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_DK[4 * 11]);
544 const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_DK[4 * 12]);
545 const SIMD_8x32 K13 = SIMD_8x32::load_le128(&m_DK[4 * 13]);
546 const SIMD_8x32 K14 = SIMD_8x32::load_le128(&m_DK[4 * 14]);
549 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
550 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
551 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
552 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
554 keyxor(K0, B0, B1, B2, B3);
555 aesdec(K1, B0, B1, B2, B3);
556 aesdec(K2, B0, B1, B2, B3);
557 aesdec(K3, B0, B1, B2, B3);
558 aesdec(K4, B0, B1, B2, B3);
559 aesdec(K5, B0, B1, B2, B3);
560 aesdec(K6, B0, B1, B2, B3);
561 aesdec(K7, B0, B1, B2, B3);
562 aesdec(K8, B0, B1, B2, B3);
563 aesdec(K9, B0, B1, B2, B3);
564 aesdec(K10, B0, B1, B2, B3);
565 aesdec(K11, B0, B1, B2, B3);
566 aesdec(K12, B0, B1, B2, B3);
567 aesdec(K13, B0, B1, B2, B3);
568 aesdeclast(K14, B0, B1, B2, B3);
570 B0.store_le(out + 16 * 0);
571 B1.store_le(out + 16 * 2);
572 B2.store_le(out + 16 * 4);
573 B3.store_le(out + 16 * 6);
581 SIMD_8x32 B = SIMD_8x32::load_le(in);
607 SIMD_8x32 B = SIMD_8x32::load_le128(in);
#define BOTAN_FUNC_ISA(isa)
#define BOTAN_FORCE_INLINE
#define BOTAN_FUNC_ISA_INLINE(isa)