Botan 3.10.0
Crypto and TLS for C&
aes_vaes.cpp
Go to the documentation of this file.
1/*
2* (C) 2024 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/aes.h>
8
9#include <botan/internal/isa_extn.h>
10#include <botan/internal/simd_avx2.h>
11#include <wmmintrin.h>
12
13namespace Botan {
14
15namespace {
16
17BOTAN_FORCE_INLINE void keyxor(SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
18 B0 ^= K;
19 B1 ^= K;
20 B2 ^= K;
21 B3 ^= K;
22}
23
24// NOLINTBEGIN(portability-simd-intrinsics)
25
26BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesenc(SIMD_8x32 K, SIMD_8x32& B) {
27 B = SIMD_8x32(_mm256_aesenc_epi128(B.raw(), K.raw()));
28}
29
30BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesenc(
31 SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
32 B0 = SIMD_8x32(_mm256_aesenc_epi128(B0.raw(), K.raw()));
33 B1 = SIMD_8x32(_mm256_aesenc_epi128(B1.raw(), K.raw()));
34 B2 = SIMD_8x32(_mm256_aesenc_epi128(B2.raw(), K.raw()));
35 B3 = SIMD_8x32(_mm256_aesenc_epi128(B3.raw(), K.raw()));
36}
37
38BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesenclast(SIMD_8x32 K, SIMD_8x32& B) {
39 B = SIMD_8x32(_mm256_aesenclast_epi128(B.raw(), K.raw()));
40}
41
42BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesenclast(
43 SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
44 B0 = SIMD_8x32(_mm256_aesenclast_epi128(B0.raw(), K.raw()));
45 B1 = SIMD_8x32(_mm256_aesenclast_epi128(B1.raw(), K.raw()));
46 B2 = SIMD_8x32(_mm256_aesenclast_epi128(B2.raw(), K.raw()));
47 B3 = SIMD_8x32(_mm256_aesenclast_epi128(B3.raw(), K.raw()));
48}
49
50BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesdec(SIMD_8x32 K, SIMD_8x32& B) {
51 B = SIMD_8x32(_mm256_aesdec_epi128(B.raw(), K.raw()));
52}
53
54BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesdec(
55 SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
56 B0 = SIMD_8x32(_mm256_aesdec_epi128(B0.raw(), K.raw()));
57 B1 = SIMD_8x32(_mm256_aesdec_epi128(B1.raw(), K.raw()));
58 B2 = SIMD_8x32(_mm256_aesdec_epi128(B2.raw(), K.raw()));
59 B3 = SIMD_8x32(_mm256_aesdec_epi128(B3.raw(), K.raw()));
60}
61
62BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesdeclast(SIMD_8x32 K, SIMD_8x32& B) {
63 B = SIMD_8x32(_mm256_aesdeclast_epi128(B.raw(), K.raw()));
64}
65
66BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesdeclast(
67 SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
68 B0 = SIMD_8x32(_mm256_aesdeclast_epi128(B0.raw(), K.raw()));
69 B1 = SIMD_8x32(_mm256_aesdeclast_epi128(B1.raw(), K.raw()));
70 B2 = SIMD_8x32(_mm256_aesdeclast_epi128(B2.raw(), K.raw()));
71 B3 = SIMD_8x32(_mm256_aesdeclast_epi128(B3.raw(), K.raw()));
72}
73
74// NOLINTEND(portability-simd-intrinsics)
75
76} // namespace
77
78/*
79* AES-128 Encryption
80*/
81BOTAN_FN_ISA_AVX2_VAES void AES_128::x86_vaes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
82 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_EK[4 * 0]);
83 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_EK[4 * 1]);
84 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_EK[4 * 2]);
85 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_EK[4 * 3]);
86 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_EK[4 * 4]);
87 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_EK[4 * 5]);
88 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_EK[4 * 6]);
89 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_EK[4 * 7]);
90 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_EK[4 * 8]);
91 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_EK[4 * 9]);
92 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_EK[4 * 10]);
93
94 while(blocks >= 8) {
95 SIMD_8x32 B0 = SIMD_8x32::load_le(in);
96 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
97 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
98 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
99
100 keyxor(K0, B0, B1, B2, B3);
101 aesenc(K1, B0, B1, B2, B3);
102 aesenc(K2, B0, B1, B2, B3);
103 aesenc(K3, B0, B1, B2, B3);
104 aesenc(K4, B0, B1, B2, B3);
105 aesenc(K5, B0, B1, B2, B3);
106 aesenc(K6, B0, B1, B2, B3);
107 aesenc(K7, B0, B1, B2, B3);
108 aesenc(K8, B0, B1, B2, B3);
109 aesenc(K9, B0, B1, B2, B3);
110 aesenclast(K10, B0, B1, B2, B3);
111
112 B0.store_le(out);
113 B1.store_le(out + 16 * 2);
114 B2.store_le(out + 16 * 4);
115 B3.store_le(out + 16 * 6);
116
117 blocks -= 8;
118 in += 8 * 16;
119 out += 8 * 16;
120 }
121
122 while(blocks >= 2) {
123 SIMD_8x32 B = SIMD_8x32::load_le(in);
124
125 B ^= K0;
126 aesenc(K1, B);
127 aesenc(K2, B);
128 aesenc(K3, B);
129 aesenc(K4, B);
130 aesenc(K5, B);
131 aesenc(K6, B);
132 aesenc(K7, B);
133 aesenc(K8, B);
134 aesenc(K9, B);
135 aesenclast(K10, B);
136
137 B.store_le(out);
138
139 in += 2 * 16;
140 out += 2 * 16;
141 blocks -= 2;
142 }
143
144 if(blocks > 0) {
145 SIMD_8x32 B = SIMD_8x32::load_le128(in);
146
147 B ^= K0;
148 aesenc(K1, B);
149 aesenc(K2, B);
150 aesenc(K3, B);
151 aesenc(K4, B);
152 aesenc(K5, B);
153 aesenc(K6, B);
154 aesenc(K7, B);
155 aesenc(K8, B);
156 aesenc(K9, B);
157 aesenclast(K10, B);
158
159 B.store_le128(out);
160 }
161}
162
163/*
164* AES-128 Decryption
165*/
166BOTAN_FN_ISA_AVX2_VAES void AES_128::x86_vaes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
167 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_DK[4 * 0]);
168 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_DK[4 * 1]);
169 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_DK[4 * 2]);
170 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_DK[4 * 3]);
171 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_DK[4 * 4]);
172 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_DK[4 * 5]);
173 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_DK[4 * 6]);
174 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_DK[4 * 7]);
175 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_DK[4 * 8]);
176 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_DK[4 * 9]);
177 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_DK[4 * 10]);
178
179 while(blocks >= 8) {
180 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
181 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
182 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
183 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
184
185 keyxor(K0, B0, B1, B2, B3);
186 aesdec(K1, B0, B1, B2, B3);
187 aesdec(K2, B0, B1, B2, B3);
188 aesdec(K3, B0, B1, B2, B3);
189 aesdec(K4, B0, B1, B2, B3);
190 aesdec(K5, B0, B1, B2, B3);
191 aesdec(K6, B0, B1, B2, B3);
192 aesdec(K7, B0, B1, B2, B3);
193 aesdec(K8, B0, B1, B2, B3);
194 aesdec(K9, B0, B1, B2, B3);
195 aesdeclast(K10, B0, B1, B2, B3);
196
197 B0.store_le(out + 16 * 0);
198 B1.store_le(out + 16 * 2);
199 B2.store_le(out + 16 * 4);
200 B3.store_le(out + 16 * 6);
201
202 blocks -= 8;
203 in += 8 * 16;
204 out += 8 * 16;
205 }
206
207 while(blocks >= 2) {
208 SIMD_8x32 B = SIMD_8x32::load_le(in);
209
210 B ^= K0;
211 aesdec(K1, B);
212 aesdec(K2, B);
213 aesdec(K3, B);
214 aesdec(K4, B);
215 aesdec(K5, B);
216 aesdec(K6, B);
217 aesdec(K7, B);
218 aesdec(K8, B);
219 aesdec(K9, B);
220 aesdeclast(K10, B);
221
222 B.store_le(out);
223
224 in += 2 * 16;
225 out += 2 * 16;
226 blocks -= 2;
227 }
228
229 if(blocks > 0) {
230 SIMD_8x32 B = SIMD_8x32::load_le128(in);
231
232 B ^= K0;
233 aesdec(K1, B);
234 aesdec(K2, B);
235 aesdec(K3, B);
236 aesdec(K4, B);
237 aesdec(K5, B);
238 aesdec(K6, B);
239 aesdec(K7, B);
240 aesdec(K8, B);
241 aesdec(K9, B);
242 aesdeclast(K10, B);
243
244 B.store_le128(out);
245 }
246}
247
248/*
249* AES-192 Encryption
250*/
251BOTAN_FN_ISA_AVX2_VAES void AES_192::x86_vaes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
252 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_EK[4 * 0]);
253 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_EK[4 * 1]);
254 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_EK[4 * 2]);
255 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_EK[4 * 3]);
256 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_EK[4 * 4]);
257 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_EK[4 * 5]);
258 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_EK[4 * 6]);
259 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_EK[4 * 7]);
260 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_EK[4 * 8]);
261 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_EK[4 * 9]);
262 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_EK[4 * 10]);
263 const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_EK[4 * 11]);
264 const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_EK[4 * 12]);
265
266 while(blocks >= 8) {
267 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
268 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
269 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
270 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
271
272 keyxor(K0, B0, B1, B2, B3);
273 aesenc(K1, B0, B1, B2, B3);
274 aesenc(K2, B0, B1, B2, B3);
275 aesenc(K3, B0, B1, B2, B3);
276 aesenc(K4, B0, B1, B2, B3);
277 aesenc(K5, B0, B1, B2, B3);
278 aesenc(K6, B0, B1, B2, B3);
279 aesenc(K7, B0, B1, B2, B3);
280 aesenc(K8, B0, B1, B2, B3);
281 aesenc(K9, B0, B1, B2, B3);
282 aesenc(K10, B0, B1, B2, B3);
283 aesenc(K11, B0, B1, B2, B3);
284 aesenclast(K12, B0, B1, B2, B3);
285
286 B0.store_le(out + 16 * 0);
287 B1.store_le(out + 16 * 2);
288 B2.store_le(out + 16 * 4);
289 B3.store_le(out + 16 * 6);
290
291 blocks -= 8;
292 in += 8 * 16;
293 out += 8 * 16;
294 }
295
296 while(blocks >= 2) {
297 SIMD_8x32 B = SIMD_8x32::load_le(in);
298
299 B ^= K0;
300 aesenc(K1, B);
301 aesenc(K2, B);
302 aesenc(K3, B);
303 aesenc(K4, B);
304 aesenc(K5, B);
305 aesenc(K6, B);
306 aesenc(K7, B);
307 aesenc(K8, B);
308 aesenc(K9, B);
309 aesenc(K10, B);
310 aesenc(K11, B);
311 aesenclast(K12, B);
312
313 B.store_le(out);
314
315 in += 2 * 16;
316 out += 2 * 16;
317 blocks -= 2;
318 }
319
320 if(blocks > 0) {
321 SIMD_8x32 B = SIMD_8x32::load_le128(in);
322
323 B ^= K0;
324 aesenc(K1, B);
325 aesenc(K2, B);
326 aesenc(K3, B);
327 aesenc(K4, B);
328 aesenc(K5, B);
329 aesenc(K6, B);
330 aesenc(K7, B);
331 aesenc(K8, B);
332 aesenc(K9, B);
333 aesenc(K10, B);
334 aesenc(K11, B);
335 aesenclast(K12, B);
336
337 B.store_le128(out);
338 }
339}
340
341/*
342* AES-192 Decryption
343*/
344BOTAN_FN_ISA_AVX2_VAES void AES_192::x86_vaes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
345 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_DK[4 * 0]);
346 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_DK[4 * 1]);
347 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_DK[4 * 2]);
348 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_DK[4 * 3]);
349 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_DK[4 * 4]);
350 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_DK[4 * 5]);
351 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_DK[4 * 6]);
352 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_DK[4 * 7]);
353 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_DK[4 * 8]);
354 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_DK[4 * 9]);
355 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_DK[4 * 10]);
356 const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_DK[4 * 11]);
357 const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_DK[4 * 12]);
358
359 while(blocks >= 8) {
360 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
361 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
362 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
363 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
364
365 keyxor(K0, B0, B1, B2, B3);
366 aesdec(K1, B0, B1, B2, B3);
367 aesdec(K2, B0, B1, B2, B3);
368 aesdec(K3, B0, B1, B2, B3);
369 aesdec(K4, B0, B1, B2, B3);
370 aesdec(K5, B0, B1, B2, B3);
371 aesdec(K6, B0, B1, B2, B3);
372 aesdec(K7, B0, B1, B2, B3);
373 aesdec(K8, B0, B1, B2, B3);
374 aesdec(K9, B0, B1, B2, B3);
375 aesdec(K10, B0, B1, B2, B3);
376 aesdec(K11, B0, B1, B2, B3);
377 aesdeclast(K12, B0, B1, B2, B3);
378
379 B0.store_le(out + 16 * 0);
380 B1.store_le(out + 16 * 2);
381 B2.store_le(out + 16 * 4);
382 B3.store_le(out + 16 * 6);
383
384 blocks -= 8;
385 in += 8 * 16;
386 out += 8 * 16;
387 }
388
389 while(blocks >= 2) {
390 SIMD_8x32 B = SIMD_8x32::load_le(in);
391
392 B ^= K0;
393 aesdec(K1, B);
394 aesdec(K2, B);
395 aesdec(K3, B);
396 aesdec(K4, B);
397 aesdec(K5, B);
398 aesdec(K6, B);
399 aesdec(K7, B);
400 aesdec(K8, B);
401 aesdec(K9, B);
402 aesdec(K10, B);
403 aesdec(K11, B);
404 aesdeclast(K12, B);
405
406 B.store_le(out);
407
408 in += 2 * 16;
409 out += 2 * 16;
410 blocks -= 2;
411 }
412
413 if(blocks > 0) {
414 SIMD_8x32 B = SIMD_8x32::load_le128(in);
415
416 B ^= K0;
417 aesdec(K1, B);
418 aesdec(K2, B);
419 aesdec(K3, B);
420 aesdec(K4, B);
421 aesdec(K5, B);
422 aesdec(K6, B);
423 aesdec(K7, B);
424 aesdec(K8, B);
425 aesdec(K9, B);
426 aesdec(K10, B);
427 aesdec(K11, B);
428 aesdeclast(K12, B);
429
430 B.store_le128(out);
431 }
432}
433
434BOTAN_FN_ISA_AVX2_VAES void AES_256::x86_vaes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
435 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_EK[4 * 0]);
436 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_EK[4 * 1]);
437 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_EK[4 * 2]);
438 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_EK[4 * 3]);
439 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_EK[4 * 4]);
440 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_EK[4 * 5]);
441 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_EK[4 * 6]);
442 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_EK[4 * 7]);
443 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_EK[4 * 8]);
444 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_EK[4 * 9]);
445 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_EK[4 * 10]);
446 const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_EK[4 * 11]);
447 const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_EK[4 * 12]);
448 const SIMD_8x32 K13 = SIMD_8x32::load_le128(&m_EK[4 * 13]);
449 const SIMD_8x32 K14 = SIMD_8x32::load_le128(&m_EK[4 * 14]);
450
451 while(blocks >= 8) {
452 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
453 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
454 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
455 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
456
457 keyxor(K0, B0, B1, B2, B3);
458 aesenc(K1, B0, B1, B2, B3);
459 aesenc(K2, B0, B1, B2, B3);
460 aesenc(K3, B0, B1, B2, B3);
461 aesenc(K4, B0, B1, B2, B3);
462 aesenc(K5, B0, B1, B2, B3);
463 aesenc(K6, B0, B1, B2, B3);
464 aesenc(K7, B0, B1, B2, B3);
465 aesenc(K8, B0, B1, B2, B3);
466 aesenc(K9, B0, B1, B2, B3);
467 aesenc(K10, B0, B1, B2, B3);
468 aesenc(K11, B0, B1, B2, B3);
469 aesenc(K12, B0, B1, B2, B3);
470 aesenc(K13, B0, B1, B2, B3);
471 aesenclast(K14, B0, B1, B2, B3);
472
473 B0.store_le(out + 16 * 0);
474 B1.store_le(out + 16 * 2);
475 B2.store_le(out + 16 * 4);
476 B3.store_le(out + 16 * 6);
477
478 blocks -= 8;
479 in += 8 * 16;
480 out += 8 * 16;
481 }
482
483 while(blocks >= 2) {
484 SIMD_8x32 B = SIMD_8x32::load_le(in);
485
486 B ^= K0;
487 aesenc(K1, B);
488 aesenc(K2, B);
489 aesenc(K3, B);
490 aesenc(K4, B);
491 aesenc(K5, B);
492 aesenc(K6, B);
493 aesenc(K7, B);
494 aesenc(K8, B);
495 aesenc(K9, B);
496 aesenc(K10, B);
497 aesenc(K11, B);
498 aesenc(K12, B);
499 aesenc(K13, B);
500 aesenclast(K14, B);
501
502 B.store_le(out);
503
504 in += 2 * 16;
505 out += 2 * 16;
506 blocks -= 2;
507 }
508
509 if(blocks > 0) {
510 SIMD_8x32 B = SIMD_8x32::load_le128(in);
511
512 B ^= K0;
513 aesenc(K1, B);
514 aesenc(K2, B);
515 aesenc(K3, B);
516 aesenc(K4, B);
517 aesenc(K5, B);
518 aesenc(K6, B);
519 aesenc(K7, B);
520 aesenc(K8, B);
521 aesenc(K9, B);
522 aesenc(K10, B);
523 aesenc(K11, B);
524 aesenc(K12, B);
525 aesenc(K13, B);
526 aesenclast(K14, B);
527
528 B.store_le128(out);
529 }
530}
531
532/*
533* AES-256 Decryption
534*/
535BOTAN_FN_ISA_AVX2_VAES void AES_256::x86_vaes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
536 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_DK[4 * 0]);
537 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_DK[4 * 1]);
538 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_DK[4 * 2]);
539 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_DK[4 * 3]);
540 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_DK[4 * 4]);
541 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_DK[4 * 5]);
542 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_DK[4 * 6]);
543 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_DK[4 * 7]);
544 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_DK[4 * 8]);
545 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_DK[4 * 9]);
546 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_DK[4 * 10]);
547 const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_DK[4 * 11]);
548 const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_DK[4 * 12]);
549 const SIMD_8x32 K13 = SIMD_8x32::load_le128(&m_DK[4 * 13]);
550 const SIMD_8x32 K14 = SIMD_8x32::load_le128(&m_DK[4 * 14]);
551
552 while(blocks >= 8) {
553 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
554 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
555 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
556 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
557
558 keyxor(K0, B0, B1, B2, B3);
559 aesdec(K1, B0, B1, B2, B3);
560 aesdec(K2, B0, B1, B2, B3);
561 aesdec(K3, B0, B1, B2, B3);
562 aesdec(K4, B0, B1, B2, B3);
563 aesdec(K5, B0, B1, B2, B3);
564 aesdec(K6, B0, B1, B2, B3);
565 aesdec(K7, B0, B1, B2, B3);
566 aesdec(K8, B0, B1, B2, B3);
567 aesdec(K9, B0, B1, B2, B3);
568 aesdec(K10, B0, B1, B2, B3);
569 aesdec(K11, B0, B1, B2, B3);
570 aesdec(K12, B0, B1, B2, B3);
571 aesdec(K13, B0, B1, B2, B3);
572 aesdeclast(K14, B0, B1, B2, B3);
573
574 B0.store_le(out + 16 * 0);
575 B1.store_le(out + 16 * 2);
576 B2.store_le(out + 16 * 4);
577 B3.store_le(out + 16 * 6);
578
579 blocks -= 8;
580 in += 8 * 16;
581 out += 8 * 16;
582 }
583
584 while(blocks >= 2) {
585 SIMD_8x32 B = SIMD_8x32::load_le(in);
586
587 B ^= K0;
588 aesdec(K1, B);
589 aesdec(K2, B);
590 aesdec(K3, B);
591 aesdec(K4, B);
592 aesdec(K5, B);
593 aesdec(K6, B);
594 aesdec(K7, B);
595 aesdec(K8, B);
596 aesdec(K9, B);
597 aesdec(K10, B);
598 aesdec(K11, B);
599 aesdec(K12, B);
600 aesdec(K13, B);
601 aesdeclast(K14, B);
602
603 B.store_le(out);
604
605 in += 2 * 16;
606 out += 2 * 16;
607 blocks -= 2;
608 }
609
610 if(blocks > 0) {
611 SIMD_8x32 B = SIMD_8x32::load_le128(in);
612
613 B ^= K0;
614 aesdec(K1, B);
615 aesdec(K2, B);
616 aesdec(K3, B);
617 aesdec(K4, B);
618 aesdec(K5, B);
619 aesdec(K6, B);
620 aesdec(K7, B);
621 aesdec(K8, B);
622 aesdec(K9, B);
623 aesdec(K10, B);
624 aesdec(K11, B);
625 aesdec(K12, B);
626 aesdec(K13, B);
627 aesdeclast(K14, B);
628
629 B.store_le128(out);
630 }
631}
632
633} // namespace Botan
static BOTAN_FN_ISA_AVX2 SIMD_8x32 load_le128(const uint8_t *in) noexcept
Definition simd_avx2.h:71
static BOTAN_FN_ISA_AVX2 SIMD_8x32 load_le(const uint8_t *in) noexcept
Definition simd_avx2.h:61
#define BOTAN_FORCE_INLINE
Definition compiler.h:87
constexpr uint32_t K1
Definition sha1_f.h:16
constexpr uint32_t K4
Definition sha1_f.h:19
constexpr uint32_t K3
Definition sha1_f.h:18
constexpr uint32_t K2
Definition sha1_f.h:17