Botan 3.9.0
Crypto and TLS for C&
aes_vaes.cpp
Go to the documentation of this file.
1/*
2* (C) 2024 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/aes.h>
8
9#include <botan/internal/isa_extn.h>
10#include <botan/internal/loadstor.h>
11#include <botan/internal/simd_avx2.h>
12#include <wmmintrin.h>
13
14namespace Botan {
15
16namespace {
17
18BOTAN_FORCE_INLINE void keyxor(SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
19 B0 ^= K;
20 B1 ^= K;
21 B2 ^= K;
22 B3 ^= K;
23}
24
25BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesenc(SIMD_8x32 K, SIMD_8x32& B) {
26 B = SIMD_8x32(_mm256_aesenc_epi128(B.raw(), K.raw()));
27}
28
29BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesenc(
30 SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
31 B0 = SIMD_8x32(_mm256_aesenc_epi128(B0.raw(), K.raw()));
32 B1 = SIMD_8x32(_mm256_aesenc_epi128(B1.raw(), K.raw()));
33 B2 = SIMD_8x32(_mm256_aesenc_epi128(B2.raw(), K.raw()));
34 B3 = SIMD_8x32(_mm256_aesenc_epi128(B3.raw(), K.raw()));
35}
36
37BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesenclast(SIMD_8x32 K, SIMD_8x32& B) {
38 B = SIMD_8x32(_mm256_aesenclast_epi128(B.raw(), K.raw()));
39}
40
41BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesenclast(
42 SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
43 B0 = SIMD_8x32(_mm256_aesenclast_epi128(B0.raw(), K.raw()));
44 B1 = SIMD_8x32(_mm256_aesenclast_epi128(B1.raw(), K.raw()));
45 B2 = SIMD_8x32(_mm256_aesenclast_epi128(B2.raw(), K.raw()));
46 B3 = SIMD_8x32(_mm256_aesenclast_epi128(B3.raw(), K.raw()));
47}
48
49BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesdec(SIMD_8x32 K, SIMD_8x32& B) {
50 B = SIMD_8x32(_mm256_aesdec_epi128(B.raw(), K.raw()));
51}
52
53BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesdec(
54 SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
55 B0 = SIMD_8x32(_mm256_aesdec_epi128(B0.raw(), K.raw()));
56 B1 = SIMD_8x32(_mm256_aesdec_epi128(B1.raw(), K.raw()));
57 B2 = SIMD_8x32(_mm256_aesdec_epi128(B2.raw(), K.raw()));
58 B3 = SIMD_8x32(_mm256_aesdec_epi128(B3.raw(), K.raw()));
59}
60
61BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesdeclast(SIMD_8x32 K, SIMD_8x32& B) {
62 B = SIMD_8x32(_mm256_aesdeclast_epi128(B.raw(), K.raw()));
63}
64
65BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_VAES void aesdeclast(
66 SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
67 B0 = SIMD_8x32(_mm256_aesdeclast_epi128(B0.raw(), K.raw()));
68 B1 = SIMD_8x32(_mm256_aesdeclast_epi128(B1.raw(), K.raw()));
69 B2 = SIMD_8x32(_mm256_aesdeclast_epi128(B2.raw(), K.raw()));
70 B3 = SIMD_8x32(_mm256_aesdeclast_epi128(B3.raw(), K.raw()));
71}
72
73} // namespace
74
75/*
76* AES-128 Encryption
77*/
78BOTAN_FN_ISA_AVX2_VAES void AES_128::x86_vaes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
79 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_EK[4 * 0]);
80 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_EK[4 * 1]);
81 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_EK[4 * 2]);
82 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_EK[4 * 3]);
83 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_EK[4 * 4]);
84 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_EK[4 * 5]);
85 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_EK[4 * 6]);
86 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_EK[4 * 7]);
87 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_EK[4 * 8]);
88 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_EK[4 * 9]);
89 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_EK[4 * 10]);
90
91 while(blocks >= 8) {
92 SIMD_8x32 B0 = SIMD_8x32::load_le(in);
93 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
94 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
95 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
96
97 keyxor(K0, B0, B1, B2, B3);
98 aesenc(K1, B0, B1, B2, B3);
99 aesenc(K2, B0, B1, B2, B3);
100 aesenc(K3, B0, B1, B2, B3);
101 aesenc(K4, B0, B1, B2, B3);
102 aesenc(K5, B0, B1, B2, B3);
103 aesenc(K6, B0, B1, B2, B3);
104 aesenc(K7, B0, B1, B2, B3);
105 aesenc(K8, B0, B1, B2, B3);
106 aesenc(K9, B0, B1, B2, B3);
107 aesenclast(K10, B0, B1, B2, B3);
108
109 B0.store_le(out);
110 B1.store_le(out + 16 * 2);
111 B2.store_le(out + 16 * 4);
112 B3.store_le(out + 16 * 6);
113
114 blocks -= 8;
115 in += 8 * 16;
116 out += 8 * 16;
117 }
118
119 while(blocks >= 2) {
120 SIMD_8x32 B = SIMD_8x32::load_le(in);
121
122 B ^= K0;
123 aesenc(K1, B);
124 aesenc(K2, B);
125 aesenc(K3, B);
126 aesenc(K4, B);
127 aesenc(K5, B);
128 aesenc(K6, B);
129 aesenc(K7, B);
130 aesenc(K8, B);
131 aesenc(K9, B);
132 aesenclast(K10, B);
133
134 B.store_le(out);
135
136 in += 2 * 16;
137 out += 2 * 16;
138 blocks -= 2;
139 }
140
141 if(blocks > 0) {
142 SIMD_8x32 B = SIMD_8x32::load_le128(in);
143
144 B ^= K0;
145 aesenc(K1, B);
146 aesenc(K2, B);
147 aesenc(K3, B);
148 aesenc(K4, B);
149 aesenc(K5, B);
150 aesenc(K6, B);
151 aesenc(K7, B);
152 aesenc(K8, B);
153 aesenc(K9, B);
154 aesenclast(K10, B);
155
156 B.store_le128(out);
157 }
158}
159
160/*
161* AES-128 Decryption
162*/
163BOTAN_FN_ISA_AVX2_VAES void AES_128::x86_vaes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
164 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_DK[4 * 0]);
165 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_DK[4 * 1]);
166 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_DK[4 * 2]);
167 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_DK[4 * 3]);
168 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_DK[4 * 4]);
169 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_DK[4 * 5]);
170 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_DK[4 * 6]);
171 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_DK[4 * 7]);
172 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_DK[4 * 8]);
173 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_DK[4 * 9]);
174 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_DK[4 * 10]);
175
176 while(blocks >= 8) {
177 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
178 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
179 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
180 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
181
182 keyxor(K0, B0, B1, B2, B3);
183 aesdec(K1, B0, B1, B2, B3);
184 aesdec(K2, B0, B1, B2, B3);
185 aesdec(K3, B0, B1, B2, B3);
186 aesdec(K4, B0, B1, B2, B3);
187 aesdec(K5, B0, B1, B2, B3);
188 aesdec(K6, B0, B1, B2, B3);
189 aesdec(K7, B0, B1, B2, B3);
190 aesdec(K8, B0, B1, B2, B3);
191 aesdec(K9, B0, B1, B2, B3);
192 aesdeclast(K10, B0, B1, B2, B3);
193
194 B0.store_le(out + 16 * 0);
195 B1.store_le(out + 16 * 2);
196 B2.store_le(out + 16 * 4);
197 B3.store_le(out + 16 * 6);
198
199 blocks -= 8;
200 in += 8 * 16;
201 out += 8 * 16;
202 }
203
204 while(blocks >= 2) {
205 SIMD_8x32 B = SIMD_8x32::load_le(in);
206
207 B ^= K0;
208 aesdec(K1, B);
209 aesdec(K2, B);
210 aesdec(K3, B);
211 aesdec(K4, B);
212 aesdec(K5, B);
213 aesdec(K6, B);
214 aesdec(K7, B);
215 aesdec(K8, B);
216 aesdec(K9, B);
217 aesdeclast(K10, B);
218
219 B.store_le(out);
220
221 in += 2 * 16;
222 out += 2 * 16;
223 blocks -= 2;
224 }
225
226 if(blocks > 0) {
227 SIMD_8x32 B = SIMD_8x32::load_le128(in);
228
229 B ^= K0;
230 aesdec(K1, B);
231 aesdec(K2, B);
232 aesdec(K3, B);
233 aesdec(K4, B);
234 aesdec(K5, B);
235 aesdec(K6, B);
236 aesdec(K7, B);
237 aesdec(K8, B);
238 aesdec(K9, B);
239 aesdeclast(K10, B);
240
241 B.store_le128(out);
242 }
243}
244
245/*
246* AES-192 Encryption
247*/
248BOTAN_FN_ISA_AVX2_VAES void AES_192::x86_vaes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
249 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_EK[4 * 0]);
250 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_EK[4 * 1]);
251 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_EK[4 * 2]);
252 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_EK[4 * 3]);
253 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_EK[4 * 4]);
254 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_EK[4 * 5]);
255 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_EK[4 * 6]);
256 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_EK[4 * 7]);
257 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_EK[4 * 8]);
258 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_EK[4 * 9]);
259 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_EK[4 * 10]);
260 const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_EK[4 * 11]);
261 const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_EK[4 * 12]);
262
263 while(blocks >= 8) {
264 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
265 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
266 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
267 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
268
269 keyxor(K0, B0, B1, B2, B3);
270 aesenc(K1, B0, B1, B2, B3);
271 aesenc(K2, B0, B1, B2, B3);
272 aesenc(K3, B0, B1, B2, B3);
273 aesenc(K4, B0, B1, B2, B3);
274 aesenc(K5, B0, B1, B2, B3);
275 aesenc(K6, B0, B1, B2, B3);
276 aesenc(K7, B0, B1, B2, B3);
277 aesenc(K8, B0, B1, B2, B3);
278 aesenc(K9, B0, B1, B2, B3);
279 aesenc(K10, B0, B1, B2, B3);
280 aesenc(K11, B0, B1, B2, B3);
281 aesenclast(K12, B0, B1, B2, B3);
282
283 B0.store_le(out + 16 * 0);
284 B1.store_le(out + 16 * 2);
285 B2.store_le(out + 16 * 4);
286 B3.store_le(out + 16 * 6);
287
288 blocks -= 8;
289 in += 8 * 16;
290 out += 8 * 16;
291 }
292
293 while(blocks >= 2) {
294 SIMD_8x32 B = SIMD_8x32::load_le(in);
295
296 B ^= K0;
297 aesenc(K1, B);
298 aesenc(K2, B);
299 aesenc(K3, B);
300 aesenc(K4, B);
301 aesenc(K5, B);
302 aesenc(K6, B);
303 aesenc(K7, B);
304 aesenc(K8, B);
305 aesenc(K9, B);
306 aesenc(K10, B);
307 aesenc(K11, B);
308 aesenclast(K12, B);
309
310 B.store_le(out);
311
312 in += 2 * 16;
313 out += 2 * 16;
314 blocks -= 2;
315 }
316
317 if(blocks > 0) {
318 SIMD_8x32 B = SIMD_8x32::load_le128(in);
319
320 B ^= K0;
321 aesenc(K1, B);
322 aesenc(K2, B);
323 aesenc(K3, B);
324 aesenc(K4, B);
325 aesenc(K5, B);
326 aesenc(K6, B);
327 aesenc(K7, B);
328 aesenc(K8, B);
329 aesenc(K9, B);
330 aesenc(K10, B);
331 aesenc(K11, B);
332 aesenclast(K12, B);
333
334 B.store_le128(out);
335 }
336}
337
338/*
339* AES-192 Decryption
340*/
341BOTAN_FN_ISA_AVX2_VAES void AES_192::x86_vaes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
342 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_DK[4 * 0]);
343 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_DK[4 * 1]);
344 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_DK[4 * 2]);
345 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_DK[4 * 3]);
346 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_DK[4 * 4]);
347 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_DK[4 * 5]);
348 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_DK[4 * 6]);
349 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_DK[4 * 7]);
350 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_DK[4 * 8]);
351 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_DK[4 * 9]);
352 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_DK[4 * 10]);
353 const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_DK[4 * 11]);
354 const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_DK[4 * 12]);
355
356 while(blocks >= 8) {
357 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
358 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
359 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
360 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
361
362 keyxor(K0, B0, B1, B2, B3);
363 aesdec(K1, B0, B1, B2, B3);
364 aesdec(K2, B0, B1, B2, B3);
365 aesdec(K3, B0, B1, B2, B3);
366 aesdec(K4, B0, B1, B2, B3);
367 aesdec(K5, B0, B1, B2, B3);
368 aesdec(K6, B0, B1, B2, B3);
369 aesdec(K7, B0, B1, B2, B3);
370 aesdec(K8, B0, B1, B2, B3);
371 aesdec(K9, B0, B1, B2, B3);
372 aesdec(K10, B0, B1, B2, B3);
373 aesdec(K11, B0, B1, B2, B3);
374 aesdeclast(K12, B0, B1, B2, B3);
375
376 B0.store_le(out + 16 * 0);
377 B1.store_le(out + 16 * 2);
378 B2.store_le(out + 16 * 4);
379 B3.store_le(out + 16 * 6);
380
381 blocks -= 8;
382 in += 8 * 16;
383 out += 8 * 16;
384 }
385
386 while(blocks >= 2) {
387 SIMD_8x32 B = SIMD_8x32::load_le(in);
388
389 B ^= K0;
390 aesdec(K1, B);
391 aesdec(K2, B);
392 aesdec(K3, B);
393 aesdec(K4, B);
394 aesdec(K5, B);
395 aesdec(K6, B);
396 aesdec(K7, B);
397 aesdec(K8, B);
398 aesdec(K9, B);
399 aesdec(K10, B);
400 aesdec(K11, B);
401 aesdeclast(K12, B);
402
403 B.store_le(out);
404
405 in += 2 * 16;
406 out += 2 * 16;
407 blocks -= 2;
408 }
409
410 if(blocks > 0) {
411 SIMD_8x32 B = SIMD_8x32::load_le128(in);
412
413 B ^= K0;
414 aesdec(K1, B);
415 aesdec(K2, B);
416 aesdec(K3, B);
417 aesdec(K4, B);
418 aesdec(K5, B);
419 aesdec(K6, B);
420 aesdec(K7, B);
421 aesdec(K8, B);
422 aesdec(K9, B);
423 aesdec(K10, B);
424 aesdec(K11, B);
425 aesdeclast(K12, B);
426
427 B.store_le128(out);
428 }
429}
430
431BOTAN_FN_ISA_AVX2_VAES void AES_256::x86_vaes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
432 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_EK[4 * 0]);
433 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_EK[4 * 1]);
434 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_EK[4 * 2]);
435 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_EK[4 * 3]);
436 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_EK[4 * 4]);
437 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_EK[4 * 5]);
438 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_EK[4 * 6]);
439 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_EK[4 * 7]);
440 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_EK[4 * 8]);
441 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_EK[4 * 9]);
442 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_EK[4 * 10]);
443 const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_EK[4 * 11]);
444 const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_EK[4 * 12]);
445 const SIMD_8x32 K13 = SIMD_8x32::load_le128(&m_EK[4 * 13]);
446 const SIMD_8x32 K14 = SIMD_8x32::load_le128(&m_EK[4 * 14]);
447
448 while(blocks >= 8) {
449 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
450 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
451 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
452 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
453
454 keyxor(K0, B0, B1, B2, B3);
455 aesenc(K1, B0, B1, B2, B3);
456 aesenc(K2, B0, B1, B2, B3);
457 aesenc(K3, B0, B1, B2, B3);
458 aesenc(K4, B0, B1, B2, B3);
459 aesenc(K5, B0, B1, B2, B3);
460 aesenc(K6, B0, B1, B2, B3);
461 aesenc(K7, B0, B1, B2, B3);
462 aesenc(K8, B0, B1, B2, B3);
463 aesenc(K9, B0, B1, B2, B3);
464 aesenc(K10, B0, B1, B2, B3);
465 aesenc(K11, B0, B1, B2, B3);
466 aesenc(K12, B0, B1, B2, B3);
467 aesenc(K13, B0, B1, B2, B3);
468 aesenclast(K14, B0, B1, B2, B3);
469
470 B0.store_le(out + 16 * 0);
471 B1.store_le(out + 16 * 2);
472 B2.store_le(out + 16 * 4);
473 B3.store_le(out + 16 * 6);
474
475 blocks -= 8;
476 in += 8 * 16;
477 out += 8 * 16;
478 }
479
480 while(blocks >= 2) {
481 SIMD_8x32 B = SIMD_8x32::load_le(in);
482
483 B ^= K0;
484 aesenc(K1, B);
485 aesenc(K2, B);
486 aesenc(K3, B);
487 aesenc(K4, B);
488 aesenc(K5, B);
489 aesenc(K6, B);
490 aesenc(K7, B);
491 aesenc(K8, B);
492 aesenc(K9, B);
493 aesenc(K10, B);
494 aesenc(K11, B);
495 aesenc(K12, B);
496 aesenc(K13, B);
497 aesenclast(K14, B);
498
499 B.store_le(out);
500
501 in += 2 * 16;
502 out += 2 * 16;
503 blocks -= 2;
504 }
505
506 if(blocks > 0) {
507 SIMD_8x32 B = SIMD_8x32::load_le128(in);
508
509 B ^= K0;
510 aesenc(K1, B);
511 aesenc(K2, B);
512 aesenc(K3, B);
513 aesenc(K4, B);
514 aesenc(K5, B);
515 aesenc(K6, B);
516 aesenc(K7, B);
517 aesenc(K8, B);
518 aesenc(K9, B);
519 aesenc(K10, B);
520 aesenc(K11, B);
521 aesenc(K12, B);
522 aesenc(K13, B);
523 aesenclast(K14, B);
524
525 B.store_le128(out);
526 }
527}
528
529/*
530* AES-256 Decryption
531*/
532BOTAN_FN_ISA_AVX2_VAES void AES_256::x86_vaes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
533 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_DK[4 * 0]);
534 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_DK[4 * 1]);
535 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_DK[4 * 2]);
536 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_DK[4 * 3]);
537 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_DK[4 * 4]);
538 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_DK[4 * 5]);
539 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_DK[4 * 6]);
540 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_DK[4 * 7]);
541 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_DK[4 * 8]);
542 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_DK[4 * 9]);
543 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_DK[4 * 10]);
544 const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_DK[4 * 11]);
545 const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_DK[4 * 12]);
546 const SIMD_8x32 K13 = SIMD_8x32::load_le128(&m_DK[4 * 13]);
547 const SIMD_8x32 K14 = SIMD_8x32::load_le128(&m_DK[4 * 14]);
548
549 while(blocks >= 8) {
550 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
551 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
552 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
553 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
554
555 keyxor(K0, B0, B1, B2, B3);
556 aesdec(K1, B0, B1, B2, B3);
557 aesdec(K2, B0, B1, B2, B3);
558 aesdec(K3, B0, B1, B2, B3);
559 aesdec(K4, B0, B1, B2, B3);
560 aesdec(K5, B0, B1, B2, B3);
561 aesdec(K6, B0, B1, B2, B3);
562 aesdec(K7, B0, B1, B2, B3);
563 aesdec(K8, B0, B1, B2, B3);
564 aesdec(K9, B0, B1, B2, B3);
565 aesdec(K10, B0, B1, B2, B3);
566 aesdec(K11, B0, B1, B2, B3);
567 aesdec(K12, B0, B1, B2, B3);
568 aesdec(K13, B0, B1, B2, B3);
569 aesdeclast(K14, B0, B1, B2, B3);
570
571 B0.store_le(out + 16 * 0);
572 B1.store_le(out + 16 * 2);
573 B2.store_le(out + 16 * 4);
574 B3.store_le(out + 16 * 6);
575
576 blocks -= 8;
577 in += 8 * 16;
578 out += 8 * 16;
579 }
580
581 while(blocks >= 2) {
582 SIMD_8x32 B = SIMD_8x32::load_le(in);
583
584 B ^= K0;
585 aesdec(K1, B);
586 aesdec(K2, B);
587 aesdec(K3, B);
588 aesdec(K4, B);
589 aesdec(K5, B);
590 aesdec(K6, B);
591 aesdec(K7, B);
592 aesdec(K8, B);
593 aesdec(K9, B);
594 aesdec(K10, B);
595 aesdec(K11, B);
596 aesdec(K12, B);
597 aesdec(K13, B);
598 aesdeclast(K14, B);
599
600 B.store_le(out);
601
602 in += 2 * 16;
603 out += 2 * 16;
604 blocks -= 2;
605 }
606
607 if(blocks > 0) {
608 SIMD_8x32 B = SIMD_8x32::load_le128(in);
609
610 B ^= K0;
611 aesdec(K1, B);
612 aesdec(K2, B);
613 aesdec(K3, B);
614 aesdec(K4, B);
615 aesdec(K5, B);
616 aesdec(K6, B);
617 aesdec(K7, B);
618 aesdec(K8, B);
619 aesdec(K9, B);
620 aesdec(K10, B);
621 aesdec(K11, B);
622 aesdec(K12, B);
623 aesdec(K13, B);
624 aesdeclast(K14, B);
625
626 B.store_le128(out);
627 }
628}
629
630} // namespace Botan
static BOTAN_FN_ISA_AVX2 SIMD_8x32 load_le128(const uint8_t *in) noexcept
Definition simd_avx2.h:71
static BOTAN_FN_ISA_AVX2 SIMD_8x32 load_le(const uint8_t *in) noexcept
Definition simd_avx2.h:61
#define BOTAN_FORCE_INLINE
Definition compiler.h:87
constexpr uint32_t K1
Definition sha1_f.h:16
constexpr uint32_t K4
Definition sha1_f.h:19
constexpr uint32_t K3
Definition sha1_f.h:18
constexpr uint32_t K2
Definition sha1_f.h:17