Botan 3.6.0
Crypto and TLS for C&
aes_vaes.cpp
Go to the documentation of this file.
1/*
2* (C) 2024 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/aes.h>
8
9#include <botan/internal/loadstor.h>
10#include <botan/internal/simd_avx2.h>
11#include <wmmintrin.h>
12
13namespace Botan {
14
15namespace {
16
17BOTAN_FORCE_INLINE void keyxor(SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
18 B0 ^= K;
19 B1 ^= K;
20 B2 ^= K;
21 B3 ^= K;
22}
23
24BOTAN_FUNC_ISA_INLINE("vaes,avx2") void aesenc(SIMD_8x32 K, SIMD_8x32& B) {
25 B = SIMD_8x32(_mm256_aesenc_epi128(B.raw(), K.raw()));
26}
27
28BOTAN_FUNC_ISA_INLINE("vaes,avx2")
29void aesenc(SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
30 B0 = SIMD_8x32(_mm256_aesenc_epi128(B0.raw(), K.raw()));
31 B1 = SIMD_8x32(_mm256_aesenc_epi128(B1.raw(), K.raw()));
32 B2 = SIMD_8x32(_mm256_aesenc_epi128(B2.raw(), K.raw()));
33 B3 = SIMD_8x32(_mm256_aesenc_epi128(B3.raw(), K.raw()));
34}
35
36BOTAN_FUNC_ISA_INLINE("vaes,avx2") void aesenclast(SIMD_8x32 K, SIMD_8x32& B) {
37 B = SIMD_8x32(_mm256_aesenclast_epi128(B.raw(), K.raw()));
38}
39
40BOTAN_FUNC_ISA_INLINE("vaes,avx2")
41void aesenclast(SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
42 B0 = SIMD_8x32(_mm256_aesenclast_epi128(B0.raw(), K.raw()));
43 B1 = SIMD_8x32(_mm256_aesenclast_epi128(B1.raw(), K.raw()));
44 B2 = SIMD_8x32(_mm256_aesenclast_epi128(B2.raw(), K.raw()));
45 B3 = SIMD_8x32(_mm256_aesenclast_epi128(B3.raw(), K.raw()));
46}
47
48BOTAN_FUNC_ISA_INLINE("vaes,avx2") void aesdec(SIMD_8x32 K, SIMD_8x32& B) {
49 B = SIMD_8x32(_mm256_aesdec_epi128(B.raw(), K.raw()));
50}
51
52BOTAN_FUNC_ISA_INLINE("vaes,avx2")
53void aesdec(SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
54 B0 = SIMD_8x32(_mm256_aesdec_epi128(B0.raw(), K.raw()));
55 B1 = SIMD_8x32(_mm256_aesdec_epi128(B1.raw(), K.raw()));
56 B2 = SIMD_8x32(_mm256_aesdec_epi128(B2.raw(), K.raw()));
57 B3 = SIMD_8x32(_mm256_aesdec_epi128(B3.raw(), K.raw()));
58}
59
60BOTAN_FUNC_ISA_INLINE("vaes,avx2") void aesdeclast(SIMD_8x32 K, SIMD_8x32& B) {
61 B = SIMD_8x32(_mm256_aesdeclast_epi128(B.raw(), K.raw()));
62}
63
64BOTAN_FUNC_ISA_INLINE("vaes,avx2")
65void aesdeclast(SIMD_8x32 K, SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) {
66 B0 = SIMD_8x32(_mm256_aesdeclast_epi128(B0.raw(), K.raw()));
67 B1 = SIMD_8x32(_mm256_aesdeclast_epi128(B1.raw(), K.raw()));
68 B2 = SIMD_8x32(_mm256_aesdeclast_epi128(B2.raw(), K.raw()));
69 B3 = SIMD_8x32(_mm256_aesdeclast_epi128(B3.raw(), K.raw()));
70}
71
72} // namespace
73
74/*
75* AES-128 Encryption
76*/
77BOTAN_FUNC_ISA("vaes,avx2") void AES_128::x86_vaes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
78 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_EK[4 * 0]);
79 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_EK[4 * 1]);
80 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_EK[4 * 2]);
81 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_EK[4 * 3]);
82 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_EK[4 * 4]);
83 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_EK[4 * 5]);
84 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_EK[4 * 6]);
85 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_EK[4 * 7]);
86 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_EK[4 * 8]);
87 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_EK[4 * 9]);
88 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_EK[4 * 10]);
89
90 while(blocks >= 8) {
91 SIMD_8x32 B0 = SIMD_8x32::load_le(in);
92 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
93 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
94 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
95
96 keyxor(K0, B0, B1, B2, B3);
97 aesenc(K1, B0, B1, B2, B3);
98 aesenc(K2, B0, B1, B2, B3);
99 aesenc(K3, B0, B1, B2, B3);
100 aesenc(K4, B0, B1, B2, B3);
101 aesenc(K5, B0, B1, B2, B3);
102 aesenc(K6, B0, B1, B2, B3);
103 aesenc(K7, B0, B1, B2, B3);
104 aesenc(K8, B0, B1, B2, B3);
105 aesenc(K9, B0, B1, B2, B3);
106 aesenclast(K10, B0, B1, B2, B3);
107
108 B0.store_le(out);
109 B1.store_le(out + 16 * 2);
110 B2.store_le(out + 16 * 4);
111 B3.store_le(out + 16 * 6);
112
113 blocks -= 8;
114 in += 8 * 16;
115 out += 8 * 16;
116 }
117
118 while(blocks >= 2) {
119 SIMD_8x32 B = SIMD_8x32::load_le(in);
120
121 B ^= K0;
122 aesenc(K1, B);
123 aesenc(K2, B);
124 aesenc(K3, B);
125 aesenc(K4, B);
126 aesenc(K5, B);
127 aesenc(K6, B);
128 aesenc(K7, B);
129 aesenc(K8, B);
130 aesenc(K9, B);
131 aesenclast(K10, B);
132
133 B.store_le(out);
134
135 in += 2 * 16;
136 out += 2 * 16;
137 blocks -= 2;
138 }
139
140 if(blocks > 0) {
141 SIMD_8x32 B = SIMD_8x32::load_le128(in);
142
143 B ^= K0;
144 aesenc(K1, B);
145 aesenc(K2, B);
146 aesenc(K3, B);
147 aesenc(K4, B);
148 aesenc(K5, B);
149 aesenc(K6, B);
150 aesenc(K7, B);
151 aesenc(K8, B);
152 aesenc(K9, B);
153 aesenclast(K10, B);
154
155 B.store_le128(out);
156 }
157}
158
159/*
160* AES-128 Decryption
161*/
162BOTAN_FUNC_ISA("vaes,avx2") void AES_128::x86_vaes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
163 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_DK[4 * 0]);
164 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_DK[4 * 1]);
165 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_DK[4 * 2]);
166 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_DK[4 * 3]);
167 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_DK[4 * 4]);
168 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_DK[4 * 5]);
169 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_DK[4 * 6]);
170 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_DK[4 * 7]);
171 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_DK[4 * 8]);
172 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_DK[4 * 9]);
173 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_DK[4 * 10]);
174
175 while(blocks >= 8) {
176 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
177 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
178 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
179 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
180
181 keyxor(K0, B0, B1, B2, B3);
182 aesdec(K1, B0, B1, B2, B3);
183 aesdec(K2, B0, B1, B2, B3);
184 aesdec(K3, B0, B1, B2, B3);
185 aesdec(K4, B0, B1, B2, B3);
186 aesdec(K5, B0, B1, B2, B3);
187 aesdec(K6, B0, B1, B2, B3);
188 aesdec(K7, B0, B1, B2, B3);
189 aesdec(K8, B0, B1, B2, B3);
190 aesdec(K9, B0, B1, B2, B3);
191 aesdeclast(K10, B0, B1, B2, B3);
192
193 B0.store_le(out + 16 * 0);
194 B1.store_le(out + 16 * 2);
195 B2.store_le(out + 16 * 4);
196 B3.store_le(out + 16 * 6);
197
198 blocks -= 8;
199 in += 8 * 16;
200 out += 8 * 16;
201 }
202
203 while(blocks >= 2) {
204 SIMD_8x32 B = SIMD_8x32::load_le(in);
205
206 B ^= K0;
207 aesdec(K1, B);
208 aesdec(K2, B);
209 aesdec(K3, B);
210 aesdec(K4, B);
211 aesdec(K5, B);
212 aesdec(K6, B);
213 aesdec(K7, B);
214 aesdec(K8, B);
215 aesdec(K9, B);
216 aesdeclast(K10, B);
217
218 B.store_le(out);
219
220 in += 2 * 16;
221 out += 2 * 16;
222 blocks -= 2;
223 }
224
225 if(blocks > 0) {
226 SIMD_8x32 B = SIMD_8x32::load_le128(in);
227
228 B ^= K0;
229 aesdec(K1, B);
230 aesdec(K2, B);
231 aesdec(K3, B);
232 aesdec(K4, B);
233 aesdec(K5, B);
234 aesdec(K6, B);
235 aesdec(K7, B);
236 aesdec(K8, B);
237 aesdec(K9, B);
238 aesdeclast(K10, B);
239
240 B.store_le128(out);
241 }
242}
243
244/*
245* AES-192 Encryption
246*/
247BOTAN_FUNC_ISA("vaes,avx2") void AES_192::x86_vaes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
248 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_EK[4 * 0]);
249 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_EK[4 * 1]);
250 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_EK[4 * 2]);
251 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_EK[4 * 3]);
252 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_EK[4 * 4]);
253 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_EK[4 * 5]);
254 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_EK[4 * 6]);
255 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_EK[4 * 7]);
256 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_EK[4 * 8]);
257 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_EK[4 * 9]);
258 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_EK[4 * 10]);
259 const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_EK[4 * 11]);
260 const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_EK[4 * 12]);
261
262 while(blocks >= 8) {
263 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
264 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
265 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
266 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
267
268 keyxor(K0, B0, B1, B2, B3);
269 aesenc(K1, B0, B1, B2, B3);
270 aesenc(K2, B0, B1, B2, B3);
271 aesenc(K3, B0, B1, B2, B3);
272 aesenc(K4, B0, B1, B2, B3);
273 aesenc(K5, B0, B1, B2, B3);
274 aesenc(K6, B0, B1, B2, B3);
275 aesenc(K7, B0, B1, B2, B3);
276 aesenc(K8, B0, B1, B2, B3);
277 aesenc(K9, B0, B1, B2, B3);
278 aesenc(K10, B0, B1, B2, B3);
279 aesenc(K11, B0, B1, B2, B3);
280 aesenclast(K12, B0, B1, B2, B3);
281
282 B0.store_le(out + 16 * 0);
283 B1.store_le(out + 16 * 2);
284 B2.store_le(out + 16 * 4);
285 B3.store_le(out + 16 * 6);
286
287 blocks -= 8;
288 in += 8 * 16;
289 out += 8 * 16;
290 }
291
292 while(blocks >= 2) {
293 SIMD_8x32 B = SIMD_8x32::load_le(in);
294
295 B ^= K0;
296 aesenc(K1, B);
297 aesenc(K2, B);
298 aesenc(K3, B);
299 aesenc(K4, B);
300 aesenc(K5, B);
301 aesenc(K6, B);
302 aesenc(K7, B);
303 aesenc(K8, B);
304 aesenc(K9, B);
305 aesenc(K10, B);
306 aesenc(K11, B);
307 aesenclast(K12, B);
308
309 B.store_le(out);
310
311 in += 2 * 16;
312 out += 2 * 16;
313 blocks -= 2;
314 }
315
316 if(blocks > 0) {
317 SIMD_8x32 B = SIMD_8x32::load_le128(in);
318
319 B ^= K0;
320 aesenc(K1, B);
321 aesenc(K2, B);
322 aesenc(K3, B);
323 aesenc(K4, B);
324 aesenc(K5, B);
325 aesenc(K6, B);
326 aesenc(K7, B);
327 aesenc(K8, B);
328 aesenc(K9, B);
329 aesenc(K10, B);
330 aesenc(K11, B);
331 aesenclast(K12, B);
332
333 B.store_le128(out);
334 }
335}
336
337/*
338* AES-192 Decryption
339*/
340BOTAN_FUNC_ISA("vaes,avx2") void AES_192::x86_vaes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
341 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_DK[4 * 0]);
342 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_DK[4 * 1]);
343 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_DK[4 * 2]);
344 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_DK[4 * 3]);
345 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_DK[4 * 4]);
346 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_DK[4 * 5]);
347 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_DK[4 * 6]);
348 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_DK[4 * 7]);
349 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_DK[4 * 8]);
350 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_DK[4 * 9]);
351 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_DK[4 * 10]);
352 const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_DK[4 * 11]);
353 const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_DK[4 * 12]);
354
355 while(blocks >= 8) {
356 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
357 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
358 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
359 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
360
361 keyxor(K0, B0, B1, B2, B3);
362 aesdec(K1, B0, B1, B2, B3);
363 aesdec(K2, B0, B1, B2, B3);
364 aesdec(K3, B0, B1, B2, B3);
365 aesdec(K4, B0, B1, B2, B3);
366 aesdec(K5, B0, B1, B2, B3);
367 aesdec(K6, B0, B1, B2, B3);
368 aesdec(K7, B0, B1, B2, B3);
369 aesdec(K8, B0, B1, B2, B3);
370 aesdec(K9, B0, B1, B2, B3);
371 aesdec(K10, B0, B1, B2, B3);
372 aesdec(K11, B0, B1, B2, B3);
373 aesdeclast(K12, B0, B1, B2, B3);
374
375 B0.store_le(out + 16 * 0);
376 B1.store_le(out + 16 * 2);
377 B2.store_le(out + 16 * 4);
378 B3.store_le(out + 16 * 6);
379
380 blocks -= 8;
381 in += 8 * 16;
382 out += 8 * 16;
383 }
384
385 while(blocks >= 2) {
386 SIMD_8x32 B = SIMD_8x32::load_le(in);
387
388 B ^= K0;
389 aesdec(K1, B);
390 aesdec(K2, B);
391 aesdec(K3, B);
392 aesdec(K4, B);
393 aesdec(K5, B);
394 aesdec(K6, B);
395 aesdec(K7, B);
396 aesdec(K8, B);
397 aesdec(K9, B);
398 aesdec(K10, B);
399 aesdec(K11, B);
400 aesdeclast(K12, B);
401
402 B.store_le(out);
403
404 in += 2 * 16;
405 out += 2 * 16;
406 blocks -= 2;
407 }
408
409 if(blocks > 0) {
410 SIMD_8x32 B = SIMD_8x32::load_le128(in);
411
412 B ^= K0;
413 aesdec(K1, B);
414 aesdec(K2, B);
415 aesdec(K3, B);
416 aesdec(K4, B);
417 aesdec(K5, B);
418 aesdec(K6, B);
419 aesdec(K7, B);
420 aesdec(K8, B);
421 aesdec(K9, B);
422 aesdec(K10, B);
423 aesdec(K11, B);
424 aesdeclast(K12, B);
425
426 B.store_le128(out);
427 }
428}
429
430BOTAN_FUNC_ISA("vaes,avx2") void AES_256::x86_vaes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
431 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_EK[4 * 0]);
432 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_EK[4 * 1]);
433 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_EK[4 * 2]);
434 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_EK[4 * 3]);
435 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_EK[4 * 4]);
436 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_EK[4 * 5]);
437 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_EK[4 * 6]);
438 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_EK[4 * 7]);
439 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_EK[4 * 8]);
440 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_EK[4 * 9]);
441 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_EK[4 * 10]);
442 const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_EK[4 * 11]);
443 const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_EK[4 * 12]);
444 const SIMD_8x32 K13 = SIMD_8x32::load_le128(&m_EK[4 * 13]);
445 const SIMD_8x32 K14 = SIMD_8x32::load_le128(&m_EK[4 * 14]);
446
447 while(blocks >= 8) {
448 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
449 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
450 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
451 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
452
453 keyxor(K0, B0, B1, B2, B3);
454 aesenc(K1, B0, B1, B2, B3);
455 aesenc(K2, B0, B1, B2, B3);
456 aesenc(K3, B0, B1, B2, B3);
457 aesenc(K4, B0, B1, B2, B3);
458 aesenc(K5, B0, B1, B2, B3);
459 aesenc(K6, B0, B1, B2, B3);
460 aesenc(K7, B0, B1, B2, B3);
461 aesenc(K8, B0, B1, B2, B3);
462 aesenc(K9, B0, B1, B2, B3);
463 aesenc(K10, B0, B1, B2, B3);
464 aesenc(K11, B0, B1, B2, B3);
465 aesenc(K12, B0, B1, B2, B3);
466 aesenc(K13, B0, B1, B2, B3);
467 aesenclast(K14, B0, B1, B2, B3);
468
469 B0.store_le(out + 16 * 0);
470 B1.store_le(out + 16 * 2);
471 B2.store_le(out + 16 * 4);
472 B3.store_le(out + 16 * 6);
473
474 blocks -= 8;
475 in += 8 * 16;
476 out += 8 * 16;
477 }
478
479 while(blocks >= 2) {
480 SIMD_8x32 B = SIMD_8x32::load_le(in);
481
482 B ^= K0;
483 aesenc(K1, B);
484 aesenc(K2, B);
485 aesenc(K3, B);
486 aesenc(K4, B);
487 aesenc(K5, B);
488 aesenc(K6, B);
489 aesenc(K7, B);
490 aesenc(K8, B);
491 aesenc(K9, B);
492 aesenc(K10, B);
493 aesenc(K11, B);
494 aesenc(K12, B);
495 aesenc(K13, B);
496 aesenclast(K14, B);
497
498 B.store_le(out);
499
500 in += 2 * 16;
501 out += 2 * 16;
502 blocks -= 2;
503 }
504
505 if(blocks > 0) {
506 SIMD_8x32 B = SIMD_8x32::load_le128(in);
507
508 B ^= K0;
509 aesenc(K1, B);
510 aesenc(K2, B);
511 aesenc(K3, B);
512 aesenc(K4, B);
513 aesenc(K5, B);
514 aesenc(K6, B);
515 aesenc(K7, B);
516 aesenc(K8, B);
517 aesenc(K9, B);
518 aesenc(K10, B);
519 aesenc(K11, B);
520 aesenc(K12, B);
521 aesenc(K13, B);
522 aesenclast(K14, B);
523
524 B.store_le128(out);
525 }
526}
527
528/*
529* AES-256 Decryption
530*/
531BOTAN_FUNC_ISA("vaes,avx2") void AES_256::x86_vaes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
532 const SIMD_8x32 K0 = SIMD_8x32::load_le128(&m_DK[4 * 0]);
533 const SIMD_8x32 K1 = SIMD_8x32::load_le128(&m_DK[4 * 1]);
534 const SIMD_8x32 K2 = SIMD_8x32::load_le128(&m_DK[4 * 2]);
535 const SIMD_8x32 K3 = SIMD_8x32::load_le128(&m_DK[4 * 3]);
536 const SIMD_8x32 K4 = SIMD_8x32::load_le128(&m_DK[4 * 4]);
537 const SIMD_8x32 K5 = SIMD_8x32::load_le128(&m_DK[4 * 5]);
538 const SIMD_8x32 K6 = SIMD_8x32::load_le128(&m_DK[4 * 6]);
539 const SIMD_8x32 K7 = SIMD_8x32::load_le128(&m_DK[4 * 7]);
540 const SIMD_8x32 K8 = SIMD_8x32::load_le128(&m_DK[4 * 8]);
541 const SIMD_8x32 K9 = SIMD_8x32::load_le128(&m_DK[4 * 9]);
542 const SIMD_8x32 K10 = SIMD_8x32::load_le128(&m_DK[4 * 10]);
543 const SIMD_8x32 K11 = SIMD_8x32::load_le128(&m_DK[4 * 11]);
544 const SIMD_8x32 K12 = SIMD_8x32::load_le128(&m_DK[4 * 12]);
545 const SIMD_8x32 K13 = SIMD_8x32::load_le128(&m_DK[4 * 13]);
546 const SIMD_8x32 K14 = SIMD_8x32::load_le128(&m_DK[4 * 14]);
547
548 while(blocks >= 8) {
549 SIMD_8x32 B0 = SIMD_8x32::load_le(in + 16 * 0);
550 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 16 * 2);
551 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 16 * 4);
552 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 16 * 6);
553
554 keyxor(K0, B0, B1, B2, B3);
555 aesdec(K1, B0, B1, B2, B3);
556 aesdec(K2, B0, B1, B2, B3);
557 aesdec(K3, B0, B1, B2, B3);
558 aesdec(K4, B0, B1, B2, B3);
559 aesdec(K5, B0, B1, B2, B3);
560 aesdec(K6, B0, B1, B2, B3);
561 aesdec(K7, B0, B1, B2, B3);
562 aesdec(K8, B0, B1, B2, B3);
563 aesdec(K9, B0, B1, B2, B3);
564 aesdec(K10, B0, B1, B2, B3);
565 aesdec(K11, B0, B1, B2, B3);
566 aesdec(K12, B0, B1, B2, B3);
567 aesdec(K13, B0, B1, B2, B3);
568 aesdeclast(K14, B0, B1, B2, B3);
569
570 B0.store_le(out + 16 * 0);
571 B1.store_le(out + 16 * 2);
572 B2.store_le(out + 16 * 4);
573 B3.store_le(out + 16 * 6);
574
575 blocks -= 8;
576 in += 8 * 16;
577 out += 8 * 16;
578 }
579
580 while(blocks >= 2) {
581 SIMD_8x32 B = SIMD_8x32::load_le(in);
582
583 B ^= K0;
584 aesdec(K1, B);
585 aesdec(K2, B);
586 aesdec(K3, B);
587 aesdec(K4, B);
588 aesdec(K5, B);
589 aesdec(K6, B);
590 aesdec(K7, B);
591 aesdec(K8, B);
592 aesdec(K9, B);
593 aesdec(K10, B);
594 aesdec(K11, B);
595 aesdec(K12, B);
596 aesdec(K13, B);
597 aesdeclast(K14, B);
598
599 B.store_le(out);
600
601 in += 2 * 16;
602 out += 2 * 16;
603 blocks -= 2;
604 }
605
606 if(blocks > 0) {
607 SIMD_8x32 B = SIMD_8x32::load_le128(in);
608
609 B ^= K0;
610 aesdec(K1, B);
611 aesdec(K2, B);
612 aesdec(K3, B);
613 aesdec(K4, B);
614 aesdec(K5, B);
615 aesdec(K6, B);
616 aesdec(K7, B);
617 aesdec(K8, B);
618 aesdec(K9, B);
619 aesdec(K10, B);
620 aesdec(K11, B);
621 aesdec(K12, B);
622 aesdec(K13, B);
623 aesdeclast(K14, B);
624
625 B.store_le128(out);
626 }
627}
628
629} // namespace Botan
#define BOTAN_FUNC_ISA(isa)
Definition compiler.h:92
#define BOTAN_FORCE_INLINE
Definition compiler.h:165
#define BOTAN_FUNC_ISA_INLINE(isa)
Definition compiler.h:98
uint8x16_t uint8x16_t K2
Definition aes_armv8.cpp:32