11#include <botan/internal/aes.h>
13#include <botan/internal/loadstor.h>
18namespace AES_AARCH64 {
21 B = vaesmcq_u8(vaeseq_u8(B,
K));
25void
enc4(uint8x16_t& B0, uint8x16_t& B1, uint8x16_t& B2, uint8x16_t& B3, uint8x16_t
K) {
26 B0 = vaesmcq_u8(vaeseq_u8(B0,
K));
27 B1 = vaesmcq_u8(vaeseq_u8(B1,
K));
28 B2 = vaesmcq_u8(vaeseq_u8(B2,
K));
29 B3 = vaesmcq_u8(vaeseq_u8(B3,
K));
33 B = veorq_u8(vaeseq_u8(B,
K),
K2);
37void
enc4_last(uint8x16_t& B0, uint8x16_t& B1, uint8x16_t& B2, uint8x16_t& B3, uint8x16_t
K, uint8x16_t
K2) {
38 B0 = veorq_u8(vaeseq_u8(B0,
K),
K2);
39 B1 = veorq_u8(vaeseq_u8(B1,
K),
K2);
40 B2 = veorq_u8(vaeseq_u8(B2,
K),
K2);
41 B3 = veorq_u8(vaeseq_u8(B3,
K),
K2);
45 B = vaesimcq_u8(vaesdq_u8(B,
K));
49void
dec4(uint8x16_t& B0, uint8x16_t& B1, uint8x16_t& B2, uint8x16_t& B3, uint8x16_t
K) {
50 B0 = vaesimcq_u8(vaesdq_u8(B0,
K));
51 B1 = vaesimcq_u8(vaesdq_u8(B1,
K));
52 B2 = vaesimcq_u8(vaesdq_u8(B2,
K));
53 B3 = vaesimcq_u8(vaesdq_u8(B3,
K));
57 B = veorq_u8(vaesdq_u8(B,
K),
K2);
61void
dec4_last(uint8x16_t& B0, uint8x16_t& B1, uint8x16_t& B2, uint8x16_t& B3, uint8x16_t
K, uint8x16_t
K2) {
62 B0 = veorq_u8(vaesdq_u8(B0,
K),
K2);
63 B1 = veorq_u8(vaesdq_u8(B1,
K),
K2);
64 B2 = veorq_u8(vaesdq_u8(B2,
K),
K2);
65 B3 = veorq_u8(vaesdq_u8(B3,
K),
K2);
73BOTAN_FUNC_ISA(
"+crypto+aes") void AES_128::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[],
size_t blocks)
const {
74 const uint8_t* skey =
reinterpret_cast<const uint8_t*
>(m_EK.data());
76 const uint8x16_t K0 = vld1q_u8(skey + 0 * 16);
77 const uint8x16_t K1 = vld1q_u8(skey + 1 * 16);
78 const uint8x16_t K2 = vld1q_u8(skey + 2 * 16);
79 const uint8x16_t K3 = vld1q_u8(skey + 3 * 16);
80 const uint8x16_t K4 = vld1q_u8(skey + 4 * 16);
81 const uint8x16_t K5 = vld1q_u8(skey + 5 * 16);
82 const uint8x16_t K6 = vld1q_u8(skey + 6 * 16);
83 const uint8x16_t K7 = vld1q_u8(skey + 7 * 16);
84 const uint8x16_t K8 = vld1q_u8(skey + 8 * 16);
85 const uint8x16_t K9 = vld1q_u8(skey + 9 * 16);
86 const uint8x16_t K10 = vld1q_u8(skey + 10 * 16);
88 using namespace AES_AARCH64;
91 uint8x16_t B0 = vld1q_u8(in);
92 uint8x16_t B1 = vld1q_u8(in + 16);
93 uint8x16_t B2 = vld1q_u8(in + 32);
94 uint8x16_t B3 = vld1q_u8(in + 48);
96 enc4(B0, B1, B2, B3, K0);
97 enc4(B0, B1, B2, B3, K1);
98 enc4(B0, B1, B2, B3, K2);
99 enc4(B0, B1, B2, B3, K3);
100 enc4(B0, B1, B2, B3, K4);
101 enc4(B0, B1, B2, B3, K5);
102 enc4(B0, B1, B2, B3, K6);
103 enc4(B0, B1, B2, B3, K7);
104 enc4(B0, B1, B2, B3, K8);
105 enc4_last(B0, B1, B2, B3, K9, K10);
108 vst1q_u8(out + 16, B1);
109 vst1q_u8(out + 32, B2);
110 vst1q_u8(out + 48, B3);
117 for(
size_t i = 0; i != blocks; ++i) {
118 uint8x16_t B = vld1q_u8(in + 16 * i);
128 enc_last(B, K9, K10);
129 vst1q_u8(out + 16 * i, B);
136BOTAN_FUNC_ISA(
"+crypto+aes") void AES_128::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[],
size_t blocks)
const {
137 const uint8_t* skey =
reinterpret_cast<const uint8_t*
>(m_DK.data());
139 const uint8x16_t K0 = vld1q_u8(skey + 0 * 16);
140 const uint8x16_t K1 = vld1q_u8(skey + 1 * 16);
141 const uint8x16_t
K2 = vld1q_u8(skey + 2 * 16);
142 const uint8x16_t K3 = vld1q_u8(skey + 3 * 16);
143 const uint8x16_t K4 = vld1q_u8(skey + 4 * 16);
144 const uint8x16_t K5 = vld1q_u8(skey + 5 * 16);
145 const uint8x16_t K6 = vld1q_u8(skey + 6 * 16);
146 const uint8x16_t K7 = vld1q_u8(skey + 7 * 16);
147 const uint8x16_t K8 = vld1q_u8(skey + 8 * 16);
148 const uint8x16_t K9 = vld1q_u8(skey + 9 * 16);
149 const uint8x16_t K10 = vld1q_u8(skey + 10 * 16);
151 using namespace AES_AARCH64;
154 uint8x16_t B0 = vld1q_u8(in);
155 uint8x16_t B1 = vld1q_u8(in + 16);
156 uint8x16_t B2 = vld1q_u8(in + 32);
157 uint8x16_t B3 = vld1q_u8(in + 48);
159 dec4(B0, B1, B2, B3, K0);
160 dec4(B0, B1, B2, B3, K1);
161 dec4(B0, B1, B2, B3, K2);
162 dec4(B0, B1, B2, B3, K3);
163 dec4(B0, B1, B2, B3, K4);
164 dec4(B0, B1, B2, B3, K5);
165 dec4(B0, B1, B2, B3, K6);
166 dec4(B0, B1, B2, B3, K7);
167 dec4(B0, B1, B2, B3, K8);
171 vst1q_u8(out + 16, B1);
172 vst1q_u8(out + 32, B2);
173 vst1q_u8(out + 48, B3);
180 for(
size_t i = 0; i != blocks; ++i) {
181 uint8x16_t B = vld1q_u8(in + 16 * i);
191 B = veorq_u8(vaesdq_u8(B, K9), K10);
192 vst1q_u8(out + 16 * i, B);
199BOTAN_FUNC_ISA(
"+crypto+aes") void AES_192::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[],
size_t blocks)
const {
200 const uint8_t* skey =
reinterpret_cast<const uint8_t*
>(m_EK.data());
202 const uint8x16_t K0 = vld1q_u8(skey + 0 * 16);
203 const uint8x16_t K1 = vld1q_u8(skey + 1 * 16);
204 const uint8x16_t
K2 = vld1q_u8(skey + 2 * 16);
205 const uint8x16_t K3 = vld1q_u8(skey + 3 * 16);
206 const uint8x16_t K4 = vld1q_u8(skey + 4 * 16);
207 const uint8x16_t K5 = vld1q_u8(skey + 5 * 16);
208 const uint8x16_t K6 = vld1q_u8(skey + 6 * 16);
209 const uint8x16_t K7 = vld1q_u8(skey + 7 * 16);
210 const uint8x16_t K8 = vld1q_u8(skey + 8 * 16);
211 const uint8x16_t K9 = vld1q_u8(skey + 9 * 16);
212 const uint8x16_t K10 = vld1q_u8(skey + 10 * 16);
213 const uint8x16_t K11 = vld1q_u8(skey + 11 * 16);
214 const uint8x16_t K12 = vld1q_u8(skey + 12 * 16);
216 using namespace AES_AARCH64;
219 uint8x16_t B0 = vld1q_u8(in);
220 uint8x16_t B1 = vld1q_u8(in + 16);
221 uint8x16_t B2 = vld1q_u8(in + 32);
222 uint8x16_t B3 = vld1q_u8(in + 48);
224 enc4(B0, B1, B2, B3, K0);
225 enc4(B0, B1, B2, B3, K1);
226 enc4(B0, B1, B2, B3, K2);
227 enc4(B0, B1, B2, B3, K3);
228 enc4(B0, B1, B2, B3, K4);
229 enc4(B0, B1, B2, B3, K5);
230 enc4(B0, B1, B2, B3, K6);
231 enc4(B0, B1, B2, B3, K7);
232 enc4(B0, B1, B2, B3, K8);
233 enc4(B0, B1, B2, B3, K9);
234 enc4(B0, B1, B2, B3, K10);
238 vst1q_u8(out + 16, B1);
239 vst1q_u8(out + 32, B2);
240 vst1q_u8(out + 48, B3);
247 for(
size_t i = 0; i != blocks; ++i) {
248 uint8x16_t B = vld1q_u8(in + 16 * i);
260 B = veorq_u8(vaeseq_u8(B, K11), K12);
261 vst1q_u8(out + 16 * i, B);
268BOTAN_FUNC_ISA(
"+crypto+aes") void AES_192::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[],
size_t blocks)
const {
269 const uint8_t* skey =
reinterpret_cast<const uint8_t*
>(m_DK.data());
271 const uint8x16_t K0 = vld1q_u8(skey + 0 * 16);
272 const uint8x16_t K1 = vld1q_u8(skey + 1 * 16);
273 const uint8x16_t
K2 = vld1q_u8(skey + 2 * 16);
274 const uint8x16_t K3 = vld1q_u8(skey + 3 * 16);
275 const uint8x16_t K4 = vld1q_u8(skey + 4 * 16);
276 const uint8x16_t K5 = vld1q_u8(skey + 5 * 16);
277 const uint8x16_t K6 = vld1q_u8(skey + 6 * 16);
278 const uint8x16_t K7 = vld1q_u8(skey + 7 * 16);
279 const uint8x16_t K8 = vld1q_u8(skey + 8 * 16);
280 const uint8x16_t K9 = vld1q_u8(skey + 9 * 16);
281 const uint8x16_t K10 = vld1q_u8(skey + 10 * 16);
282 const uint8x16_t K11 = vld1q_u8(skey + 11 * 16);
283 const uint8x16_t K12 = vld1q_u8(skey + 12 * 16);
285 using namespace AES_AARCH64;
288 uint8x16_t B0 = vld1q_u8(in);
289 uint8x16_t B1 = vld1q_u8(in + 16);
290 uint8x16_t B2 = vld1q_u8(in + 32);
291 uint8x16_t B3 = vld1q_u8(in + 48);
293 dec4(B0, B1, B2, B3, K0);
294 dec4(B0, B1, B2, B3, K1);
295 dec4(B0, B1, B2, B3, K2);
296 dec4(B0, B1, B2, B3, K3);
297 dec4(B0, B1, B2, B3, K4);
298 dec4(B0, B1, B2, B3, K5);
299 dec4(B0, B1, B2, B3, K6);
300 dec4(B0, B1, B2, B3, K7);
301 dec4(B0, B1, B2, B3, K8);
302 dec4(B0, B1, B2, B3, K9);
303 dec4(B0, B1, B2, B3, K10);
307 vst1q_u8(out + 16, B1);
308 vst1q_u8(out + 32, B2);
309 vst1q_u8(out + 48, B3);
316 for(
size_t i = 0; i != blocks; ++i) {
317 uint8x16_t B = vld1q_u8(in + 16 * i);
329 B = veorq_u8(vaesdq_u8(B, K11), K12);
330 vst1q_u8(out + 16 * i, B);
337BOTAN_FUNC_ISA(
"+crypto+aes") void AES_256::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[],
size_t blocks)
const {
338 const uint8_t* skey =
reinterpret_cast<const uint8_t*
>(m_EK.data());
340 const uint8x16_t K0 = vld1q_u8(skey + 0 * 16);
341 const uint8x16_t K1 = vld1q_u8(skey + 1 * 16);
342 const uint8x16_t
K2 = vld1q_u8(skey + 2 * 16);
343 const uint8x16_t K3 = vld1q_u8(skey + 3 * 16);
344 const uint8x16_t K4 = vld1q_u8(skey + 4 * 16);
345 const uint8x16_t K5 = vld1q_u8(skey + 5 * 16);
346 const uint8x16_t K6 = vld1q_u8(skey + 6 * 16);
347 const uint8x16_t K7 = vld1q_u8(skey + 7 * 16);
348 const uint8x16_t K8 = vld1q_u8(skey + 8 * 16);
349 const uint8x16_t K9 = vld1q_u8(skey + 9 * 16);
350 const uint8x16_t K10 = vld1q_u8(skey + 10 * 16);
351 const uint8x16_t K11 = vld1q_u8(skey + 11 * 16);
352 const uint8x16_t K12 = vld1q_u8(skey + 12 * 16);
353 const uint8x16_t K13 = vld1q_u8(skey + 13 * 16);
354 const uint8x16_t K14 = vld1q_u8(skey + 14 * 16);
356 using namespace AES_AARCH64;
358 using namespace AES_AARCH64;
361 uint8x16_t B0 = vld1q_u8(in);
362 uint8x16_t B1 = vld1q_u8(in + 16);
363 uint8x16_t B2 = vld1q_u8(in + 32);
364 uint8x16_t B3 = vld1q_u8(in + 48);
366 enc4(B0, B1, B2, B3, K0);
367 enc4(B0, B1, B2, B3, K1);
368 enc4(B0, B1, B2, B3, K2);
369 enc4(B0, B1, B2, B3, K3);
370 enc4(B0, B1, B2, B3, K4);
371 enc4(B0, B1, B2, B3, K5);
372 enc4(B0, B1, B2, B3, K6);
373 enc4(B0, B1, B2, B3, K7);
374 enc4(B0, B1, B2, B3, K8);
375 enc4(B0, B1, B2, B3, K9);
376 enc4(B0, B1, B2, B3, K10);
377 enc4(B0, B1, B2, B3, K11);
378 enc4(B0, B1, B2, B3, K12);
382 vst1q_u8(out + 16, B1);
383 vst1q_u8(out + 32, B2);
384 vst1q_u8(out + 48, B3);
391 for(
size_t i = 0; i != blocks; ++i) {
392 uint8x16_t B = vld1q_u8(in + 16 * i);
406 B = veorq_u8(vaeseq_u8(B, K13), K14);
407 vst1q_u8(out + 16 * i, B);
414BOTAN_FUNC_ISA(
"+crypto+aes") void AES_256::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[],
size_t blocks)
const {
415 const uint8_t* skey =
reinterpret_cast<const uint8_t*
>(m_DK.data());
417 const uint8x16_t K0 = vld1q_u8(skey + 0 * 16);
418 const uint8x16_t K1 = vld1q_u8(skey + 1 * 16);
419 const uint8x16_t
K2 = vld1q_u8(skey + 2 * 16);
420 const uint8x16_t K3 = vld1q_u8(skey + 3 * 16);
421 const uint8x16_t K4 = vld1q_u8(skey + 4 * 16);
422 const uint8x16_t K5 = vld1q_u8(skey + 5 * 16);
423 const uint8x16_t K6 = vld1q_u8(skey + 6 * 16);
424 const uint8x16_t K7 = vld1q_u8(skey + 7 * 16);
425 const uint8x16_t K8 = vld1q_u8(skey + 8 * 16);
426 const uint8x16_t K9 = vld1q_u8(skey + 9 * 16);
427 const uint8x16_t K10 = vld1q_u8(skey + 10 * 16);
428 const uint8x16_t K11 = vld1q_u8(skey + 11 * 16);
429 const uint8x16_t K12 = vld1q_u8(skey + 12 * 16);
430 const uint8x16_t K13 = vld1q_u8(skey + 13 * 16);
431 const uint8x16_t K14 = vld1q_u8(skey + 14 * 16);
433 using namespace AES_AARCH64;
436 uint8x16_t B0 = vld1q_u8(in);
437 uint8x16_t B1 = vld1q_u8(in + 16);
438 uint8x16_t B2 = vld1q_u8(in + 32);
439 uint8x16_t B3 = vld1q_u8(in + 48);
441 dec4(B0, B1, B2, B3, K0);
442 dec4(B0, B1, B2, B3, K1);
443 dec4(B0, B1, B2, B3, K2);
444 dec4(B0, B1, B2, B3, K3);
445 dec4(B0, B1, B2, B3, K4);
446 dec4(B0, B1, B2, B3, K5);
447 dec4(B0, B1, B2, B3, K6);
448 dec4(B0, B1, B2, B3, K7);
449 dec4(B0, B1, B2, B3, K8);
450 dec4(B0, B1, B2, B3, K9);
451 dec4(B0, B1, B2, B3, K10);
452 dec4(B0, B1, B2, B3, K11);
453 dec4(B0, B1, B2, B3, K12);
457 vst1q_u8(out + 16, B1);
458 vst1q_u8(out + 32, B2);
459 vst1q_u8(out + 48, B3);
466 for(
size_t i = 0; i != blocks; ++i) {
467 uint8x16_t B = vld1q_u8(in + 16 * i);
481 B = veorq_u8(vaesdq_u8(B, K13), K14);
482 vst1q_u8(out + 16 * i, B);
#define BOTAN_FUNC_ISA(isa)
#define BOTAN_FUNC_ISA_INLINE(isa)
void dec4_last(uint8x16_t &B0, uint8x16_t &B1, uint8x16_t &B2, uint8x16_t &B3, uint8x16_t K, uint8x16_t K2)
void enc4_last(uint8x16_t &B0, uint8x16_t &B1, uint8x16_t &B2, uint8x16_t &B3, uint8x16_t K, uint8x16_t K2)
void enc4(uint8x16_t &B0, uint8x16_t &B1, uint8x16_t &B2, uint8x16_t &B3, uint8x16_t K)
void dec4(uint8x16_t &B0, uint8x16_t &B1, uint8x16_t &B2, uint8x16_t &B3, uint8x16_t K)