Botan  2.11.0
Crypto and TLS for C++11
aes_armv8.cpp
Go to the documentation of this file.
1 /*
2 * AES using ARMv8
3 * Contributed by Jeffrey Walton
4 *
5 * Further changes
6 * (C) 2017,2018 Jack Lloyd
7 *
8 * Botan is released under the Simplified BSD License (see license.txt)
9 */
10 
11 #include <botan/aes.h>
12 #include <botan/loadstor.h>
13 #include <arm_neon.h>
14 
15 namespace Botan {
16 
17 #define AES_ENC_4_ROUNDS(K) \
18  do \
19  { \
20  B0 = vaesmcq_u8(vaeseq_u8(B0, K)); \
21  B1 = vaesmcq_u8(vaeseq_u8(B1, K)); \
22  B2 = vaesmcq_u8(vaeseq_u8(B2, K)); \
23  B3 = vaesmcq_u8(vaeseq_u8(B3, K)); \
24  } while(0)
25 
26 #define AES_ENC_4_LAST_ROUNDS(K, K2) \
27  do \
28  { \
29  B0 = veorq_u8(vaeseq_u8(B0, K), K2); \
30  B1 = veorq_u8(vaeseq_u8(B1, K), K2); \
31  B2 = veorq_u8(vaeseq_u8(B2, K), K2); \
32  B3 = veorq_u8(vaeseq_u8(B3, K), K2); \
33  } while(0)
34 
35 #define AES_DEC_4_ROUNDS(K) \
36  do \
37  { \
38  B0 = vaesimcq_u8(vaesdq_u8(B0, K)); \
39  B1 = vaesimcq_u8(vaesdq_u8(B1, K)); \
40  B2 = vaesimcq_u8(vaesdq_u8(B2, K)); \
41  B3 = vaesimcq_u8(vaesdq_u8(B3, K)); \
42  } while(0)
43 
44 #define AES_DEC_4_LAST_ROUNDS(K, K2) \
45  do \
46  { \
47  B0 = veorq_u8(vaesdq_u8(B0, K), K2); \
48  B1 = veorq_u8(vaesdq_u8(B1, K), K2); \
49  B2 = veorq_u8(vaesdq_u8(B2, K), K2); \
50  B3 = veorq_u8(vaesdq_u8(B3, K), K2); \
51  } while(0)
52 
53 /*
54 * AES-128 Encryption
55 */
56 BOTAN_FUNC_ISA("+crypto")
57 void AES_128::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
58  {
59  const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data());
60  const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_ME.data());
61 
62  const uint8x16_t K0 = vld1q_u8(skey + 0);
63  const uint8x16_t K1 = vld1q_u8(skey + 16);
64  const uint8x16_t K2 = vld1q_u8(skey + 32);
65  const uint8x16_t K3 = vld1q_u8(skey + 48);
66  const uint8x16_t K4 = vld1q_u8(skey + 64);
67  const uint8x16_t K5 = vld1q_u8(skey + 80);
68  const uint8x16_t K6 = vld1q_u8(skey + 96);
69  const uint8x16_t K7 = vld1q_u8(skey + 112);
70  const uint8x16_t K8 = vld1q_u8(skey + 128);
71  const uint8x16_t K9 = vld1q_u8(skey + 144);
72  const uint8x16_t K10 = vld1q_u8(mkey);
73 
74  while(blocks >= 4)
75  {
76  uint8x16_t B0 = vld1q_u8(in);
77  uint8x16_t B1 = vld1q_u8(in+16);
78  uint8x16_t B2 = vld1q_u8(in+32);
79  uint8x16_t B3 = vld1q_u8(in+48);
80 
81  AES_ENC_4_ROUNDS(K0);
82  AES_ENC_4_ROUNDS(K1);
83  AES_ENC_4_ROUNDS(K2);
84  AES_ENC_4_ROUNDS(K3);
85  AES_ENC_4_ROUNDS(K4);
86  AES_ENC_4_ROUNDS(K5);
87  AES_ENC_4_ROUNDS(K6);
88  AES_ENC_4_ROUNDS(K7);
89  AES_ENC_4_ROUNDS(K8);
90  AES_ENC_4_LAST_ROUNDS(K9, K10);
91 
92  vst1q_u8(out, B0);
93  vst1q_u8(out+16, B1);
94  vst1q_u8(out+32, B2);
95  vst1q_u8(out+48, B3);
96 
97  in += 16*4;
98  out += 16*4;
99  blocks -= 4;
100  }
101 
102  for(size_t i = 0; i != blocks; ++i)
103  {
104  uint8x16_t B = vld1q_u8(in+16*i);
105  B = vaesmcq_u8(vaeseq_u8(B, K0));
106  B = vaesmcq_u8(vaeseq_u8(B, K1));
107  B = vaesmcq_u8(vaeseq_u8(B, K2));
108  B = vaesmcq_u8(vaeseq_u8(B, K3));
109  B = vaesmcq_u8(vaeseq_u8(B, K4));
110  B = vaesmcq_u8(vaeseq_u8(B, K5));
111  B = vaesmcq_u8(vaeseq_u8(B, K6));
112  B = vaesmcq_u8(vaeseq_u8(B, K7));
113  B = vaesmcq_u8(vaeseq_u8(B, K8));
114  B = veorq_u8(vaeseq_u8(B, K9), K10);
115  vst1q_u8(out+16*i, B);
116  }
117  }
118 
119 /*
120 * AES-128 Decryption
121 */
122 BOTAN_FUNC_ISA("+crypto")
123 void AES_128::armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
124  {
125  const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data());
126  const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_MD.data());
127 
128  const uint8x16_t K0 = vld1q_u8(skey + 0);
129  const uint8x16_t K1 = vld1q_u8(skey + 16);
130  const uint8x16_t K2 = vld1q_u8(skey + 32);
131  const uint8x16_t K3 = vld1q_u8(skey + 48);
132  const uint8x16_t K4 = vld1q_u8(skey + 64);
133  const uint8x16_t K5 = vld1q_u8(skey + 80);
134  const uint8x16_t K6 = vld1q_u8(skey + 96);
135  const uint8x16_t K7 = vld1q_u8(skey + 112);
136  const uint8x16_t K8 = vld1q_u8(skey + 128);
137  const uint8x16_t K9 = vld1q_u8(skey + 144);
138  const uint8x16_t K10 = vld1q_u8(mkey);
139 
140  while(blocks >= 4)
141  {
142  uint8x16_t B0 = vld1q_u8(in);
143  uint8x16_t B1 = vld1q_u8(in+16);
144  uint8x16_t B2 = vld1q_u8(in+32);
145  uint8x16_t B3 = vld1q_u8(in+48);
146 
147  AES_DEC_4_ROUNDS(K0);
148  AES_DEC_4_ROUNDS(K1);
149  AES_DEC_4_ROUNDS(K2);
150  AES_DEC_4_ROUNDS(K3);
151  AES_DEC_4_ROUNDS(K4);
152  AES_DEC_4_ROUNDS(K5);
153  AES_DEC_4_ROUNDS(K6);
154  AES_DEC_4_ROUNDS(K7);
155  AES_DEC_4_ROUNDS(K8);
156  AES_DEC_4_LAST_ROUNDS(K9, K10);
157 
158  vst1q_u8(out, B0);
159  vst1q_u8(out+16, B1);
160  vst1q_u8(out+32, B2);
161  vst1q_u8(out+48, B3);
162 
163  in += 16*4;
164  out += 16*4;
165  blocks -= 4;
166  }
167 
168  for(size_t i = 0; i != blocks; ++i)
169  {
170  uint8x16_t B = vld1q_u8(in+16*i);
171  B = vaesimcq_u8(vaesdq_u8(B, K0));
172  B = vaesimcq_u8(vaesdq_u8(B, K1));
173  B = vaesimcq_u8(vaesdq_u8(B, K2));
174  B = vaesimcq_u8(vaesdq_u8(B, K3));
175  B = vaesimcq_u8(vaesdq_u8(B, K4));
176  B = vaesimcq_u8(vaesdq_u8(B, K5));
177  B = vaesimcq_u8(vaesdq_u8(B, K6));
178  B = vaesimcq_u8(vaesdq_u8(B, K7));
179  B = vaesimcq_u8(vaesdq_u8(B, K8));
180  B = veorq_u8(vaesdq_u8(B, K9), K10);
181  vst1q_u8(out+16*i, B);
182  }
183  }
184 
185 /*
186 * AES-192 Encryption
187 */
188 BOTAN_FUNC_ISA("+crypto")
189 void AES_192::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
190  {
191  const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data());
192  const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_ME.data());
193 
194  const uint8x16_t K0 = vld1q_u8(skey + 0);
195  const uint8x16_t K1 = vld1q_u8(skey + 16);
196  const uint8x16_t K2 = vld1q_u8(skey + 32);
197  const uint8x16_t K3 = vld1q_u8(skey + 48);
198  const uint8x16_t K4 = vld1q_u8(skey + 64);
199  const uint8x16_t K5 = vld1q_u8(skey + 80);
200  const uint8x16_t K6 = vld1q_u8(skey + 96);
201  const uint8x16_t K7 = vld1q_u8(skey + 112);
202  const uint8x16_t K8 = vld1q_u8(skey + 128);
203  const uint8x16_t K9 = vld1q_u8(skey + 144);
204  const uint8x16_t K10 = vld1q_u8(skey + 160);
205  const uint8x16_t K11 = vld1q_u8(skey + 176);
206  const uint8x16_t K12 = vld1q_u8(mkey);
207 
208  while(blocks >= 4)
209  {
210  uint8x16_t B0 = vld1q_u8(in);
211  uint8x16_t B1 = vld1q_u8(in+16);
212  uint8x16_t B2 = vld1q_u8(in+32);
213  uint8x16_t B3 = vld1q_u8(in+48);
214 
215  AES_ENC_4_ROUNDS(K0);
216  AES_ENC_4_ROUNDS(K1);
217  AES_ENC_4_ROUNDS(K2);
218  AES_ENC_4_ROUNDS(K3);
219  AES_ENC_4_ROUNDS(K4);
220  AES_ENC_4_ROUNDS(K5);
221  AES_ENC_4_ROUNDS(K6);
222  AES_ENC_4_ROUNDS(K7);
223  AES_ENC_4_ROUNDS(K8);
224  AES_ENC_4_ROUNDS(K9);
225  AES_ENC_4_ROUNDS(K10);
226  AES_ENC_4_LAST_ROUNDS(K11, K12);
227 
228  vst1q_u8(out, B0);
229  vst1q_u8(out+16, B1);
230  vst1q_u8(out+32, B2);
231  vst1q_u8(out+48, B3);
232 
233  in += 16*4;
234  out += 16*4;
235  blocks -= 4;
236  }
237 
238  for(size_t i = 0; i != blocks; ++i)
239  {
240  uint8x16_t B = vld1q_u8(in+16*i);
241  B = vaesmcq_u8(vaeseq_u8(B, K0));
242  B = vaesmcq_u8(vaeseq_u8(B, K1));
243  B = vaesmcq_u8(vaeseq_u8(B, K2));
244  B = vaesmcq_u8(vaeseq_u8(B, K3));
245  B = vaesmcq_u8(vaeseq_u8(B, K4));
246  B = vaesmcq_u8(vaeseq_u8(B, K5));
247  B = vaesmcq_u8(vaeseq_u8(B, K6));
248  B = vaesmcq_u8(vaeseq_u8(B, K7));
249  B = vaesmcq_u8(vaeseq_u8(B, K8));
250  B = vaesmcq_u8(vaeseq_u8(B, K9));
251  B = vaesmcq_u8(vaeseq_u8(B, K10));
252  B = veorq_u8(vaeseq_u8(B, K11), K12);
253  vst1q_u8(out+16*i, B);
254  }
255  }
256 
257 /*
258 * AES-192 Decryption
259 */
260 BOTAN_FUNC_ISA("+crypto")
261 void AES_192::armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
262  {
263  const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data());
264  const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_MD.data());
265 
266  const uint8x16_t K0 = vld1q_u8(skey + 0);
267  const uint8x16_t K1 = vld1q_u8(skey + 16);
268  const uint8x16_t K2 = vld1q_u8(skey + 32);
269  const uint8x16_t K3 = vld1q_u8(skey + 48);
270  const uint8x16_t K4 = vld1q_u8(skey + 64);
271  const uint8x16_t K5 = vld1q_u8(skey + 80);
272  const uint8x16_t K6 = vld1q_u8(skey + 96);
273  const uint8x16_t K7 = vld1q_u8(skey + 112);
274  const uint8x16_t K8 = vld1q_u8(skey + 128);
275  const uint8x16_t K9 = vld1q_u8(skey + 144);
276  const uint8x16_t K10 = vld1q_u8(skey + 160);
277  const uint8x16_t K11 = vld1q_u8(skey + 176);
278  const uint8x16_t K12 = vld1q_u8(mkey);
279 
280  while(blocks >= 4)
281  {
282  uint8x16_t B0 = vld1q_u8(in);
283  uint8x16_t B1 = vld1q_u8(in+16);
284  uint8x16_t B2 = vld1q_u8(in+32);
285  uint8x16_t B3 = vld1q_u8(in+48);
286 
287  AES_DEC_4_ROUNDS(K0);
288  AES_DEC_4_ROUNDS(K1);
289  AES_DEC_4_ROUNDS(K2);
290  AES_DEC_4_ROUNDS(K3);
291  AES_DEC_4_ROUNDS(K4);
292  AES_DEC_4_ROUNDS(K5);
293  AES_DEC_4_ROUNDS(K6);
294  AES_DEC_4_ROUNDS(K7);
295  AES_DEC_4_ROUNDS(K8);
296  AES_DEC_4_ROUNDS(K9);
297  AES_DEC_4_ROUNDS(K10);
298  AES_DEC_4_LAST_ROUNDS(K11, K12);
299 
300  vst1q_u8(out, B0);
301  vst1q_u8(out+16, B1);
302  vst1q_u8(out+32, B2);
303  vst1q_u8(out+48, B3);
304 
305  in += 16*4;
306  out += 16*4;
307  blocks -= 4;
308  }
309 
310  for(size_t i = 0; i != blocks; ++i)
311  {
312  uint8x16_t B = vld1q_u8(in+16*i);
313  B = vaesimcq_u8(vaesdq_u8(B, K0));
314  B = vaesimcq_u8(vaesdq_u8(B, K1));
315  B = vaesimcq_u8(vaesdq_u8(B, K2));
316  B = vaesimcq_u8(vaesdq_u8(B, K3));
317  B = vaesimcq_u8(vaesdq_u8(B, K4));
318  B = vaesimcq_u8(vaesdq_u8(B, K5));
319  B = vaesimcq_u8(vaesdq_u8(B, K6));
320  B = vaesimcq_u8(vaesdq_u8(B, K7));
321  B = vaesimcq_u8(vaesdq_u8(B, K8));
322  B = vaesimcq_u8(vaesdq_u8(B, K9));
323  B = vaesimcq_u8(vaesdq_u8(B, K10));
324  B = veorq_u8(vaesdq_u8(B, K11), K12);
325  vst1q_u8(out+16*i, B);
326  }
327  }
328 
329 /*
330 * AES-256 Encryption
331 */
332 BOTAN_FUNC_ISA("+crypto")
333 void AES_256::armv8_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
334  {
335  const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data());
336  const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_ME.data());
337 
338  const uint8x16_t K0 = vld1q_u8(skey + 0);
339  const uint8x16_t K1 = vld1q_u8(skey + 16);
340  const uint8x16_t K2 = vld1q_u8(skey + 32);
341  const uint8x16_t K3 = vld1q_u8(skey + 48);
342  const uint8x16_t K4 = vld1q_u8(skey + 64);
343  const uint8x16_t K5 = vld1q_u8(skey + 80);
344  const uint8x16_t K6 = vld1q_u8(skey + 96);
345  const uint8x16_t K7 = vld1q_u8(skey + 112);
346  const uint8x16_t K8 = vld1q_u8(skey + 128);
347  const uint8x16_t K9 = vld1q_u8(skey + 144);
348  const uint8x16_t K10 = vld1q_u8(skey + 160);
349  const uint8x16_t K11 = vld1q_u8(skey + 176);
350  const uint8x16_t K12 = vld1q_u8(skey + 192);
351  const uint8x16_t K13 = vld1q_u8(skey + 208);
352  const uint8x16_t K14 = vld1q_u8(mkey);
353 
354  while(blocks >= 4)
355  {
356  uint8x16_t B0 = vld1q_u8(in);
357  uint8x16_t B1 = vld1q_u8(in+16);
358  uint8x16_t B2 = vld1q_u8(in+32);
359  uint8x16_t B3 = vld1q_u8(in+48);
360 
361  AES_ENC_4_ROUNDS(K0);
362  AES_ENC_4_ROUNDS(K1);
363  AES_ENC_4_ROUNDS(K2);
364  AES_ENC_4_ROUNDS(K3);
365  AES_ENC_4_ROUNDS(K4);
366  AES_ENC_4_ROUNDS(K5);
367  AES_ENC_4_ROUNDS(K6);
368  AES_ENC_4_ROUNDS(K7);
369  AES_ENC_4_ROUNDS(K8);
370  AES_ENC_4_ROUNDS(K9);
371  AES_ENC_4_ROUNDS(K10);
372  AES_ENC_4_ROUNDS(K11);
373  AES_ENC_4_ROUNDS(K12);
374  AES_ENC_4_LAST_ROUNDS(K13, K14);
375 
376  vst1q_u8(out, B0);
377  vst1q_u8(out+16, B1);
378  vst1q_u8(out+32, B2);
379  vst1q_u8(out+48, B3);
380 
381  in += 16*4;
382  out += 16*4;
383  blocks -= 4;
384  }
385 
386  for(size_t i = 0; i != blocks; ++i)
387  {
388  uint8x16_t B = vld1q_u8(in+16*i);
389  B = vaesmcq_u8(vaeseq_u8(B, K0));
390  B = vaesmcq_u8(vaeseq_u8(B, K1));
391  B = vaesmcq_u8(vaeseq_u8(B, K2));
392  B = vaesmcq_u8(vaeseq_u8(B, K3));
393  B = vaesmcq_u8(vaeseq_u8(B, K4));
394  B = vaesmcq_u8(vaeseq_u8(B, K5));
395  B = vaesmcq_u8(vaeseq_u8(B, K6));
396  B = vaesmcq_u8(vaeseq_u8(B, K7));
397  B = vaesmcq_u8(vaeseq_u8(B, K8));
398  B = vaesmcq_u8(vaeseq_u8(B, K9));
399  B = vaesmcq_u8(vaeseq_u8(B, K10));
400  B = vaesmcq_u8(vaeseq_u8(B, K11));
401  B = vaesmcq_u8(vaeseq_u8(B, K12));
402  B = veorq_u8(vaeseq_u8(B, K13), K14);
403  vst1q_u8(out+16*i, B);
404  }
405  }
406 
407 /*
408 * AES-256 Decryption
409 */
410 BOTAN_FUNC_ISA("+crypto")
411 void AES_256::armv8_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
412  {
413  const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data());
414  const uint8_t *mkey = reinterpret_cast<const uint8_t*>(m_MD.data());
415 
416  const uint8x16_t K0 = vld1q_u8(skey + 0);
417  const uint8x16_t K1 = vld1q_u8(skey + 16);
418  const uint8x16_t K2 = vld1q_u8(skey + 32);
419  const uint8x16_t K3 = vld1q_u8(skey + 48);
420  const uint8x16_t K4 = vld1q_u8(skey + 64);
421  const uint8x16_t K5 = vld1q_u8(skey + 80);
422  const uint8x16_t K6 = vld1q_u8(skey + 96);
423  const uint8x16_t K7 = vld1q_u8(skey + 112);
424  const uint8x16_t K8 = vld1q_u8(skey + 128);
425  const uint8x16_t K9 = vld1q_u8(skey + 144);
426  const uint8x16_t K10 = vld1q_u8(skey + 160);
427  const uint8x16_t K11 = vld1q_u8(skey + 176);
428  const uint8x16_t K12 = vld1q_u8(skey + 192);
429  const uint8x16_t K13 = vld1q_u8(skey + 208);
430  const uint8x16_t K14 = vld1q_u8(mkey);
431 
432  while(blocks >= 4)
433  {
434  uint8x16_t B0 = vld1q_u8(in);
435  uint8x16_t B1 = vld1q_u8(in+16);
436  uint8x16_t B2 = vld1q_u8(in+32);
437  uint8x16_t B3 = vld1q_u8(in+48);
438 
439  AES_DEC_4_ROUNDS(K0);
440  AES_DEC_4_ROUNDS(K1);
441  AES_DEC_4_ROUNDS(K2);
442  AES_DEC_4_ROUNDS(K3);
443  AES_DEC_4_ROUNDS(K4);
444  AES_DEC_4_ROUNDS(K5);
445  AES_DEC_4_ROUNDS(K6);
446  AES_DEC_4_ROUNDS(K7);
447  AES_DEC_4_ROUNDS(K8);
448  AES_DEC_4_ROUNDS(K9);
449  AES_DEC_4_ROUNDS(K10);
450  AES_DEC_4_ROUNDS(K11);
451  AES_DEC_4_ROUNDS(K12);
452  AES_DEC_4_LAST_ROUNDS(K13, K14);
453 
454  vst1q_u8(out, B0);
455  vst1q_u8(out+16, B1);
456  vst1q_u8(out+32, B2);
457  vst1q_u8(out+48, B3);
458 
459  in += 16*4;
460  out += 16*4;
461  blocks -= 4;
462  }
463 
464  for(size_t i = 0; i != blocks; ++i)
465  {
466  uint8x16_t B = vld1q_u8(in+16*i);
467  B = vaesimcq_u8(vaesdq_u8(B, K0));
468  B = vaesimcq_u8(vaesdq_u8(B, K1));
469  B = vaesimcq_u8(vaesdq_u8(B, K2));
470  B = vaesimcq_u8(vaesdq_u8(B, K3));
471  B = vaesimcq_u8(vaesdq_u8(B, K4));
472  B = vaesimcq_u8(vaesdq_u8(B, K5));
473  B = vaesimcq_u8(vaesdq_u8(B, K6));
474  B = vaesimcq_u8(vaesdq_u8(B, K7));
475  B = vaesimcq_u8(vaesdq_u8(B, K8));
476  B = vaesimcq_u8(vaesdq_u8(B, K9));
477  B = vaesimcq_u8(vaesdq_u8(B, K10));
478  B = vaesimcq_u8(vaesdq_u8(B, K11));
479  B = vaesimcq_u8(vaesdq_u8(B, K12));
480  B = veorq_u8(vaesdq_u8(B, K13), K14);
481  vst1q_u8(out+16*i, B);
482  }
483  }
484 
485 #undef AES_ENC_4_ROUNDS
486 #undef AES_ENC_4_LAST_ROUNDS
487 #undef AES_DEC_4_ROUNDS
488 #undef AES_DEC_4_LAST_ROUNDS
489 
490 }
void const uint8_t in[]
Definition: mgf1.h:26
const uint8_t uint8_t size_t blocks
Definition: ffi.h:691
#define BOTAN_FUNC_ISA(isa)
Definition: compiler.h:71
Definition: alg_id.cpp:13
uint8_t out[]
Definition: pbkdf2.h:19
#define AES_DEC_4_ROUNDS(K)
Definition: aes_armv8.cpp:35
#define AES_ENC_4_ROUNDS(K)
Definition: aes_armv8.cpp:17
#define AES_ENC_4_LAST_ROUNDS(K, K2)
Definition: aes_armv8.cpp:26
#define AES_DEC_4_LAST_ROUNDS(K, K2)
Definition: aes_armv8.cpp:44