102apply_aria_sbox(SIMD_16x32 x, __m512i pre_mat, __m512i pre_const, __m512i post_mat, __m512i post_const) {
117 const __m512i fwd_perm = _mm512_set_epi64(0x3F3B37332F2B2723,
126 const __m512i inv_perm = _mm512_set_epi64(0x3F2F1F0F3E2E1E0E,
136 __m512i v = _mm512_permutexvar_epi8(fwd_perm, x.
raw());
139 v = _mm512_xor_si512(_mm512_gf2p8affine_epi64_epi8(v, pre_mat, 0), pre_const);
140 v = _mm512_xor_si512(_mm512_gf2p8affineinv_epi64_epi8(v, post_mat, 0), post_const);
143 v = _mm512_permutexvar_epi8(inv_perm, v);
155 const __m512i fo_pre_mat = _mm512_set_epi64(IDENTITY, IDENTITY, IDENTITY, IDENTITY, AFF_X1, AFF_X1, AFF_X2, AFF_X2);
157 const __m512i fo_post_mat = _mm512_set_epi64(AFF_S1, AFF_S1, AFF_S2, AFF_S2, IDENTITY, IDENTITY, IDENTITY, IDENTITY);
159 const __m512i fo_pre_const = _mm512_set_epi64(0x0000000000000000,
168 const __m512i fo_post_const = _mm512_set_epi64(0x6363636363636363,
177 return apply_aria_sbox(x, fo_pre_mat, fo_pre_const, fo_post_mat, fo_post_const);
181 const __m512i fe_pre_mat = _mm512_set_epi64(AFF_X1, AFF_X1, AFF_X2, AFF_X2, IDENTITY, IDENTITY, IDENTITY, IDENTITY);
183 const __m512i fe_post_mat = _mm512_set_epi64(IDENTITY, IDENTITY, IDENTITY, IDENTITY, AFF_S1, AFF_S1, AFF_S2, AFF_S2);
185 const __m512i fe_pre_const = _mm512_set_epi64(0x0505050505050505,
194 const __m512i fe_post_const = _mm512_set_epi64(0x0000000000000000,
203 return apply_aria_sbox(x, fe_pre_mat, fe_pre_const, fe_post_mat, fe_post_const);
209 const __m512i rol16 = _mm512_set_epi64(0x0E0F0C0D0A0B0809,
236 return x ^ x.
rotl<8>() ^ x.
rotl<24>();
252 B0 = aria_fo_m(apply_fo_sbox(B0));
253 B1 = aria_fo_m(apply_fo_sbox(B1));
254 B2 = aria_fo_m(apply_fo_sbox(B2));
255 B3 = aria_fo_m(apply_fo_sbox(B3));
257 aria_mix(B0, B1, B2, B3);
259 B1 = swap_abcd_badc(B1);
263 aria_mix(B0, B1, B2, B3);
270 B0 = aria_fe_m(apply_fe_sbox(B0));
271 B1 = aria_fe_m(apply_fe_sbox(B1));
272 B2 = aria_fe_m(apply_fe_sbox(B2));
273 B3 = aria_fe_m(apply_fe_sbox(B3));
275 aria_mix(B0, B1, B2, B3);
277 B3 = swap_abcd_badc(B3);
281 aria_mix(B0, B1, B2, B3);
287BOTAN_FN_ISA_AVX512_GFNI
288void transform_16(
const uint8_t in[], uint8_t out[], std::span<const uint32_t> KS) {
289 const size_t ROUNDS = (KS.size() / 4) - 1;
300 for(
size_t r = 0; r != ROUNDS; r += 2) {
305 aria_fo(B0, B1, B2, B3);
312 if(r != ROUNDS - 2) {
313 aria_fe(B0, B1, B2, B3);
330void BOTAN_FN_ISA_AVX512_GFNI aria_transform(
const uint8_t in[],
333 std::span<const uint32_t> KS) {
334 while(blocks >= 16) {
335 ARIA_AVX512::transform_16(in, out, KS);
342 uint8_t ibuf[16 * 16] = {0};
343 uint8_t obuf[16 * 16] = {0};
345 ARIA_AVX512::transform_16(ibuf, obuf, KS);