65 constexpr uint8_t seed_s0_post_c = 0xA9;
66 constexpr uint8_t seed_s1_post_c = 0x38;
74 constexpr uint64_t blend_mask = 0xAAAAAAAAAAAAAAAA;
75 const auto sbox =
SIMD_16x32(_mm512_mask_blend_epi8(blend_mask, s0.raw(), s1.raw()));
86 alignas(64)
constexpr uint8_t SHUF_BYTE0[64] = {
87 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12,
88 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12,
89 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12,
90 0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12,
92 alignas(64)
constexpr uint8_t SHUF_BYTE1[64] = {
93 1, 1, 1, 1, 5, 5, 5, 5, 9, 9, 9, 9, 13, 13, 13, 13,
94 1, 1, 1, 1, 5, 5, 5, 5, 9, 9, 9, 9, 13, 13, 13, 13,
95 1, 1, 1, 1, 5, 5, 5, 5, 9, 9, 9, 9, 13, 13, 13, 13,
96 1, 1, 1, 1, 5, 5, 5, 5, 9, 9, 9, 9, 13, 13, 13, 13,
98 alignas(64)
constexpr uint8_t SHUF_BYTE2[64] = {
99 2, 2, 2, 2, 6, 6, 6, 6, 10, 10, 10, 10, 14, 14, 14, 14,
100 2, 2, 2, 2, 6, 6, 6, 6, 10, 10, 10, 10, 14, 14, 14, 14,
101 2, 2, 2, 2, 6, 6, 6, 6, 10, 10, 10, 10, 14, 14, 14, 14,
102 2, 2, 2, 2, 6, 6, 6, 6, 10, 10, 10, 10, 14, 14, 14, 14,
104 alignas(64)
constexpr uint8_t SHUF_BYTE3[64] = {
105 3, 3, 3, 3, 7, 7, 7, 7, 11, 11, 11, 11, 15, 15, 15, 15,
106 3, 3, 3, 3, 7, 7, 7, 7, 11, 11, 11, 11, 15, 15, 15, 15,
107 3, 3, 3, 3, 7, 7, 7, 7, 11, 11, 11, 11, 15, 15, 15, 15,
108 3, 3, 3, 3, 7, 7, 7, 7, 11, 11, 11, 11, 15, 15, 15, 15,
112 const auto b0 =
SIMD_16x32(_mm512_shuffle_epi8(sbox.raw(), _mm512_load_si512(SHUF_BYTE0)));
113 const auto b1 =
SIMD_16x32(_mm512_shuffle_epi8(sbox.raw(), _mm512_load_si512(SHUF_BYTE1)));
114 const auto b2 =
SIMD_16x32(_mm512_shuffle_epi8(sbox.raw(), _mm512_load_si512(SHUF_BYTE2)));
115 const auto b3 =
SIMD_16x32(_mm512_shuffle_epi8(sbox.raw(), _mm512_load_si512(SHUF_BYTE3)));
131 T0 = seed_g(T1 + T0);
132 T1 = seed_g(T1 + T0);
138 T0 = seed_g(T1 + T0);
139 T1 = seed_g(T1 + T0);
144BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX512_GFNI
void encrypt(
const uint8_t ptext[16 * 4 * 4],
145 uint8_t ctext[16 * 4 * 4],
146 std::span<const uint32_t> RK) {
154 for(
size_t j = 0; j != 8; ++j) {
155 const uint32_t K0 = RK[4 * j];
156 const uint32_t K1 = RK[4 * j + 1];
157 const uint32_t K2 = RK[4 * j + 2];
158 const uint32_t K3 = RK[4 * j + 3];
160 seed_round(B0, B1, B2, B3, K0, K1, K2, K3);
171BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX512_GFNI
void decrypt(
const uint8_t ctext[16 * 4 * 4],
172 uint8_t ptext[16 * 4 * 4],
173 std::span<const uint32_t> RK) {
181 for(
size_t j = 0; j != 8; ++j) {
182 const uint32_t K0 = RK[30 - 4 * j];
183 const uint32_t K1 = RK[31 - 4 * j];
184 const uint32_t K2 = RK[28 - 4 * j];
185 const uint32_t K3 = RK[29 - 4 * j];
187 seed_round(B0, B1, B2, B3, K0, K1, K2, K3);
197BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX512_GFNI
void encrypt_x2(
const uint8_t ptext[32 * 4 * 4],
198 uint8_t ctext[32 * 4 * 4],
199 std::span<const uint32_t> RK) {
213 for(
size_t j = 0; j != 8; ++j) {
214 const uint32_t K0 = RK[4 * j];
215 const uint32_t K1 = RK[4 * j + 1];
216 const uint32_t K2 = RK[4 * j + 2];
217 const uint32_t K3 = RK[4 * j + 3];
219 seed_round(B0, B1, B2, B3, K0, K1, K2, K3);
220 seed_round(B4, B5, B6, B7, K0, K1, K2, K3);
237BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX512_GFNI
void decrypt_x2(
const uint8_t ctext[32 * 4 * 4],
238 uint8_t ptext[32 * 4 * 4],
239 std::span<const uint32_t> RK) {
253 for(
size_t j = 0; j != 8; ++j) {
254 const uint32_t K0 = RK[30 - 4 * j];
255 const uint32_t K1 = RK[31 - 4 * j];
256 const uint32_t K2 = RK[28 - 4 * j];
257 const uint32_t K3 = RK[29 - 4 * j];
259 seed_round(B0, B1, B2, B3, K0, K1, K2, K3);
260 seed_round(B4, B5, B6, B7, K0, K1, K2, K3);