7#include <botan/internal/serpent.h>
9#include <botan/internal/serpent_sbox.h>
10#include <botan/internal/simd_avx2.h>
14#if defined(__GNUG__) && !defined(__clang__)
20 #define transform(B0, B1, B2, B3) \
25 B3 ^= B2 ^ B0.shl<3>(); \
29 B2 ^= B3 ^ B1.shl<7>(); \
34 #define i_transform(B0, B1, B2, B3) \
38 B2 ^= B3 ^ B1.shl<7>(); \
42 B3 ^= B2 ^ B0.shl<3>(); \
51void Serpent::avx2_encrypt_8(
const uint8_t in[128], uint8_t out[128])
const {
54 SIMD_8x32::reset_registers();
57 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 32);
58 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 64);
59 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 96);
61 SIMD_8x32::transpose(B0, B1, B2, B3);
65 key_xor(0, B0, B1, B2, B3);
68 key_xor(1, B0, B1, B2, B3);
71 key_xor(2, B0, B1, B2, B3);
74 key_xor(3, B0, B1, B2, B3);
77 key_xor(4, B0, B1, B2, B3);
80 key_xor(5, B0, B1, B2, B3);
83 key_xor(6, B0, B1, B2, B3);
86 key_xor(7, B0, B1, B2, B3);
90 key_xor(8, B0, B1, B2, B3);
93 key_xor(9, B0, B1, B2, B3);
96 key_xor(10, B0, B1, B2, B3);
99 key_xor(11, B0, B1, B2, B3);
102 key_xor(12, B0, B1, B2, B3);
105 key_xor(13, B0, B1, B2, B3);
108 key_xor(14, B0, B1, B2, B3);
111 key_xor(15, B0, B1, B2, B3);
115 key_xor(16, B0, B1, B2, B3);
118 key_xor(17, B0, B1, B2, B3);
121 key_xor(18, B0, B1, B2, B3);
124 key_xor(19, B0, B1, B2, B3);
127 key_xor(20, B0, B1, B2, B3);
130 key_xor(21, B0, B1, B2, B3);
133 key_xor(22, B0, B1, B2, B3);
136 key_xor(23, B0, B1, B2, B3);
140 key_xor(24, B0, B1, B2, B3);
143 key_xor(25, B0, B1, B2, B3);
146 key_xor(26, B0, B1, B2, B3);
149 key_xor(27, B0, B1, B2, B3);
152 key_xor(28, B0, B1, B2, B3);
155 key_xor(29, B0, B1, B2, B3);
158 key_xor(30, B0, B1, B2, B3);
161 key_xor(31, B0, B1, B2, B3);
163 key_xor(32, B0, B1, B2, B3);
165 SIMD_8x32::transpose(B0, B1, B2, B3);
167 B1.store_le(out + 32);
168 B2.store_le(out + 64);
169 B3.store_le(out + 96);
171 SIMD_8x32::zero_registers();
175void Serpent::avx2_decrypt_8(
const uint8_t in[128], uint8_t out[128])
const {
178 SIMD_8x32::reset_registers();
181 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 32);
182 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 64);
183 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 96);
185 SIMD_8x32::transpose(B0, B1, B2, B3);
189 key_xor(32, B0, B1, B2, B3);
191 key_xor(31, B0, B1, B2, B3);
194 key_xor(30, B0, B1, B2, B3);
197 key_xor(29, B0, B1, B2, B3);
200 key_xor(28, B0, B1, B2, B3);
203 key_xor(27, B0, B1, B2, B3);
206 key_xor(26, B0, B1, B2, B3);
209 key_xor(25, B0, B1, B2, B3);
212 key_xor(24, B0, B1, B2, B3);
216 key_xor(23, B0, B1, B2, B3);
219 key_xor(22, B0, B1, B2, B3);
222 key_xor(21, B0, B1, B2, B3);
225 key_xor(20, B0, B1, B2, B3);
228 key_xor(19, B0, B1, B2, B3);
231 key_xor(18, B0, B1, B2, B3);
234 key_xor(17, B0, B1, B2, B3);
237 key_xor(16, B0, B1, B2, B3);
241 key_xor(15, B0, B1, B2, B3);
244 key_xor(14, B0, B1, B2, B3);
247 key_xor(13, B0, B1, B2, B3);
250 key_xor(12, B0, B1, B2, B3);
253 key_xor(11, B0, B1, B2, B3);
256 key_xor(10, B0, B1, B2, B3);
259 key_xor(9, B0, B1, B2, B3);
262 key_xor(8, B0, B1, B2, B3);
266 key_xor(7, B0, B1, B2, B3);
269 key_xor(6, B0, B1, B2, B3);
272 key_xor(5, B0, B1, B2, B3);
275 key_xor(4, B0, B1, B2, B3);
278 key_xor(3, B0, B1, B2, B3);
281 key_xor(2, B0, B1, B2, B3);
284 key_xor(1, B0, B1, B2, B3);
287 key_xor(0, B0, B1, B2, B3);
289 SIMD_8x32::transpose(B0, B1, B2, B3);
292 B1.store_le(out + 32);
293 B2.store_le(out + 64);
294 B3.store_le(out + 96);
296 SIMD_8x32::zero_registers();
BOTAN_FORCE_INLINE void transform(T &B0, T &B1, T &B2, T &B3)
BOTAN_FORCE_INLINE void i_transform(T &B0, T &B1, T &B2, T &B3)
BOTAN_FORCE_INLINE void SBoxE6(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD4(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE4(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE7(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE5(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD5(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE2(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE3(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE0(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD0(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD1(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE1(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD6(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD2(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD3(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD7(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)