Botan 3.4.0
Crypto and TLS for C&
serpent_avx2.cpp
Go to the documentation of this file.
1/*
2* (C) 2018 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/serpent.h>
8
9#include <botan/internal/serpent_sbox.h>
10#include <botan/internal/simd_avx2.h>
11
12namespace Botan {
13
14#if defined(__GNUG__) && !defined(__clang__)
15
16// These macros are redundant with the versions in serpent_sbox.h
17// but unfortunately removing them seems to trigger a bug in GCC
18// when building in amalgamation mode
19
20 #define transform(B0, B1, B2, B3) \
21 do { \
22 B0 = B0.rotl<13>(); \
23 B2 = B2.rotl<3>(); \
24 B1 ^= B0 ^ B2; \
25 B3 ^= B2 ^ B0.shl<3>(); \
26 B1 = B1.rotl<1>(); \
27 B3 = B3.rotl<7>(); \
28 B0 ^= B1 ^ B3; \
29 B2 ^= B3 ^ B1.shl<7>(); \
30 B0 = B0.rotl<5>(); \
31 B2 = B2.rotl<22>(); \
32 } while(0)
33
34 #define i_transform(B0, B1, B2, B3) \
35 do { \
36 B2 = B2.rotr<22>(); \
37 B0 = B0.rotr<5>(); \
38 B2 ^= B3 ^ B1.shl<7>(); \
39 B0 ^= B1 ^ B3; \
40 B3 = B3.rotr<7>(); \
41 B1 = B1.rotr<1>(); \
42 B3 ^= B2 ^ B0.shl<3>(); \
43 B1 ^= B0 ^ B2; \
44 B2 = B2.rotr<3>(); \
45 B0 = B0.rotr<13>(); \
46 } while(0)
47
48#endif
49
51void Serpent::avx2_encrypt_8(const uint8_t in[128], uint8_t out[128]) const {
52 using namespace Botan::Serpent_F;
53
54 SIMD_8x32::reset_registers();
55
56 SIMD_8x32 B0 = SIMD_8x32::load_le(in);
57 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 32);
58 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 64);
59 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 96);
60
61 SIMD_8x32::transpose(B0, B1, B2, B3);
62
63 const Key_Inserter key_xor(m_round_key.data());
64
65 key_xor(0, B0, B1, B2, B3);
66 SBoxE0(B0, B1, B2, B3);
67 transform(B0, B1, B2, B3);
68 key_xor(1, B0, B1, B2, B3);
69 SBoxE1(B0, B1, B2, B3);
70 transform(B0, B1, B2, B3);
71 key_xor(2, B0, B1, B2, B3);
72 SBoxE2(B0, B1, B2, B3);
73 transform(B0, B1, B2, B3);
74 key_xor(3, B0, B1, B2, B3);
75 SBoxE3(B0, B1, B2, B3);
76 transform(B0, B1, B2, B3);
77 key_xor(4, B0, B1, B2, B3);
78 SBoxE4(B0, B1, B2, B3);
79 transform(B0, B1, B2, B3);
80 key_xor(5, B0, B1, B2, B3);
81 SBoxE5(B0, B1, B2, B3);
82 transform(B0, B1, B2, B3);
83 key_xor(6, B0, B1, B2, B3);
84 SBoxE6(B0, B1, B2, B3);
85 transform(B0, B1, B2, B3);
86 key_xor(7, B0, B1, B2, B3);
87 SBoxE7(B0, B1, B2, B3);
88 transform(B0, B1, B2, B3);
89
90 key_xor(8, B0, B1, B2, B3);
91 SBoxE0(B0, B1, B2, B3);
92 transform(B0, B1, B2, B3);
93 key_xor(9, B0, B1, B2, B3);
94 SBoxE1(B0, B1, B2, B3);
95 transform(B0, B1, B2, B3);
96 key_xor(10, B0, B1, B2, B3);
97 SBoxE2(B0, B1, B2, B3);
98 transform(B0, B1, B2, B3);
99 key_xor(11, B0, B1, B2, B3);
100 SBoxE3(B0, B1, B2, B3);
101 transform(B0, B1, B2, B3);
102 key_xor(12, B0, B1, B2, B3);
103 SBoxE4(B0, B1, B2, B3);
104 transform(B0, B1, B2, B3);
105 key_xor(13, B0, B1, B2, B3);
106 SBoxE5(B0, B1, B2, B3);
107 transform(B0, B1, B2, B3);
108 key_xor(14, B0, B1, B2, B3);
109 SBoxE6(B0, B1, B2, B3);
110 transform(B0, B1, B2, B3);
111 key_xor(15, B0, B1, B2, B3);
112 SBoxE7(B0, B1, B2, B3);
113 transform(B0, B1, B2, B3);
114
115 key_xor(16, B0, B1, B2, B3);
116 SBoxE0(B0, B1, B2, B3);
117 transform(B0, B1, B2, B3);
118 key_xor(17, B0, B1, B2, B3);
119 SBoxE1(B0, B1, B2, B3);
120 transform(B0, B1, B2, B3);
121 key_xor(18, B0, B1, B2, B3);
122 SBoxE2(B0, B1, B2, B3);
123 transform(B0, B1, B2, B3);
124 key_xor(19, B0, B1, B2, B3);
125 SBoxE3(B0, B1, B2, B3);
126 transform(B0, B1, B2, B3);
127 key_xor(20, B0, B1, B2, B3);
128 SBoxE4(B0, B1, B2, B3);
129 transform(B0, B1, B2, B3);
130 key_xor(21, B0, B1, B2, B3);
131 SBoxE5(B0, B1, B2, B3);
132 transform(B0, B1, B2, B3);
133 key_xor(22, B0, B1, B2, B3);
134 SBoxE6(B0, B1, B2, B3);
135 transform(B0, B1, B2, B3);
136 key_xor(23, B0, B1, B2, B3);
137 SBoxE7(B0, B1, B2, B3);
138 transform(B0, B1, B2, B3);
139
140 key_xor(24, B0, B1, B2, B3);
141 SBoxE0(B0, B1, B2, B3);
142 transform(B0, B1, B2, B3);
143 key_xor(25, B0, B1, B2, B3);
144 SBoxE1(B0, B1, B2, B3);
145 transform(B0, B1, B2, B3);
146 key_xor(26, B0, B1, B2, B3);
147 SBoxE2(B0, B1, B2, B3);
148 transform(B0, B1, B2, B3);
149 key_xor(27, B0, B1, B2, B3);
150 SBoxE3(B0, B1, B2, B3);
151 transform(B0, B1, B2, B3);
152 key_xor(28, B0, B1, B2, B3);
153 SBoxE4(B0, B1, B2, B3);
154 transform(B0, B1, B2, B3);
155 key_xor(29, B0, B1, B2, B3);
156 SBoxE5(B0, B1, B2, B3);
157 transform(B0, B1, B2, B3);
158 key_xor(30, B0, B1, B2, B3);
159 SBoxE6(B0, B1, B2, B3);
160 transform(B0, B1, B2, B3);
161 key_xor(31, B0, B1, B2, B3);
162 SBoxE7(B0, B1, B2, B3);
163 key_xor(32, B0, B1, B2, B3);
164
165 SIMD_8x32::transpose(B0, B1, B2, B3);
166 B0.store_le(out);
167 B1.store_le(out + 32);
168 B2.store_le(out + 64);
169 B3.store_le(out + 96);
170
171 SIMD_8x32::zero_registers();
172}
173
175void Serpent::avx2_decrypt_8(const uint8_t in[128], uint8_t out[128]) const {
176 using namespace Botan::Serpent_F;
177
178 SIMD_8x32::reset_registers();
179
180 SIMD_8x32 B0 = SIMD_8x32::load_le(in);
181 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 32);
182 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 64);
183 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 96);
184
185 SIMD_8x32::transpose(B0, B1, B2, B3);
186
187 const Key_Inserter key_xor(m_round_key.data());
188
189 key_xor(32, B0, B1, B2, B3);
190 SBoxD7(B0, B1, B2, B3);
191 key_xor(31, B0, B1, B2, B3);
192 i_transform(B0, B1, B2, B3);
193 SBoxD6(B0, B1, B2, B3);
194 key_xor(30, B0, B1, B2, B3);
195 i_transform(B0, B1, B2, B3);
196 SBoxD5(B0, B1, B2, B3);
197 key_xor(29, B0, B1, B2, B3);
198 i_transform(B0, B1, B2, B3);
199 SBoxD4(B0, B1, B2, B3);
200 key_xor(28, B0, B1, B2, B3);
201 i_transform(B0, B1, B2, B3);
202 SBoxD3(B0, B1, B2, B3);
203 key_xor(27, B0, B1, B2, B3);
204 i_transform(B0, B1, B2, B3);
205 SBoxD2(B0, B1, B2, B3);
206 key_xor(26, B0, B1, B2, B3);
207 i_transform(B0, B1, B2, B3);
208 SBoxD1(B0, B1, B2, B3);
209 key_xor(25, B0, B1, B2, B3);
210 i_transform(B0, B1, B2, B3);
211 SBoxD0(B0, B1, B2, B3);
212 key_xor(24, B0, B1, B2, B3);
213
214 i_transform(B0, B1, B2, B3);
215 SBoxD7(B0, B1, B2, B3);
216 key_xor(23, B0, B1, B2, B3);
217 i_transform(B0, B1, B2, B3);
218 SBoxD6(B0, B1, B2, B3);
219 key_xor(22, B0, B1, B2, B3);
220 i_transform(B0, B1, B2, B3);
221 SBoxD5(B0, B1, B2, B3);
222 key_xor(21, B0, B1, B2, B3);
223 i_transform(B0, B1, B2, B3);
224 SBoxD4(B0, B1, B2, B3);
225 key_xor(20, B0, B1, B2, B3);
226 i_transform(B0, B1, B2, B3);
227 SBoxD3(B0, B1, B2, B3);
228 key_xor(19, B0, B1, B2, B3);
229 i_transform(B0, B1, B2, B3);
230 SBoxD2(B0, B1, B2, B3);
231 key_xor(18, B0, B1, B2, B3);
232 i_transform(B0, B1, B2, B3);
233 SBoxD1(B0, B1, B2, B3);
234 key_xor(17, B0, B1, B2, B3);
235 i_transform(B0, B1, B2, B3);
236 SBoxD0(B0, B1, B2, B3);
237 key_xor(16, B0, B1, B2, B3);
238
239 i_transform(B0, B1, B2, B3);
240 SBoxD7(B0, B1, B2, B3);
241 key_xor(15, B0, B1, B2, B3);
242 i_transform(B0, B1, B2, B3);
243 SBoxD6(B0, B1, B2, B3);
244 key_xor(14, B0, B1, B2, B3);
245 i_transform(B0, B1, B2, B3);
246 SBoxD5(B0, B1, B2, B3);
247 key_xor(13, B0, B1, B2, B3);
248 i_transform(B0, B1, B2, B3);
249 SBoxD4(B0, B1, B2, B3);
250 key_xor(12, B0, B1, B2, B3);
251 i_transform(B0, B1, B2, B3);
252 SBoxD3(B0, B1, B2, B3);
253 key_xor(11, B0, B1, B2, B3);
254 i_transform(B0, B1, B2, B3);
255 SBoxD2(B0, B1, B2, B3);
256 key_xor(10, B0, B1, B2, B3);
257 i_transform(B0, B1, B2, B3);
258 SBoxD1(B0, B1, B2, B3);
259 key_xor(9, B0, B1, B2, B3);
260 i_transform(B0, B1, B2, B3);
261 SBoxD0(B0, B1, B2, B3);
262 key_xor(8, B0, B1, B2, B3);
263
264 i_transform(B0, B1, B2, B3);
265 SBoxD7(B0, B1, B2, B3);
266 key_xor(7, B0, B1, B2, B3);
267 i_transform(B0, B1, B2, B3);
268 SBoxD6(B0, B1, B2, B3);
269 key_xor(6, B0, B1, B2, B3);
270 i_transform(B0, B1, B2, B3);
271 SBoxD5(B0, B1, B2, B3);
272 key_xor(5, B0, B1, B2, B3);
273 i_transform(B0, B1, B2, B3);
274 SBoxD4(B0, B1, B2, B3);
275 key_xor(4, B0, B1, B2, B3);
276 i_transform(B0, B1, B2, B3);
277 SBoxD3(B0, B1, B2, B3);
278 key_xor(3, B0, B1, B2, B3);
279 i_transform(B0, B1, B2, B3);
280 SBoxD2(B0, B1, B2, B3);
281 key_xor(2, B0, B1, B2, B3);
282 i_transform(B0, B1, B2, B3);
283 SBoxD1(B0, B1, B2, B3);
284 key_xor(1, B0, B1, B2, B3);
285 i_transform(B0, B1, B2, B3);
286 SBoxD0(B0, B1, B2, B3);
287 key_xor(0, B0, B1, B2, B3);
288
289 SIMD_8x32::transpose(B0, B1, B2, B3);
290
291 B0.store_le(out);
292 B1.store_le(out + 32);
293 B2.store_le(out + 64);
294 B3.store_le(out + 96);
295
296 SIMD_8x32::zero_registers();
297}
298
299#undef transform
300#undef i_transform
301
302} // namespace Botan
BOTAN_FORCE_INLINE void transform(T &B0, T &B1, T &B2, T &B3)
Definition serpent_fn.h:24
BOTAN_FORCE_INLINE void i_transform(T &B0, T &B1, T &B2, T &B3)
Definition serpent_fn.h:41
BOTAN_FORCE_INLINE void SBoxE6(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD4(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE4(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE7(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE5(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD5(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE2(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE3(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE0(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD0(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD1(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE1(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD6(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD2(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD3(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD7(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
#define BOTAN_AVX2_FN
Definition simd_avx2.h:15