Botan 3.11.0
Crypto and TLS for C&
serpent_avx512.cpp
Go to the documentation of this file.
1/*
2* (C) 2023 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/serpent.h>
8
9#include <botan/internal/serpent_fn.h>
10#include <botan/internal/simd_avx512.h>
11
12namespace Botan {
13
14// TODO(Botan4) if minimum GCC is increased we can remove this
15#if defined(__GNUG__) && !defined(__clang__) && (__GNUG__ < 14)
16
17// These macros are redundant with the versions in serpent_sbox.h
18// but unfortunately removing them seems to trigger a bug in GCC
19// when building in amalgamation mode
20
21 #define transform(B0, B1, B2, B3) \
22 do { \
23 B0 = B0.rotl<13>(); \
24 B2 = B2.rotl<3>(); \
25 B1 ^= B0 ^ B2; \
26 B3 ^= B2 ^ B0.shl<3>(); \
27 B1 = B1.rotl<1>(); \
28 B3 = B3.rotl<7>(); \
29 B0 ^= B1 ^ B3; \
30 B2 ^= B3 ^ B1.shl<7>(); \
31 B0 = B0.rotl<5>(); \
32 B2 = B2.rotl<22>(); \
33 } while(0)
34
35 #define i_transform(B0, B1, B2, B3) \
36 do { \
37 B2 = B2.rotr<22>(); \
38 B0 = B0.rotr<5>(); \
39 B2 ^= B3 ^ B1.shl<7>(); \
40 B0 ^= B1 ^ B3; \
41 B3 = B3.rotr<7>(); \
42 B1 = B1.rotr<1>(); \
43 B3 ^= B2 ^ B0.shl<3>(); \
44 B1 ^= B0 ^ B2; \
45 B2 = B2.rotr<3>(); \
46 B0 = B0.rotr<13>(); \
47 } while(0)
48
49#endif
50
51namespace {
52
54 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xb9>(b, d, c);
55 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0xe2>(a, b, d);
56 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x36>(a, b, d);
57 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x26>(t0, d, b);
58 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t3);
59 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x96>(t1, c, o0);
60 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xa9>(o0, o1, t2);
61 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x3c>(t2, c, t0);
62 a = o0;
63 b = o1;
64 c = o2;
65 d = o3;
66}
67
69 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xe5>(d, b, c);
70 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x26>(c, d, b);
71 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xa6>(a, b, c);
72 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x2b>(a, b, d);
73 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
74 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x83>(t2, d, t0);
75 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x69>(t3, c, o1);
76 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0x65>(o3, o1, t2);
77 a = o0;
78 b = o1;
79 c = o2;
80 d = o3;
81}
82
84 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x96>(c, b, d);
85 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0xda>(a, b, c);
86 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x66>(d, t0, c);
87 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x94>(a, b, t0);
88 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0xa1>(a, d, t0);
89 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t2);
90 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xd2>(t3, d, o0);
91 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x2d>(t4, b, c);
92 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x2d>(t1, d, t2);
93 a = o0;
94 b = o1;
95 c = o2;
96 d = o3;
97}
98
100 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x92>(d, c, b);
101 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x3b>(d, b, c);
102 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xbc>(a, c, t0);
103 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x68>(t2, d, t1);
104 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x6e>(a, c, o2);
105 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0xb9>(a, d, t3);
106 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
107 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x6d>(t4, b, t2);
108 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x38>(t3, b, t0);
109 a = o0;
110 b = o1;
111 c = o2;
112 d = o3;
113}
114
116 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xc2>(c, b, d);
117 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x79>(b, c, d);
118 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x71>(a, b, d);
119 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x6b>(a, b, d);
120 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0xc2>(a, t0, t3);
121 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
122 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0x3c>(t2, c, t0);
123 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x49>(t3, c, t0);
124 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd6>(t4, b, t1);
125 a = o0;
126 b = o1;
127 c = o2;
128 d = o3;
129}
130
132 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xa9>(b, d, c);
133 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x93>(b, c, d);
134 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xc3>(a, b, c);
135 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x27>(a, b, d);
136 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x85>(a, c, t1);
137 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
138 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x2d>(t2, d, o0);
139 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x7a>(t4, b, t0);
140 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x87>(t3, t0, o0);
141 a = o0;
142 b = o1;
143 c = o2;
144 d = o3;
145}
146
148 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x61>(d, c, b);
149 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x9c>(b, d, t0);
150 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x93>(a, b, d);
151 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0xb5>(a, b, c);
152 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
153 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x3c>(t2, c, t0);
154 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x7c>(a, b, o1);
155 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x1e>(t4, d, t0);
156 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x29>(t3, t0, t1);
157 a = o0;
158 b = o1;
159 c = o2;
160 d = o3;
161}
162
164 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x9b>(b, c, d);
165 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x61>(c, b, d);
166 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xe3>(a, d, t1);
167 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x83>(b, c, d);
168 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x49>(a, b, c);
169 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
170 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xe1>(t2, b, c);
171 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd1>(t3, a, t1);
172 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x87>(t4, d, t2);
173 a = o0;
174 b = o1;
175 c = o2;
176 d = o3;
177}
178
180 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x67>(c, d, b);
181 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x49>(b, d, c);
182 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
183 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xa9>(a, b, c);
184 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x3c>(t2, d, t0);
185 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x4d>(a, b, d);
186 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x69>(t3, c, o0);
187 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x56>(o3, o0, t2);
188 a = o0;
189 b = o1;
190 c = o2;
191 d = o3;
192}
193
195 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x73>(d, b, c);
196 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x68>(c, d, b);
197 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xc5>(a, b, d);
198 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x2d>(a, b, d);
199 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
200 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x96>(t2, c, o0);
201 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd2>(t3, o0, o1);
202 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x99>(o0, t3, c);
203 a = o0;
204 b = o1;
205 c = o2;
206 d = o3;
207}
208
210 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xc6>(d, b, c);
211 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x9c>(d, c, b);
212 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xe1>(a, b, c);
213 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0x87>(t2, d, t0);
214 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
215 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd1>(t0, a, t1);
216 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x9b>(a, c, o2);
217 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x5b>(t3, b, d);
218 a = o0;
219 b = o1;
220 c = o2;
221 d = o3;
222}
223
225 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x94>(c, d, b);
226 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x99>(b, d, t0);
227 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
228 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x97>(a, b, d);
229 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x4b>(t2, c, o0);
230 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x94>(c, d, t2);
231 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x0e>(t3, b, t0);
232 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x1c>(a, b, t0);
233 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0xb4>(t4, c, d);
234 a = o0;
235 b = o1;
236 c = o2;
237 d = o3;
238}
239
241 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xa9>(d, c, b);
242 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0xa6>(d, b, c);
243 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xb5>(a, b, d);
244 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x5e>(a, b, d);
245 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x50>(a, b, t0);
246 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
247 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x96>(t4, c, d);
248 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x4b>(t3, c, t4);
249 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x5a>(t2, c, t0);
250 a = o0;
251 b = o1;
252 c = o2;
253 d = o3;
254}
255
257 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xc9>(a, b, c);
258 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x65>(a, b, c);
259 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x25>(a, b, d);
260 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x63>(c, d, t0);
261 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x86>(a, b, t3);
262 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0x87>(t2, c, t0);
263 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xc3>(t4, c, d);
264 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x47>(t1, d, t0);
265 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0xac>(a, t0, t3);
266 a = o0;
267 b = o1;
268 c = o2;
269 d = o3;
270}
271
273 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x07>(d, b, c);
274 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x9e>(c, d, b);
275 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xc6>(a, b, c);
276 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x34>(a, b, d);
277 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x2b>(a, c, d);
278 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
279 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xcb>(t2, d, t0);
280 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x4b>(t3, c, t0);
281 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x69>(t4, b, o0);
282 a = o0;
283 b = o1;
284 c = o2;
285 d = o3;
286}
287
289 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x67>(b, d, c);
290 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x3e>(a, c, d);
291 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x1c>(a, b, d);
292 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x87>(t0, d, b);
293 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x7d>(a, b, t1);
294 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t3);
295 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x96>(t1, b, t0);
296 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd2>(t2, c, t1);
297 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x6d>(t4, c, d);
298 a = o0;
299 b = o1;
300 c = o2;
301 d = o3;
302}
303
304} // namespace
305
306void BOTAN_FN_ISA_AVX512 Serpent::avx512_encrypt_16(const uint8_t in[16 * 16], uint8_t out[16 * 16]) const {
307 using namespace Botan::Serpent_F;
308
309 SIMD_16x32 B0 = SIMD_16x32::load_le(in);
310 SIMD_16x32 B1 = SIMD_16x32::load_le(in + 64);
311 SIMD_16x32 B2 = SIMD_16x32::load_le(in + 128);
312 SIMD_16x32 B3 = SIMD_16x32::load_le(in + 192);
313
314 SIMD_16x32::transpose(B0, B1, B2, B3);
315
316 const Key_Inserter key_xor(m_round_key.data());
317
318 key_xor(0, B0, B1, B2, B3);
319 SBoxE0(B0, B1, B2, B3);
320 transform(B0, B1, B2, B3);
321 key_xor(1, B0, B1, B2, B3);
322 SBoxE1(B0, B1, B2, B3);
323 transform(B0, B1, B2, B3);
324 key_xor(2, B0, B1, B2, B3);
325 SBoxE2(B0, B1, B2, B3);
326 transform(B0, B1, B2, B3);
327 key_xor(3, B0, B1, B2, B3);
328 SBoxE3(B0, B1, B2, B3);
329 transform(B0, B1, B2, B3);
330 key_xor(4, B0, B1, B2, B3);
331 SBoxE4(B0, B1, B2, B3);
332 transform(B0, B1, B2, B3);
333 key_xor(5, B0, B1, B2, B3);
334 SBoxE5(B0, B1, B2, B3);
335 transform(B0, B1, B2, B3);
336 key_xor(6, B0, B1, B2, B3);
337 SBoxE6(B0, B1, B2, B3);
338 transform(B0, B1, B2, B3);
339 key_xor(7, B0, B1, B2, B3);
340 SBoxE7(B0, B1, B2, B3);
341 transform(B0, B1, B2, B3);
342
343 key_xor(8, B0, B1, B2, B3);
344 SBoxE0(B0, B1, B2, B3);
345 transform(B0, B1, B2, B3);
346 key_xor(9, B0, B1, B2, B3);
347 SBoxE1(B0, B1, B2, B3);
348 transform(B0, B1, B2, B3);
349 key_xor(10, B0, B1, B2, B3);
350 SBoxE2(B0, B1, B2, B3);
351 transform(B0, B1, B2, B3);
352 key_xor(11, B0, B1, B2, B3);
353 SBoxE3(B0, B1, B2, B3);
354 transform(B0, B1, B2, B3);
355 key_xor(12, B0, B1, B2, B3);
356 SBoxE4(B0, B1, B2, B3);
357 transform(B0, B1, B2, B3);
358 key_xor(13, B0, B1, B2, B3);
359 SBoxE5(B0, B1, B2, B3);
360 transform(B0, B1, B2, B3);
361 key_xor(14, B0, B1, B2, B3);
362 SBoxE6(B0, B1, B2, B3);
363 transform(B0, B1, B2, B3);
364 key_xor(15, B0, B1, B2, B3);
365 SBoxE7(B0, B1, B2, B3);
366 transform(B0, B1, B2, B3);
367
368 key_xor(16, B0, B1, B2, B3);
369 SBoxE0(B0, B1, B2, B3);
370 transform(B0, B1, B2, B3);
371 key_xor(17, B0, B1, B2, B3);
372 SBoxE1(B0, B1, B2, B3);
373 transform(B0, B1, B2, B3);
374 key_xor(18, B0, B1, B2, B3);
375 SBoxE2(B0, B1, B2, B3);
376 transform(B0, B1, B2, B3);
377 key_xor(19, B0, B1, B2, B3);
378 SBoxE3(B0, B1, B2, B3);
379 transform(B0, B1, B2, B3);
380 key_xor(20, B0, B1, B2, B3);
381 SBoxE4(B0, B1, B2, B3);
382 transform(B0, B1, B2, B3);
383 key_xor(21, B0, B1, B2, B3);
384 SBoxE5(B0, B1, B2, B3);
385 transform(B0, B1, B2, B3);
386 key_xor(22, B0, B1, B2, B3);
387 SBoxE6(B0, B1, B2, B3);
388 transform(B0, B1, B2, B3);
389 key_xor(23, B0, B1, B2, B3);
390 SBoxE7(B0, B1, B2, B3);
391 transform(B0, B1, B2, B3);
392
393 key_xor(24, B0, B1, B2, B3);
394 SBoxE0(B0, B1, B2, B3);
395 transform(B0, B1, B2, B3);
396 key_xor(25, B0, B1, B2, B3);
397 SBoxE1(B0, B1, B2, B3);
398 transform(B0, B1, B2, B3);
399 key_xor(26, B0, B1, B2, B3);
400 SBoxE2(B0, B1, B2, B3);
401 transform(B0, B1, B2, B3);
402 key_xor(27, B0, B1, B2, B3);
403 SBoxE3(B0, B1, B2, B3);
404 transform(B0, B1, B2, B3);
405 key_xor(28, B0, B1, B2, B3);
406 SBoxE4(B0, B1, B2, B3);
407 transform(B0, B1, B2, B3);
408 key_xor(29, B0, B1, B2, B3);
409 SBoxE5(B0, B1, B2, B3);
410 transform(B0, B1, B2, B3);
411 key_xor(30, B0, B1, B2, B3);
412 SBoxE6(B0, B1, B2, B3);
413 transform(B0, B1, B2, B3);
414 key_xor(31, B0, B1, B2, B3);
415 SBoxE7(B0, B1, B2, B3);
416 key_xor(32, B0, B1, B2, B3);
417
418 SIMD_16x32::transpose(B0, B1, B2, B3);
419 B0.store_le(out);
420 B1.store_le(out + 64);
421 B2.store_le(out + 128);
422 B3.store_le(out + 192);
423
425}
426
427void BOTAN_FN_ISA_AVX512 Serpent::avx512_decrypt_16(const uint8_t in[16 * 16], uint8_t out[16 * 16]) const {
428 using namespace Botan::Serpent_F;
429
430 SIMD_16x32 B0 = SIMD_16x32::load_le(in);
431 SIMD_16x32 B1 = SIMD_16x32::load_le(in + 64);
432 SIMD_16x32 B2 = SIMD_16x32::load_le(in + 128);
433 SIMD_16x32 B3 = SIMD_16x32::load_le(in + 192);
434
435 SIMD_16x32::transpose(B0, B1, B2, B3);
436
437 const Key_Inserter key_xor(m_round_key.data());
438
439 key_xor(32, B0, B1, B2, B3);
440 SBoxD7(B0, B1, B2, B3);
441 key_xor(31, B0, B1, B2, B3);
442 i_transform(B0, B1, B2, B3);
443 SBoxD6(B0, B1, B2, B3);
444 key_xor(30, B0, B1, B2, B3);
445 i_transform(B0, B1, B2, B3);
446 SBoxD5(B0, B1, B2, B3);
447 key_xor(29, B0, B1, B2, B3);
448 i_transform(B0, B1, B2, B3);
449 SBoxD4(B0, B1, B2, B3);
450 key_xor(28, B0, B1, B2, B3);
451 i_transform(B0, B1, B2, B3);
452 SBoxD3(B0, B1, B2, B3);
453 key_xor(27, B0, B1, B2, B3);
454 i_transform(B0, B1, B2, B3);
455 SBoxD2(B0, B1, B2, B3);
456 key_xor(26, B0, B1, B2, B3);
457 i_transform(B0, B1, B2, B3);
458 SBoxD1(B0, B1, B2, B3);
459 key_xor(25, B0, B1, B2, B3);
460 i_transform(B0, B1, B2, B3);
461 SBoxD0(B0, B1, B2, B3);
462 key_xor(24, B0, B1, B2, B3);
463
464 i_transform(B0, B1, B2, B3);
465 SBoxD7(B0, B1, B2, B3);
466 key_xor(23, B0, B1, B2, B3);
467 i_transform(B0, B1, B2, B3);
468 SBoxD6(B0, B1, B2, B3);
469 key_xor(22, B0, B1, B2, B3);
470 i_transform(B0, B1, B2, B3);
471 SBoxD5(B0, B1, B2, B3);
472 key_xor(21, B0, B1, B2, B3);
473 i_transform(B0, B1, B2, B3);
474 SBoxD4(B0, B1, B2, B3);
475 key_xor(20, B0, B1, B2, B3);
476 i_transform(B0, B1, B2, B3);
477 SBoxD3(B0, B1, B2, B3);
478 key_xor(19, B0, B1, B2, B3);
479 i_transform(B0, B1, B2, B3);
480 SBoxD2(B0, B1, B2, B3);
481 key_xor(18, B0, B1, B2, B3);
482 i_transform(B0, B1, B2, B3);
483 SBoxD1(B0, B1, B2, B3);
484 key_xor(17, B0, B1, B2, B3);
485 i_transform(B0, B1, B2, B3);
486 SBoxD0(B0, B1, B2, B3);
487 key_xor(16, B0, B1, B2, B3);
488
489 i_transform(B0, B1, B2, B3);
490 SBoxD7(B0, B1, B2, B3);
491 key_xor(15, B0, B1, B2, B3);
492 i_transform(B0, B1, B2, B3);
493 SBoxD6(B0, B1, B2, B3);
494 key_xor(14, B0, B1, B2, B3);
495 i_transform(B0, B1, B2, B3);
496 SBoxD5(B0, B1, B2, B3);
497 key_xor(13, B0, B1, B2, B3);
498 i_transform(B0, B1, B2, B3);
499 SBoxD4(B0, B1, B2, B3);
500 key_xor(12, B0, B1, B2, B3);
501 i_transform(B0, B1, B2, B3);
502 SBoxD3(B0, B1, B2, B3);
503 key_xor(11, B0, B1, B2, B3);
504 i_transform(B0, B1, B2, B3);
505 SBoxD2(B0, B1, B2, B3);
506 key_xor(10, B0, B1, B2, B3);
507 i_transform(B0, B1, B2, B3);
508 SBoxD1(B0, B1, B2, B3);
509 key_xor(9, B0, B1, B2, B3);
510 i_transform(B0, B1, B2, B3);
511 SBoxD0(B0, B1, B2, B3);
512 key_xor(8, B0, B1, B2, B3);
513
514 i_transform(B0, B1, B2, B3);
515 SBoxD7(B0, B1, B2, B3);
516 key_xor(7, B0, B1, B2, B3);
517 i_transform(B0, B1, B2, B3);
518 SBoxD6(B0, B1, B2, B3);
519 key_xor(6, B0, B1, B2, B3);
520 i_transform(B0, B1, B2, B3);
521 SBoxD5(B0, B1, B2, B3);
522 key_xor(5, B0, B1, B2, B3);
523 i_transform(B0, B1, B2, B3);
524 SBoxD4(B0, B1, B2, B3);
525 key_xor(4, B0, B1, B2, B3);
526 i_transform(B0, B1, B2, B3);
527 SBoxD3(B0, B1, B2, B3);
528 key_xor(3, B0, B1, B2, B3);
529 i_transform(B0, B1, B2, B3);
530 SBoxD2(B0, B1, B2, B3);
531 key_xor(2, B0, B1, B2, B3);
532 i_transform(B0, B1, B2, B3);
533 SBoxD1(B0, B1, B2, B3);
534 key_xor(1, B0, B1, B2, B3);
535 i_transform(B0, B1, B2, B3);
536 SBoxD0(B0, B1, B2, B3);
537 key_xor(0, B0, B1, B2, B3);
538
539 SIMD_16x32::transpose(B0, B1, B2, B3);
540
541 B0.store_le(out);
542 B1.store_le(out + 64);
543 B2.store_le(out + 128);
544 B3.store_le(out + 192);
545
547}
548
549// TODO(Botan4) remove when compiler hack above is removed
550#undef transform
551#undef i_transform
552
553} // namespace Botan
static BOTAN_FN_ISA_AVX512 void transpose(SIMD_16x32 &B0, SIMD_16x32 &B1, SIMD_16x32 &B2, SIMD_16x32 &B3)
BOTAN_FN_ISA_AVX512 void store_le(uint8_t out[]) const
Definition simd_avx512.h:71
static BOTAN_FN_ISA_AVX512 SIMD_16x32 ternary_fn(const SIMD_16x32 &a, const SIMD_16x32 &b, const SIMD_16x32 &c)
static BOTAN_FN_ISA_AVX512 SIMD_16x32 load_le(const uint8_t *in)
Definition simd_avx512.h:63
static BOTAN_FN_ISA_AVX512 void zero_registers()
#define BOTAN_FORCE_INLINE
Definition compiler.h:87
BOTAN_FORCE_INLINE void transform(T &B0, T &B1, T &B2, T &B3)
Definition serpent_fn.h:35
BOTAN_FORCE_INLINE void SBoxD5(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxD4(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxE0(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxE1(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxE3(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxD0(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxE5(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxD6(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxD3(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void i_transform(T &B0, T &B1, T &B2, T &B3)
Definition serpent_fn.h:52
BOTAN_FORCE_INLINE void SBoxD2(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxE6(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxE2(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxE4(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxD1(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxE7(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxD7(T &a, T &b, T &c, T &d)