7#include <botan/internal/serpent.h>
8#include <botan/internal/serpent_sbox.h>
9#include <botan/internal/simd_avx512.h>
14 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xb9>(b, d, c);
15 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0xe2>(a, b, d);
16 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x36>(a, b, d);
17 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x26>(t0, d, b);
18 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t3);
19 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x96>(t1, c, o0);
20 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xa9>(o0, o1, t2);
21 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x3c>(t2, c, t0);
29 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xe5>(d, b, c);
30 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x26>(c, d, b);
31 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xa6>(a, b, c);
32 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x2b>(a, b, d);
33 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
34 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x83>(t2, d, t0);
35 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x69>(t3, c, o1);
36 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0x65>(o3, o1, t2);
44 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x96>(c, b, d);
45 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0xda>(a, b, c);
46 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x66>(d, t0, c);
47 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x94>(a, b, t0);
48 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0xa1>(a, d, t0);
49 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t2);
50 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xd2>(t3, d, o0);
51 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x2d>(t4, b, c);
52 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x2d>(t1, d, t2);
60 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x92>(d, c, b);
61 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x3b>(d, b, c);
62 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xbc>(a, c, t0);
63 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x68>(t2, d, t1);
64 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x6e>(a, c, o2);
65 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0xb9>(a, d, t3);
66 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
67 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x6d>(t4, b, t2);
68 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x38>(t3, b, t0);
76 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xc2>(c, b, d);
77 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x79>(b, c, d);
78 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x71>(a, b, d);
79 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x6b>(a, b, d);
80 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0xc2>(a, t0, t3);
81 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
82 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0x3c>(t2, c, t0);
83 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x49>(t3, c, t0);
84 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd6>(t4, b, t1);
92 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xa9>(b, d, c);
93 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x93>(b, c, d);
94 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xc3>(a, b, c);
95 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x27>(a, b, d);
96 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x85>(a, c, t1);
97 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
98 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x2d>(t2, d, o0);
99 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x7a>(t4, b, t0);
100 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x87>(t3, t0, o0);
108 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x61>(d, c, b);
109 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x9c>(b, d, t0);
110 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x93>(a, b, d);
111 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0xb5>(a, b, c);
112 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
113 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x3c>(t2, c, t0);
114 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x7c>(a, b, o1);
115 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x1e>(t4, d, t0);
116 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x29>(t3, t0, t1);
124 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x9b>(b, c, d);
125 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x61>(c, b, d);
126 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xe3>(a, d, t1);
127 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x83>(b, c, d);
128 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x49>(a, b, c);
129 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
130 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xe1>(t2, b, c);
131 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd1>(t3, a, t1);
132 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x87>(t4, d, t2);
140 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x67>(c, d, b);
141 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x49>(b, d, c);
142 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
143 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xa9>(a, b, c);
144 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x3c>(t2, d, t0);
145 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x4d>(a, b, d);
146 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x69>(t3, c, o0);
147 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x56>(o3, o0, t2);
155 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x73>(d, b, c);
156 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x68>(c, d, b);
157 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xc5>(a, b, d);
158 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x2d>(a, b, d);
159 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
160 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x96>(t2, c, o0);
161 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd2>(t3, o0, o1);
162 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x99>(o0, t3, c);
170 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xc6>(d, b, c);
171 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x9c>(d, c, b);
172 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xe1>(a, b, c);
173 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0x87>(t2, d, t0);
174 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
175 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd1>(t0, a, t1);
176 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x9b>(a, c, o2);
177 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x5b>(t3, b, d);
185 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x94>(c, d, b);
186 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x99>(b, d, t0);
187 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
188 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x97>(a, b, d);
189 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x4b>(t2, c, o0);
190 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x94>(c, d, t2);
191 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x0e>(t3, b, t0);
192 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x1c>(a, b, t0);
193 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0xb4>(t4, c, d);
201 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xa9>(d, c, b);
202 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0xa6>(d, b, c);
203 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xb5>(a, b, d);
204 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x5e>(a, b, d);
205 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x50>(a, b, t0);
206 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
207 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x96>(t4, c, d);
208 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x4b>(t3, c, t4);
209 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x5a>(t2, c, t0);
217 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xc9>(a, b, c);
218 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x65>(a, b, c);
219 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x25>(a, b, d);
220 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x63>(c, d, t0);
221 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x86>(a, b, t3);
222 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0x87>(t2, c, t0);
223 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xc3>(t4, c, d);
224 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x47>(t1, d, t0);
225 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0xac>(a, t0, t3);
233 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x07>(d, b, c);
234 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x9e>(c, d, b);
235 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xc6>(a, b, c);
236 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x34>(a, b, d);
237 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x2b>(a, c, d);
238 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
239 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xcb>(t2, d, t0);
240 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x4b>(t3, c, t0);
241 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x69>(t4, b, o0);
249 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x67>(b, d, c);
250 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x3e>(a, c, d);
251 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x1c>(a, b, d);
252 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x87>(t0, d, b);
253 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x7d>(a, b, t1);
254 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t3);
255 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x96>(t1, b, t0);
256 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd2>(t2, c, t1);
257 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x6d>(t4, c, d);
265void Serpent::avx512_encrypt_16(
const uint8_t in[16 * 16], uint8_t out[16 * 16])
const {
270 SIMD_16x32 B2 = SIMD_16x32::load_le(in + 128);
271 SIMD_16x32 B3 = SIMD_16x32::load_le(in + 192);
273 SIMD_16x32::transpose(B0, B1, B2, B3);
277 key_xor(0, B0, B1, B2, B3);
279 transform(B0, B1, B2, B3);
280 key_xor(1, B0, B1, B2, B3);
282 transform(B0, B1, B2, B3);
283 key_xor(2, B0, B1, B2, B3);
285 transform(B0, B1, B2, B3);
286 key_xor(3, B0, B1, B2, B3);
288 transform(B0, B1, B2, B3);
289 key_xor(4, B0, B1, B2, B3);
291 transform(B0, B1, B2, B3);
292 key_xor(5, B0, B1, B2, B3);
294 transform(B0, B1, B2, B3);
295 key_xor(6, B0, B1, B2, B3);
297 transform(B0, B1, B2, B3);
298 key_xor(7, B0, B1, B2, B3);
300 transform(B0, B1, B2, B3);
302 key_xor(8, B0, B1, B2, B3);
304 transform(B0, B1, B2, B3);
305 key_xor(9, B0, B1, B2, B3);
307 transform(B0, B1, B2, B3);
308 key_xor(10, B0, B1, B2, B3);
310 transform(B0, B1, B2, B3);
311 key_xor(11, B0, B1, B2, B3);
313 transform(B0, B1, B2, B3);
314 key_xor(12, B0, B1, B2, B3);
316 transform(B0, B1, B2, B3);
317 key_xor(13, B0, B1, B2, B3);
319 transform(B0, B1, B2, B3);
320 key_xor(14, B0, B1, B2, B3);
322 transform(B0, B1, B2, B3);
323 key_xor(15, B0, B1, B2, B3);
325 transform(B0, B1, B2, B3);
327 key_xor(16, B0, B1, B2, B3);
329 transform(B0, B1, B2, B3);
330 key_xor(17, B0, B1, B2, B3);
332 transform(B0, B1, B2, B3);
333 key_xor(18, B0, B1, B2, B3);
335 transform(B0, B1, B2, B3);
336 key_xor(19, B0, B1, B2, B3);
338 transform(B0, B1, B2, B3);
339 key_xor(20, B0, B1, B2, B3);
341 transform(B0, B1, B2, B3);
342 key_xor(21, B0, B1, B2, B3);
344 transform(B0, B1, B2, B3);
345 key_xor(22, B0, B1, B2, B3);
347 transform(B0, B1, B2, B3);
348 key_xor(23, B0, B1, B2, B3);
350 transform(B0, B1, B2, B3);
352 key_xor(24, B0, B1, B2, B3);
354 transform(B0, B1, B2, B3);
355 key_xor(25, B0, B1, B2, B3);
357 transform(B0, B1, B2, B3);
358 key_xor(26, B0, B1, B2, B3);
360 transform(B0, B1, B2, B3);
361 key_xor(27, B0, B1, B2, B3);
363 transform(B0, B1, B2, B3);
364 key_xor(28, B0, B1, B2, B3);
366 transform(B0, B1, B2, B3);
367 key_xor(29, B0, B1, B2, B3);
369 transform(B0, B1, B2, B3);
370 key_xor(30, B0, B1, B2, B3);
372 transform(B0, B1, B2, B3);
373 key_xor(31, B0, B1, B2, B3);
375 key_xor(32, B0, B1, B2, B3);
377 SIMD_16x32::transpose(B0, B1, B2, B3);
379 B1.store_le(out + 64);
380 B2.store_le(out + 128);
381 B3.store_le(out + 192);
383 SIMD_16x32::zero_registers();
387void Serpent::avx512_decrypt_16(
const uint8_t in[16 * 16], uint8_t out[16 * 16])
const {
392 SIMD_16x32 B2 = SIMD_16x32::load_le(in + 128);
393 SIMD_16x32 B3 = SIMD_16x32::load_le(in + 192);
395 SIMD_16x32::transpose(B0, B1, B2, B3);
399 key_xor(32, B0, B1, B2, B3);
401 key_xor(31, B0, B1, B2, B3);
404 key_xor(30, B0, B1, B2, B3);
407 key_xor(29, B0, B1, B2, B3);
410 key_xor(28, B0, B1, B2, B3);
413 key_xor(27, B0, B1, B2, B3);
416 key_xor(26, B0, B1, B2, B3);
419 key_xor(25, B0, B1, B2, B3);
422 key_xor(24, B0, B1, B2, B3);
426 key_xor(23, B0, B1, B2, B3);
429 key_xor(22, B0, B1, B2, B3);
432 key_xor(21, B0, B1, B2, B3);
435 key_xor(20, B0, B1, B2, B3);
438 key_xor(19, B0, B1, B2, B3);
441 key_xor(18, B0, B1, B2, B3);
444 key_xor(17, B0, B1, B2, B3);
447 key_xor(16, B0, B1, B2, B3);
451 key_xor(15, B0, B1, B2, B3);
454 key_xor(14, B0, B1, B2, B3);
457 key_xor(13, B0, B1, B2, B3);
460 key_xor(12, B0, B1, B2, B3);
463 key_xor(11, B0, B1, B2, B3);
466 key_xor(10, B0, B1, B2, B3);
469 key_xor(9, B0, B1, B2, B3);
472 key_xor(8, B0, B1, B2, B3);
476 key_xor(7, B0, B1, B2, B3);
479 key_xor(6, B0, B1, B2, B3);
482 key_xor(5, B0, B1, B2, B3);
485 key_xor(4, B0, B1, B2, B3);
488 key_xor(3, B0, B1, B2, B3);
491 key_xor(2, B0, B1, B2, B3);
494 key_xor(1, B0, B1, B2, B3);
497 key_xor(0, B0, B1, B2, B3);
499 SIMD_16x32::transpose(B0, B1, B2, B3);
502 B1.store_le(out + 64);
503 B2.store_le(out + 128);
504 B3.store_le(out + 192);
506 SIMD_16x32::zero_registers();
#define BOTAN_FORCE_INLINE
BOTAN_FORCE_INLINE void i_transform(T &B0, T &B1, T &B2, T &B3)
BOTAN_FORCE_INLINE void SBoxE6(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD4(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE4(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE7(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE5(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD5(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE2(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE3(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE0(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD0(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD1(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE1(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD6(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD2(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD3(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD7(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)