Botan 3.7.1
Crypto and TLS for C&
serpent_avx512.cpp
Go to the documentation of this file.
1/*
2* (C) 2023 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/serpent.h>
8#include <botan/internal/serpent_sbox.h>
9#include <botan/internal/simd_avx512.h>
10
11namespace Botan {
12
13namespace {
14
15BOTAN_FORCE_INLINE void SBoxE0(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
16 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xb9>(b, d, c);
17 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0xe2>(a, b, d);
18 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x36>(a, b, d);
19 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x26>(t0, d, b);
20 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t3);
21 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x96>(t1, c, o0);
22 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xa9>(o0, o1, t2);
23 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x3c>(t2, c, t0);
24 a = o0;
25 b = o1;
26 c = o2;
27 d = o3;
28}
29
30BOTAN_FORCE_INLINE void SBoxE1(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
31 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xe5>(d, b, c);
32 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x26>(c, d, b);
33 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xa6>(a, b, c);
34 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x2b>(a, b, d);
35 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
36 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x83>(t2, d, t0);
37 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x69>(t3, c, o1);
38 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0x65>(o3, o1, t2);
39 a = o0;
40 b = o1;
41 c = o2;
42 d = o3;
43}
44
45BOTAN_FORCE_INLINE void SBoxE2(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
46 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x96>(c, b, d);
47 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0xda>(a, b, c);
48 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x66>(d, t0, c);
49 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x94>(a, b, t0);
50 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0xa1>(a, d, t0);
51 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t2);
52 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xd2>(t3, d, o0);
53 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x2d>(t4, b, c);
54 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x2d>(t1, d, t2);
55 a = o0;
56 b = o1;
57 c = o2;
58 d = o3;
59}
60
61BOTAN_FORCE_INLINE void SBoxE3(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
62 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x92>(d, c, b);
63 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x3b>(d, b, c);
64 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xbc>(a, c, t0);
65 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x68>(t2, d, t1);
66 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x6e>(a, c, o2);
67 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0xb9>(a, d, t3);
68 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
69 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x6d>(t4, b, t2);
70 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x38>(t3, b, t0);
71 a = o0;
72 b = o1;
73 c = o2;
74 d = o3;
75}
76
77BOTAN_FORCE_INLINE void SBoxE4(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
78 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xc2>(c, b, d);
79 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x79>(b, c, d);
80 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x71>(a, b, d);
81 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x6b>(a, b, d);
82 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0xc2>(a, t0, t3);
83 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
84 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0x3c>(t2, c, t0);
85 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x49>(t3, c, t0);
86 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd6>(t4, b, t1);
87 a = o0;
88 b = o1;
89 c = o2;
90 d = o3;
91}
92
93BOTAN_FORCE_INLINE void SBoxE5(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
94 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xa9>(b, d, c);
95 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x93>(b, c, d);
96 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xc3>(a, b, c);
97 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x27>(a, b, d);
98 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x85>(a, c, t1);
99 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
100 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x2d>(t2, d, o0);
101 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x7a>(t4, b, t0);
102 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x87>(t3, t0, o0);
103 a = o0;
104 b = o1;
105 c = o2;
106 d = o3;
107}
108
109BOTAN_FORCE_INLINE void SBoxE6(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
110 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x61>(d, c, b);
111 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x9c>(b, d, t0);
112 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x93>(a, b, d);
113 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0xb5>(a, b, c);
114 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
115 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x3c>(t2, c, t0);
116 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x7c>(a, b, o1);
117 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x1e>(t4, d, t0);
118 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x29>(t3, t0, t1);
119 a = o0;
120 b = o1;
121 c = o2;
122 d = o3;
123}
124
125BOTAN_FORCE_INLINE void SBoxE7(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
126 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x9b>(b, c, d);
127 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x61>(c, b, d);
128 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xe3>(a, d, t1);
129 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x83>(b, c, d);
130 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x49>(a, b, c);
131 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
132 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xe1>(t2, b, c);
133 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd1>(t3, a, t1);
134 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x87>(t4, d, t2);
135 a = o0;
136 b = o1;
137 c = o2;
138 d = o3;
139}
140
141BOTAN_FORCE_INLINE void SBoxD0(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
142 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x67>(c, d, b);
143 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x49>(b, d, c);
144 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
145 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xa9>(a, b, c);
146 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x3c>(t2, d, t0);
147 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x4d>(a, b, d);
148 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x69>(t3, c, o0);
149 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x56>(o3, o0, t2);
150 a = o0;
151 b = o1;
152 c = o2;
153 d = o3;
154}
155
156BOTAN_FORCE_INLINE void SBoxD1(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
157 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x73>(d, b, c);
158 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x68>(c, d, b);
159 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xc5>(a, b, d);
160 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x2d>(a, b, d);
161 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
162 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x96>(t2, c, o0);
163 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd2>(t3, o0, o1);
164 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x99>(o0, t3, c);
165 a = o0;
166 b = o1;
167 c = o2;
168 d = o3;
169}
170
171BOTAN_FORCE_INLINE void SBoxD2(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
172 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xc6>(d, b, c);
173 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x9c>(d, c, b);
174 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xe1>(a, b, c);
175 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0x87>(t2, d, t0);
176 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
177 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd1>(t0, a, t1);
178 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x9b>(a, c, o2);
179 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x5b>(t3, b, d);
180 a = o0;
181 b = o1;
182 c = o2;
183 d = o3;
184}
185
186BOTAN_FORCE_INLINE void SBoxD3(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
187 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x94>(c, d, b);
188 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x99>(b, d, t0);
189 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
190 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x97>(a, b, d);
191 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x4b>(t2, c, o0);
192 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x94>(c, d, t2);
193 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x0e>(t3, b, t0);
194 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x1c>(a, b, t0);
195 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0xb4>(t4, c, d);
196 a = o0;
197 b = o1;
198 c = o2;
199 d = o3;
200}
201
202BOTAN_FORCE_INLINE void SBoxD4(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
203 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xa9>(d, c, b);
204 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0xa6>(d, b, c);
205 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xb5>(a, b, d);
206 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x5e>(a, b, d);
207 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x50>(a, b, t0);
208 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
209 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x96>(t4, c, d);
210 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x4b>(t3, c, t4);
211 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x5a>(t2, c, t0);
212 a = o0;
213 b = o1;
214 c = o2;
215 d = o3;
216}
217
218BOTAN_FORCE_INLINE void SBoxD5(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
219 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0xc9>(a, b, c);
220 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x65>(a, b, c);
221 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x25>(a, b, d);
222 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x63>(c, d, t0);
223 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x86>(a, b, t3);
224 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0x87>(t2, c, t0);
225 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xc3>(t4, c, d);
226 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x47>(t1, d, t0);
227 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0xac>(a, t0, t3);
228 a = o0;
229 b = o1;
230 c = o2;
231 d = o3;
232}
233
234BOTAN_FORCE_INLINE void SBoxD6(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
235 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x07>(d, b, c);
236 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x9e>(c, d, b);
237 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0xc6>(a, b, c);
238 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x34>(a, b, d);
239 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x2b>(a, c, d);
240 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t1);
241 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0xcb>(t2, d, t0);
242 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0x4b>(t3, c, t0);
243 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x69>(t4, b, o0);
244 a = o0;
245 b = o1;
246 c = o2;
247 d = o3;
248}
249
250BOTAN_FORCE_INLINE void SBoxD7(SIMD_16x32& a, SIMD_16x32& b, SIMD_16x32& c, SIMD_16x32& d) {
251 const SIMD_16x32 t0 = SIMD_16x32::ternary_fn<0x67>(b, d, c);
252 const SIMD_16x32 t1 = SIMD_16x32::ternary_fn<0x3e>(a, c, d);
253 const SIMD_16x32 t2 = SIMD_16x32::ternary_fn<0x1c>(a, b, d);
254 const SIMD_16x32 t3 = SIMD_16x32::ternary_fn<0x87>(t0, d, b);
255 const SIMD_16x32 t4 = SIMD_16x32::ternary_fn<0x7d>(a, b, t1);
256 const SIMD_16x32 o0 = SIMD_16x32::ternary_fn<0xac>(a, t0, t3);
257 const SIMD_16x32 o1 = SIMD_16x32::ternary_fn<0x96>(t1, b, t0);
258 const SIMD_16x32 o2 = SIMD_16x32::ternary_fn<0xd2>(t2, c, t1);
259 const SIMD_16x32 o3 = SIMD_16x32::ternary_fn<0x6d>(t4, c, d);
260 a = o0;
261 b = o1;
262 c = o2;
263 d = o3;
264}
265
266} // namespace
267
269void Serpent::avx512_encrypt_16(const uint8_t in[16 * 16], uint8_t out[16 * 16]) const {
270 using namespace Botan::Serpent_F;
271
272 SIMD_16x32 B0 = SIMD_16x32::load_le(in);
273 SIMD_16x32 B1 = SIMD_16x32::load_le(in + 64);
274 SIMD_16x32 B2 = SIMD_16x32::load_le(in + 128);
275 SIMD_16x32 B3 = SIMD_16x32::load_le(in + 192);
276
277 SIMD_16x32::transpose(B0, B1, B2, B3);
278
279 const Key_Inserter key_xor(m_round_key.data());
280
281 key_xor(0, B0, B1, B2, B3);
282 SBoxE0(B0, B1, B2, B3);
283 transform(B0, B1, B2, B3);
284 key_xor(1, B0, B1, B2, B3);
285 SBoxE1(B0, B1, B2, B3);
286 transform(B0, B1, B2, B3);
287 key_xor(2, B0, B1, B2, B3);
288 SBoxE2(B0, B1, B2, B3);
289 transform(B0, B1, B2, B3);
290 key_xor(3, B0, B1, B2, B3);
291 SBoxE3(B0, B1, B2, B3);
292 transform(B0, B1, B2, B3);
293 key_xor(4, B0, B1, B2, B3);
294 SBoxE4(B0, B1, B2, B3);
295 transform(B0, B1, B2, B3);
296 key_xor(5, B0, B1, B2, B3);
297 SBoxE5(B0, B1, B2, B3);
298 transform(B0, B1, B2, B3);
299 key_xor(6, B0, B1, B2, B3);
300 SBoxE6(B0, B1, B2, B3);
301 transform(B0, B1, B2, B3);
302 key_xor(7, B0, B1, B2, B3);
303 SBoxE7(B0, B1, B2, B3);
304 transform(B0, B1, B2, B3);
305
306 key_xor(8, B0, B1, B2, B3);
307 SBoxE0(B0, B1, B2, B3);
308 transform(B0, B1, B2, B3);
309 key_xor(9, B0, B1, B2, B3);
310 SBoxE1(B0, B1, B2, B3);
311 transform(B0, B1, B2, B3);
312 key_xor(10, B0, B1, B2, B3);
313 SBoxE2(B0, B1, B2, B3);
314 transform(B0, B1, B2, B3);
315 key_xor(11, B0, B1, B2, B3);
316 SBoxE3(B0, B1, B2, B3);
317 transform(B0, B1, B2, B3);
318 key_xor(12, B0, B1, B2, B3);
319 SBoxE4(B0, B1, B2, B3);
320 transform(B0, B1, B2, B3);
321 key_xor(13, B0, B1, B2, B3);
322 SBoxE5(B0, B1, B2, B3);
323 transform(B0, B1, B2, B3);
324 key_xor(14, B0, B1, B2, B3);
325 SBoxE6(B0, B1, B2, B3);
326 transform(B0, B1, B2, B3);
327 key_xor(15, B0, B1, B2, B3);
328 SBoxE7(B0, B1, B2, B3);
329 transform(B0, B1, B2, B3);
330
331 key_xor(16, B0, B1, B2, B3);
332 SBoxE0(B0, B1, B2, B3);
333 transform(B0, B1, B2, B3);
334 key_xor(17, B0, B1, B2, B3);
335 SBoxE1(B0, B1, B2, B3);
336 transform(B0, B1, B2, B3);
337 key_xor(18, B0, B1, B2, B3);
338 SBoxE2(B0, B1, B2, B3);
339 transform(B0, B1, B2, B3);
340 key_xor(19, B0, B1, B2, B3);
341 SBoxE3(B0, B1, B2, B3);
342 transform(B0, B1, B2, B3);
343 key_xor(20, B0, B1, B2, B3);
344 SBoxE4(B0, B1, B2, B3);
345 transform(B0, B1, B2, B3);
346 key_xor(21, B0, B1, B2, B3);
347 SBoxE5(B0, B1, B2, B3);
348 transform(B0, B1, B2, B3);
349 key_xor(22, B0, B1, B2, B3);
350 SBoxE6(B0, B1, B2, B3);
351 transform(B0, B1, B2, B3);
352 key_xor(23, B0, B1, B2, B3);
353 SBoxE7(B0, B1, B2, B3);
354 transform(B0, B1, B2, B3);
355
356 key_xor(24, B0, B1, B2, B3);
357 SBoxE0(B0, B1, B2, B3);
358 transform(B0, B1, B2, B3);
359 key_xor(25, B0, B1, B2, B3);
360 SBoxE1(B0, B1, B2, B3);
361 transform(B0, B1, B2, B3);
362 key_xor(26, B0, B1, B2, B3);
363 SBoxE2(B0, B1, B2, B3);
364 transform(B0, B1, B2, B3);
365 key_xor(27, B0, B1, B2, B3);
366 SBoxE3(B0, B1, B2, B3);
367 transform(B0, B1, B2, B3);
368 key_xor(28, B0, B1, B2, B3);
369 SBoxE4(B0, B1, B2, B3);
370 transform(B0, B1, B2, B3);
371 key_xor(29, B0, B1, B2, B3);
372 SBoxE5(B0, B1, B2, B3);
373 transform(B0, B1, B2, B3);
374 key_xor(30, B0, B1, B2, B3);
375 SBoxE6(B0, B1, B2, B3);
376 transform(B0, B1, B2, B3);
377 key_xor(31, B0, B1, B2, B3);
378 SBoxE7(B0, B1, B2, B3);
379 key_xor(32, B0, B1, B2, B3);
380
381 SIMD_16x32::transpose(B0, B1, B2, B3);
382 B0.store_le(out);
383 B1.store_le(out + 64);
384 B2.store_le(out + 128);
385 B3.store_le(out + 192);
386
387 SIMD_16x32::zero_registers();
388}
389
391void Serpent::avx512_decrypt_16(const uint8_t in[16 * 16], uint8_t out[16 * 16]) const {
392 using namespace Botan::Serpent_F;
393
394 SIMD_16x32 B0 = SIMD_16x32::load_le(in);
395 SIMD_16x32 B1 = SIMD_16x32::load_le(in + 64);
396 SIMD_16x32 B2 = SIMD_16x32::load_le(in + 128);
397 SIMD_16x32 B3 = SIMD_16x32::load_le(in + 192);
398
399 SIMD_16x32::transpose(B0, B1, B2, B3);
400
401 const Key_Inserter key_xor(m_round_key.data());
402
403 key_xor(32, B0, B1, B2, B3);
404 SBoxD7(B0, B1, B2, B3);
405 key_xor(31, B0, B1, B2, B3);
406 i_transform(B0, B1, B2, B3);
407 SBoxD6(B0, B1, B2, B3);
408 key_xor(30, B0, B1, B2, B3);
409 i_transform(B0, B1, B2, B3);
410 SBoxD5(B0, B1, B2, B3);
411 key_xor(29, B0, B1, B2, B3);
412 i_transform(B0, B1, B2, B3);
413 SBoxD4(B0, B1, B2, B3);
414 key_xor(28, B0, B1, B2, B3);
415 i_transform(B0, B1, B2, B3);
416 SBoxD3(B0, B1, B2, B3);
417 key_xor(27, B0, B1, B2, B3);
418 i_transform(B0, B1, B2, B3);
419 SBoxD2(B0, B1, B2, B3);
420 key_xor(26, B0, B1, B2, B3);
421 i_transform(B0, B1, B2, B3);
422 SBoxD1(B0, B1, B2, B3);
423 key_xor(25, B0, B1, B2, B3);
424 i_transform(B0, B1, B2, B3);
425 SBoxD0(B0, B1, B2, B3);
426 key_xor(24, B0, B1, B2, B3);
427
428 i_transform(B0, B1, B2, B3);
429 SBoxD7(B0, B1, B2, B3);
430 key_xor(23, B0, B1, B2, B3);
431 i_transform(B0, B1, B2, B3);
432 SBoxD6(B0, B1, B2, B3);
433 key_xor(22, B0, B1, B2, B3);
434 i_transform(B0, B1, B2, B3);
435 SBoxD5(B0, B1, B2, B3);
436 key_xor(21, B0, B1, B2, B3);
437 i_transform(B0, B1, B2, B3);
438 SBoxD4(B0, B1, B2, B3);
439 key_xor(20, B0, B1, B2, B3);
440 i_transform(B0, B1, B2, B3);
441 SBoxD3(B0, B1, B2, B3);
442 key_xor(19, B0, B1, B2, B3);
443 i_transform(B0, B1, B2, B3);
444 SBoxD2(B0, B1, B2, B3);
445 key_xor(18, B0, B1, B2, B3);
446 i_transform(B0, B1, B2, B3);
447 SBoxD1(B0, B1, B2, B3);
448 key_xor(17, B0, B1, B2, B3);
449 i_transform(B0, B1, B2, B3);
450 SBoxD0(B0, B1, B2, B3);
451 key_xor(16, B0, B1, B2, B3);
452
453 i_transform(B0, B1, B2, B3);
454 SBoxD7(B0, B1, B2, B3);
455 key_xor(15, B0, B1, B2, B3);
456 i_transform(B0, B1, B2, B3);
457 SBoxD6(B0, B1, B2, B3);
458 key_xor(14, B0, B1, B2, B3);
459 i_transform(B0, B1, B2, B3);
460 SBoxD5(B0, B1, B2, B3);
461 key_xor(13, B0, B1, B2, B3);
462 i_transform(B0, B1, B2, B3);
463 SBoxD4(B0, B1, B2, B3);
464 key_xor(12, B0, B1, B2, B3);
465 i_transform(B0, B1, B2, B3);
466 SBoxD3(B0, B1, B2, B3);
467 key_xor(11, B0, B1, B2, B3);
468 i_transform(B0, B1, B2, B3);
469 SBoxD2(B0, B1, B2, B3);
470 key_xor(10, B0, B1, B2, B3);
471 i_transform(B0, B1, B2, B3);
472 SBoxD1(B0, B1, B2, B3);
473 key_xor(9, B0, B1, B2, B3);
474 i_transform(B0, B1, B2, B3);
475 SBoxD0(B0, B1, B2, B3);
476 key_xor(8, B0, B1, B2, B3);
477
478 i_transform(B0, B1, B2, B3);
479 SBoxD7(B0, B1, B2, B3);
480 key_xor(7, B0, B1, B2, B3);
481 i_transform(B0, B1, B2, B3);
482 SBoxD6(B0, B1, B2, B3);
483 key_xor(6, B0, B1, B2, B3);
484 i_transform(B0, B1, B2, B3);
485 SBoxD5(B0, B1, B2, B3);
486 key_xor(5, B0, B1, B2, B3);
487 i_transform(B0, B1, B2, B3);
488 SBoxD4(B0, B1, B2, B3);
489 key_xor(4, B0, B1, B2, B3);
490 i_transform(B0, B1, B2, B3);
491 SBoxD3(B0, B1, B2, B3);
492 key_xor(3, B0, B1, B2, B3);
493 i_transform(B0, B1, B2, B3);
494 SBoxD2(B0, B1, B2, B3);
495 key_xor(2, B0, B1, B2, B3);
496 i_transform(B0, B1, B2, B3);
497 SBoxD1(B0, B1, B2, B3);
498 key_xor(1, B0, B1, B2, B3);
499 i_transform(B0, B1, B2, B3);
500 SBoxD0(B0, B1, B2, B3);
501 key_xor(0, B0, B1, B2, B3);
502
503 SIMD_16x32::transpose(B0, B1, B2, B3);
504
505 B0.store_le(out);
506 B1.store_le(out + 64);
507 B2.store_le(out + 128);
508 B3.store_le(out + 192);
509
510 SIMD_16x32::zero_registers();
511}
512
513} // namespace Botan
#define BOTAN_FORCE_INLINE
Definition compiler.h:71
BOTAN_FORCE_INLINE void transform(T &B0, T &B1, T &B2, T &B3)
Definition serpent_fn.h:25
BOTAN_FORCE_INLINE void SBoxD5(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxD4(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxE0(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxE1(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxE3(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxD0(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxE5(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxD6(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxD3(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void i_transform(T &B0, T &B1, T &B2, T &B3)
Definition serpent_fn.h:42
BOTAN_FORCE_INLINE void SBoxD2(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxE6(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxE2(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxE4(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxD1(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxE7(T &a, T &b, T &c, T &d)
BOTAN_FORCE_INLINE void SBoxD7(T &a, T &b, T &c, T &d)
const SIMD_8x32 & b
#define BOTAN_AVX512_FN
Definition simd_avx512.h:16