Botan 3.6.1
Crypto and TLS for C&
serpent_simd.cpp
Go to the documentation of this file.
1/*
2* Serpent (SIMD)
3* (C) 2009,2013 Jack Lloyd
4*
5* Botan is released under the Simplified BSD License (see license.txt)
6*/
7
8#include <botan/internal/serpent.h>
9
10#include <botan/internal/serpent_sbox.h>
11#include <botan/internal/simd_32.h>
12
13namespace Botan {
14
15/*
16* SIMD Serpent Encryption of 4 blocks in parallel
17*/
18void Serpent::simd_encrypt_4(const uint8_t in[64], uint8_t out[64]) const {
19 using namespace Botan::Serpent_F;
20
22 SIMD_4x32 B1 = SIMD_4x32::load_le(in + 16);
23 SIMD_4x32 B2 = SIMD_4x32::load_le(in + 32);
24 SIMD_4x32 B3 = SIMD_4x32::load_le(in + 48);
25
26 SIMD_4x32::transpose(B0, B1, B2, B3);
27
28 const Key_Inserter key_xor(m_round_key.data());
29
30 key_xor(0, B0, B1, B2, B3);
31 SBoxE0(B0, B1, B2, B3);
32 transform(B0, B1, B2, B3);
33 key_xor(1, B0, B1, B2, B3);
34 SBoxE1(B0, B1, B2, B3);
35 transform(B0, B1, B2, B3);
36 key_xor(2, B0, B1, B2, B3);
37 SBoxE2(B0, B1, B2, B3);
38 transform(B0, B1, B2, B3);
39 key_xor(3, B0, B1, B2, B3);
40 SBoxE3(B0, B1, B2, B3);
41 transform(B0, B1, B2, B3);
42 key_xor(4, B0, B1, B2, B3);
43 SBoxE4(B0, B1, B2, B3);
44 transform(B0, B1, B2, B3);
45 key_xor(5, B0, B1, B2, B3);
46 SBoxE5(B0, B1, B2, B3);
47 transform(B0, B1, B2, B3);
48 key_xor(6, B0, B1, B2, B3);
49 SBoxE6(B0, B1, B2, B3);
50 transform(B0, B1, B2, B3);
51 key_xor(7, B0, B1, B2, B3);
52 SBoxE7(B0, B1, B2, B3);
53 transform(B0, B1, B2, B3);
54
55 key_xor(8, B0, B1, B2, B3);
56 SBoxE0(B0, B1, B2, B3);
57 transform(B0, B1, B2, B3);
58 key_xor(9, B0, B1, B2, B3);
59 SBoxE1(B0, B1, B2, B3);
60 transform(B0, B1, B2, B3);
61 key_xor(10, B0, B1, B2, B3);
62 SBoxE2(B0, B1, B2, B3);
63 transform(B0, B1, B2, B3);
64 key_xor(11, B0, B1, B2, B3);
65 SBoxE3(B0, B1, B2, B3);
66 transform(B0, B1, B2, B3);
67 key_xor(12, B0, B1, B2, B3);
68 SBoxE4(B0, B1, B2, B3);
69 transform(B0, B1, B2, B3);
70 key_xor(13, B0, B1, B2, B3);
71 SBoxE5(B0, B1, B2, B3);
72 transform(B0, B1, B2, B3);
73 key_xor(14, B0, B1, B2, B3);
74 SBoxE6(B0, B1, B2, B3);
75 transform(B0, B1, B2, B3);
76 key_xor(15, B0, B1, B2, B3);
77 SBoxE7(B0, B1, B2, B3);
78 transform(B0, B1, B2, B3);
79
80 key_xor(16, B0, B1, B2, B3);
81 SBoxE0(B0, B1, B2, B3);
82 transform(B0, B1, B2, B3);
83 key_xor(17, B0, B1, B2, B3);
84 SBoxE1(B0, B1, B2, B3);
85 transform(B0, B1, B2, B3);
86 key_xor(18, B0, B1, B2, B3);
87 SBoxE2(B0, B1, B2, B3);
88 transform(B0, B1, B2, B3);
89 key_xor(19, B0, B1, B2, B3);
90 SBoxE3(B0, B1, B2, B3);
91 transform(B0, B1, B2, B3);
92 key_xor(20, B0, B1, B2, B3);
93 SBoxE4(B0, B1, B2, B3);
94 transform(B0, B1, B2, B3);
95 key_xor(21, B0, B1, B2, B3);
96 SBoxE5(B0, B1, B2, B3);
97 transform(B0, B1, B2, B3);
98 key_xor(22, B0, B1, B2, B3);
99 SBoxE6(B0, B1, B2, B3);
100 transform(B0, B1, B2, B3);
101 key_xor(23, B0, B1, B2, B3);
102 SBoxE7(B0, B1, B2, B3);
103 transform(B0, B1, B2, B3);
104
105 key_xor(24, B0, B1, B2, B3);
106 SBoxE0(B0, B1, B2, B3);
107 transform(B0, B1, B2, B3);
108 key_xor(25, B0, B1, B2, B3);
109 SBoxE1(B0, B1, B2, B3);
110 transform(B0, B1, B2, B3);
111 key_xor(26, B0, B1, B2, B3);
112 SBoxE2(B0, B1, B2, B3);
113 transform(B0, B1, B2, B3);
114 key_xor(27, B0, B1, B2, B3);
115 SBoxE3(B0, B1, B2, B3);
116 transform(B0, B1, B2, B3);
117 key_xor(28, B0, B1, B2, B3);
118 SBoxE4(B0, B1, B2, B3);
119 transform(B0, B1, B2, B3);
120 key_xor(29, B0, B1, B2, B3);
121 SBoxE5(B0, B1, B2, B3);
122 transform(B0, B1, B2, B3);
123 key_xor(30, B0, B1, B2, B3);
124 SBoxE6(B0, B1, B2, B3);
125 transform(B0, B1, B2, B3);
126 key_xor(31, B0, B1, B2, B3);
127 SBoxE7(B0, B1, B2, B3);
128 key_xor(32, B0, B1, B2, B3);
129
130 SIMD_4x32::transpose(B0, B1, B2, B3);
131
132 B0.store_le(out);
133 B1.store_le(out + 16);
134 B2.store_le(out + 32);
135 B3.store_le(out + 48);
136}
137
138/*
139* SIMD Serpent Decryption of 4 blocks in parallel
140*/
141void Serpent::simd_decrypt_4(const uint8_t in[64], uint8_t out[64]) const {
142 using namespace Botan::Serpent_F;
143
145 SIMD_4x32 B1 = SIMD_4x32::load_le(in + 16);
146 SIMD_4x32 B2 = SIMD_4x32::load_le(in + 32);
147 SIMD_4x32 B3 = SIMD_4x32::load_le(in + 48);
148
149 SIMD_4x32::transpose(B0, B1, B2, B3);
150
151 const Key_Inserter key_xor(m_round_key.data());
152
153 key_xor(32, B0, B1, B2, B3);
154 SBoxD7(B0, B1, B2, B3);
155 key_xor(31, B0, B1, B2, B3);
156 i_transform(B0, B1, B2, B3);
157 SBoxD6(B0, B1, B2, B3);
158 key_xor(30, B0, B1, B2, B3);
159 i_transform(B0, B1, B2, B3);
160 SBoxD5(B0, B1, B2, B3);
161 key_xor(29, B0, B1, B2, B3);
162 i_transform(B0, B1, B2, B3);
163 SBoxD4(B0, B1, B2, B3);
164 key_xor(28, B0, B1, B2, B3);
165 i_transform(B0, B1, B2, B3);
166 SBoxD3(B0, B1, B2, B3);
167 key_xor(27, B0, B1, B2, B3);
168 i_transform(B0, B1, B2, B3);
169 SBoxD2(B0, B1, B2, B3);
170 key_xor(26, B0, B1, B2, B3);
171 i_transform(B0, B1, B2, B3);
172 SBoxD1(B0, B1, B2, B3);
173 key_xor(25, B0, B1, B2, B3);
174 i_transform(B0, B1, B2, B3);
175 SBoxD0(B0, B1, B2, B3);
176 key_xor(24, B0, B1, B2, B3);
177
178 i_transform(B0, B1, B2, B3);
179 SBoxD7(B0, B1, B2, B3);
180 key_xor(23, B0, B1, B2, B3);
181 i_transform(B0, B1, B2, B3);
182 SBoxD6(B0, B1, B2, B3);
183 key_xor(22, B0, B1, B2, B3);
184 i_transform(B0, B1, B2, B3);
185 SBoxD5(B0, B1, B2, B3);
186 key_xor(21, B0, B1, B2, B3);
187 i_transform(B0, B1, B2, B3);
188 SBoxD4(B0, B1, B2, B3);
189 key_xor(20, B0, B1, B2, B3);
190 i_transform(B0, B1, B2, B3);
191 SBoxD3(B0, B1, B2, B3);
192 key_xor(19, B0, B1, B2, B3);
193 i_transform(B0, B1, B2, B3);
194 SBoxD2(B0, B1, B2, B3);
195 key_xor(18, B0, B1, B2, B3);
196 i_transform(B0, B1, B2, B3);
197 SBoxD1(B0, B1, B2, B3);
198 key_xor(17, B0, B1, B2, B3);
199 i_transform(B0, B1, B2, B3);
200 SBoxD0(B0, B1, B2, B3);
201 key_xor(16, B0, B1, B2, B3);
202
203 i_transform(B0, B1, B2, B3);
204 SBoxD7(B0, B1, B2, B3);
205 key_xor(15, B0, B1, B2, B3);
206 i_transform(B0, B1, B2, B3);
207 SBoxD6(B0, B1, B2, B3);
208 key_xor(14, B0, B1, B2, B3);
209 i_transform(B0, B1, B2, B3);
210 SBoxD5(B0, B1, B2, B3);
211 key_xor(13, B0, B1, B2, B3);
212 i_transform(B0, B1, B2, B3);
213 SBoxD4(B0, B1, B2, B3);
214 key_xor(12, B0, B1, B2, B3);
215 i_transform(B0, B1, B2, B3);
216 SBoxD3(B0, B1, B2, B3);
217 key_xor(11, B0, B1, B2, B3);
218 i_transform(B0, B1, B2, B3);
219 SBoxD2(B0, B1, B2, B3);
220 key_xor(10, B0, B1, B2, B3);
221 i_transform(B0, B1, B2, B3);
222 SBoxD1(B0, B1, B2, B3);
223 key_xor(9, B0, B1, B2, B3);
224 i_transform(B0, B1, B2, B3);
225 SBoxD0(B0, B1, B2, B3);
226 key_xor(8, B0, B1, B2, B3);
227
228 i_transform(B0, B1, B2, B3);
229 SBoxD7(B0, B1, B2, B3);
230 key_xor(7, B0, B1, B2, B3);
231 i_transform(B0, B1, B2, B3);
232 SBoxD6(B0, B1, B2, B3);
233 key_xor(6, B0, B1, B2, B3);
234 i_transform(B0, B1, B2, B3);
235 SBoxD5(B0, B1, B2, B3);
236 key_xor(5, B0, B1, B2, B3);
237 i_transform(B0, B1, B2, B3);
238 SBoxD4(B0, B1, B2, B3);
239 key_xor(4, B0, B1, B2, B3);
240 i_transform(B0, B1, B2, B3);
241 SBoxD3(B0, B1, B2, B3);
242 key_xor(3, B0, B1, B2, B3);
243 i_transform(B0, B1, B2, B3);
244 SBoxD2(B0, B1, B2, B3);
245 key_xor(2, B0, B1, B2, B3);
246 i_transform(B0, B1, B2, B3);
247 SBoxD1(B0, B1, B2, B3);
248 key_xor(1, B0, B1, B2, B3);
249 i_transform(B0, B1, B2, B3);
250 SBoxD0(B0, B1, B2, B3);
251 key_xor(0, B0, B1, B2, B3);
252
253 SIMD_4x32::transpose(B0, B1, B2, B3);
254
255 B0.store_le(out);
256 B1.store_le(out + 16);
257 B2.store_le(out + 32);
258 B3.store_le(out + 48);
259}
260
261} // namespace Botan
static void transpose(SIMD_4x32 &B0, SIMD_4x32 &B1, SIMD_4x32 &B2, SIMD_4x32 &B3) noexcept
Definition simd_32.h:554
void store_le(uint32_t out[4]) const noexcept
Definition simd_32.h:194
static SIMD_4x32 load_le(const void *in) noexcept
Definition simd_32.h:159
BOTAN_FORCE_INLINE void transform(T &B0, T &B1, T &B2, T &B3)
Definition serpent_fn.h:24
BOTAN_FORCE_INLINE void i_transform(T &B0, T &B1, T &B2, T &B3)
Definition serpent_fn.h:41
BOTAN_FORCE_INLINE void SBoxE6(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD4(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE4(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE7(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE5(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD5(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE2(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE3(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE0(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD0(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD1(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxE1(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD6(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD2(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD3(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)
BOTAN_FORCE_INLINE void SBoxD7(SIMD_16x32 &a, SIMD_16x32 &b, SIMD_16x32 &c, SIMD_16x32 &d)