Botan 3.10.0
Crypto and TLS for C&
camellia_gfni.cpp
Go to the documentation of this file.
1/*
2* (C) 2025 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/camellia_gfni.h>
8
9#include <botan/internal/loadstor.h>
10#include <botan/internal/rotate.h>
11#include <botan/internal/simd_avx2_gfni.h>
12
13namespace Botan {
14
15namespace {
16
17namespace Camellia_GFNI {
18
19// NOLINTBEGIN(portability-simd-intrinsics)
20
21inline BOTAN_FN_ISA_AVX2_GFNI __m256i camellia_s1234(__m256i x) {
22 constexpr uint64_t pre123_a = gfni_matrix(R"(
23 1 1 1 0 1 1 0 1
24 0 0 1 1 0 0 1 0
25 1 1 0 1 0 0 0 0
26 1 0 1 1 0 0 1 1
27 0 0 0 0 1 1 0 0
28 1 0 1 0 0 1 0 0
29 0 0 1 0 1 1 0 0
30 1 0 0 0 0 1 1 0)");
31
32 constexpr uint64_t pre4_a = gfni_matrix(R"(
33 1 1 0 1 1 0 1 1
34 0 1 1 0 0 1 0 0
35 1 0 1 0 0 0 0 1
36 0 1 1 0 0 1 1 1
37 0 0 0 1 1 0 0 0
38 0 1 0 0 1 0 0 1
39 0 1 0 1 1 0 0 0
40 0 0 0 0 1 1 0 1)");
41
42 constexpr uint8_t pre_c = 0b01000101;
43 const auto pre = _mm256_set_epi64x(pre4_a, pre123_a, pre123_a, pre123_a);
44
45 constexpr uint64_t post2_a = gfni_matrix(R"(
46 0 0 0 1 1 1 0 0
47 0 0 0 0 0 0 0 1
48 0 1 1 0 0 1 1 0
49 1 0 1 1 1 1 1 0
50 0 0 0 1 1 0 1 1
51 1 0 0 0 1 1 1 0
52 0 1 0 1 1 1 1 0
53 0 1 1 1 1 1 1 1)");
54
55 constexpr uint64_t post3_a = gfni_matrix(R"(
56 0 1 1 0 0 1 1 0
57 1 0 1 1 1 1 1 0
58 0 0 0 1 1 0 1 1
59 1 0 0 0 1 1 1 0
60 0 1 0 1 1 1 1 0
61 0 1 1 1 1 1 1 1
62 0 0 0 1 1 1 0 0
63 0 0 0 0 0 0 0 1)");
64
65 constexpr uint64_t post14_a = gfni_matrix(R"(
66 0 0 0 0 0 0 0 1
67 0 1 1 0 0 1 1 0
68 1 0 1 1 1 1 1 0
69 0 0 0 1 1 0 1 1
70 1 0 0 0 1 1 1 0
71 0 1 0 1 1 1 1 0
72 0 1 1 1 1 1 1 1
73 0 0 0 1 1 1 0 0)");
74
75 const auto post_a = _mm256_set_epi64x(post14_a, post3_a, post2_a, post14_a);
76
77 const auto post_c =
78 _mm256_set_epi64x(0x6E6E6E6E6E6E6E6E, 0x3737373737373737, 0xDCDCDCDCDCDCDCDC, 0x6E6E6E6E6E6E6E6E);
79
80 auto y = _mm256_gf2p8affine_epi64_epi8(x, pre, pre_c);
81 return _mm256_xor_si256(post_c, _mm256_gf2p8affineinv_epi64_epi8(y, post_a, 0));
82}
83
84inline BOTAN_FN_ISA_AVX2_GFNI uint64_t F(uint64_t x) {
85 // All 4 Camellia Sboxes in parallel
86 auto s_vec = camellia_s1234(_mm256_set1_epi64x(x));
87
88 // The linear transformation just sprays bytes about which can be done with two byte shuffles
89 auto Z0 = _mm256_shuffle_epi8(
90 s_vec, _mm256_set_epi64x(0x0C0CFF0CFFFF0C0C, 0x05FF0505FF0505FF, 0xFF0E0E0E0E0EFFFF, 0x070707FF07FFFF07));
91
92 auto Z1 = _mm256_shuffle_epi8(
93 s_vec, _mm256_set_epi64x(0x0909FF090909FF09, 0x02FF020202FF0202, 0xFF0B0B0BFF0B0B0B, 0x000000FF000000FF));
94
95 Z0 = _mm256_xor_si256(Z0, Z1);
96
97 uint64_t Z[4];
98 _mm256_store_si256(reinterpret_cast<__m256i*>(Z), Z0);
99
100 // My kingdom for a horizontal XOR (even AVX-512 doesn't have this, only OR/AND)
101 return Z[0] ^ Z[1] ^ Z[2] ^ Z[3];
102}
103
104// NOLINTEND(portability-simd-intrinsics)
105
106inline uint64_t FL(uint64_t v, uint64_t K) {
107 uint32_t x1 = static_cast<uint32_t>(v >> 32);
108 uint32_t x2 = static_cast<uint32_t>(v & 0xFFFFFFFF);
109
110 const uint32_t k1 = static_cast<uint32_t>(K >> 32);
111 const uint32_t k2 = static_cast<uint32_t>(K & 0xFFFFFFFF);
112
113 x2 ^= rotl<1>(x1 & k1);
114 x1 ^= (x2 | k2);
115
116 return ((static_cast<uint64_t>(x1) << 32) | x2);
117}
118
119inline uint64_t FLINV(uint64_t v, uint64_t K) {
120 uint32_t x1 = static_cast<uint32_t>(v >> 32);
121 uint32_t x2 = static_cast<uint32_t>(v & 0xFFFFFFFF);
122
123 const uint32_t k1 = static_cast<uint32_t>(K >> 32);
124 const uint32_t k2 = static_cast<uint32_t>(K & 0xFFFFFFFF);
125
126 x1 ^= (x2 | k2);
127 x2 ^= rotl<1>(x1 & k1);
128
129 return ((static_cast<uint64_t>(x1) << 32) | x2);
130}
131
132} // namespace Camellia_GFNI
133
134} // namespace
135
136BOTAN_FN_ISA_AVX2_GFNI void camellia_gfni_encrypt9(const uint8_t in[],
137 uint8_t out[],
138 size_t blocks,
139 std::span<const uint64_t> SK) {
140 using namespace Camellia_GFNI;
141
142 for(size_t i = 0; i < blocks; ++i) {
143 uint64_t D1 = load_be<uint64_t>(in, 2 * i + 0);
144 uint64_t D2 = load_be<uint64_t>(in, 2 * i + 1);
145
146 D1 ^= SK[0];
147 D2 ^= SK[1];
148
149 D2 ^= F(D1 ^ SK[2]);
150 D1 ^= F(D2 ^ SK[3]);
151 D2 ^= F(D1 ^ SK[4]);
152 D1 ^= F(D2 ^ SK[5]);
153 D2 ^= F(D1 ^ SK[6]);
154 D1 ^= F(D2 ^ SK[7]);
155
156 D1 = FL(D1, SK[8]);
157 D2 = FLINV(D2, SK[9]);
158
159 D2 ^= F(D1 ^ SK[10]);
160 D1 ^= F(D2 ^ SK[11]);
161 D2 ^= F(D1 ^ SK[12]);
162 D1 ^= F(D2 ^ SK[13]);
163 D2 ^= F(D1 ^ SK[14]);
164 D1 ^= F(D2 ^ SK[15]);
165
166 D1 = FL(D1, SK[16]);
167 D2 = FLINV(D2, SK[17]);
168
169 D2 ^= F(D1 ^ SK[18]);
170 D1 ^= F(D2 ^ SK[19]);
171 D2 ^= F(D1 ^ SK[20]);
172 D1 ^= F(D2 ^ SK[21]);
173 D2 ^= F(D1 ^ SK[22]);
174 D1 ^= F(D2 ^ SK[23]);
175
176 D2 ^= SK[24];
177 D1 ^= SK[25];
178
179 store_be(out + 16 * i, D2, D1);
180 }
181}
182
183BOTAN_FN_ISA_AVX2_GFNI void camellia_gfni_decrypt9(const uint8_t in[],
184 uint8_t out[],
185 size_t blocks,
186 std::span<const uint64_t> SK) {
187 using namespace Camellia_GFNI;
188
189 for(size_t i = 0; i < blocks; ++i) {
190 uint64_t D1 = load_be<uint64_t>(in, 2 * i + 0);
191 uint64_t D2 = load_be<uint64_t>(in, 2 * i + 1);
192
193 D2 ^= SK[25];
194 D1 ^= SK[24];
195
196 D2 ^= F(D1 ^ SK[23]);
197 D1 ^= F(D2 ^ SK[22]);
198
199 D2 ^= F(D1 ^ SK[21]);
200 D1 ^= F(D2 ^ SK[20]);
201
202 D2 ^= F(D1 ^ SK[19]);
203 D1 ^= F(D2 ^ SK[18]);
204
205 D1 = FL(D1, SK[17]);
206 D2 = FLINV(D2, SK[16]);
207
208 D2 ^= F(D1 ^ SK[15]);
209 D1 ^= F(D2 ^ SK[14]);
210 D2 ^= F(D1 ^ SK[13]);
211 D1 ^= F(D2 ^ SK[12]);
212 D2 ^= F(D1 ^ SK[11]);
213 D1 ^= F(D2 ^ SK[10]);
214
215 D1 = FL(D1, SK[9]);
216 D2 = FLINV(D2, SK[8]);
217
218 D2 ^= F(D1 ^ SK[7]);
219 D1 ^= F(D2 ^ SK[6]);
220 D2 ^= F(D1 ^ SK[5]);
221 D1 ^= F(D2 ^ SK[4]);
222 D2 ^= F(D1 ^ SK[3]);
223 D1 ^= F(D2 ^ SK[2]);
224
225 D1 ^= SK[1];
226 D2 ^= SK[0];
227
228 store_be(out + 16 * i, D2, D1);
229 }
230}
231
232BOTAN_FN_ISA_AVX2_GFNI void camellia_gfni_encrypt12(const uint8_t in[],
233 uint8_t out[],
234 size_t blocks,
235 std::span<const uint64_t> SK) {
236 using namespace Camellia_GFNI;
237
238 for(size_t i = 0; i < blocks; ++i) {
239 uint64_t D1 = load_be<uint64_t>(in, 2 * i + 0);
240 uint64_t D2 = load_be<uint64_t>(in, 2 * i + 1);
241
242 D1 ^= SK[0];
243 D2 ^= SK[1];
244
245 D2 ^= F(D1 ^ SK[2]);
246 D1 ^= F(D2 ^ SK[3]);
247 D2 ^= F(D1 ^ SK[4]);
248 D1 ^= F(D2 ^ SK[5]);
249 D2 ^= F(D1 ^ SK[6]);
250 D1 ^= F(D2 ^ SK[7]);
251
252 D1 = FL(D1, SK[8]);
253 D2 = FLINV(D2, SK[9]);
254
255 D2 ^= F(D1 ^ SK[10]);
256 D1 ^= F(D2 ^ SK[11]);
257 D2 ^= F(D1 ^ SK[12]);
258 D1 ^= F(D2 ^ SK[13]);
259 D2 ^= F(D1 ^ SK[14]);
260 D1 ^= F(D2 ^ SK[15]);
261
262 D1 = FL(D1, SK[16]);
263 D2 = FLINV(D2, SK[17]);
264
265 D2 ^= F(D1 ^ SK[18]);
266 D1 ^= F(D2 ^ SK[19]);
267 D2 ^= F(D1 ^ SK[20]);
268 D1 ^= F(D2 ^ SK[21]);
269 D2 ^= F(D1 ^ SK[22]);
270 D1 ^= F(D2 ^ SK[23]);
271
272 D1 = FL(D1, SK[24]);
273 D2 = FLINV(D2, SK[25]);
274
275 D2 ^= F(D1 ^ SK[26]);
276 D1 ^= F(D2 ^ SK[27]);
277 D2 ^= F(D1 ^ SK[28]);
278 D1 ^= F(D2 ^ SK[29]);
279 D2 ^= F(D1 ^ SK[30]);
280 D1 ^= F(D2 ^ SK[31]);
281
282 D2 ^= SK[32];
283 D1 ^= SK[33];
284
285 store_be(out + 16 * i, D2, D1);
286 }
287}
288
289BOTAN_FN_ISA_AVX2_GFNI void camellia_gfni_decrypt12(const uint8_t in[],
290 uint8_t out[],
291 size_t blocks,
292 std::span<const uint64_t> SK) {
293 using namespace Camellia_GFNI;
294
295 for(size_t i = 0; i < blocks; ++i) {
296 uint64_t D1 = load_be<uint64_t>(in, 2 * i + 0);
297 uint64_t D2 = load_be<uint64_t>(in, 2 * i + 1);
298
299 D2 ^= SK[33];
300 D1 ^= SK[32];
301
302 D2 ^= F(D1 ^ SK[31]);
303 D1 ^= F(D2 ^ SK[30]);
304
305 D2 ^= F(D1 ^ SK[29]);
306 D1 ^= F(D2 ^ SK[28]);
307
308 D2 ^= F(D1 ^ SK[27]);
309 D1 ^= F(D2 ^ SK[26]);
310
311 D1 = FL(D1, SK[25]);
312 D2 = FLINV(D2, SK[24]);
313 D2 ^= F(D1 ^ SK[23]);
314 D1 ^= F(D2 ^ SK[22]);
315 D2 ^= F(D1 ^ SK[21]);
316 D1 ^= F(D2 ^ SK[20]);
317 D2 ^= F(D1 ^ SK[19]);
318 D1 ^= F(D2 ^ SK[18]);
319
320 D1 = FL(D1, SK[17]);
321 D2 = FLINV(D2, SK[16]);
322 D2 ^= F(D1 ^ SK[15]);
323 D1 ^= F(D2 ^ SK[14]);
324 D2 ^= F(D1 ^ SK[13]);
325 D1 ^= F(D2 ^ SK[12]);
326 D2 ^= F(D1 ^ SK[11]);
327 D1 ^= F(D2 ^ SK[10]);
328
329 D1 = FL(D1, SK[9]);
330 D2 = FLINV(D2, SK[8]);
331 D2 ^= F(D1 ^ SK[7]);
332 D1 ^= F(D2 ^ SK[6]);
333 D2 ^= F(D1 ^ SK[5]);
334 D1 ^= F(D2 ^ SK[4]);
335 D2 ^= F(D1 ^ SK[3]);
336 D1 ^= F(D2 ^ SK[2]);
337
338 D1 ^= SK[1];
339 D2 ^= SK[0];
340
341 store_be(out + 16 * i, D2, D1);
342 }
343}
344
345} // namespace Botan
consteval uint64_t gfni_matrix(std::string_view s)
BOTAN_FN_ISA_AVX2_GFNI void camellia_gfni_encrypt12(const uint8_t in[], uint8_t out[], size_t blocks, std::span< const uint64_t > SK)
BOTAN_FORCE_INLINE constexpr T rotl(T input)
Definition rotate.h:23
BOTAN_FN_ISA_AVX2_GFNI void camellia_gfni_decrypt12(const uint8_t in[], uint8_t out[], size_t blocks, std::span< const uint64_t > SK)
BOTAN_FN_ISA_AVX2_GFNI void camellia_gfni_encrypt9(const uint8_t in[], uint8_t out[], size_t blocks, std::span< const uint64_t > SK)
BOTAN_FN_ISA_AVX2_GFNI void camellia_gfni_decrypt9(const uint8_t in[], uint8_t out[], size_t blocks, std::span< const uint64_t > SK)
constexpr auto store_be(ParamTs &&... params)
Definition loadstor.h:745
constexpr auto load_be(ParamTs &&... params)
Definition loadstor.h:504