Botan 3.9.0
Crypto and TLS for C&
camellia_gfni.cpp
Go to the documentation of this file.
1/*
2* (C) 2025 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/camellia_gfni.h>
8
9#include <botan/internal/loadstor.h>
10#include <botan/internal/rotate.h>
11#include <botan/internal/simd_avx2_gfni.h>
12
13namespace Botan {
14
15namespace {
16
17namespace Camellia_GFNI {
18
19inline BOTAN_FN_ISA_AVX2_GFNI __m256i camellia_s1234(__m256i x) {
20 constexpr uint64_t pre123_a = gfni_matrix(R"(
21 1 1 1 0 1 1 0 1
22 0 0 1 1 0 0 1 0
23 1 1 0 1 0 0 0 0
24 1 0 1 1 0 0 1 1
25 0 0 0 0 1 1 0 0
26 1 0 1 0 0 1 0 0
27 0 0 1 0 1 1 0 0
28 1 0 0 0 0 1 1 0)");
29
30 constexpr uint64_t pre4_a = gfni_matrix(R"(
31 1 1 0 1 1 0 1 1
32 0 1 1 0 0 1 0 0
33 1 0 1 0 0 0 0 1
34 0 1 1 0 0 1 1 1
35 0 0 0 1 1 0 0 0
36 0 1 0 0 1 0 0 1
37 0 1 0 1 1 0 0 0
38 0 0 0 0 1 1 0 1)");
39
40 constexpr uint8_t pre_c = 0b01000101;
41 const auto pre = _mm256_set_epi64x(pre4_a, pre123_a, pre123_a, pre123_a);
42
43 constexpr uint64_t post2_a = gfni_matrix(R"(
44 0 0 0 1 1 1 0 0
45 0 0 0 0 0 0 0 1
46 0 1 1 0 0 1 1 0
47 1 0 1 1 1 1 1 0
48 0 0 0 1 1 0 1 1
49 1 0 0 0 1 1 1 0
50 0 1 0 1 1 1 1 0
51 0 1 1 1 1 1 1 1)");
52
53 constexpr uint64_t post3_a = gfni_matrix(R"(
54 0 1 1 0 0 1 1 0
55 1 0 1 1 1 1 1 0
56 0 0 0 1 1 0 1 1
57 1 0 0 0 1 1 1 0
58 0 1 0 1 1 1 1 0
59 0 1 1 1 1 1 1 1
60 0 0 0 1 1 1 0 0
61 0 0 0 0 0 0 0 1)");
62
63 constexpr uint64_t post14_a = gfni_matrix(R"(
64 0 0 0 0 0 0 0 1
65 0 1 1 0 0 1 1 0
66 1 0 1 1 1 1 1 0
67 0 0 0 1 1 0 1 1
68 1 0 0 0 1 1 1 0
69 0 1 0 1 1 1 1 0
70 0 1 1 1 1 1 1 1
71 0 0 0 1 1 1 0 0)");
72
73 const auto post_a = _mm256_set_epi64x(post14_a, post3_a, post2_a, post14_a);
74
75 const auto post_c =
76 _mm256_set_epi64x(0x6E6E6E6E6E6E6E6E, 0x3737373737373737, 0xDCDCDCDCDCDCDCDC, 0x6E6E6E6E6E6E6E6E);
77
78 auto y = _mm256_gf2p8affine_epi64_epi8(x, pre, pre_c);
79 return _mm256_xor_si256(post_c, _mm256_gf2p8affineinv_epi64_epi8(y, post_a, 0));
80}
81
82inline BOTAN_FN_ISA_AVX2_GFNI uint64_t F(uint64_t x) {
83 // All 4 Camellia Sboxes in parallel
84 auto s_vec = camellia_s1234(_mm256_set1_epi64x(x));
85
86 // The linear transformation just sprays bytes about which can be done with two byte shuffles
87 auto Z0 = _mm256_shuffle_epi8(
88 s_vec, _mm256_set_epi64x(0x0C0CFF0CFFFF0C0C, 0x05FF0505FF0505FF, 0xFF0E0E0E0E0EFFFF, 0x070707FF07FFFF07));
89
90 auto Z1 = _mm256_shuffle_epi8(
91 s_vec, _mm256_set_epi64x(0x0909FF090909FF09, 0x02FF020202FF0202, 0xFF0B0B0BFF0B0B0B, 0x000000FF000000FF));
92
93 Z0 = _mm256_xor_si256(Z0, Z1);
94
95 uint64_t Z[4];
96 _mm256_store_si256(reinterpret_cast<__m256i*>(Z), Z0);
97
98 // My kingdom for a horizontal XOR (even AVX-512 doesn't have this, only OR/AND)
99 return Z[0] ^ Z[1] ^ Z[2] ^ Z[3];
100}
101
102inline uint64_t FL(uint64_t v, uint64_t K) {
103 uint32_t x1 = static_cast<uint32_t>(v >> 32);
104 uint32_t x2 = static_cast<uint32_t>(v & 0xFFFFFFFF);
105
106 const uint32_t k1 = static_cast<uint32_t>(K >> 32);
107 const uint32_t k2 = static_cast<uint32_t>(K & 0xFFFFFFFF);
108
109 x2 ^= rotl<1>(x1 & k1);
110 x1 ^= (x2 | k2);
111
112 return ((static_cast<uint64_t>(x1) << 32) | x2);
113}
114
115inline uint64_t FLINV(uint64_t v, uint64_t K) {
116 uint32_t x1 = static_cast<uint32_t>(v >> 32);
117 uint32_t x2 = static_cast<uint32_t>(v & 0xFFFFFFFF);
118
119 const uint32_t k1 = static_cast<uint32_t>(K >> 32);
120 const uint32_t k2 = static_cast<uint32_t>(K & 0xFFFFFFFF);
121
122 x1 ^= (x2 | k2);
123 x2 ^= rotl<1>(x1 & k1);
124
125 return ((static_cast<uint64_t>(x1) << 32) | x2);
126}
127
128} // namespace Camellia_GFNI
129
130} // namespace
131
132BOTAN_FN_ISA_AVX2_GFNI void camellia_gfni_encrypt9(const uint8_t in[],
133 uint8_t out[],
134 size_t blocks,
135 std::span<const uint64_t> SK) {
136 using namespace Camellia_GFNI;
137
138 for(size_t i = 0; i < blocks; ++i) {
139 uint64_t D1 = load_be<uint64_t>(in, 2 * i + 0);
140 uint64_t D2 = load_be<uint64_t>(in, 2 * i + 1);
141
142 D1 ^= SK[0];
143 D2 ^= SK[1];
144
145 D2 ^= F(D1 ^ SK[2]);
146 D1 ^= F(D2 ^ SK[3]);
147 D2 ^= F(D1 ^ SK[4]);
148 D1 ^= F(D2 ^ SK[5]);
149 D2 ^= F(D1 ^ SK[6]);
150 D1 ^= F(D2 ^ SK[7]);
151
152 D1 = FL(D1, SK[8]);
153 D2 = FLINV(D2, SK[9]);
154
155 D2 ^= F(D1 ^ SK[10]);
156 D1 ^= F(D2 ^ SK[11]);
157 D2 ^= F(D1 ^ SK[12]);
158 D1 ^= F(D2 ^ SK[13]);
159 D2 ^= F(D1 ^ SK[14]);
160 D1 ^= F(D2 ^ SK[15]);
161
162 D1 = FL(D1, SK[16]);
163 D2 = FLINV(D2, SK[17]);
164
165 D2 ^= F(D1 ^ SK[18]);
166 D1 ^= F(D2 ^ SK[19]);
167 D2 ^= F(D1 ^ SK[20]);
168 D1 ^= F(D2 ^ SK[21]);
169 D2 ^= F(D1 ^ SK[22]);
170 D1 ^= F(D2 ^ SK[23]);
171
172 D2 ^= SK[24];
173 D1 ^= SK[25];
174
175 store_be(out + 16 * i, D2, D1);
176 }
177}
178
179BOTAN_FN_ISA_AVX2_GFNI void camellia_gfni_decrypt9(const uint8_t in[],
180 uint8_t out[],
181 size_t blocks,
182 std::span<const uint64_t> SK) {
183 using namespace Camellia_GFNI;
184
185 for(size_t i = 0; i < blocks; ++i) {
186 uint64_t D1 = load_be<uint64_t>(in, 2 * i + 0);
187 uint64_t D2 = load_be<uint64_t>(in, 2 * i + 1);
188
189 D2 ^= SK[25];
190 D1 ^= SK[24];
191
192 D2 ^= F(D1 ^ SK[23]);
193 D1 ^= F(D2 ^ SK[22]);
194
195 D2 ^= F(D1 ^ SK[21]);
196 D1 ^= F(D2 ^ SK[20]);
197
198 D2 ^= F(D1 ^ SK[19]);
199 D1 ^= F(D2 ^ SK[18]);
200
201 D1 = FL(D1, SK[17]);
202 D2 = FLINV(D2, SK[16]);
203
204 D2 ^= F(D1 ^ SK[15]);
205 D1 ^= F(D2 ^ SK[14]);
206 D2 ^= F(D1 ^ SK[13]);
207 D1 ^= F(D2 ^ SK[12]);
208 D2 ^= F(D1 ^ SK[11]);
209 D1 ^= F(D2 ^ SK[10]);
210
211 D1 = FL(D1, SK[9]);
212 D2 = FLINV(D2, SK[8]);
213
214 D2 ^= F(D1 ^ SK[7]);
215 D1 ^= F(D2 ^ SK[6]);
216 D2 ^= F(D1 ^ SK[5]);
217 D1 ^= F(D2 ^ SK[4]);
218 D2 ^= F(D1 ^ SK[3]);
219 D1 ^= F(D2 ^ SK[2]);
220
221 D1 ^= SK[1];
222 D2 ^= SK[0];
223
224 store_be(out + 16 * i, D2, D1);
225 }
226}
227
228BOTAN_FN_ISA_AVX2_GFNI void camellia_gfni_encrypt12(const uint8_t in[],
229 uint8_t out[],
230 size_t blocks,
231 std::span<const uint64_t> SK) {
232 using namespace Camellia_GFNI;
233
234 for(size_t i = 0; i < blocks; ++i) {
235 uint64_t D1 = load_be<uint64_t>(in, 2 * i + 0);
236 uint64_t D2 = load_be<uint64_t>(in, 2 * i + 1);
237
238 D1 ^= SK[0];
239 D2 ^= SK[1];
240
241 D2 ^= F(D1 ^ SK[2]);
242 D1 ^= F(D2 ^ SK[3]);
243 D2 ^= F(D1 ^ SK[4]);
244 D1 ^= F(D2 ^ SK[5]);
245 D2 ^= F(D1 ^ SK[6]);
246 D1 ^= F(D2 ^ SK[7]);
247
248 D1 = FL(D1, SK[8]);
249 D2 = FLINV(D2, SK[9]);
250
251 D2 ^= F(D1 ^ SK[10]);
252 D1 ^= F(D2 ^ SK[11]);
253 D2 ^= F(D1 ^ SK[12]);
254 D1 ^= F(D2 ^ SK[13]);
255 D2 ^= F(D1 ^ SK[14]);
256 D1 ^= F(D2 ^ SK[15]);
257
258 D1 = FL(D1, SK[16]);
259 D2 = FLINV(D2, SK[17]);
260
261 D2 ^= F(D1 ^ SK[18]);
262 D1 ^= F(D2 ^ SK[19]);
263 D2 ^= F(D1 ^ SK[20]);
264 D1 ^= F(D2 ^ SK[21]);
265 D2 ^= F(D1 ^ SK[22]);
266 D1 ^= F(D2 ^ SK[23]);
267
268 D1 = FL(D1, SK[24]);
269 D2 = FLINV(D2, SK[25]);
270
271 D2 ^= F(D1 ^ SK[26]);
272 D1 ^= F(D2 ^ SK[27]);
273 D2 ^= F(D1 ^ SK[28]);
274 D1 ^= F(D2 ^ SK[29]);
275 D2 ^= F(D1 ^ SK[30]);
276 D1 ^= F(D2 ^ SK[31]);
277
278 D2 ^= SK[32];
279 D1 ^= SK[33];
280
281 store_be(out + 16 * i, D2, D1);
282 }
283}
284
285BOTAN_FN_ISA_AVX2_GFNI void camellia_gfni_decrypt12(const uint8_t in[],
286 uint8_t out[],
287 size_t blocks,
288 std::span<const uint64_t> SK) {
289 using namespace Camellia_GFNI;
290
291 for(size_t i = 0; i < blocks; ++i) {
292 uint64_t D1 = load_be<uint64_t>(in, 2 * i + 0);
293 uint64_t D2 = load_be<uint64_t>(in, 2 * i + 1);
294
295 D2 ^= SK[33];
296 D1 ^= SK[32];
297
298 D2 ^= F(D1 ^ SK[31]);
299 D1 ^= F(D2 ^ SK[30]);
300
301 D2 ^= F(D1 ^ SK[29]);
302 D1 ^= F(D2 ^ SK[28]);
303
304 D2 ^= F(D1 ^ SK[27]);
305 D1 ^= F(D2 ^ SK[26]);
306
307 D1 = FL(D1, SK[25]);
308 D2 = FLINV(D2, SK[24]);
309 D2 ^= F(D1 ^ SK[23]);
310 D1 ^= F(D2 ^ SK[22]);
311 D2 ^= F(D1 ^ SK[21]);
312 D1 ^= F(D2 ^ SK[20]);
313 D2 ^= F(D1 ^ SK[19]);
314 D1 ^= F(D2 ^ SK[18]);
315
316 D1 = FL(D1, SK[17]);
317 D2 = FLINV(D2, SK[16]);
318 D2 ^= F(D1 ^ SK[15]);
319 D1 ^= F(D2 ^ SK[14]);
320 D2 ^= F(D1 ^ SK[13]);
321 D1 ^= F(D2 ^ SK[12]);
322 D2 ^= F(D1 ^ SK[11]);
323 D1 ^= F(D2 ^ SK[10]);
324
325 D1 = FL(D1, SK[9]);
326 D2 = FLINV(D2, SK[8]);
327 D2 ^= F(D1 ^ SK[7]);
328 D1 ^= F(D2 ^ SK[6]);
329 D2 ^= F(D1 ^ SK[5]);
330 D1 ^= F(D2 ^ SK[4]);
331 D2 ^= F(D1 ^ SK[3]);
332 D1 ^= F(D2 ^ SK[2]);
333
334 D1 ^= SK[1];
335 D2 ^= SK[0];
336
337 store_be(out + 16 * i, D2, D1);
338 }
339}
340
341} // namespace Botan
consteval uint64_t gfni_matrix(std::string_view s)
BOTAN_FN_ISA_AVX2_GFNI void camellia_gfni_encrypt12(const uint8_t in[], uint8_t out[], size_t blocks, std::span< const uint64_t > SK)
BOTAN_FORCE_INLINE constexpr T rotl(T input)
Definition rotate.h:23
BOTAN_FN_ISA_AVX2_GFNI void camellia_gfni_decrypt12(const uint8_t in[], uint8_t out[], size_t blocks, std::span< const uint64_t > SK)
BOTAN_FN_ISA_AVX2_GFNI void camellia_gfni_encrypt9(const uint8_t in[], uint8_t out[], size_t blocks, std::span< const uint64_t > SK)
BOTAN_FN_ISA_AVX2_GFNI void camellia_gfni_decrypt9(const uint8_t in[], uint8_t out[], size_t blocks, std::span< const uint64_t > SK)
constexpr auto store_be(ParamTs &&... params)
Definition loadstor.h:745
constexpr auto load_be(ParamTs &&... params)
Definition loadstor.h:504