Botan 3.11.1
Crypto and TLS for C&
twofish.cpp
Go to the documentation of this file.
1/*
2* Twofish
3* (C) 1999-2007,2017,2026 Jack Lloyd
4*
5* Botan is released under the Simplified BSD License (see license.txt)
6*/
7
8#include <botan/internal/twofish.h>
9
10#include <botan/internal/ct_utils.h>
11#include <botan/internal/loadstor.h>
12#include <botan/internal/rotate.h>
13
14#if defined(BOTAN_HAS_CPUID)
15 #include <botan/internal/cpuid.h>
16#endif
17
18namespace Botan {
19
20namespace {
21
22namespace Twofish_KS {
23
24// Twofish q-permutation derived from four 4-bit sboxes
25// ("Twofish: A 128-Bit Block Cipher", section 4.3.5)
26consteval std::array<uint8_t, 256> twofish_q_perm(std::array<uint8_t, 16> t0,
27 std::array<uint8_t, 16> t1,
28 std::array<uint8_t, 16> t2,
29 std::array<uint8_t, 16> t3) noexcept {
30 std::array<uint8_t, 256> Q = {};
31 for(size_t x = 0; x != 256; ++x) {
32 const uint8_t a0 = static_cast<uint8_t>((x >> 4) & 0x0F);
33 const uint8_t b0 = static_cast<uint8_t>(x & 0x0F);
34
35 const uint8_t a1 = a0 ^ b0;
36 const uint8_t b1 = a0 ^ ((b0 >> 1) | ((b0 & 1) << 3)) ^ ((8 * a0) & 0x0F);
37
38 const uint8_t a2 = t0[a1];
39 const uint8_t b2 = t1[b1];
40
41 const uint8_t a3 = a2 ^ b2;
42 const uint8_t b3 = a2 ^ ((b2 >> 1) | ((b2 & 1) << 3)) ^ ((8 * a2) & 0x0F);
43
44 const uint8_t a4 = t2[a3];
45 const uint8_t b4 = t3[b3];
46
47 Q[x] = static_cast<uint8_t>((b4 << 4) | a4);
48 }
49 return Q;
50}
51
52// clang-format off
53alignas(256) constexpr auto Q0 = twofish_q_perm(
54 {8, 1, 7, 13, 6, 15, 3, 2, 0, 11, 5, 9, 14, 12, 10, 4},
55 {14, 12, 11, 8, 1, 2, 3, 5, 15, 4, 10, 6, 7, 0, 9, 13},
56 {11, 10, 5, 14, 6, 13, 9, 0, 12, 8, 15, 3, 2, 4, 7, 1},
57 {13, 7, 15, 4, 1, 2, 6, 14, 9, 11, 3, 0, 8, 5, 12, 10});
58
59alignas(256) constexpr auto Q1 = twofish_q_perm(
60 {2, 8, 11, 13, 15, 7, 6, 14, 3, 1, 9, 4, 0, 10, 12, 5},
61 {1, 14, 2, 11, 4, 12, 3, 7, 6, 13, 10, 5, 15, 9, 0, 8},
62 {4, 12, 7, 5, 1, 6, 9, 10, 0, 14, 13, 8, 2, 11, 3, 15},
63 {11, 9, 5, 1, 12, 3, 13, 14, 6, 4, 7, 15, 2, 0, 8, 10});
64
65// clang-format on
66
67/*
68* MDS matrix multiplication (Twofish paper Section 4.2)
69*
70* MDS = [01, EF, 5B, 5B]
71* [5B, EF, EF, 01]
72* [EF, 5B, 01, EF]
73* [EF, 01, EF, 5B]
74*
75* The MDS coefficients are 01, 5B, and EF. These were chosen so that
76*
77* 5B = 1 + 1/x^2
78* EF = 1 + 1/x + 1/x^2
79*
80* in GF(2^8) mod x^8+x^6+x^5+x^3+1, where 1/x is computed by shifting
81* right and conditionally XORing with 0xB4 (which is itself just the
82* irreducible polynomial 0x169 shifted right by 1).
83*
84* This property of the MDS constants is described (briefly) in Section 7.3
85* of the Twofish paper.
86*/
87
88inline uint8_t mds_div_x(uint8_t q) {
89 return (q >> 1) ^ (CT::value_barrier<uint8_t>(q & 1) * 0xB4);
90}
91
92inline uint32_t mds0(uint8_t q) {
93 const uint8_t q_div_x = mds_div_x(q);
94 const uint8_t q5b = q ^ mds_div_x(q_div_x);
95 const uint8_t qef = q5b ^ q_div_x;
96 return make_uint32(qef, qef, q5b, q);
97}
98
99inline uint32_t mds1(uint8_t q) {
100 const uint8_t q_div_x = mds_div_x(q);
101 const uint8_t q5b = q ^ mds_div_x(q_div_x);
102 const uint8_t qef = q5b ^ q_div_x;
103 return make_uint32(q, q5b, qef, qef);
104}
105
106inline uint32_t mds2(uint8_t q) {
107 const uint8_t q_div_x = mds_div_x(q);
108 const uint8_t q5b = q ^ mds_div_x(q_div_x);
109 const uint8_t qef = q5b ^ q_div_x;
110 return make_uint32(qef, q, qef, q5b);
111}
112
113inline uint32_t mds3(uint8_t q) {
114 const uint8_t q_div_x = mds_div_x(q);
115 const uint8_t q5b = q ^ mds_div_x(q_div_x);
116 const uint8_t qef = q5b ^ q_div_x;
117 return make_uint32(q5b, qef, q, q5b);
118}
119
120// Constant-time GF(2^8) multiply in the RS field (irreducible polynomial 0x14D)
121inline uint32_t gf_mul_rs32(uint32_t rs, uint8_t k) {
122 constexpr uint32_t lo_bit = 0x01010101;
123 constexpr uint32_t mask = 0x7F7F7F7F;
124 constexpr uint32_t poly = 0x4D;
125
126 uint32_t r = 0;
127 for(size_t i = 0; i != 8; ++i) {
128 const auto k_lo = CT::Mask<uint32_t>::expand(k & 1);
129 r ^= k_lo.if_set_return(rs);
130 rs = ((rs & mask) << 1) ^ (((rs >> 7) & lo_bit) * poly);
131 k >>= 1;
132 }
133 return r;
134}
135
136} // namespace Twofish_KS
137
138inline void TF_E(
139 uint32_t A, uint32_t B, uint32_t& C, uint32_t& D, uint32_t RK1, uint32_t RK2, const secure_vector<uint32_t>& SB) {
140 uint32_t X = SB[get_byte<3>(A)] ^ SB[256 + get_byte<2>(A)] ^ SB[512 + get_byte<1>(A)] ^ SB[768 + get_byte<0>(A)];
141 uint32_t Y = SB[get_byte<0>(B)] ^ SB[256 + get_byte<3>(B)] ^ SB[512 + get_byte<2>(B)] ^ SB[768 + get_byte<1>(B)];
142
143 X += Y;
144 Y += X;
145
146 X += RK1;
147 Y += RK2;
148
149 C = rotr<1>(C ^ X);
150 D = rotl<1>(D) ^ Y;
151}
152
153inline void TF_D(
154 uint32_t A, uint32_t B, uint32_t& C, uint32_t& D, uint32_t RK1, uint32_t RK2, const secure_vector<uint32_t>& SB) {
155 uint32_t X = SB[get_byte<3>(A)] ^ SB[256 + get_byte<2>(A)] ^ SB[512 + get_byte<1>(A)] ^ SB[768 + get_byte<0>(A)];
156 uint32_t Y = SB[get_byte<0>(B)] ^ SB[256 + get_byte<3>(B)] ^ SB[512 + get_byte<2>(B)] ^ SB[768 + get_byte<1>(B)];
157
158 X += Y;
159 Y += X;
160
161 X += RK1;
162 Y += RK2;
163
164 C = rotl<1>(C) ^ X;
165 D = rotr<1>(D ^ Y);
166}
167
168} // namespace
169
170/*
171* Twofish Encryption
172*/
173void Twofish::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
175
176#if defined(BOTAN_HAS_TWOFISH_AVX512)
177 if(!m_QS.empty()) {
178 while(blocks >= 16) {
179 avx512_encrypt_16(in, out);
180 in += 16 * BLOCK_SIZE;
181 out += 16 * BLOCK_SIZE;
182 blocks -= 16;
183 }
184 }
185#endif
186
187 while(blocks >= 2) {
188 uint32_t A0 = 0;
189 uint32_t B0 = 0;
190 uint32_t C0 = 0;
191 uint32_t D0 = 0;
192 uint32_t A1 = 0;
193 uint32_t B1 = 0;
194 uint32_t C1 = 0;
195 uint32_t D1 = 0;
196 load_le(in, A0, B0, C0, D0, A1, B1, C1, D1);
197
198 A0 ^= m_RK[0];
199 A1 ^= m_RK[0];
200 B0 ^= m_RK[1];
201 B1 ^= m_RK[1];
202 C0 ^= m_RK[2];
203 C1 ^= m_RK[2];
204 D0 ^= m_RK[3];
205 D1 ^= m_RK[3];
206
207 for(size_t k = 8; k != 40; k += 4) {
208 TF_E(A0, B0, C0, D0, m_RK[k + 0], m_RK[k + 1], m_SB);
209 TF_E(A1, B1, C1, D1, m_RK[k + 0], m_RK[k + 1], m_SB);
210
211 TF_E(C0, D0, A0, B0, m_RK[k + 2], m_RK[k + 3], m_SB);
212 TF_E(C1, D1, A1, B1, m_RK[k + 2], m_RK[k + 3], m_SB);
213 }
214
215 C0 ^= m_RK[4];
216 C1 ^= m_RK[4];
217 D0 ^= m_RK[5];
218 D1 ^= m_RK[5];
219 A0 ^= m_RK[6];
220 A1 ^= m_RK[6];
221 B0 ^= m_RK[7];
222 B1 ^= m_RK[7];
223
224 store_le(out, C0, D0, A0, B0, C1, D1, A1, B1);
225
226 blocks -= 2;
227 out += 2 * BLOCK_SIZE;
228 in += 2 * BLOCK_SIZE;
229 }
230
231 if(blocks > 0) {
232 uint32_t A = 0;
233 uint32_t B = 0;
234 uint32_t C = 0;
235 uint32_t D = 0;
236 load_le(in, A, B, C, D);
237
238 A ^= m_RK[0];
239 B ^= m_RK[1];
240 C ^= m_RK[2];
241 D ^= m_RK[3];
242
243 for(size_t k = 8; k != 40; k += 4) {
244 TF_E(A, B, C, D, m_RK[k], m_RK[k + 1], m_SB);
245 TF_E(C, D, A, B, m_RK[k + 2], m_RK[k + 3], m_SB);
246 }
247
248 C ^= m_RK[4];
249 D ^= m_RK[5];
250 A ^= m_RK[6];
251 B ^= m_RK[7];
252
253 store_le(out, C, D, A, B);
254 }
255}
256
257/*
258* Twofish Decryption
259*/
260void Twofish::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const {
262
263#if defined(BOTAN_HAS_TWOFISH_AVX512)
264 if(!m_QS.empty()) {
265 while(blocks >= 16) {
266 avx512_decrypt_16(in, out);
267 in += 16 * BLOCK_SIZE;
268 out += 16 * BLOCK_SIZE;
269 blocks -= 16;
270 }
271 }
272#endif
273
274 while(blocks >= 2) {
275 uint32_t A0 = 0;
276 uint32_t B0 = 0;
277 uint32_t C0 = 0;
278 uint32_t D0 = 0;
279 uint32_t A1 = 0;
280 uint32_t B1 = 0;
281 uint32_t C1 = 0;
282 uint32_t D1 = 0;
283 load_le(in, A0, B0, C0, D0, A1, B1, C1, D1);
284
285 A0 ^= m_RK[4];
286 A1 ^= m_RK[4];
287 B0 ^= m_RK[5];
288 B1 ^= m_RK[5];
289 C0 ^= m_RK[6];
290 C1 ^= m_RK[6];
291 D0 ^= m_RK[7];
292 D1 ^= m_RK[7];
293
294 for(size_t k = 40; k != 8; k -= 4) {
295 TF_D(A0, B0, C0, D0, m_RK[k - 2], m_RK[k - 1], m_SB);
296 TF_D(A1, B1, C1, D1, m_RK[k - 2], m_RK[k - 1], m_SB);
297
298 TF_D(C0, D0, A0, B0, m_RK[k - 4], m_RK[k - 3], m_SB);
299 TF_D(C1, D1, A1, B1, m_RK[k - 4], m_RK[k - 3], m_SB);
300 }
301
302 C0 ^= m_RK[0];
303 C1 ^= m_RK[0];
304 D0 ^= m_RK[1];
305 D1 ^= m_RK[1];
306 A0 ^= m_RK[2];
307 A1 ^= m_RK[2];
308 B0 ^= m_RK[3];
309 B1 ^= m_RK[3];
310
311 store_le(out, C0, D0, A0, B0, C1, D1, A1, B1);
312
313 blocks -= 2;
314 out += 2 * BLOCK_SIZE;
315 in += 2 * BLOCK_SIZE;
316 }
317
318 if(blocks > 0) {
319 uint32_t A = 0;
320 uint32_t B = 0;
321 uint32_t C = 0;
322 uint32_t D = 0;
323 load_le(in, A, B, C, D);
324
325 A ^= m_RK[4];
326 B ^= m_RK[5];
327 C ^= m_RK[6];
328 D ^= m_RK[7];
329
330 for(size_t k = 40; k != 8; k -= 4) {
331 TF_D(A, B, C, D, m_RK[k - 2], m_RK[k - 1], m_SB);
332 TF_D(C, D, A, B, m_RK[k - 4], m_RK[k - 3], m_SB);
333 }
334
335 C ^= m_RK[0];
336 D ^= m_RK[1];
337 A ^= m_RK[2];
338 B ^= m_RK[3];
339
340 store_le(out, C, D, A, B);
341 }
342}
343
345 return !m_SB.empty();
346}
347
348std::string Twofish::provider() const {
349#if defined(BOTAN_HAS_TWOFISH_AVX512)
351 return "avx512";
352 }
353#endif
354 return "base";
355}
356
357size_t Twofish::parallelism() const {
358#if defined(BOTAN_HAS_TWOFISH_AVX512)
360 return 16;
361 }
362#endif
363 return 1;
364}
365
366/*
367* Twofish Key Schedule
368*/
369void Twofish::key_schedule(std::span<const uint8_t> key) {
370 using namespace Twofish_KS;
371
372 // Reed-Solomon matrix for key schedule (Twofish paper Section 4.3)
373 // in column-major form
374
375 // clang-format off
376 constexpr uint32_t RS32[8] = {
377 0x01A402A4,
378 0xA456A155,
379 0x5582FC87,
380 0x87F3C15A,
381 0x5A1E4758,
382 0x58C6AEDB,
383 0xDB683D9E,
384 0x9EE51903
385 };
386 // clang-format on
387
388 m_RK.resize(40);
389
391
392 for(size_t i = 0; i != key.size(); ++i) {
393 const uint8_t ki = key[i];
394 const size_t s_off = 4 * (i / 8);
395
396 const uint32_t p = gf_mul_rs32(RS32[i % 8], ki);
397
398 S[s_off + 0] ^= get_byte<0>(p);
399 S[s_off + 1] ^= get_byte<1>(p);
400 S[s_off + 2] ^= get_byte<2>(p);
401 S[s_off + 3] ^= get_byte<3>(p);
402 }
403
404 secure_vector<uint8_t> QS(1024);
405
406 if(key.size() == 16) {
407 for(size_t i = 0; i != 256; ++i) {
408 QS[i] = Q1[Q0[Q0[i] ^ S[0]] ^ S[4]];
409 QS[256 + i] = Q0[Q0[Q1[i] ^ S[1]] ^ S[5]];
410 QS[512 + i] = Q1[Q1[Q0[i] ^ S[2]] ^ S[6]];
411 QS[768 + i] = Q0[Q1[Q1[i] ^ S[3]] ^ S[7]];
412 }
413
414 for(size_t i = 0; i < 40; i += 2) {
415 uint32_t X = mds0(Q1[Q0[Q0[i] ^ key[8]] ^ key[0]]) ^ mds1(Q0[Q0[Q1[i] ^ key[9]] ^ key[1]]) ^
416 mds2(Q1[Q1[Q0[i] ^ key[10]] ^ key[2]]) ^ mds3(Q0[Q1[Q1[i] ^ key[11]] ^ key[3]]);
417 uint32_t Y = mds0(Q1[Q0[Q0[i + 1] ^ key[12]] ^ key[4]]) ^ mds1(Q0[Q0[Q1[i + 1] ^ key[13]] ^ key[5]]) ^
418 mds2(Q1[Q1[Q0[i + 1] ^ key[14]] ^ key[6]]) ^ mds3(Q0[Q1[Q1[i + 1] ^ key[15]] ^ key[7]]);
419 Y = rotl<8>(Y);
420 X += Y;
421 Y += X;
422
423 m_RK[i] = X;
424 m_RK[i + 1] = rotl<9>(Y);
425 }
426 } else if(key.size() == 24) {
427 for(size_t i = 0; i != 256; ++i) {
428 QS[i] = Q1[Q0[Q0[Q1[i] ^ S[0]] ^ S[4]] ^ S[8]];
429 QS[256 + i] = Q0[Q0[Q1[Q1[i] ^ S[1]] ^ S[5]] ^ S[9]];
430 QS[512 + i] = Q1[Q1[Q0[Q0[i] ^ S[2]] ^ S[6]] ^ S[10]];
431 QS[768 + i] = Q0[Q1[Q1[Q0[i] ^ S[3]] ^ S[7]] ^ S[11]];
432 }
433
434 for(size_t i = 0; i < 40; i += 2) {
435 uint32_t X =
436 mds0(Q1[Q0[Q0[Q1[i] ^ key[16]] ^ key[8]] ^ key[0]]) ^ mds1(Q0[Q0[Q1[Q1[i] ^ key[17]] ^ key[9]] ^ key[1]]) ^
437 mds2(Q1[Q1[Q0[Q0[i] ^ key[18]] ^ key[10]] ^ key[2]]) ^ mds3(Q0[Q1[Q1[Q0[i] ^ key[19]] ^ key[11]] ^ key[3]]);
438 uint32_t Y = mds0(Q1[Q0[Q0[Q1[i + 1] ^ key[20]] ^ key[12]] ^ key[4]]) ^
439 mds1(Q0[Q0[Q1[Q1[i + 1] ^ key[21]] ^ key[13]] ^ key[5]]) ^
440 mds2(Q1[Q1[Q0[Q0[i + 1] ^ key[22]] ^ key[14]] ^ key[6]]) ^
441 mds3(Q0[Q1[Q1[Q0[i + 1] ^ key[23]] ^ key[15]] ^ key[7]]);
442 Y = rotl<8>(Y);
443 X += Y;
444 Y += X;
445
446 m_RK[i] = X;
447 m_RK[i + 1] = rotl<9>(Y);
448 }
449 } else if(key.size() == 32) {
450 for(size_t i = 0; i != 256; ++i) {
451 QS[i] = Q1[Q0[Q0[Q1[Q1[i] ^ S[0]] ^ S[4]] ^ S[8]] ^ S[12]];
452 QS[256 + i] = Q0[Q0[Q1[Q1[Q0[i] ^ S[1]] ^ S[5]] ^ S[9]] ^ S[13]];
453 QS[512 + i] = Q1[Q1[Q0[Q0[Q0[i] ^ S[2]] ^ S[6]] ^ S[10]] ^ S[14]];
454 QS[768 + i] = Q0[Q1[Q1[Q0[Q1[i] ^ S[3]] ^ S[7]] ^ S[11]] ^ S[15]];
455 }
456
457 for(size_t i = 0; i < 40; i += 2) {
458 uint32_t X = mds0(Q1[Q0[Q0[Q1[Q1[i] ^ key[24]] ^ key[16]] ^ key[8]] ^ key[0]]) ^
459 mds1(Q0[Q0[Q1[Q1[Q0[i] ^ key[25]] ^ key[17]] ^ key[9]] ^ key[1]]) ^
460 mds2(Q1[Q1[Q0[Q0[Q0[i] ^ key[26]] ^ key[18]] ^ key[10]] ^ key[2]]) ^
461 mds3(Q0[Q1[Q1[Q0[Q1[i] ^ key[27]] ^ key[19]] ^ key[11]] ^ key[3]]);
462 uint32_t Y = mds0(Q1[Q0[Q0[Q1[Q1[i + 1] ^ key[28]] ^ key[20]] ^ key[12]] ^ key[4]]) ^
463 mds1(Q0[Q0[Q1[Q1[Q0[i + 1] ^ key[29]] ^ key[21]] ^ key[13]] ^ key[5]]) ^
464 mds2(Q1[Q1[Q0[Q0[Q0[i + 1] ^ key[30]] ^ key[22]] ^ key[14]] ^ key[6]]) ^
465 mds3(Q0[Q1[Q1[Q0[Q1[i + 1] ^ key[31]] ^ key[23]] ^ key[15]] ^ key[7]]);
466 Y = rotl<8>(Y);
467 X += Y;
468 Y += X;
469
470 m_RK[i] = X;
471 m_RK[i + 1] = rotl<9>(Y);
472 }
473 }
474
475 m_SB.resize(1024);
476 for(size_t i = 0; i != 256; ++i) {
477 m_SB[i] = mds0(QS[i]);
478 m_SB[256 + i] = mds1(QS[256 + i]);
479 m_SB[512 + i] = mds2(QS[512 + i]);
480 m_SB[768 + i] = mds3(QS[768 + i]);
481 }
482
483#if defined(BOTAN_HAS_TWOFISH_AVX512)
485 m_QS = std::move(QS);
486 }
487#endif
488}
489
490/*
491* Clear memory of sensitive data
492*/
494 zap(m_SB);
495 zap(m_RK);
496 zap(m_QS);
497}
498
499} // namespace Botan
static bool has(CPUID::Feature feat)
Definition cpuid.h:94
static constexpr Mask< T > expand(T v)
Definition ct_utils.h:392
std::string provider() const override
Definition twofish.cpp:348
bool has_keying_material() const override
Definition twofish.cpp:344
void clear() override
Definition twofish.cpp:493
void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override
Definition twofish.cpp:173
void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override
Definition twofish.cpp:260
size_t parallelism() const override
Definition twofish.cpp:357
constexpr T value_barrier(T x)
constexpr uint8_t get_byte(T input)
Definition loadstor.h:79
void zap(std::vector< T, Alloc > &vec)
Definition secmem.h:133
BOTAN_FORCE_INLINE constexpr T rotr(T input)
Definition rotate.h:35
constexpr uint32_t make_uint32(uint8_t i0, uint8_t i1, uint8_t i2, uint8_t i3)
Definition loadstor.h:104
constexpr auto store_le(ParamTs &&... params)
Definition loadstor.h:736
BOTAN_FORCE_INLINE constexpr T rotl(T input)
Definition rotate.h:23
constexpr auto load_le(ParamTs &&... params)
Definition loadstor.h:495
std::vector< T, secure_allocator< T > > secure_vector
Definition secmem.h:68