26consteval std::array<uint8_t, 256> twofish_q_perm(std::array<uint8_t, 16> t0,
27 std::array<uint8_t, 16> t1,
28 std::array<uint8_t, 16> t2,
29 std::array<uint8_t, 16> t3)
noexcept {
30 std::array<uint8_t, 256> Q = {};
31 for(
size_t x = 0; x != 256; ++x) {
32 const uint8_t a0 =
static_cast<uint8_t
>((x >> 4) & 0x0F);
33 const uint8_t b0 =
static_cast<uint8_t
>(x & 0x0F);
35 const uint8_t a1 = a0 ^ b0;
36 const uint8_t b1 = a0 ^ ((b0 >> 1) | ((b0 & 1) << 3)) ^ ((8 * a0) & 0x0F);
38 const uint8_t a2 = t0[a1];
39 const uint8_t b2 = t1[b1];
41 const uint8_t a3 = a2 ^ b2;
42 const uint8_t b3 = a2 ^ ((b2 >> 1) | ((b2 & 1) << 3)) ^ ((8 * a2) & 0x0F);
44 const uint8_t a4 = t2[a3];
45 const uint8_t b4 = t3[b3];
47 Q[x] =
static_cast<uint8_t
>((b4 << 4) | a4);
53alignas(256)
constexpr auto Q0 = twofish_q_perm(
54 {8, 1, 7, 13, 6, 15, 3, 2, 0, 11, 5, 9, 14, 12, 10, 4},
55 {14, 12, 11, 8, 1, 2, 3, 5, 15, 4, 10, 6, 7, 0, 9, 13},
56 {11, 10, 5, 14, 6, 13, 9, 0, 12, 8, 15, 3, 2, 4, 7, 1},
57 {13, 7, 15, 4, 1, 2, 6, 14, 9, 11, 3, 0, 8, 5, 12, 10});
59alignas(256)
constexpr auto Q1 = twofish_q_perm(
60 {2, 8, 11, 13, 15, 7, 6, 14, 3, 1, 9, 4, 0, 10, 12, 5},
61 {1, 14, 2, 11, 4, 12, 3, 7, 6, 13, 10, 5, 15, 9, 0, 8},
62 {4, 12, 7, 5, 1, 6, 9, 10, 0, 14, 13, 8, 2, 11, 3, 15},
63 {11, 9, 5, 1, 12, 3, 13, 14, 6, 4, 7, 15, 2, 0, 8, 10});
88inline uint8_t mds_div_x(uint8_t q) {
92inline uint32_t mds0(uint8_t q) {
93 const uint8_t q_div_x = mds_div_x(q);
94 const uint8_t q5b = q ^ mds_div_x(q_div_x);
95 const uint8_t qef = q5b ^ q_div_x;
99inline uint32_t mds1(uint8_t q) {
100 const uint8_t q_div_x = mds_div_x(q);
101 const uint8_t q5b = q ^ mds_div_x(q_div_x);
102 const uint8_t qef = q5b ^ q_div_x;
106inline uint32_t mds2(uint8_t q) {
107 const uint8_t q_div_x = mds_div_x(q);
108 const uint8_t q5b = q ^ mds_div_x(q_div_x);
109 const uint8_t qef = q5b ^ q_div_x;
113inline uint32_t mds3(uint8_t q) {
114 const uint8_t q_div_x = mds_div_x(q);
115 const uint8_t q5b = q ^ mds_div_x(q_div_x);
116 const uint8_t qef = q5b ^ q_div_x;
121inline uint32_t gf_mul_rs32(uint32_t rs, uint8_t k) {
122 constexpr uint32_t lo_bit = 0x01010101;
123 constexpr uint32_t mask = 0x7F7F7F7F;
124 constexpr uint32_t poly = 0x4D;
127 for(
size_t i = 0; i != 8; ++i) {
129 r ^= k_lo.if_set_return(rs);
130 rs = ((rs & mask) << 1) ^ (((rs >> 7) & lo_bit) * poly);
176#if defined(BOTAN_HAS_TWOFISH_AVX512)
178 while(blocks >= 16) {
179 avx512_encrypt_16(in, out);
196 load_le(in, A0, B0, C0, D0, A1, B1, C1, D1);
207 for(
size_t k = 8; k != 40; k += 4) {
208 TF_E(A0, B0, C0, D0, m_RK[k + 0], m_RK[k + 1], m_SB);
209 TF_E(A1, B1, C1, D1, m_RK[k + 0], m_RK[k + 1], m_SB);
211 TF_E(C0, D0, A0, B0, m_RK[k + 2], m_RK[k + 3], m_SB);
212 TF_E(C1, D1, A1, B1, m_RK[k + 2], m_RK[k + 3], m_SB);
224 store_le(out, C0, D0, A0, B0, C1, D1, A1, B1);
243 for(
size_t k = 8; k != 40; k += 4) {
244 TF_E(A, B, C, D, m_RK[k], m_RK[k + 1], m_SB);
245 TF_E(C, D, A, B, m_RK[k + 2], m_RK[k + 3], m_SB);
263#if defined(BOTAN_HAS_TWOFISH_AVX512)
265 while(blocks >= 16) {
266 avx512_decrypt_16(in, out);
283 load_le(in, A0, B0, C0, D0, A1, B1, C1, D1);
294 for(
size_t k = 40; k != 8; k -= 4) {
295 TF_D(A0, B0, C0, D0, m_RK[k - 2], m_RK[k - 1], m_SB);
296 TF_D(A1, B1, C1, D1, m_RK[k - 2], m_RK[k - 1], m_SB);
298 TF_D(C0, D0, A0, B0, m_RK[k - 4], m_RK[k - 3], m_SB);
299 TF_D(C1, D1, A1, B1, m_RK[k - 4], m_RK[k - 3], m_SB);
311 store_le(out, C0, D0, A0, B0, C1, D1, A1, B1);
330 for(
size_t k = 40; k != 8; k -= 4) {
331 TF_D(A, B, C, D, m_RK[k - 2], m_RK[k - 1], m_SB);
332 TF_D(C, D, A, B, m_RK[k - 4], m_RK[k - 3], m_SB);