21inline BOTAN_FN_ISA_AVX2_GFNI __m256i camellia_s1234(__m256i x) {
42 constexpr uint8_t pre_c = 0b01000101;
43 const auto pre = _mm256_set_epi64x(pre4_a, pre123_a, pre123_a, pre123_a);
75 const auto post_a = _mm256_set_epi64x(post14_a, post3_a, post2_a, post14_a);
78 _mm256_set_epi64x(0x6E6E6E6E6E6E6E6E, 0x3737373737373737, 0xDCDCDCDCDCDCDCDC, 0x6E6E6E6E6E6E6E6E);
80 auto y = _mm256_gf2p8affine_epi64_epi8(x, pre, pre_c);
81 return _mm256_xor_si256(post_c, _mm256_gf2p8affineinv_epi64_epi8(y, post_a, 0));
84inline BOTAN_FN_ISA_AVX2_GFNI uint64_t F(uint64_t x) {
86 auto s_vec = camellia_s1234(_mm256_set1_epi64x(x));
89 auto Z0 = _mm256_shuffle_epi8(
90 s_vec, _mm256_set_epi64x(0x0C0CFF0CFFFF0C0C, 0x05FF0505FF0505FF, 0xFF0E0E0E0E0EFFFF, 0x070707FF07FFFF07));
92 auto Z1 = _mm256_shuffle_epi8(
93 s_vec, _mm256_set_epi64x(0x0909FF090909FF09, 0x02FF020202FF0202, 0xFF0B0B0BFF0B0B0B, 0x000000FF000000FF));
95 Z0 = _mm256_xor_si256(Z0, Z1);
98 _mm256_store_si256(
reinterpret_cast<__m256i*
>(Z), Z0);
101 return Z[0] ^ Z[1] ^ Z[2] ^ Z[3];
106inline uint64_t FL(uint64_t v, uint64_t K) {
107 uint32_t x1 =
static_cast<uint32_t
>(v >> 32);
108 uint32_t x2 =
static_cast<uint32_t
>(v & 0xFFFFFFFF);
110 const uint32_t k1 =
static_cast<uint32_t
>(K >> 32);
111 const uint32_t k2 =
static_cast<uint32_t
>(K & 0xFFFFFFFF);
116 return ((
static_cast<uint64_t
>(x1) << 32) | x2);
119inline uint64_t FLINV(uint64_t v, uint64_t K) {
120 uint32_t x1 =
static_cast<uint32_t
>(v >> 32);
121 uint32_t x2 =
static_cast<uint32_t
>(v & 0xFFFFFFFF);
123 const uint32_t k1 =
static_cast<uint32_t
>(K >> 32);
124 const uint32_t k2 =
static_cast<uint32_t
>(K & 0xFFFFFFFF);
129 return ((
static_cast<uint64_t
>(x1) << 32) | x2);