19inline BOTAN_FN_ISA_AVX2_GFNI __m256i camellia_s1234(__m256i x) {
40 constexpr uint8_t pre_c = 0b01000101;
41 const auto pre = _mm256_set_epi64x(pre4_a, pre123_a, pre123_a, pre123_a);
73 const auto post_a = _mm256_set_epi64x(post14_a, post3_a, post2_a, post14_a);
76 _mm256_set_epi64x(0x6E6E6E6E6E6E6E6E, 0x3737373737373737, 0xDCDCDCDCDCDCDCDC, 0x6E6E6E6E6E6E6E6E);
78 auto y = _mm256_gf2p8affine_epi64_epi8(x, pre, pre_c);
79 return _mm256_xor_si256(post_c, _mm256_gf2p8affineinv_epi64_epi8(y, post_a, 0));
82inline BOTAN_FN_ISA_AVX2_GFNI uint64_t F(uint64_t x) {
84 auto s_vec = camellia_s1234(_mm256_set1_epi64x(x));
87 auto Z0 = _mm256_shuffle_epi8(
88 s_vec, _mm256_set_epi64x(0x0C0CFF0CFFFF0C0C, 0x05FF0505FF0505FF, 0xFF0E0E0E0E0EFFFF, 0x070707FF07FFFF07));
90 auto Z1 = _mm256_shuffle_epi8(
91 s_vec, _mm256_set_epi64x(0x0909FF090909FF09, 0x02FF020202FF0202, 0xFF0B0B0BFF0B0B0B, 0x000000FF000000FF));
93 Z0 = _mm256_xor_si256(Z0, Z1);
96 _mm256_store_si256(
reinterpret_cast<__m256i*
>(Z), Z0);
99 return Z[0] ^ Z[1] ^ Z[2] ^ Z[3];
102inline uint64_t FL(uint64_t v, uint64_t K) {
103 uint32_t x1 =
static_cast<uint32_t
>(v >> 32);
104 uint32_t x2 =
static_cast<uint32_t
>(v & 0xFFFFFFFF);
106 const uint32_t k1 =
static_cast<uint32_t
>(K >> 32);
107 const uint32_t k2 =
static_cast<uint32_t
>(K & 0xFFFFFFFF);
112 return ((
static_cast<uint64_t
>(x1) << 32) | x2);
115inline uint64_t FLINV(uint64_t v, uint64_t K) {
116 uint32_t x1 =
static_cast<uint32_t
>(v >> 32);
117 uint32_t x2 =
static_cast<uint32_t
>(v & 0xFFFFFFFF);
119 const uint32_t k1 =
static_cast<uint32_t
>(K >> 32);
120 const uint32_t k2 =
static_cast<uint32_t
>(K & 0xFFFFFFFF);
125 return ((
static_cast<uint64_t
>(x1) << 32) | x2);