8#include <botan/internal/ghash.h>
10#include <botan/internal/isa_extn.h>
11#include <botan/internal/simd_4x32.h>
12#include <botan/internal/target_info.h>
15#if defined(BOTAN_SIMD_USE_SSSE3)
16 #include <immintrin.h>
17 #include <wmmintrin.h>
25#if defined(BOTAN_SIMD_USE_SSSE3)
26 const __m128i BSWAP_MASK = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
27 return SIMD_4x32(_mm_shuffle_epi8(in.raw(), BSWAP_MASK));
28#elif defined(BOTAN_SIMD_USE_NEON)
29 const uint8_t maskb[16] = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
30 const uint8x16_t mask = vld1q_u8(maskb);
31 return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(in.raw()), mask)));
32#elif defined(BOTAN_SIMD_USE_ALTIVEC)
33 const __vector
unsigned char mask = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
34 return SIMD_4x32(vec_perm(in.raw(), in.raw(), mask));
40 static_assert(M == 0x00 || M == 0x01 || M == 0x10 || M == 0x11,
"Valid clmul mode");
42#if defined(BOTAN_SIMD_USE_SSSE3)
43 return SIMD_4x32(_mm_clmulepi64_si128(x.raw(), H.raw(), M));
44#elif defined(BOTAN_SIMD_USE_NEON)
45 const uint64_t a = vgetq_lane_u64(vreinterpretq_u64_u32(x.raw()), M & 0x01);
46 const uint64_t b = vgetq_lane_u64(vreinterpretq_u64_u32(H.raw()), (M & 0x10) >> 4);
48 #if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
49 __n64 a1 = {a}, b1 = {b};
52 return SIMD_4x32(
reinterpret_cast<uint32x4_t
>(vmull_p64(a, b)));
55#elif defined(BOTAN_SIMD_USE_ALTIVEC)
57 constexpr uint8_t flip = (std::endian::native == std::endian::big) ? 0x11 : 0x00;
62 if constexpr(std::endian::native == std::endian::big) {
63 i1 = reverse_vector(i1).bswap();
64 i2 = reverse_vector(i2).bswap();
67 if constexpr(M == (0x11 ^ flip)) {
70 }
else if constexpr(M == (0x10 ^ flip)) {
71 i1 = i1.shift_elems_left<2>();
72 }
else if constexpr(M == (0x01 ^ flip)) {
73 i2 = i2.shift_elems_left<2>();
74 }
else if constexpr(M == (0x00 ^ flip)) {
75 i1 = mask_lo.andc(i1);
76 i2 = mask_lo.andc(i2);
79 auto i1v =
reinterpret_cast<__vector
unsigned long long>(i1.raw());
80 auto i2v =
reinterpret_cast<__vector
unsigned long long>(i2.raw());
82 #if BOTAN_COMPILER_HAS_BUILTIN(__builtin_crypto_vpmsumd)
83 auto rv = __builtin_crypto_vpmsumd(i1v, i2v);
85 auto rv = __builtin_altivec_crypto_vpmsumd(i1v, i2v);
88 auto z =
SIMD_4x32(
reinterpret_cast<__vector
unsigned int>(rv));
90 if constexpr(std::endian::native == std::endian::big) {
91 z = reverse_vector(z).bswap();
108 X0 = X1.
shl<31>() ^ X1.
shl<30>() ^ X1.
shl<25>();
113 X0 ^= X1.
shr<7>() ^ X1.
shr<2>() ^ X1.
shr<1>();
124 T0 ^= T1.shift_elems_right<2>();
125 T3 ^= T1.shift_elems_left<2>();
127 return gcm_reduce(T0, T3);
143 const SIMD_4x32 lo = (clmul<0x00>(H1, X1) ^ clmul<0x00>(H2, X2)) ^ (clmul<0x00>(H3, X3) ^ clmul<0x00>(H4, X4));
145 const SIMD_4x32 hi = (clmul<0x11>(H1, X1) ^ clmul<0x11>(H2, X2)) ^ (clmul<0x11>(H3, X3) ^ clmul<0x11>(H4, X4));
149 T ^= clmul<0x00>(H1 ^ H1.shift_elems_right<2>(), X1 ^ X1.shift_elems_right<2>());
150 T ^= clmul<0x00>(H2 ^ H2.shift_elems_right<2>(), X2 ^ X2.shift_elems_right<2>());
151 T ^= clmul<0x00>(H3 ^ H3.shift_elems_right<2>(), X3 ^ X3.shift_elems_right<2>());
152 T ^= clmul<0x00>(H4 ^ H4.shift_elems_right<2>(), X4 ^ X4.shift_elems_right<2>());
156 return gcm_reduce(hi ^ T.shift_elems_right<2>(), lo ^ T.shift_elems_left<2>());
161void BOTAN_FN_ISA_CLMUL GHASH::ghash_precompute_cpu(
const uint8_t H_bytes[16], uint64_t H_pow[4 * 2]) {
163 const SIMD_4x32 H2 = gcm_multiply(H1, H1);
164 const SIMD_4x32 H3 = gcm_multiply(H1, H2);
165 const SIMD_4x32 H4 = gcm_multiply(H2, H2);
168 H2.store_le(H_pow + 2);
169 H3.store_le(H_pow + 4);
170 H4.store_le(H_pow + 6);
173void BOTAN_FN_ISA_CLMUL GHASH::ghash_multiply_cpu(uint8_t x[16],
174 const uint64_t H_pow[8],
175 const uint8_t input[],
196 a = gcm_multiply_x4(H1, H2, H3, H4, m3, m2, m1, a);
203 for(
size_t i = 0; i != blocks; ++i) {
207 a = gcm_multiply(H1, a);
210 a = reverse_vector(a);
static SIMD_4x32 load_le(const void *in) noexcept
SIMD_4x32 shift_elems_left() const noexcept
SIMD_4x32 shr() const noexcept
SIMD_4x32 shl() const noexcept
SIMD_4x32 shift_elems_right() const noexcept
#define BOTAN_FORCE_INLINE