Botan 3.11.0
Crypto and TLS for C&
polyval_fn.h
Go to the documentation of this file.
1/*
2* (C) 2026 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#ifndef BOTAN_POLYVAL_FN_H_
8#define BOTAN_POLYVAL_FN_H_
9
10#include <botan/internal/simd_4x32.h>
11
12namespace Botan {
13
14// NOLINTBEGIN(portability-simd-intrinsics)
15
16BOTAN_FORCE_INLINE BOTAN_FN_ISA_SIMD_4X32 SIMD_4x32 reverse_vector(const SIMD_4x32& in) {
17#if defined(BOTAN_SIMD_USE_SSSE3)
18 const __m128i BSWAP_MASK = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19 return SIMD_4x32(_mm_shuffle_epi8(in.raw(), BSWAP_MASK));
20#elif defined(BOTAN_SIMD_USE_NEON)
21 const uint8_t maskb[16] = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
22 const uint8x16_t mask = vld1q_u8(maskb);
23 return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(in.raw()), mask)));
24#elif defined(BOTAN_SIMD_USE_ALTIVEC)
25 const __vector unsigned char mask = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
26 return SIMD_4x32(vec_perm(in.raw(), in.raw(), mask));
27#endif
28}
29
30template <int M>
31BOTAN_FORCE_INLINE BOTAN_FN_ISA_CLMUL SIMD_4x32 clmul(const SIMD_4x32& H, const SIMD_4x32& x) {
32 static_assert(M == 0x00 || M == 0x01 || M == 0x10 || M == 0x11, "Valid clmul mode");
33
34#if defined(BOTAN_SIMD_USE_SSSE3)
35 return SIMD_4x32(_mm_clmulepi64_si128(x.raw(), H.raw(), M));
36#elif defined(BOTAN_SIMD_USE_NEON)
37 const uint64_t a = vgetq_lane_u64(vreinterpretq_u64_u32(x.raw()), M & 0x01);
38 const uint64_t b = vgetq_lane_u64(vreinterpretq_u64_u32(H.raw()), (M & 0x10) >> 4);
39
40 #if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
41 __n64 a1 = {a}, b1 = {b};
42 return SIMD_4x32(vmull_p64(a1, b1));
43 #else
44 return SIMD_4x32(reinterpret_cast<uint32x4_t>(vmull_p64(a, b)));
45 #endif
46
47#elif defined(BOTAN_SIMD_USE_ALTIVEC)
48 const SIMD_4x32 mask_lo = SIMD_4x32(0, 0, 0xFFFFFFFF, 0xFFFFFFFF);
49 constexpr uint8_t flip = (std::endian::native == std::endian::big) ? 0x11 : 0x00;
50
51 SIMD_4x32 i1 = x;
52 SIMD_4x32 i2 = H;
53
54 if constexpr(std::endian::native == std::endian::big) {
55 i1 = reverse_vector(i1).bswap();
56 i2 = reverse_vector(i2).bswap();
57 }
58
59 if constexpr(M == (0x11 ^ flip)) {
60 i1 &= mask_lo;
61 i2 &= mask_lo;
62 } else if constexpr(M == (0x10 ^ flip)) {
63 i1 = i1.shift_elems_left<2>();
64 } else if constexpr(M == (0x01 ^ flip)) {
65 i2 = i2.shift_elems_left<2>();
66 } else if constexpr(M == (0x00 ^ flip)) {
67 i1 = mask_lo.andc(i1);
68 i2 = mask_lo.andc(i2);
69 }
70
71 auto i1v = reinterpret_cast<__vector unsigned long long>(i1.raw());
72 auto i2v = reinterpret_cast<__vector unsigned long long>(i2.raw());
73
74 #if BOTAN_COMPILER_HAS_BUILTIN(__builtin_crypto_vpmsumd)
75 auto rv = __builtin_crypto_vpmsumd(i1v, i2v);
76 #else
77 auto rv = __builtin_altivec_crypto_vpmsumd(i1v, i2v);
78 #endif
79
80 auto z = SIMD_4x32(reinterpret_cast<__vector unsigned int>(rv));
81
82 if constexpr(std::endian::native == std::endian::big) {
83 z = reverse_vector(z).bswap();
84 }
85
86 return z;
87#endif
88}
89
90// NOLINTEND(portability-simd-intrinsics)
91
92BOTAN_FORCE_INLINE SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 mulx_polyval(const SIMD_4x32& h) {
93 const auto V = SIMD_4x32(0x00000001, 0x00000000, 0x00000000, 0xc2000000);
94
95 // Bitmask set iff the top bit of h is set
96 const auto mask = h.top_bit_mask();
97
98 // Extract the top bits of the words and move them into place as the low bit of the next word
99 auto top_bits = h.shr<31>().shift_elems_left<1>();
100
101 // The main shift, adding back in the top bits that are otherwise lost
102 auto shifted_h = h.shl<1>() | top_bits;
103
104 return shifted_h ^ (mask & V);
105}
106
107BOTAN_FORCE_INLINE SIMD_4x32 BOTAN_FN_ISA_CLMUL polyval_reduce(const SIMD_4x32& hi, const SIMD_4x32& lo) {
108 const SIMD_4x32 V(0, 0xC2000000, 0, 0);
109
110 /*
111 Montgomery reduction
112 Input: 256-bit operand [X3 : X2 : X1 : X0]
113 [A1 : A0] = X0 • 0xc200000000000000
114 [B1 : B0] = [X0 ⨁ A1 : X1 ⨁ A0]
115 [C1 : C0] = B0 • 0xc200000000000000
116 [D1 : D0] = [B0 ⨁ C1 : B1 ⨁ C0]
117 Output: [D1 ⨁ X3 : D0 ⨁ X2]
118 */
119
120 const auto A = clmul<0x00>(lo, V);
121 const auto B = A ^ lo.swap_halves();
122 const auto C = clmul<0x00>(B, V);
123 const auto D = C ^ B.swap_halves();
124
125 return D ^ hi;
126}
127
128BOTAN_FORCE_INLINE SIMD_4x32 BOTAN_FN_ISA_CLMUL polyval_multiply(const SIMD_4x32& H, const SIMD_4x32& x) {
129 SIMD_4x32 hi = clmul<0x11>(H, x);
130 const SIMD_4x32 mid = clmul<0x10>(H, x) ^ clmul<0x01>(H, x);
131 SIMD_4x32 lo = clmul<0x00>(H, x);
132
133 hi ^= mid.shift_elems_right<2>();
134 lo ^= mid.shift_elems_left<2>();
135
136 return polyval_reduce(hi, lo);
137}
138
139} // namespace Botan
140
141#endif
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 top_bit_mask() const
Definition simd_4x32.h:883
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 shr() const noexcept
Definition simd_4x32.h:520
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 shl() const noexcept
Definition simd_4x32.h:500
native_simd_type BOTAN_FN_ISA_SIMD_4X32 raw() const noexcept
Definition simd_4x32.h:916
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 shift_elems_right() const noexcept
Definition simd_4x32.h:639
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 andc(const SIMD_4x32 &other) const noexcept
Definition simd_4x32.h:552
BOTAN_FN_ISA_SIMD_4X32 SIMD_4x32 bswap() const noexcept
Definition simd_4x32.h:576
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 shift_elems_left() const noexcept
Definition simd_4x32.h:602
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 swap_halves() const
Definition simd_4x32.h:908
#define BOTAN_FORCE_INLINE
Definition compiler.h:87
BOTAN_FORCE_INLINE SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 mulx_polyval(const SIMD_4x32 &h)
Definition polyval_fn.h:92
BOTAN_FORCE_INLINE SIMD_4x32 BOTAN_FN_ISA_CLMUL polyval_multiply(const SIMD_4x32 &H, const SIMD_4x32 &x)
Definition polyval_fn.h:128
BOTAN_FORCE_INLINE SIMD_4x32 BOTAN_FN_ISA_CLMUL polyval_reduce(const SIMD_4x32 &hi, const SIMD_4x32 &lo)
Definition polyval_fn.h:107
BOTAN_FORCE_INLINE BOTAN_FN_ISA_CLMUL SIMD_4x32 clmul(const SIMD_4x32 &H, const SIMD_4x32 &x)
Definition polyval_fn.h:31
BOTAN_FORCE_INLINE BOTAN_FN_ISA_SIMD_4X32 SIMD_4x32 reverse_vector(const SIMD_4x32 &in)
Definition polyval_fn.h:16