Botan 3.6.0
Crypto and TLS for C&
simd_avx2.h
Go to the documentation of this file.
1/*
2* (C) 2018 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#ifndef BOTAN_SIMD_AVX2_H_
8#define BOTAN_SIMD_AVX2_H_
9
10#include <botan/types.h>
11#include <immintrin.h>
12
13namespace Botan {
14
15#define BOTAN_AVX2_FN BOTAN_FUNC_ISA("avx2")
16
18 public:
19 SIMD_8x32& operator=(const SIMD_8x32& other) = default;
20 SIMD_8x32(const SIMD_8x32& other) = default;
21
22 SIMD_8x32& operator=(SIMD_8x32&& other) = default;
23 SIMD_8x32(SIMD_8x32&& other) = default;
24
25 ~SIMD_8x32() = default;
26
28 BOTAN_FORCE_INLINE SIMD_8x32() noexcept { m_avx2 = _mm256_setzero_si256(); }
29
31 explicit SIMD_8x32(const uint32_t B[8]) noexcept {
32 m_avx2 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(B));
33 }
34
36 explicit SIMD_8x32(uint32_t B0,
37 uint32_t B1,
38 uint32_t B2,
39 uint32_t B3,
40 uint32_t B4,
41 uint32_t B5,
42 uint32_t B6,
43 uint32_t B7) noexcept {
44 m_avx2 = _mm256_set_epi32(B7, B6, B5, B4, B3, B2, B1, B0);
45 }
46
48 static SIMD_8x32 splat(uint32_t B) noexcept { return SIMD_8x32(_mm256_set1_epi32(B)); }
49
51 static SIMD_8x32 load_le(const uint8_t* in) noexcept {
52 return SIMD_8x32(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in)));
53 }
54
56 static SIMD_8x32 load_le128(const uint8_t* in) noexcept {
57 return SIMD_8x32(_mm256_broadcastsi128_si256(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in))));
58 }
59
61 static SIMD_8x32 load_le128(const uint32_t* in) noexcept {
62 return SIMD_8x32(_mm256_broadcastsi128_si256(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in))));
63 }
64
66 static SIMD_8x32 load_be(const uint8_t* in) noexcept { return load_le(in).bswap(); }
67
69 void store_le(uint8_t out[]) const noexcept { _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_avx2); }
70
72 void store_le128(uint8_t out[]) const noexcept {
73 _mm_storeu_si128(reinterpret_cast<__m128i*>(out), _mm256_extracti128_si256(raw(), 0));
74 }
75
77 void store_be(uint8_t out[]) const noexcept { bswap().store_le(out); }
78
79 template <size_t ROT>
81 requires(ROT > 0 && ROT < 32)
82 {
83#if defined(__AVX512VL__)
84 return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
85#else
86 if constexpr(ROT == 8) {
87 const __m256i shuf_rotl_8 =
88 _mm256_set_epi64x(0x0e0d0c0f'0a09080b, 0x06050407'02010003, 0x0e0d0c0f'0a09080b, 0x06050407'02010003);
89
90 return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
91 } else if constexpr(ROT == 16) {
92 const __m256i shuf_rotl_16 =
93 _mm256_set_epi64x(0x0d0c0f0e'09080b0a, 0x05040706'01000302, 0x0d0c0f0e'09080b0a, 0x05040706'01000302);
94
95 return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96 } else if constexpr(ROT == 24) {
97 const __m256i shuf_rotl_24 =
98 _mm256_set_epi64x(0x0c0f0e0d'080b0a09, 0x04070605'00030201, 0x0c0f0e0d'080b0a09, 0x04070605'00030201);
99
100 return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_24));
101 } else {
102 return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
103 _mm256_srli_epi32(m_avx2, static_cast<int>(32 - ROT))));
104 }
105#endif
106 }
107
108 template <size_t ROT>
110 return this->rotl<32 - ROT>();
111 }
112
114 const SIMD_8x32 rot1 = this->rotr<2>();
115 const SIMD_8x32 rot2 = this->rotr<13>();
116 const SIMD_8x32 rot3 = this->rotr<22>();
117 return rot1 ^ rot2 ^ rot3;
118 }
119
121 const SIMD_8x32 rot1 = this->rotr<6>();
122 const SIMD_8x32 rot2 = this->rotr<11>();
123 const SIMD_8x32 rot3 = this->rotr<25>();
124 return rot1 ^ rot2 ^ rot3;
125 }
126
128 SIMD_8x32 operator+(const SIMD_8x32& other) const noexcept {
129 SIMD_8x32 retval(*this);
130 retval += other;
131 return retval;
132 }
133
135 SIMD_8x32 operator-(const SIMD_8x32& other) const noexcept {
136 SIMD_8x32 retval(*this);
137 retval -= other;
138 return retval;
139 }
140
142 SIMD_8x32 operator^(const SIMD_8x32& other) const noexcept {
143 SIMD_8x32 retval(*this);
144 retval ^= other;
145 return retval;
146 }
147
149 SIMD_8x32 operator|(const SIMD_8x32& other) const noexcept {
150 SIMD_8x32 retval(*this);
151 retval |= other;
152 return retval;
153 }
154
156 SIMD_8x32 operator&(const SIMD_8x32& other) const noexcept {
157 SIMD_8x32 retval(*this);
158 retval &= other;
159 return retval;
160 }
161
163 void operator+=(const SIMD_8x32& other) { m_avx2 = _mm256_add_epi32(m_avx2, other.m_avx2); }
164
166 void operator-=(const SIMD_8x32& other) { m_avx2 = _mm256_sub_epi32(m_avx2, other.m_avx2); }
167
169 void operator^=(const SIMD_8x32& other) { m_avx2 = _mm256_xor_si256(m_avx2, other.m_avx2); }
170
172 void operator^=(uint32_t other) { *this ^= SIMD_8x32::splat(other); }
173
175 void operator|=(const SIMD_8x32& other) { m_avx2 = _mm256_or_si256(m_avx2, other.m_avx2); }
176
178 void operator&=(const SIMD_8x32& other) { m_avx2 = _mm256_and_si256(m_avx2, other.m_avx2); }
179
180 template <int SHIFT>
182 return SIMD_8x32(_mm256_slli_epi32(m_avx2, SHIFT));
183 }
184
185 template <int SHIFT>
186 BOTAN_AVX2_FN SIMD_8x32 shr() const noexcept {
187 return SIMD_8x32(_mm256_srli_epi32(m_avx2, SHIFT));
188 }
189
191 SIMD_8x32 operator~() const noexcept {
192 return SIMD_8x32(_mm256_xor_si256(m_avx2, _mm256_set1_epi32(0xFFFFFFFF)));
193 }
194
195 // (~reg) & other
197 SIMD_8x32 andc(const SIMD_8x32& other) const noexcept {
198 return SIMD_8x32(_mm256_andnot_si256(m_avx2, other.m_avx2));
199 }
200
202 SIMD_8x32 bswap() const noexcept {
203 const uint8_t BSWAP_MASK[32] = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12,
204 19, 18, 17, 16, 23, 22, 21, 20, 27, 26, 25, 24, 31, 30, 29, 28};
205
206 const __m256i bswap = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(BSWAP_MASK));
207
208 const __m256i output = _mm256_shuffle_epi8(m_avx2, bswap);
209
210 return SIMD_8x32(output);
211 }
212
214 SIMD_8x32 rev_words() const noexcept { return SIMD_8x32(_mm256_shuffle_epi32(raw(), 0b00011011)); }
215
217 static void transpose(SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) noexcept {
218 const __m256i T0 = _mm256_unpacklo_epi32(B0.m_avx2, B1.m_avx2);
219 const __m256i T1 = _mm256_unpacklo_epi32(B2.m_avx2, B3.m_avx2);
220 const __m256i T2 = _mm256_unpackhi_epi32(B0.m_avx2, B1.m_avx2);
221 const __m256i T3 = _mm256_unpackhi_epi32(B2.m_avx2, B3.m_avx2);
222
223 B0.m_avx2 = _mm256_unpacklo_epi64(T0, T1);
224 B1.m_avx2 = _mm256_unpackhi_epi64(T0, T1);
225 B2.m_avx2 = _mm256_unpacklo_epi64(T2, T3);
226 B3.m_avx2 = _mm256_unpackhi_epi64(T2, T3);
227 }
228
230 static void transpose(SIMD_8x32& B0,
231 SIMD_8x32& B1,
232 SIMD_8x32& B2,
233 SIMD_8x32& B3,
234 SIMD_8x32& B4,
235 SIMD_8x32& B5,
236 SIMD_8x32& B6,
237 SIMD_8x32& B7) noexcept {
238 transpose(B0, B1, B2, B3);
239 transpose(B4, B5, B6, B7);
240
241 swap_tops(B0, B4);
242 swap_tops(B1, B5);
243 swap_tops(B2, B6);
244 swap_tops(B3, B7);
245 }
246
248 static SIMD_8x32 choose(const SIMD_8x32& mask, const SIMD_8x32& a, const SIMD_8x32& b) noexcept {
249#if defined(__AVX512VL__)
250 return _mm256_ternarylogic_epi32(mask.raw(), a.raw(), b.raw(), 0xca);
251#else
252 return (mask & a) ^ mask.andc(b);
253#endif
254 }
255
257 static SIMD_8x32 majority(const SIMD_8x32& x, const SIMD_8x32& y, const SIMD_8x32& z) noexcept {
258#if defined(__AVX512VL__)
259 return _mm256_ternarylogic_epi32(x.raw(), y.raw(), z.raw(), 0xe8);
260#else
261 return SIMD_8x32::choose(x ^ y, z, y);
262#endif
263 }
264
266 static void reset_registers() noexcept { _mm256_zeroupper(); }
267
269 static void zero_registers() noexcept { _mm256_zeroall(); }
270
271 __m256i BOTAN_AVX2_FN raw() const noexcept { return m_avx2; }
272
274 SIMD_8x32(__m256i x) noexcept : m_avx2(x) {}
275
276 private:
278 static void swap_tops(SIMD_8x32& A, SIMD_8x32& B) {
279 SIMD_8x32 T0 = _mm256_permute2x128_si256(A.raw(), B.raw(), 0 + (2 << 4));
280 SIMD_8x32 T1 = _mm256_permute2x128_si256(A.raw(), B.raw(), 1 + (3 << 4));
281 A = T0;
282 B = T1;
283 }
284
285 __m256i m_avx2;
286};
287
288template <size_t R>
289inline SIMD_8x32 rotl(SIMD_8x32 input) {
290 return input.rotl<R>();
291}
292
293template <size_t R>
294inline SIMD_8x32 rotr(SIMD_8x32 input) {
295 return input.rotr<R>();
296}
297
298// For Serpent:
299template <size_t S>
300inline SIMD_8x32 shl(SIMD_8x32 input) {
301 return input.shl<S>();
302}
303
304} // namespace Botan
305
306#endif
SIMD_8x32 & operator=(SIMD_8x32 &&other)=default
SIMD_8x32(const SIMD_8x32 &other)=default
uint32_t uint32_t B2
Definition simd_avx2.h:38
uint32_t uint32_t uint32_t uint32_t uint32_t uint32_t B6
Definition simd_avx2.h:42
~SIMD_8x32()=default
uint32_t uint32_t uint32_t uint32_t uint32_t B5
Definition simd_avx2.h:41
SIMD_8x32(SIMD_8x32 &&other)=default
uint32_t uint32_t uint32_t uint32_t uint32_t uint32_t uint32_t B7 noexcept
Definition simd_avx2.h:43
SIMD_8x32 & operator=(const SIMD_8x32 &other)=default
uint32_t uint32_t uint32_t B3
Definition simd_avx2.h:39
uint32_t uint32_t uint32_t uint32_t B4
Definition simd_avx2.h:40
int(* final)(unsigned char *, CTX *)
#define BOTAN_FORCE_INLINE
Definition compiler.h:165
ASN1_Type operator|(ASN1_Type x, ASN1_Type y)
Definition asn1_obj.h:75
constexpr T rotl(T input)
Definition rotate.h:21
OctetString operator^(const OctetString &k1, const OctetString &k2)
Definition symkey.cpp:109
OctetString operator+(const OctetString &k1, const OctetString &k2)
Definition symkey.cpp:99
constexpr T choose(T mask, T a, T b)
Definition bit_ops.h:193
constexpr T rotr(T input)
Definition rotate.h:33
BigInt operator-(const BigInt &x, const BigInt &y)
Definition bigint.h:1094
constexpr auto operator|=(Strong< T1, Tags... > &a, T2 b)
constexpr auto store_le(ParamTs &&... params)
Definition loadstor.h:764
constexpr auto operator&=(Strong< T1, Tags... > &a, T2 b)
constexpr T majority(T a, T b, T c)
Definition bit_ops.h:199
std::vector< uint8_t, Alloc > & operator^=(std::vector< uint8_t, Alloc > &out, const std::vector< uint8_t, Alloc2 > &in)
Definition mem_ops.h:445
std::vector< T, Alloc > & operator+=(std::vector< T, Alloc > &out, const std::vector< T, Alloc2 > &in)
Definition secmem.h:80
SIMD_4x32 shl(SIMD_4x32 input)
Definition simd_32.h:634
constexpr auto load_le(ParamTs &&... params)
Definition loadstor.h:521
constexpr auto operator-=(Strong< T1, Tags... > &a, T2 b)
const SIMD_8x32 & b
constexpr auto store_be(ParamTs &&... params)
Definition loadstor.h:773
constexpr auto load_be(ParamTs &&... params)
Definition loadstor.h:530
ECIES_Flags operator&(ECIES_Flags a, ECIES_Flags b)
Definition ecies.h:50
#define BOTAN_AVX2_FN
Definition simd_avx2.h:15