Botan 3.7.1
Crypto and TLS for C&
simd_avx2.h
Go to the documentation of this file.
1/*
2* (C) 2018 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#ifndef BOTAN_SIMD_AVX2_H_
8#define BOTAN_SIMD_AVX2_H_
9
10#include <botan/compiler.h>
11#include <botan/types.h>
12#include <immintrin.h>
13
14namespace Botan {
15
16#define BOTAN_AVX2_FN BOTAN_FUNC_ISA("avx2")
17
19 public:
20 SIMD_8x32& operator=(const SIMD_8x32& other) = default;
21 SIMD_8x32(const SIMD_8x32& other) = default;
22
23 SIMD_8x32& operator=(SIMD_8x32&& other) = default;
24 SIMD_8x32(SIMD_8x32&& other) = default;
25
26 ~SIMD_8x32() = default;
27
29 BOTAN_FORCE_INLINE SIMD_8x32() noexcept { m_avx2 = _mm256_setzero_si256(); }
30
32 explicit SIMD_8x32(const uint32_t B[8]) noexcept {
33 m_avx2 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(B));
34 }
35
37 explicit SIMD_8x32(uint32_t B0,
38 uint32_t B1,
39 uint32_t B2,
40 uint32_t B3,
41 uint32_t B4,
42 uint32_t B5,
43 uint32_t B6,
44 uint32_t B7) noexcept {
45 m_avx2 = _mm256_set_epi32(B7, B6, B5, B4, B3, B2, B1, B0);
46 }
47
49 static SIMD_8x32 splat(uint32_t B) noexcept { return SIMD_8x32(_mm256_set1_epi32(B)); }
50
52 static SIMD_8x32 load_le(const uint8_t* in) noexcept {
53 return SIMD_8x32(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in)));
54 }
55
57 static SIMD_8x32 load_le128(const uint8_t* in) noexcept {
58 return SIMD_8x32(_mm256_broadcastsi128_si256(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in))));
59 }
60
62 static SIMD_8x32 load_le128(const uint32_t* in) noexcept {
63 return SIMD_8x32(_mm256_broadcastsi128_si256(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in))));
64 }
65
67 static SIMD_8x32 load_be(const uint8_t* in) noexcept { return load_le(in).bswap(); }
68
70 void store_le(uint8_t out[]) const noexcept { _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_avx2); }
71
73 void store_le128(uint8_t out[]) const noexcept {
74 _mm_storeu_si128(reinterpret_cast<__m128i*>(out), _mm256_extracti128_si256(raw(), 0));
75 }
76
78 void store_be(uint8_t out[]) const noexcept { bswap().store_le(out); }
79
80 template <size_t ROT>
82 requires(ROT > 0 && ROT < 32)
83 {
84#if defined(__AVX512VL__)
85 return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
86#else
87 if constexpr(ROT == 8) {
88 const __m256i shuf_rotl_8 =
89 _mm256_set_epi64x(0x0e0d0c0f'0a09080b, 0x06050407'02010003, 0x0e0d0c0f'0a09080b, 0x06050407'02010003);
90
91 return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
92 } else if constexpr(ROT == 16) {
93 const __m256i shuf_rotl_16 =
94 _mm256_set_epi64x(0x0d0c0f0e'09080b0a, 0x05040706'01000302, 0x0d0c0f0e'09080b0a, 0x05040706'01000302);
95
96 return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
97 } else if constexpr(ROT == 24) {
98 const __m256i shuf_rotl_24 =
99 _mm256_set_epi64x(0x0c0f0e0d'080b0a09, 0x04070605'00030201, 0x0c0f0e0d'080b0a09, 0x04070605'00030201);
100
101 return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_24));
102 } else {
103 return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
104 _mm256_srli_epi32(m_avx2, static_cast<int>(32 - ROT))));
105 }
106#endif
107 }
108
109 template <size_t ROT>
111 return this->rotl<32 - ROT>();
112 }
113
115 const SIMD_8x32 rot1 = this->rotr<2>();
116 const SIMD_8x32 rot2 = this->rotr<13>();
117 const SIMD_8x32 rot3 = this->rotr<22>();
118 return rot1 ^ rot2 ^ rot3;
119 }
120
122 const SIMD_8x32 rot1 = this->rotr<6>();
123 const SIMD_8x32 rot2 = this->rotr<11>();
124 const SIMD_8x32 rot3 = this->rotr<25>();
125 return rot1 ^ rot2 ^ rot3;
126 }
127
129 SIMD_8x32 operator+(const SIMD_8x32& other) const noexcept {
130 SIMD_8x32 retval(*this);
131 retval += other;
132 return retval;
133 }
134
136 SIMD_8x32 operator-(const SIMD_8x32& other) const noexcept {
137 SIMD_8x32 retval(*this);
138 retval -= other;
139 return retval;
140 }
141
143 SIMD_8x32 operator^(const SIMD_8x32& other) const noexcept {
144 SIMD_8x32 retval(*this);
145 retval ^= other;
146 return retval;
147 }
148
150 SIMD_8x32 operator|(const SIMD_8x32& other) const noexcept {
151 SIMD_8x32 retval(*this);
152 retval |= other;
153 return retval;
154 }
155
157 SIMD_8x32 operator&(const SIMD_8x32& other) const noexcept {
158 SIMD_8x32 retval(*this);
159 retval &= other;
160 return retval;
161 }
162
164 void operator+=(const SIMD_8x32& other) { m_avx2 = _mm256_add_epi32(m_avx2, other.m_avx2); }
165
167 void operator-=(const SIMD_8x32& other) { m_avx2 = _mm256_sub_epi32(m_avx2, other.m_avx2); }
168
170 void operator^=(const SIMD_8x32& other) { m_avx2 = _mm256_xor_si256(m_avx2, other.m_avx2); }
171
173 void operator^=(uint32_t other) { *this ^= SIMD_8x32::splat(other); }
174
176 void operator|=(const SIMD_8x32& other) { m_avx2 = _mm256_or_si256(m_avx2, other.m_avx2); }
177
179 void operator&=(const SIMD_8x32& other) { m_avx2 = _mm256_and_si256(m_avx2, other.m_avx2); }
180
181 template <int SHIFT>
183 return SIMD_8x32(_mm256_slli_epi32(m_avx2, SHIFT));
184 }
185
186 template <int SHIFT>
187 BOTAN_AVX2_FN SIMD_8x32 shr() const noexcept {
188 return SIMD_8x32(_mm256_srli_epi32(m_avx2, SHIFT));
189 }
190
192 SIMD_8x32 operator~() const noexcept {
193 return SIMD_8x32(_mm256_xor_si256(m_avx2, _mm256_set1_epi32(0xFFFFFFFF)));
194 }
195
196 // (~reg) & other
198 SIMD_8x32 andc(const SIMD_8x32& other) const noexcept {
199 return SIMD_8x32(_mm256_andnot_si256(m_avx2, other.m_avx2));
200 }
201
203 SIMD_8x32 bswap() const noexcept {
204 const uint8_t BSWAP_MASK[32] = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12,
205 19, 18, 17, 16, 23, 22, 21, 20, 27, 26, 25, 24, 31, 30, 29, 28};
206
207 const __m256i bswap = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(BSWAP_MASK));
208
209 const __m256i output = _mm256_shuffle_epi8(m_avx2, bswap);
210
211 return SIMD_8x32(output);
212 }
213
215 SIMD_8x32 rev_words() const noexcept { return SIMD_8x32(_mm256_shuffle_epi32(raw(), 0b00011011)); }
216
218 static void transpose(SIMD_8x32& B0, SIMD_8x32& B1, SIMD_8x32& B2, SIMD_8x32& B3) noexcept {
219 const __m256i T0 = _mm256_unpacklo_epi32(B0.m_avx2, B1.m_avx2);
220 const __m256i T1 = _mm256_unpacklo_epi32(B2.m_avx2, B3.m_avx2);
221 const __m256i T2 = _mm256_unpackhi_epi32(B0.m_avx2, B1.m_avx2);
222 const __m256i T3 = _mm256_unpackhi_epi32(B2.m_avx2, B3.m_avx2);
223
224 B0.m_avx2 = _mm256_unpacklo_epi64(T0, T1);
225 B1.m_avx2 = _mm256_unpackhi_epi64(T0, T1);
226 B2.m_avx2 = _mm256_unpacklo_epi64(T2, T3);
227 B3.m_avx2 = _mm256_unpackhi_epi64(T2, T3);
228 }
229
231 static void transpose(SIMD_8x32& B0,
232 SIMD_8x32& B1,
233 SIMD_8x32& B2,
234 SIMD_8x32& B3,
235 SIMD_8x32& B4,
236 SIMD_8x32& B5,
237 SIMD_8x32& B6,
238 SIMD_8x32& B7) noexcept {
239 transpose(B0, B1, B2, B3);
240 transpose(B4, B5, B6, B7);
241
242 swap_tops(B0, B4);
243 swap_tops(B1, B5);
244 swap_tops(B2, B6);
245 swap_tops(B3, B7);
246 }
247
249 static SIMD_8x32 choose(const SIMD_8x32& mask, const SIMD_8x32& a, const SIMD_8x32& b) noexcept {
250#if defined(__AVX512VL__)
251 return _mm256_ternarylogic_epi32(mask.raw(), a.raw(), b.raw(), 0xca);
252#else
253 return (mask & a) ^ mask.andc(b);
254#endif
255 }
256
258 static SIMD_8x32 majority(const SIMD_8x32& x, const SIMD_8x32& y, const SIMD_8x32& z) noexcept {
259#if defined(__AVX512VL__)
260 return _mm256_ternarylogic_epi32(x.raw(), y.raw(), z.raw(), 0xe8);
261#else
262 return SIMD_8x32::choose(x ^ y, z, y);
263#endif
264 }
265
267 static void reset_registers() noexcept { _mm256_zeroupper(); }
268
270 static void zero_registers() noexcept { _mm256_zeroall(); }
271
272 __m256i BOTAN_AVX2_FN raw() const noexcept { return m_avx2; }
273
275 SIMD_8x32(__m256i x) noexcept : m_avx2(x) {}
276
277 private:
279 static void swap_tops(SIMD_8x32& A, SIMD_8x32& B) {
280 SIMD_8x32 T0 = _mm256_permute2x128_si256(A.raw(), B.raw(), 0 + (2 << 4));
281 SIMD_8x32 T1 = _mm256_permute2x128_si256(A.raw(), B.raw(), 1 + (3 << 4));
282 A = T0;
283 B = T1;
284 }
285
286 __m256i m_avx2;
287};
288
289template <size_t R>
290inline SIMD_8x32 rotl(SIMD_8x32 input) {
291 return input.rotl<R>();
292}
293
294template <size_t R>
295inline SIMD_8x32 rotr(SIMD_8x32 input) {
296 return input.rotr<R>();
297}
298
299// For Serpent:
300template <size_t S>
301inline SIMD_8x32 shl(SIMD_8x32 input) {
302 return input.shl<S>();
303}
304
305} // namespace Botan
306
307#endif
SIMD_8x32 & operator=(SIMD_8x32 &&other)=default
SIMD_8x32(const SIMD_8x32 &other)=default
uint32_t uint32_t B2
Definition simd_avx2.h:39
uint32_t uint32_t uint32_t uint32_t uint32_t uint32_t B6
Definition simd_avx2.h:43
~SIMD_8x32()=default
uint32_t uint32_t uint32_t uint32_t uint32_t B5
Definition simd_avx2.h:42
SIMD_8x32(SIMD_8x32 &&other)=default
uint32_t uint32_t uint32_t uint32_t uint32_t uint32_t uint32_t B7 noexcept
Definition simd_avx2.h:44
SIMD_8x32 & operator=(const SIMD_8x32 &other)=default
uint32_t uint32_t uint32_t B3
Definition simd_avx2.h:40
uint32_t uint32_t uint32_t uint32_t B4
Definition simd_avx2.h:41
int(* final)(unsigned char *, CTX *)
#define BOTAN_FORCE_INLINE
Definition compiler.h:71
ASN1_Type operator|(ASN1_Type x, ASN1_Type y)
Definition asn1_obj.h:75
constexpr T rotl(T input)
Definition rotate.h:21
OctetString operator^(const OctetString &k1, const OctetString &k2)
Definition symkey.cpp:109
OctetString operator+(const OctetString &k1, const OctetString &k2)
Definition symkey.cpp:99
constexpr T choose(T mask, T a, T b)
Definition bit_ops.h:204
constexpr T rotr(T input)
Definition rotate.h:33
BigInt operator-(const BigInt &x, const BigInt &y)
Definition bigint.h:1094
constexpr auto operator|=(Strong< T1, Tags... > &a, T2 b)
constexpr auto store_le(ParamTs &&... params)
Definition loadstor.h:764
constexpr auto operator&=(Strong< T1, Tags... > &a, T2 b)
constexpr T majority(T a, T b, T c)
Definition bit_ops.h:210
std::vector< uint8_t, Alloc > & operator^=(std::vector< uint8_t, Alloc > &out, const std::vector< uint8_t, Alloc2 > &in)
Definition mem_ops.h:446
std::vector< T, Alloc > & operator+=(std::vector< T, Alloc > &out, const std::vector< T, Alloc2 > &in)
Definition secmem.h:80
SIMD_4x32 shl(SIMD_4x32 input)
Definition simd_32.h:634
constexpr auto load_le(ParamTs &&... params)
Definition loadstor.h:521
constexpr auto operator-=(Strong< T1, Tags... > &a, T2 b)
const SIMD_8x32 & b
constexpr auto store_be(ParamTs &&... params)
Definition loadstor.h:773
constexpr auto load_be(ParamTs &&... params)
Definition loadstor.h:530
ECIES_Flags operator&(ECIES_Flags a, ECIES_Flags b)
Definition ecies.h:55
#define BOTAN_AVX2_FN
Definition simd_avx2.h:16