Botan 2.19.2
Crypto and TLS for C&
simd_avx2.h
Go to the documentation of this file.
1/*
2* (C) 2018 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#ifndef BOTAN_SIMD_AVX2_H_
8#define BOTAN_SIMD_AVX2_H_
9
10#include <botan/types.h>
11#include <immintrin.h>
12
13namespace Botan {
14
16 {
17 public:
18
19 SIMD_8x32& operator=(const SIMD_8x32& other) = default;
20 SIMD_8x32(const SIMD_8x32& other) = default;
21
22 SIMD_8x32& operator=(SIMD_8x32&& other) = default;
23 SIMD_8x32(SIMD_8x32&& other) = default;
24
25 BOTAN_FUNC_ISA("avx2")
27 {
28 m_avx2 = _mm256_setzero_si256();
29 }
30
31 BOTAN_FUNC_ISA("avx2")
32 explicit SIMD_8x32(const uint32_t B[8])
33 {
34 m_avx2 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(B));
35 }
36
37 BOTAN_FUNC_ISA("avx2")
38 explicit SIMD_8x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3,
39 uint32_t B4, uint32_t B5, uint32_t B6, uint32_t B7)
40 {
41 m_avx2 = _mm256_set_epi32(B7, B6, B5, B4, B3, B2, B1, B0);
42 }
43
44 BOTAN_FUNC_ISA("avx2")
45 static SIMD_8x32 splat(uint32_t B)
46 {
47 return SIMD_8x32(_mm256_set1_epi32(B));
48 }
49
50 BOTAN_FUNC_ISA("avx2")
51 static SIMD_8x32 load_le(const uint8_t* in)
52 {
53 return SIMD_8x32(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in)));
54 }
55
56 BOTAN_FUNC_ISA("avx2")
57 static SIMD_8x32 load_be(const uint8_t* in)
58 {
59 return load_le(in).bswap();
60 }
61
62 BOTAN_FUNC_ISA("avx2")
63 void store_le(uint8_t out[]) const
64 {
65 _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_avx2);
66 }
67
68 BOTAN_FUNC_ISA("avx2")
69 void store_be(uint8_t out[]) const
70 {
71 bswap().store_le(out);
72 }
73
74 template<size_t ROT>
75 BOTAN_FUNC_ISA("avx2")
77 {
78 static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79
80#if defined(__AVX512VL__)
81 return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82#else
83 BOTAN_IF_CONSTEXPR(ROT == 8)
84 {
85 const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86 14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87
88 return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89 }
90 else BOTAN_IF_CONSTEXPR(ROT == 16)
91 {
92 const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94
95 return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96 }
97 else
98 {
99 return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100 _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101 }
102#endif
103 }
104
105 template<size_t ROT>
106 BOTAN_FUNC_ISA("avx2")
108 {
109 return this->rotl<32-ROT>();
110 }
111
112 template<size_t ROT1, size_t ROT2, size_t ROT3>
113 SIMD_8x32 BOTAN_FUNC_ISA("avx2") rho() const
114 {
115 SIMD_8x32 res;
116
117 const SIMD_8x32 rot1 = this->rotr<ROT1>();
118 const SIMD_8x32 rot2 = this->rotr<ROT2>();
119 const SIMD_8x32 rot3 = this->rotr<ROT3>();
120
121 return rot1 ^ rot2 ^ rot3;
122 }
123
124 BOTAN_FUNC_ISA("avx2")
125 SIMD_8x32 operator+(const SIMD_8x32& other) const
126 {
127 SIMD_8x32 retval(*this);
128 retval += other;
129 return retval;
130 }
131
132 BOTAN_FUNC_ISA("avx2")
133 SIMD_8x32 operator-(const SIMD_8x32& other) const
134 {
135 SIMD_8x32 retval(*this);
136 retval -= other;
137 return retval;
138 }
139
140 BOTAN_FUNC_ISA("avx2")
141 SIMD_8x32 operator^(const SIMD_8x32& other) const
142 {
143 SIMD_8x32 retval(*this);
144 retval ^= other;
145 return retval;
146 }
147
148 BOTAN_FUNC_ISA("avx2")
149 SIMD_8x32 operator|(const SIMD_8x32& other) const
150 {
151 SIMD_8x32 retval(*this);
152 retval |= other;
153 return retval;
154 }
155
156 BOTAN_FUNC_ISA("avx2")
157 SIMD_8x32 operator&(const SIMD_8x32& other) const
158 {
159 SIMD_8x32 retval(*this);
160 retval &= other;
161 return retval;
162 }
163
164 BOTAN_FUNC_ISA("avx2")
165 void operator+=(const SIMD_8x32& other)
166 {
167 m_avx2 = _mm256_add_epi32(m_avx2, other.m_avx2);
168 }
169
170 BOTAN_FUNC_ISA("avx2")
171 void operator-=(const SIMD_8x32& other)
172 {
173 m_avx2 = _mm256_sub_epi32(m_avx2, other.m_avx2);
174 }
175
176 BOTAN_FUNC_ISA("avx2")
177 void operator^=(const SIMD_8x32& other)
178 {
179 m_avx2 = _mm256_xor_si256(m_avx2, other.m_avx2);
180 }
181
182 BOTAN_FUNC_ISA("avx2")
183 void operator|=(const SIMD_8x32& other)
184 {
185 m_avx2 = _mm256_or_si256(m_avx2, other.m_avx2);
186 }
187
188 BOTAN_FUNC_ISA("avx2")
189 void operator&=(const SIMD_8x32& other)
190 {
191 m_avx2 = _mm256_and_si256(m_avx2, other.m_avx2);
192 }
193
194 template<int SHIFT> BOTAN_FUNC_ISA("avx2") SIMD_8x32 shl() const
195 {
196 return SIMD_8x32(_mm256_slli_epi32(m_avx2, SHIFT));
197 }
198
199 template<int SHIFT> BOTAN_FUNC_ISA("avx2") SIMD_8x32 shr() const
200 {
201 return SIMD_8x32(_mm256_srli_epi32(m_avx2, SHIFT));
202 }
203
204 BOTAN_FUNC_ISA("avx2")
205 SIMD_8x32 operator~() const
206 {
207 return SIMD_8x32(_mm256_xor_si256(m_avx2, _mm256_set1_epi32(0xFFFFFFFF)));
208 }
209
210 // (~reg) & other
211 BOTAN_FUNC_ISA("avx2")
212 SIMD_8x32 andc(const SIMD_8x32& other) const
213 {
214 return SIMD_8x32(_mm256_andnot_si256(m_avx2, other.m_avx2));
215 }
216
217 BOTAN_FUNC_ISA("avx2")
219 {
220 const uint8_t BSWAP_MASK[32] = { 3, 2, 1, 0,
221 7, 6, 5, 4,
222 11, 10, 9, 8,
223 15, 14, 13, 12,
224 19, 18, 17, 16,
225 23, 22, 21, 20,
226 27, 26, 25, 24,
227 31, 30, 29, 28 };
228
229 const __m256i bswap = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(BSWAP_MASK));
230
231 const __m256i output = _mm256_shuffle_epi8(m_avx2, bswap);
232
233 return SIMD_8x32(output);
234 }
235
236 BOTAN_FUNC_ISA("avx2")
237 static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
238 SIMD_8x32& B2, SIMD_8x32& B3)
239 {
240 const __m256i T0 = _mm256_unpacklo_epi32(B0.m_avx2, B1.m_avx2);
241 const __m256i T1 = _mm256_unpacklo_epi32(B2.m_avx2, B3.m_avx2);
242 const __m256i T2 = _mm256_unpackhi_epi32(B0.m_avx2, B1.m_avx2);
243 const __m256i T3 = _mm256_unpackhi_epi32(B2.m_avx2, B3.m_avx2);
244
245 B0.m_avx2 = _mm256_unpacklo_epi64(T0, T1);
246 B1.m_avx2 = _mm256_unpackhi_epi64(T0, T1);
247 B2.m_avx2 = _mm256_unpacklo_epi64(T2, T3);
248 B3.m_avx2 = _mm256_unpackhi_epi64(T2, T3);
249 }
250
251 BOTAN_FUNC_ISA("avx2")
252 static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
253 SIMD_8x32& B2, SIMD_8x32& B3,
254 SIMD_8x32& B4, SIMD_8x32& B5,
255 SIMD_8x32& B6, SIMD_8x32& B7)
256 {
257 transpose(B0, B1, B2, B3);
258 transpose(B4, B5, B6, B7);
259
260 swap_tops(B0, B4);
261 swap_tops(B1, B5);
262 swap_tops(B2, B6);
263 swap_tops(B3, B7);
264 }
265
266 BOTAN_FUNC_ISA("avx2")
267 static void reset_registers()
268 {
269 _mm256_zeroupper();
270 }
271
272 BOTAN_FUNC_ISA("avx2")
273 static void zero_registers()
274 {
275 _mm256_zeroall();
276 }
277
278 __m256i BOTAN_FUNC_ISA("avx2") handle() const { return m_avx2; }
279
280 BOTAN_FUNC_ISA("avx2")
281 SIMD_8x32(__m256i x) : m_avx2(x) {}
282
283 private:
284
285 BOTAN_FUNC_ISA("avx2")
286 static void swap_tops(SIMD_8x32& A, SIMD_8x32& B)
287 {
288 SIMD_8x32 T0 = _mm256_permute2x128_si256(A.handle(), B.handle(), 0 + (2 << 4));
289 SIMD_8x32 T1 = _mm256_permute2x128_si256(A.handle(), B.handle(), 1 + (3 << 4));
290 A = T0;
291 B = T1;
292 }
293
294 __m256i m_avx2;
295 };
296
297}
298
299#endif
SIMD_8x32 & operator=(SIMD_8x32 &&other)=default
SIMD_8x32(const SIMD_8x32 &other)=default
__m256i BOTAN_FUNC_ISA("avx2") handle() const
Definition: simd_avx2.h:278
BOTAN_FUNC_ISA("avx2") SIMD_8x32 shr() const
Definition: simd_avx2.h:199
BOTAN_FORCE_INLINE SIMD_8x32()
Definition: simd_avx2.h:26
SIMD_8x32(SIMD_8x32 &&other)=default
static SIMD_8x32 splat(uint32_t B)
Definition: simd_avx2.h:45
static SIMD_8x32 load_be(const uint8_t *in)
Definition: simd_avx2.h:57
static void reset_registers()
Definition: simd_avx2.h:267
void store_le(uint8_t out[]) const
Definition: simd_avx2.h:63
SIMD_8x32 rotl() const
Definition: simd_avx2.h:76
SIMD_8x32 andc(const SIMD_8x32 &other) const
Definition: simd_avx2.h:212
static SIMD_8x32 load_le(const uint8_t *in)
Definition: simd_avx2.h:51
BOTAN_FUNC_ISA("avx2") SIMD_8x32 shl() const
Definition: simd_avx2.h:194
SIMD_8x32 bswap() const
Definition: simd_avx2.h:218
SIMD_8x32 & operator=(const SIMD_8x32 &other)=default
SIMD_8x32 BOTAN_FUNC_ISA("avx2") rho() const
Definition: simd_avx2.h:113
SIMD_8x32 rotr() const
Definition: simd_avx2.h:107
static void zero_registers()
Definition: simd_avx2.h:273
void store_be(uint8_t out[]) const
Definition: simd_avx2.h:69
static void transpose(SIMD_8x32 &B0, SIMD_8x32 &B1, SIMD_8x32 &B2, SIMD_8x32 &B3)
Definition: simd_avx2.h:237
int(* final)(unsigned char *, CTX *)
#define BOTAN_IF_CONSTEXPR
Definition: compiler.h:176
#define BOTAN_FORCE_INLINE
Definition: compiler.h:205
Definition: alg_id.cpp:13