Botan  2.15.0
Crypto and TLS for C++11
simd_avx2.h
Go to the documentation of this file.
1 /*
2 * (C) 2018 Jack Lloyd
3 *
4 * Botan is released under the Simplified BSD License (see license.txt)
5 */
6 
7 #ifndef BOTAN_SIMD_AVX2_H_
8 #define BOTAN_SIMD_AVX2_H_
9 
10 #include <botan/types.h>
11 #include <immintrin.h>
12 
13 namespace Botan {
14 
16  {
17  public:
18 
19  SIMD_8x32& operator=(const SIMD_8x32& other) = default;
20  SIMD_8x32(const SIMD_8x32& other) = default;
21 
22  SIMD_8x32& operator=(SIMD_8x32&& other) = default;
23  SIMD_8x32(SIMD_8x32&& other) = default;
24 
25  BOTAN_FUNC_ISA("avx2")
27  {
28  m_avx2 = _mm256_setzero_si256();
29  }
30 
31  BOTAN_FUNC_ISA("avx2")
32  explicit SIMD_8x32(const uint32_t B[8])
33  {
34  m_avx2 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(B));
35  }
36 
37  BOTAN_FUNC_ISA("avx2")
38  explicit SIMD_8x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3,
39  uint32_t B4, uint32_t B5, uint32_t B6, uint32_t B7)
40  {
41  m_avx2 = _mm256_set_epi32(B7, B6, B5, B4, B3, B2, B1, B0);
42  }
43 
44  BOTAN_FUNC_ISA("avx2")
45  static SIMD_8x32 splat(uint32_t B)
46  {
47  return SIMD_8x32(_mm256_set1_epi32(B));
48  }
49 
50  BOTAN_FUNC_ISA("avx2")
51  static SIMD_8x32 load_le(const uint8_t* in)
52  {
53  return SIMD_8x32(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in)));
54  }
55 
56  BOTAN_FUNC_ISA("avx2")
57  static SIMD_8x32 load_be(const uint8_t* in)
58  {
59  return load_le(in).bswap();
60  }
61 
62  BOTAN_FUNC_ISA("avx2")
63  void store_le(uint8_t out[]) const
64  {
65  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_avx2);
66  }
67 
68  BOTAN_FUNC_ISA("avx2")
69  void store_be(uint8_t out[]) const
70  {
71  bswap().store_le(out);
72  }
73 
74  template<size_t ROT>
75  BOTAN_FUNC_ISA("avx2")
76  SIMD_8x32 rotl() const
77  {
78  static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79 
80 #if defined(__AVX512VL__)
81  return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
82 #else
83  BOTAN_IF_CONSTEXPR(ROT == 8)
84  {
85  const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86  14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
87 
88  return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
89  }
90  else BOTAN_IF_CONSTEXPR(ROT == 16)
91  {
92  const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93  13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
94 
95  return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
96  }
97  else
98  {
99  return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100  _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
101  }
102 #endif
103  }
104 
105  template<size_t ROT>
106  BOTAN_FUNC_ISA("avx2")
107  SIMD_8x32 rotr() const
108  {
109  return this->rotl<32-ROT>();
110  }
111 
112  template<size_t ROT1, size_t ROT2, size_t ROT3>
113  SIMD_8x32 BOTAN_FUNC_ISA("avx2") rho() const
114  {
115  SIMD_8x32 res;
116 
117  const SIMD_8x32 rot1 = this->rotr<ROT1>();
118  const SIMD_8x32 rot2 = this->rotr<ROT2>();
119  const SIMD_8x32 rot3 = this->rotr<ROT3>();
120 
121  return rot1 ^ rot2 ^ rot3;
122  }
123 
124  BOTAN_FUNC_ISA("avx2")
125  SIMD_8x32 operator+(const SIMD_8x32& other) const
126  {
127  SIMD_8x32 retval(*this);
128  retval += other;
129  return retval;
130  }
131 
132  BOTAN_FUNC_ISA("avx2")
133  SIMD_8x32 operator-(const SIMD_8x32& other) const
134  {
135  SIMD_8x32 retval(*this);
136  retval -= other;
137  return retval;
138  }
139 
140  BOTAN_FUNC_ISA("avx2")
141  SIMD_8x32 operator^(const SIMD_8x32& other) const
142  {
143  SIMD_8x32 retval(*this);
144  retval ^= other;
145  return retval;
146  }
147 
148  BOTAN_FUNC_ISA("avx2")
149  SIMD_8x32 operator|(const SIMD_8x32& other) const
150  {
151  SIMD_8x32 retval(*this);
152  retval |= other;
153  return retval;
154  }
155 
156  BOTAN_FUNC_ISA("avx2")
157  SIMD_8x32 operator&(const SIMD_8x32& other) const
158  {
159  SIMD_8x32 retval(*this);
160  retval &= other;
161  return retval;
162  }
163 
164  BOTAN_FUNC_ISA("avx2")
165  void operator+=(const SIMD_8x32& other)
166  {
167  m_avx2 = _mm256_add_epi32(m_avx2, other.m_avx2);
168  }
169 
170  BOTAN_FUNC_ISA("avx2")
171  void operator-=(const SIMD_8x32& other)
172  {
173  m_avx2 = _mm256_sub_epi32(m_avx2, other.m_avx2);
174  }
175 
176  BOTAN_FUNC_ISA("avx2")
177  void operator^=(const SIMD_8x32& other)
178  {
179  m_avx2 = _mm256_xor_si256(m_avx2, other.m_avx2);
180  }
181 
182  BOTAN_FUNC_ISA("avx2")
183  void operator|=(const SIMD_8x32& other)
184  {
185  m_avx2 = _mm256_or_si256(m_avx2, other.m_avx2);
186  }
187 
188  BOTAN_FUNC_ISA("avx2")
189  void operator&=(const SIMD_8x32& other)
190  {
191  m_avx2 = _mm256_and_si256(m_avx2, other.m_avx2);
192  }
193 
194  template<int SHIFT> BOTAN_FUNC_ISA("avx2") SIMD_8x32 shl() const
195  {
196  return SIMD_8x32(_mm256_slli_epi32(m_avx2, SHIFT));
197  }
198 
199  template<int SHIFT> BOTAN_FUNC_ISA("avx2") SIMD_8x32 shr() const
200  {
201  return SIMD_8x32(_mm256_srli_epi32(m_avx2, SHIFT));
202  }
203 
204  BOTAN_FUNC_ISA("avx2")
205  SIMD_8x32 operator~() const
206  {
207  return SIMD_8x32(_mm256_xor_si256(m_avx2, _mm256_set1_epi32(0xFFFFFFFF)));
208  }
209 
210  // (~reg) & other
211  BOTAN_FUNC_ISA("avx2")
212  SIMD_8x32 andc(const SIMD_8x32& other) const
213  {
214  return SIMD_8x32(_mm256_andnot_si256(m_avx2, other.m_avx2));
215  }
216 
217  BOTAN_FUNC_ISA("avx2")
218  SIMD_8x32 bswap() const
219  {
220  const uint8_t BSWAP_MASK[32] = { 3, 2, 1, 0,
221  7, 6, 5, 4,
222  11, 10, 9, 8,
223  15, 14, 13, 12,
224  19, 18, 17, 16,
225  23, 22, 21, 20,
226  27, 26, 25, 24,
227  31, 30, 29, 28 };
228 
229  const __m256i bswap = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(BSWAP_MASK));
230 
231  const __m256i output = _mm256_shuffle_epi8(m_avx2, bswap);
232 
233  return SIMD_8x32(output);
234  }
235 
236  BOTAN_FUNC_ISA("avx2")
237  static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
238  SIMD_8x32& B2, SIMD_8x32& B3)
239  {
240  const __m256i T0 = _mm256_unpacklo_epi32(B0.m_avx2, B1.m_avx2);
241  const __m256i T1 = _mm256_unpacklo_epi32(B2.m_avx2, B3.m_avx2);
242  const __m256i T2 = _mm256_unpackhi_epi32(B0.m_avx2, B1.m_avx2);
243  const __m256i T3 = _mm256_unpackhi_epi32(B2.m_avx2, B3.m_avx2);
244 
245  B0.m_avx2 = _mm256_unpacklo_epi64(T0, T1);
246  B1.m_avx2 = _mm256_unpackhi_epi64(T0, T1);
247  B2.m_avx2 = _mm256_unpacklo_epi64(T2, T3);
248  B3.m_avx2 = _mm256_unpackhi_epi64(T2, T3);
249  }
250 
251  BOTAN_FUNC_ISA("avx2")
252  static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
253  SIMD_8x32& B2, SIMD_8x32& B3,
254  SIMD_8x32& B4, SIMD_8x32& B5,
255  SIMD_8x32& B6, SIMD_8x32& B7)
256  {
257  transpose(B0, B1, B2, B3);
258  transpose(B4, B5, B6, B7);
259 
260  swap_tops(B0, B4);
261  swap_tops(B1, B5);
262  swap_tops(B2, B6);
263  swap_tops(B3, B7);
264  }
265 
266  BOTAN_FUNC_ISA("avx2")
267  static void reset_registers()
268  {
269  _mm256_zeroupper();
270  }
271 
272  BOTAN_FUNC_ISA("avx2")
273  static void zero_registers()
274  {
275  _mm256_zeroall();
276  }
277 
278  __m256i BOTAN_FUNC_ISA("avx2") handle() const { return m_avx2; }
279 
280  BOTAN_FUNC_ISA("avx2")
281  SIMD_8x32(__m256i x) : m_avx2(x) {}
282 
283  private:
284 
285  BOTAN_FUNC_ISA("avx2")
286  static void swap_tops(SIMD_8x32& A, SIMD_8x32& B)
287  {
288  SIMD_8x32 T0 = _mm256_permute2x128_si256(A.handle(), B.handle(), 0 + (2 << 4));
289  SIMD_8x32 T1 = _mm256_permute2x128_si256(A.handle(), B.handle(), 1 + (3 << 4));
290  A = T0;
291  B = T1;
292  }
293 
294  __m256i m_avx2;
295  };
296 
297 }
298 
299 #endif
BOTAN_FORCE_INLINE SIMD_8x32()
Definition: simd_avx2.h:26
void store_le(uint8_t out[]) const
Definition: simd_avx2.h:63
SIMD_8x32 BOTAN_FUNC_ISA("avx2") rho() const
Definition: simd_avx2.h:113
#define BOTAN_IF_CONSTEXPR
Definition: compiler.h:185
static SIMD_8x32 splat(uint32_t B)
Definition: simd_avx2.h:45
#define BOTAN_FORCE_INLINE
Definition: compiler.h:214
int(* final)(unsigned char *, CTX *)
BOTAN_FUNC_ISA("avx2") SIMD_8x32 shl() const
Definition: simd_avx2.h:194
static SIMD_8x32 load_le(const uint8_t *in)
Definition: simd_avx2.h:51
__m256i BOTAN_FUNC_ISA("avx2") handle() const
Definition: simd_avx2.h:278
static void reset_registers()
Definition: simd_avx2.h:267
SIMD_8x32 rotr() const
Definition: simd_avx2.h:107
SIMD_8x32 bswap() const
Definition: simd_avx2.h:218
BOTAN_FUNC_ISA("avx2") SIMD_8x32 shr() const
Definition: simd_avx2.h:199
Definition: alg_id.cpp:13
SIMD_8x32 andc(const SIMD_8x32 &other) const
Definition: simd_avx2.h:212
static void transpose(SIMD_8x32 &B0, SIMD_8x32 &B1, SIMD_8x32 &B2, SIMD_8x32 &B3)
Definition: simd_avx2.h:237
void store_be(uint8_t out[]) const
Definition: simd_avx2.h:69
SIMD_8x32 rotl() const
Definition: simd_avx2.h:76
SIMD_8x32 & operator=(const SIMD_8x32 &other)=default
static SIMD_8x32 load_be(const uint8_t *in)
Definition: simd_avx2.h:57
static void zero_registers()
Definition: simd_avx2.h:273