Botan  2.13.0
Crypto and TLS for C++11
simd_avx2.h
Go to the documentation of this file.
1 /*
2 * (C) 2018 Jack Lloyd
3 *
4 * Botan is released under the Simplified BSD License (see license.txt)
5 */
6 
7 #ifndef BOTAN_SIMD_AVX2_H_
8 #define BOTAN_SIMD_AVX2_H_
9 
10 #include <botan/types.h>
11 #include <immintrin.h>
12 
13 namespace Botan {
14 
16  {
17  public:
18 
19  SIMD_8x32& operator=(const SIMD_8x32& other) = default;
20  SIMD_8x32(const SIMD_8x32& other) = default;
21 
22  SIMD_8x32& operator=(SIMD_8x32&& other) = default;
23  SIMD_8x32(SIMD_8x32&& other) = default;
24 
25  BOTAN_FUNC_ISA("avx2")
27  {
28  m_avx2 = _mm256_setzero_si256();
29  }
30 
31  BOTAN_FUNC_ISA("avx2")
32  explicit SIMD_8x32(const uint32_t B[8])
33  {
34  m_avx2 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(B));
35  }
36 
37  BOTAN_FUNC_ISA("avx2")
38  explicit SIMD_8x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3,
39  uint32_t B4, uint32_t B5, uint32_t B6, uint32_t B7)
40  {
41  m_avx2 = _mm256_set_epi32(B7, B6, B5, B4, B3, B2, B1, B0);
42  }
43 
44  BOTAN_FUNC_ISA("avx2")
45  static SIMD_8x32 splat(uint32_t B)
46  {
47  return SIMD_8x32(_mm256_set1_epi32(B));
48  }
49 
50  BOTAN_FUNC_ISA("avx2")
51  static SIMD_8x32 load_le(const uint8_t* in)
52  {
53  return SIMD_8x32(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in)));
54  }
55 
56  BOTAN_FUNC_ISA("avx2")
57  static SIMD_8x32 load_be(const uint8_t* in)
58  {
59  return load_le(in).bswap();
60  }
61 
62  BOTAN_FUNC_ISA("avx2")
63  void store_le(uint8_t out[]) const
64  {
65  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_avx2);
66  }
67 
68  BOTAN_FUNC_ISA("avx2")
69  void store_be(uint8_t out[]) const
70  {
71  bswap().store_le(out);
72  }
73 
74  template<size_t ROT>
75  BOTAN_FUNC_ISA("avx2")
76  SIMD_8x32 rotl() const
77  {
78  static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79 
80  BOTAN_IF_CONSTEXPR(ROT == 8)
81  {
82  const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
83  14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
84 
85  return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
86  }
87  else BOTAN_IF_CONSTEXPR(ROT == 16)
88  {
89  const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
90  13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
91 
92  return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
93  }
94  else
95  {
96  return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
97  _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
98  }
99  }
100 
101  template<size_t ROT>
102  BOTAN_FUNC_ISA("avx2")
103  SIMD_8x32 rotr() const
104  {
105  return this->rotl<32-ROT>();
106  }
107 
108  template<size_t ROT1, size_t ROT2, size_t ROT3>
109  SIMD_8x32 rho() const
110  {
111  SIMD_8x32 res;
112 
113  const SIMD_8x32 rot1 = this->rotr<ROT1>();
114  const SIMD_8x32 rot2 = this->rotr<ROT2>();
115  const SIMD_8x32 rot3 = this->rotr<ROT3>();
116 
117  return rot1 ^ rot2 ^ rot3;
118  }
119 
120  SIMD_8x32 operator+(const SIMD_8x32& other) const
121  {
122  SIMD_8x32 retval(*this);
123  retval += other;
124  return retval;
125  }
126 
127  SIMD_8x32 operator-(const SIMD_8x32& other) const
128  {
129  SIMD_8x32 retval(*this);
130  retval -= other;
131  return retval;
132  }
133 
134  SIMD_8x32 operator^(const SIMD_8x32& other) const
135  {
136  SIMD_8x32 retval(*this);
137  retval ^= other;
138  return retval;
139  }
140 
141  SIMD_8x32 operator|(const SIMD_8x32& other) const
142  {
143  SIMD_8x32 retval(*this);
144  retval |= other;
145  return retval;
146  }
147 
148  SIMD_8x32 operator&(const SIMD_8x32& other) const
149  {
150  SIMD_8x32 retval(*this);
151  retval &= other;
152  return retval;
153  }
154 
155  BOTAN_FUNC_ISA("avx2")
156  void operator+=(const SIMD_8x32& other)
157  {
158  m_avx2 = _mm256_add_epi32(m_avx2, other.m_avx2);
159  }
160 
161  BOTAN_FUNC_ISA("avx2")
162  void operator-=(const SIMD_8x32& other)
163  {
164  m_avx2 = _mm256_sub_epi32(m_avx2, other.m_avx2);
165  }
166 
167  BOTAN_FUNC_ISA("avx2")
168  void operator^=(const SIMD_8x32& other)
169  {
170  m_avx2 = _mm256_xor_si256(m_avx2, other.m_avx2);
171  }
172 
173  BOTAN_FUNC_ISA("avx2")
174  void operator|=(const SIMD_8x32& other)
175  {
176  m_avx2 = _mm256_or_si256(m_avx2, other.m_avx2);
177  }
178 
179  BOTAN_FUNC_ISA("avx2")
180  void operator&=(const SIMD_8x32& other)
181  {
182  m_avx2 = _mm256_and_si256(m_avx2, other.m_avx2);
183  }
184 
185  template<int SHIFT> BOTAN_FUNC_ISA("avx2") SIMD_8x32 shl() const
186  {
187  return SIMD_8x32(_mm256_slli_epi32(m_avx2, SHIFT));
188  }
189 
190  template<int SHIFT> BOTAN_FUNC_ISA("avx2")SIMD_8x32 shr() const
191  {
192  return SIMD_8x32(_mm256_srli_epi32(m_avx2, SHIFT));
193  }
194 
195  BOTAN_FUNC_ISA("avx2")
196  SIMD_8x32 operator~() const
197  {
198  return SIMD_8x32(_mm256_xor_si256(m_avx2, _mm256_set1_epi32(0xFFFFFFFF)));
199  }
200 
201  // (~reg) & other
202  BOTAN_FUNC_ISA("avx2")
203  SIMD_8x32 andc(const SIMD_8x32& other) const
204  {
205  return SIMD_8x32(_mm256_andnot_si256(m_avx2, other.m_avx2));
206  }
207 
208  BOTAN_FUNC_ISA("avx2")
209  SIMD_8x32 bswap() const
210  {
211  const uint8_t BSWAP_MASK[32] = { 3, 2, 1, 0,
212  7, 6, 5, 4,
213  11, 10, 9, 8,
214  15, 14, 13, 12,
215  19, 18, 17, 16,
216  23, 22, 21, 20,
217  27, 26, 25, 24,
218  31, 30, 29, 28 };
219 
220  const __m256i bswap = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(BSWAP_MASK));
221 
222  const __m256i output = _mm256_shuffle_epi8(m_avx2, bswap);
223 
224  return SIMD_8x32(output);
225  }
226 
227  BOTAN_FUNC_ISA("avx2")
228  static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
229  SIMD_8x32& B2, SIMD_8x32& B3)
230  {
231  const __m256i T0 = _mm256_unpacklo_epi32(B0.m_avx2, B1.m_avx2);
232  const __m256i T1 = _mm256_unpacklo_epi32(B2.m_avx2, B3.m_avx2);
233  const __m256i T2 = _mm256_unpackhi_epi32(B0.m_avx2, B1.m_avx2);
234  const __m256i T3 = _mm256_unpackhi_epi32(B2.m_avx2, B3.m_avx2);
235 
236  B0.m_avx2 = _mm256_unpacklo_epi64(T0, T1);
237  B1.m_avx2 = _mm256_unpackhi_epi64(T0, T1);
238  B2.m_avx2 = _mm256_unpacklo_epi64(T2, T3);
239  B3.m_avx2 = _mm256_unpackhi_epi64(T2, T3);
240  }
241 
242  BOTAN_FUNC_ISA("avx2")
243  static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
244  SIMD_8x32& B2, SIMD_8x32& B3,
245  SIMD_8x32& B4, SIMD_8x32& B5,
246  SIMD_8x32& B6, SIMD_8x32& B7)
247  {
248  transpose(B0, B1, B2, B3);
249  transpose(B4, B5, B6, B7);
250 
251  swap_tops(B0, B4);
252  swap_tops(B1, B5);
253  swap_tops(B2, B6);
254  swap_tops(B3, B7);
255  }
256 
257  BOTAN_FUNC_ISA("avx2")
258  static void reset_registers()
259  {
260  _mm256_zeroupper();
261  }
262 
263  BOTAN_FUNC_ISA("avx2")
264  static void zero_registers()
265  {
266  _mm256_zeroall();
267  }
268 
269  __m256i BOTAN_FUNC_ISA("avx2") handle() const { return m_avx2; }
270 
271  BOTAN_FUNC_ISA("avx2")
272  SIMD_8x32(__m256i x) : m_avx2(x) {}
273 
274  private:
275 
276  BOTAN_FUNC_ISA("avx2")
277  static void swap_tops(SIMD_8x32& A, SIMD_8x32& B)
278  {
279  SIMD_8x32 T0 = _mm256_permute2x128_si256(A.handle(), B.handle(), 0 + (2 << 4));
280  SIMD_8x32 T1 = _mm256_permute2x128_si256(A.handle(), B.handle(), 1 + (3 << 4));
281  A = T0;
282  B = T1;
283  }
284 
285  __m256i m_avx2;
286  };
287 
288 }
289 
290 #endif
SIMD_8x32 operator+(const SIMD_8x32 &other) const
Definition: simd_avx2.h:120
void store_le(uint8_t out[]) const
Definition: simd_avx2.h:63
#define BOTAN_IF_CONSTEXPR
Definition: compiler.h:179
static SIMD_8x32 splat(uint32_t B)
Definition: simd_avx2.h:45
int(* final)(unsigned char *, CTX *)
SIMD_8x32 operator-(const SIMD_8x32 &other) const
Definition: simd_avx2.h:127
SIMD_8x32 operator^(const SIMD_8x32 &other) const
Definition: simd_avx2.h:134
BOTAN_FUNC_ISA("avx2") SIMD_8x32 shl() const
Definition: simd_avx2.h:185
static SIMD_8x32 load_le(const uint8_t *in)
Definition: simd_avx2.h:51
__m256i BOTAN_FUNC_ISA("avx2") handle() const
Definition: simd_avx2.h:269
static void reset_registers()
Definition: simd_avx2.h:258
SIMD_8x32 operator|(const SIMD_8x32 &other) const
Definition: simd_avx2.h:141
SIMD_8x32 rotr() const
Definition: simd_avx2.h:103
SIMD_8x32 bswap() const
Definition: simd_avx2.h:209
BOTAN_FUNC_ISA("avx2") SIMD_8x32 shr() const
Definition: simd_avx2.h:190
Definition: alg_id.cpp:13
SIMD_8x32 andc(const SIMD_8x32 &other) const
Definition: simd_avx2.h:203
SIMD_8x32 rho() const
Definition: simd_avx2.h:109
static void transpose(SIMD_8x32 &B0, SIMD_8x32 &B1, SIMD_8x32 &B2, SIMD_8x32 &B3)
Definition: simd_avx2.h:228
void store_be(uint8_t out[]) const
Definition: simd_avx2.h:69
SIMD_8x32 rotl() const
Definition: simd_avx2.h:76
SIMD_8x32 & operator=(const SIMD_8x32 &other)=default
static SIMD_8x32 load_be(const uint8_t *in)
Definition: simd_avx2.h:57
static void zero_registers()
Definition: simd_avx2.h:264
SIMD_8x32 operator &(const SIMD_8x32 &other) const
Definition: simd_avx2.h:148