Botan  2.11.0
Crypto and TLS for C++11
simd_avx2.h
Go to the documentation of this file.
1 /*
2 * (C) 2018 Jack Lloyd
3 *
4 * Botan is released under the Simplified BSD License (see license.txt)
5 */
6 
7 #ifndef BOTAN_SIMD_AVX2_H_
8 #define BOTAN_SIMD_AVX2_H_
9 
10 #include <botan/types.h>
11 #include <immintrin.h>
12 
13 namespace Botan {
14 
16  {
17  public:
18 
19  SIMD_8x32& operator=(const SIMD_8x32& other) = default;
20  SIMD_8x32(const SIMD_8x32& other) = default;
21 
22  SIMD_8x32& operator=(SIMD_8x32&& other) = default;
23  SIMD_8x32(SIMD_8x32&& other) = default;
24 
25  BOTAN_FUNC_ISA("avx2")
27  {
28  m_avx2 = _mm256_setzero_si256();
29  }
30 
31  BOTAN_FUNC_ISA("avx2")
32  explicit SIMD_8x32(const uint32_t B[8])
33  {
34  m_avx2 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(B));
35  }
36 
37  BOTAN_FUNC_ISA("avx2")
38  explicit SIMD_8x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3,
39  uint32_t B4, uint32_t B5, uint32_t B6, uint32_t B7)
40  {
41  m_avx2 = _mm256_set_epi32(B7, B6, B5, B4, B3, B2, B1, B0);
42  }
43 
44  BOTAN_FUNC_ISA("avx2")
45  static SIMD_8x32 splat(uint32_t B)
46  {
47  return SIMD_8x32(_mm256_set1_epi32(B));
48  }
49 
50  BOTAN_FUNC_ISA("avx2")
51  static SIMD_8x32 load_le(const uint8_t* in)
52  {
53  return SIMD_8x32(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in)));
54  }
55 
56  BOTAN_FUNC_ISA("avx2")
57  static SIMD_8x32 load_be(const uint8_t* in)
58  {
59  return load_le(in).bswap();
60  }
61 
62  BOTAN_FUNC_ISA("avx2")
63  void store_le(uint8_t out[]) const
64  {
65  _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_avx2);
66  }
67 
68  BOTAN_FUNC_ISA("avx2")
69  void store_be(uint8_t out[]) const
70  {
71  bswap().store_le(out);
72  }
73 
74  template<size_t ROT>
75  BOTAN_FUNC_ISA("avx2")
76  SIMD_8x32 rotl() const
77  {
78  static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
79 
80  BOTAN_IF_CONSTEXPR(ROT == 8)
81  {
82  const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
83  14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
84 
85  return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
86  }
87  else BOTAN_IF_CONSTEXPR(ROT == 16)
88  {
89  const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
90  13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
91 
92  return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
93  }
94  else
95  {
96  return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
97  _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
98  }
99  }
100 
101  template<size_t ROT>
102  BOTAN_FUNC_ISA("avx2")
103  SIMD_8x32 rotr() const
104  {
105  return this->rotl<32-ROT>();
106  }
107 
108  SIMD_8x32 operator+(const SIMD_8x32& other) const
109  {
110  SIMD_8x32 retval(*this);
111  retval += other;
112  return retval;
113  }
114 
115  SIMD_8x32 operator-(const SIMD_8x32& other) const
116  {
117  SIMD_8x32 retval(*this);
118  retval -= other;
119  return retval;
120  }
121 
122  SIMD_8x32 operator^(const SIMD_8x32& other) const
123  {
124  SIMD_8x32 retval(*this);
125  retval ^= other;
126  return retval;
127  }
128 
129  SIMD_8x32 operator|(const SIMD_8x32& other) const
130  {
131  SIMD_8x32 retval(*this);
132  retval |= other;
133  return retval;
134  }
135 
136  SIMD_8x32 operator&(const SIMD_8x32& other) const
137  {
138  SIMD_8x32 retval(*this);
139  retval &= other;
140  return retval;
141  }
142 
143  BOTAN_FUNC_ISA("avx2")
144  void operator+=(const SIMD_8x32& other)
145  {
146  m_avx2 = _mm256_add_epi32(m_avx2, other.m_avx2);
147  }
148 
149  BOTAN_FUNC_ISA("avx2")
150  void operator-=(const SIMD_8x32& other)
151  {
152  m_avx2 = _mm256_sub_epi32(m_avx2, other.m_avx2);
153  }
154 
155  BOTAN_FUNC_ISA("avx2")
156  void operator^=(const SIMD_8x32& other)
157  {
158  m_avx2 = _mm256_xor_si256(m_avx2, other.m_avx2);
159  }
160 
161  BOTAN_FUNC_ISA("avx2")
162  void operator|=(const SIMD_8x32& other)
163  {
164  m_avx2 = _mm256_or_si256(m_avx2, other.m_avx2);
165  }
166 
167  BOTAN_FUNC_ISA("avx2")
168  void operator&=(const SIMD_8x32& other)
169  {
170  m_avx2 = _mm256_and_si256(m_avx2, other.m_avx2);
171  }
172 
173  template<int SHIFT> BOTAN_FUNC_ISA("avx2") SIMD_8x32 shl() const
174  {
175  return SIMD_8x32(_mm256_slli_epi32(m_avx2, SHIFT));
176  }
177 
178  template<int SHIFT> BOTAN_FUNC_ISA("avx2")SIMD_8x32 shr() const
179  {
180  return SIMD_8x32(_mm256_srli_epi32(m_avx2, SHIFT));
181  }
182 
183  BOTAN_FUNC_ISA("avx2")
184  SIMD_8x32 operator~() const
185  {
186  return SIMD_8x32(_mm256_xor_si256(m_avx2, _mm256_set1_epi32(0xFFFFFFFF)));
187  }
188 
189  // (~reg) & other
190  BOTAN_FUNC_ISA("avx2")
191  SIMD_8x32 andc(const SIMD_8x32& other) const
192  {
193  return SIMD_8x32(_mm256_andnot_si256(m_avx2, other.m_avx2));
194  }
195 
196  BOTAN_FUNC_ISA("avx2")
197  SIMD_8x32 bswap() const
198  {
199  const uint8_t BSWAP_MASK[32] = { 3, 2, 1, 0,
200  7, 6, 5, 4,
201  11, 10, 9, 8,
202  15, 14, 13, 12,
203  19, 18, 17, 16,
204  23, 22, 21, 20,
205  27, 26, 25, 24,
206  31, 30, 29, 28 };
207 
208  const __m256i bswap = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(BSWAP_MASK));
209 
210  const __m256i output = _mm256_shuffle_epi8(m_avx2, bswap);
211 
212  return SIMD_8x32(output);
213  }
214 
215  BOTAN_FUNC_ISA("avx2")
216  static void transpose(SIMD_8x32& B0, SIMD_8x32& B1,
217  SIMD_8x32& B2, SIMD_8x32& B3)
218  {
219  const __m256i T0 = _mm256_unpacklo_epi32(B0.m_avx2, B1.m_avx2);
220  const __m256i T1 = _mm256_unpacklo_epi32(B2.m_avx2, B3.m_avx2);
221  const __m256i T2 = _mm256_unpackhi_epi32(B0.m_avx2, B1.m_avx2);
222  const __m256i T3 = _mm256_unpackhi_epi32(B2.m_avx2, B3.m_avx2);
223 
224  B0.m_avx2 = _mm256_unpacklo_epi64(T0, T1);
225  B1.m_avx2 = _mm256_unpackhi_epi64(T0, T1);
226  B2.m_avx2 = _mm256_unpacklo_epi64(T2, T3);
227  B3.m_avx2 = _mm256_unpackhi_epi64(T2, T3);
228  }
229 
230  BOTAN_FUNC_ISA("avx2")
231  static void reset_registers()
232  {
233  _mm256_zeroupper();
234  }
235 
236  BOTAN_FUNC_ISA("avx2")
237  static void zero_registers()
238  {
239  _mm256_zeroall();
240  }
241 
242  __m256i BOTAN_FUNC_ISA("avx2") handle() const { return m_avx2; }
243 
244  private:
245 
246  BOTAN_FUNC_ISA("avx2")
247  SIMD_8x32(__m256i x) : m_avx2(x) {}
248 
249  __m256i m_avx2;
250  };
251 
252 }
253 
254 #endif
BigInt const BigInt & x
Definition: numthry.h:139
SIMD_8x32 operator+(const SIMD_8x32 &other) const
Definition: simd_avx2.h:108
void store_le(uint8_t out[]) const
Definition: simd_avx2.h:63
#define BOTAN_IF_CONSTEXPR
Definition: compiler.h:161
static SIMD_8x32 splat(uint32_t B)
Definition: simd_avx2.h:45
int(* final)(unsigned char *, CTX *)
SIMD_8x32 operator-(const SIMD_8x32 &other) const
Definition: simd_avx2.h:115
SIMD_8x32 operator^(const SIMD_8x32 &other) const
Definition: simd_avx2.h:122
BOTAN_FUNC_ISA("avx2") SIMD_8x32 shl() const
Definition: simd_avx2.h:173
void const uint8_t in[]
Definition: mgf1.h:26
static SIMD_8x32 load_le(const uint8_t *in)
Definition: simd_avx2.h:51
__m256i BOTAN_FUNC_ISA("avx2") handle() const
Definition: simd_avx2.h:242
static void reset_registers()
Definition: simd_avx2.h:231
SIMD_8x32 operator|(const SIMD_8x32 &other) const
Definition: simd_avx2.h:129
SIMD_8x32 rotr() const
Definition: simd_avx2.h:103
SIMD_8x32 bswap() const
Definition: simd_avx2.h:197
BOTAN_FUNC_ISA("avx2") SIMD_8x32 shr() const
Definition: simd_avx2.h:178
SIMD_8x32 operator &(const SIMD_8x32 &other) const
Definition: simd_avx2.h:136
Definition: alg_id.cpp:13
uint8_t out[]
Definition: pbkdf2.h:19
SIMD_8x32 andc(const SIMD_8x32 &other) const
Definition: simd_avx2.h:191
static void transpose(SIMD_8x32 &B0, SIMD_8x32 &B1, SIMD_8x32 &B2, SIMD_8x32 &B3)
Definition: simd_avx2.h:216
void BlockCipher const uint8_t size_t uint8_t output[]
Definition: package.h:29
void store_be(uint8_t out[]) const
Definition: simd_avx2.h:69
SIMD_8x32 rotl() const
Definition: simd_avx2.h:76
SIMD_8x32 & operator=(const SIMD_8x32 &other)=default
static SIMD_8x32 load_be(const uint8_t *in)
Definition: simd_avx2.h:57
static void zero_registers()
Definition: simd_avx2.h:237