Botan  2.15.0
Crypto and TLS for C++11
simd_32.h
Go to the documentation of this file.
1 /*
2 * Lightweight wrappers for SIMD operations
3 * (C) 2009,2011,2016,2017,2019 Jack Lloyd
4 *
5 * Botan is released under the Simplified BSD License (see license.txt)
6 */
7 
8 #ifndef BOTAN_SIMD_32_H_
9 #define BOTAN_SIMD_32_H_
10 
11 #include <botan/types.h>
12 
13 #if defined(BOTAN_TARGET_SUPPORTS_SSE2)
14  #include <emmintrin.h>
15  #define BOTAN_SIMD_USE_SSE2
16 
17 #elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC)
18  #include <botan/bswap.h>
19  #include <botan/loadstor.h>
20  #include <altivec.h>
21  #undef vector
22  #undef bool
23  #define BOTAN_SIMD_USE_ALTIVEC
24 
25 #elif defined(BOTAN_TARGET_SUPPORTS_NEON)
26  #include <botan/cpuid.h>
27  #include <arm_neon.h>
28  #define BOTAN_SIMD_USE_NEON
29 
30 #else
31  #error "No SIMD instruction set enabled"
32 #endif
33 
34 #if defined(BOTAN_SIMD_USE_SSE2)
35  #define BOTAN_SIMD_ISA "sse2"
36  #define BOTAN_VPERM_ISA "ssse3"
37  #define BOTAN_CLMUL_ISA "pclmul"
38 #elif defined(BOTAN_SIMD_USE_NEON)
39  #if defined(BOTAN_TARGET_ARCH_IS_ARM64)
40  #define BOTAN_SIMD_ISA "+simd"
41  #define BOTAN_CLMUL_ISA "+crypto"
42  #else
43  #define BOTAN_SIMD_ISA "fpu=neon"
44  #endif
45  #define BOTAN_VPERM_ISA BOTAN_SIMD_ISA
46 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
47  #define BOTAN_SIMD_ISA "altivec"
48  #define BOTAN_VPERM_ISA "altivec"
49  #define BOTAN_CLMUL_ISA "crypto"
50 #endif
51 
52 namespace Botan {
53 
54 #if defined(BOTAN_SIMD_USE_SSE2)
55  typedef __m128i native_simd_type;
56 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
57  typedef __vector unsigned int native_simd_type;
58 #elif defined(BOTAN_SIMD_USE_NEON)
59  typedef uint32x4_t native_simd_type;
60 #endif
61 
62 /**
63 * 4x32 bit SIMD register
64 *
65 * This class is not a general purpose SIMD type, and only offers
66 * instructions needed for evaluation of specific crypto primitives.
67 * For example it does not currently have equality operators of any
68 * kind.
69 *
70 * Implemented for SSE2, VMX (Altivec), and NEON.
71 */
73  {
74  public:
75 
76  SIMD_4x32& operator=(const SIMD_4x32& other) = default;
77  SIMD_4x32(const SIMD_4x32& other) = default;
78 
79  SIMD_4x32& operator=(SIMD_4x32&& other) = default;
80  SIMD_4x32(SIMD_4x32&& other) = default;
81 
82  /**
83  * Zero initialize SIMD register with 4 32-bit elements
84  */
85  SIMD_4x32() // zero initialized
86  {
87 #if defined(BOTAN_SIMD_USE_SSE2)
88  m_simd = _mm_setzero_si128();
89 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
90  m_simd = vec_splat_u32(0);
91 #elif defined(BOTAN_SIMD_USE_NEON)
92  m_simd = vdupq_n_u32(0);
93 #endif
94  }
95 
96  /**
97  * Load SIMD register with 4 32-bit elements
98  */
99  explicit SIMD_4x32(const uint32_t B[4])
100  {
101 #if defined(BOTAN_SIMD_USE_SSE2)
102  m_simd = _mm_loadu_si128(reinterpret_cast<const __m128i*>(B));
103 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
104  __vector unsigned int val = { B[0], B[1], B[2], B[3]};
105  m_simd = val;
106 #elif defined(BOTAN_SIMD_USE_NEON)
107  m_simd = vld1q_u32(B);
108 #endif
109  }
110 
111  /**
112  * Load SIMD register with 4 32-bit elements
113  */
114  SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3)
115  {
116 #if defined(BOTAN_SIMD_USE_SSE2)
117  m_simd = _mm_set_epi32(B3, B2, B1, B0);
118 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
119  __vector unsigned int val = {B0, B1, B2, B3};
120  m_simd = val;
121 #elif defined(BOTAN_SIMD_USE_NEON)
122  // Better way to do this?
123  const uint32_t B[4] = { B0, B1, B2, B3 };
124  m_simd = vld1q_u32(B);
125 #endif
126  }
127 
128  /**
129  * Load SIMD register with one 32-bit element repeated
130  */
131  static SIMD_4x32 splat(uint32_t B)
132  {
133 #if defined(BOTAN_SIMD_USE_SSE2)
134  return SIMD_4x32(_mm_set1_epi32(B));
135 #elif defined(BOTAN_SIMD_USE_NEON)
136  return SIMD_4x32(vdupq_n_u32(B));
137 #else
138  return SIMD_4x32(B, B, B, B);
139 #endif
140  }
141 
142  /**
143  * Load SIMD register with one 8-bit element repeated
144  */
145  static SIMD_4x32 splat_u8(uint8_t B)
146  {
147 #if defined(BOTAN_SIMD_USE_SSE2)
148  return SIMD_4x32(_mm_set1_epi8(B));
149 #elif defined(BOTAN_SIMD_USE_NEON)
150  return SIMD_4x32(vreinterpretq_u32_u8(vdupq_n_u8(B)));
151 #else
152  const uint32_t B4 = make_uint32(B, B, B, B);
153  return SIMD_4x32(B4, B4, B4, B4);
154 #endif
155  }
156 
157  /**
158  * Load a SIMD register with little-endian convention
159  */
160  static SIMD_4x32 load_le(const void* in)
161  {
162 #if defined(BOTAN_SIMD_USE_SSE2)
163  return SIMD_4x32(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in)));
164 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
165  uint32_t R[4];
166  Botan::load_le(R, static_cast<const uint8_t*>(in), 4);
167  return SIMD_4x32(R);
168 #elif defined(BOTAN_SIMD_USE_NEON)
169  SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
170  return CPUID::is_big_endian() ? l.bswap() : l;
171 #endif
172  }
173 
174  /**
175  * Load a SIMD register with big-endian convention
176  */
177  static SIMD_4x32 load_be(const void* in)
178  {
179 #if defined(BOTAN_SIMD_USE_SSE2)
180  return load_le(in).bswap();
181 
182 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
183  uint32_t R[4];
184  Botan::load_be(R, static_cast<const uint8_t*>(in), 4);
185  return SIMD_4x32(R);
186 
187 #elif defined(BOTAN_SIMD_USE_NEON)
188  SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
189  return CPUID::is_little_endian() ? l.bswap() : l;
190 #endif
191  }
192 
193  void store_le(uint32_t out[4]) const
194  {
195  this->store_le(reinterpret_cast<uint8_t*>(out));
196  }
197 
198  void store_le(uint64_t out[2]) const
199  {
200  this->store_le(reinterpret_cast<uint8_t*>(out));
201  }
202 
203  /**
204  * Load a SIMD register with little-endian convention
205  */
206  void store_le(uint8_t out[]) const
207  {
208 #if defined(BOTAN_SIMD_USE_SSE2)
209 
210  _mm_storeu_si128(reinterpret_cast<__m128i*>(out), raw());
211 
212 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
213 
214  union {
215  __vector unsigned int V;
216  uint32_t R[4];
217  } vec;
218  vec.V = raw();
219  Botan::store_le(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
220 
221 #elif defined(BOTAN_SIMD_USE_NEON)
223  {
224  vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
225  }
226  else
227  {
228  vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
229  }
230 #endif
231  }
232 
233  /**
234  * Load a SIMD register with big-endian convention
235  */
236  void store_be(uint8_t out[]) const
237  {
238 #if defined(BOTAN_SIMD_USE_SSE2)
239 
240  bswap().store_le(out);
241 
242 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
243 
244  union {
245  __vector unsigned int V;
246  uint32_t R[4];
247  } vec;
248  vec.V = m_simd;
249  Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
250 
251 #elif defined(BOTAN_SIMD_USE_NEON)
253  {
254  vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
255  }
256  else
257  {
258  vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
259  }
260 #endif
261  }
262 
263  /*
264  * This is used for SHA-2/SHACAL2
265  * Return rotr(ROT1) ^ rotr(ROT2) ^ rotr(ROT3)
266  */
267  template<size_t ROT1, size_t ROT2, size_t ROT3>
268  SIMD_4x32 rho() const
269  {
270  const SIMD_4x32 rot1 = this->rotr<ROT1>();
271  const SIMD_4x32 rot2 = this->rotr<ROT2>();
272  const SIMD_4x32 rot3 = this->rotr<ROT3>();
273  return (rot1 ^ rot2 ^ rot3);
274  }
275 
276  /**
277  * Left rotation by a compile time constant
278  */
279  template<size_t ROT>
280  SIMD_4x32 rotl() const
281  {
282  static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
283 
284 #if defined(BOTAN_SIMD_USE_SSE2)
285 
286  return SIMD_4x32(_mm_or_si128(_mm_slli_epi32(m_simd, static_cast<int>(ROT)),
287  _mm_srli_epi32(m_simd, static_cast<int>(32-ROT))));
288 
289 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
290 
291  const unsigned int r = static_cast<unsigned int>(ROT);
292  __vector unsigned int rot = {r, r, r, r};
293  return SIMD_4x32(vec_rl(m_simd, rot));
294 
295 #elif defined(BOTAN_SIMD_USE_NEON)
296 
297 #if defined(BOTAN_TARGET_ARCH_IS_ARM64)
298 
299  BOTAN_IF_CONSTEXPR(ROT == 8)
300  {
301  const uint8_t maskb[16] = { 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 };
302  const uint8x16_t mask = vld1q_u8(maskb);
303  return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(m_simd), mask)));
304  }
305  else BOTAN_IF_CONSTEXPR(ROT == 16)
306  {
307  return SIMD_4x32(vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(m_simd))));
308  }
309 #endif
310  return SIMD_4x32(vorrq_u32(vshlq_n_u32(m_simd, static_cast<int>(ROT)),
311  vshrq_n_u32(m_simd, static_cast<int>(32-ROT))));
312 #endif
313  }
314 
315  /**
316  * Right rotation by a compile time constant
317  */
318  template<size_t ROT>
319  SIMD_4x32 rotr() const
320  {
321  return this->rotl<32-ROT>();
322  }
323 
324  /**
325  * Add elements of a SIMD vector
326  */
327  SIMD_4x32 operator+(const SIMD_4x32& other) const
328  {
329  SIMD_4x32 retval(*this);
330  retval += other;
331  return retval;
332  }
333 
334  /**
335  * Subtract elements of a SIMD vector
336  */
337  SIMD_4x32 operator-(const SIMD_4x32& other) const
338  {
339  SIMD_4x32 retval(*this);
340  retval -= other;
341  return retval;
342  }
343 
344  /**
345  * XOR elements of a SIMD vector
346  */
347  SIMD_4x32 operator^(const SIMD_4x32& other) const
348  {
349  SIMD_4x32 retval(*this);
350  retval ^= other;
351  return retval;
352  }
353 
354  /**
355  * Binary OR elements of a SIMD vector
356  */
357  SIMD_4x32 operator|(const SIMD_4x32& other) const
358  {
359  SIMD_4x32 retval(*this);
360  retval |= other;
361  return retval;
362  }
363 
364  /**
365  * Binary AND elements of a SIMD vector
366  */
367  SIMD_4x32 operator&(const SIMD_4x32& other) const
368  {
369  SIMD_4x32 retval(*this);
370  retval &= other;
371  return retval;
372  }
373 
374  void operator+=(const SIMD_4x32& other)
375  {
376 #if defined(BOTAN_SIMD_USE_SSE2)
377  m_simd = _mm_add_epi32(m_simd, other.m_simd);
378 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
379  m_simd = vec_add(m_simd, other.m_simd);
380 #elif defined(BOTAN_SIMD_USE_NEON)
381  m_simd = vaddq_u32(m_simd, other.m_simd);
382 #endif
383  }
384 
385  void operator-=(const SIMD_4x32& other)
386  {
387 #if defined(BOTAN_SIMD_USE_SSE2)
388  m_simd = _mm_sub_epi32(m_simd, other.m_simd);
389 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
390  m_simd = vec_sub(m_simd, other.m_simd);
391 #elif defined(BOTAN_SIMD_USE_NEON)
392  m_simd = vsubq_u32(m_simd, other.m_simd);
393 #endif
394  }
395 
396  void operator^=(const SIMD_4x32& other)
397  {
398 #if defined(BOTAN_SIMD_USE_SSE2)
399  m_simd = _mm_xor_si128(m_simd, other.m_simd);
400 
401 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
402  m_simd = vec_xor(m_simd, other.m_simd);
403 #elif defined(BOTAN_SIMD_USE_NEON)
404  m_simd = veorq_u32(m_simd, other.m_simd);
405 #endif
406  }
407 
408  void operator|=(const SIMD_4x32& other)
409  {
410 #if defined(BOTAN_SIMD_USE_SSE2)
411  m_simd = _mm_or_si128(m_simd, other.m_simd);
412 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
413  m_simd = vec_or(m_simd, other.m_simd);
414 #elif defined(BOTAN_SIMD_USE_NEON)
415  m_simd = vorrq_u32(m_simd, other.m_simd);
416 #endif
417  }
418 
419  void operator&=(const SIMD_4x32& other)
420  {
421 #if defined(BOTAN_SIMD_USE_SSE2)
422  m_simd = _mm_and_si128(m_simd, other.m_simd);
423 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
424  m_simd = vec_and(m_simd, other.m_simd);
425 #elif defined(BOTAN_SIMD_USE_NEON)
426  m_simd = vandq_u32(m_simd, other.m_simd);
427 #endif
428  }
429 
430 
431  template<int SHIFT> SIMD_4x32 shl() const
432  {
433  static_assert(SHIFT > 0 && SHIFT <= 31, "Invalid shift count");
434 
435 #if defined(BOTAN_SIMD_USE_SSE2)
436  return SIMD_4x32(_mm_slli_epi32(m_simd, SHIFT));
437 
438 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
439  const unsigned int s = static_cast<unsigned int>(SHIFT);
440  const __vector unsigned int shifts = {s, s, s, s};
441  return SIMD_4x32(vec_sl(m_simd, shifts));
442 #elif defined(BOTAN_SIMD_USE_NEON)
443  return SIMD_4x32(vshlq_n_u32(m_simd, SHIFT));
444 #endif
445  }
446 
447  template<int SHIFT> SIMD_4x32 shr() const
448  {
449 #if defined(BOTAN_SIMD_USE_SSE2)
450  return SIMD_4x32(_mm_srli_epi32(m_simd, SHIFT));
451 
452 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
453  const unsigned int s = static_cast<unsigned int>(SHIFT);
454  const __vector unsigned int shifts = {s, s, s, s};
455  return SIMD_4x32(vec_sr(m_simd, shifts));
456 #elif defined(BOTAN_SIMD_USE_NEON)
457  return SIMD_4x32(vshrq_n_u32(m_simd, SHIFT));
458 #endif
459  }
460 
462  {
463 #if defined(BOTAN_SIMD_USE_SSE2)
464  return SIMD_4x32(_mm_xor_si128(m_simd, _mm_set1_epi32(0xFFFFFFFF)));
465 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
466  return SIMD_4x32(vec_nor(m_simd, m_simd));
467 #elif defined(BOTAN_SIMD_USE_NEON)
468  return SIMD_4x32(vmvnq_u32(m_simd));
469 #endif
470  }
471 
472  // (~reg) & other
473  SIMD_4x32 andc(const SIMD_4x32& other) const
474  {
475 #if defined(BOTAN_SIMD_USE_SSE2)
476  return SIMD_4x32(_mm_andnot_si128(m_simd, other.m_simd));
477 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
478  /*
479  AltiVec does arg1 & ~arg2 rather than SSE's ~arg1 & arg2
480  so swap the arguments
481  */
482  return SIMD_4x32(vec_andc(other.m_simd, m_simd));
483 #elif defined(BOTAN_SIMD_USE_NEON)
484  // NEON is also a & ~b
485  return SIMD_4x32(vbicq_u32(other.m_simd, m_simd));
486 #endif
487  }
488 
489  /**
490  * Return copy *this with each word byte swapped
491  */
492  SIMD_4x32 bswap() const
493  {
494 #if defined(BOTAN_SIMD_USE_SSE2)
495 
496  __m128i T = m_simd;
497  T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
498  T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
499  return SIMD_4x32(_mm_or_si128(_mm_srli_epi16(T, 8), _mm_slli_epi16(T, 8)));
500 
501 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
502 
503  union {
504  __vector unsigned int V;
505  uint32_t R[4];
506  } vec;
507 
508  vec.V = m_simd;
509  bswap_4(vec.R);
510  return SIMD_4x32(vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
511 
512 #elif defined(BOTAN_SIMD_USE_NEON)
513  return SIMD_4x32(vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(m_simd))));
514 #endif
515  }
516 
517  template<size_t I>
519  {
520  static_assert(I <= 3, "Invalid shift count");
521 
522 #if defined(BOTAN_SIMD_USE_SSE2)
523  return SIMD_4x32(_mm_slli_si128(raw(), 4*I));
524 #elif defined(BOTAN_SIMD_USE_NEON)
525  return SIMD_4x32(vextq_u32(vdupq_n_u32(0), raw(), 4-I));
526 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
527  const __vector unsigned int zero = vec_splat_u32(0);
528 
529  const __vector unsigned char shuf[3] = {
530  { 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 },
531  { 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7 },
532  { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3 },
533  };
534 
535  return SIMD_4x32(vec_perm(raw(), zero, shuf[I-1]));
536 #endif
537  }
538 
539  template<size_t I>
541  {
542  static_assert(I <= 3, "Invalid shift count");
543 
544 #if defined(BOTAN_SIMD_USE_SSE2)
545  return SIMD_4x32(_mm_srli_si128(raw(), 4*I));
546 #elif defined(BOTAN_SIMD_USE_NEON)
547  return SIMD_4x32(vextq_u32(raw(), vdupq_n_u32(0), I));
548 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
549  const __vector unsigned int zero = vec_splat_u32(0);
550 
551  const __vector unsigned char shuf[3] = {
552  { 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 },
553  { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 },
554  { 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 },
555  };
556 
557  return SIMD_4x32(vec_perm(raw(), zero, shuf[I-1]));
558 #endif
559  }
560 
561  /**
562  * 4x4 Transposition on SIMD registers
563  */
564  static void transpose(SIMD_4x32& B0, SIMD_4x32& B1,
565  SIMD_4x32& B2, SIMD_4x32& B3)
566  {
567 #if defined(BOTAN_SIMD_USE_SSE2)
568  const __m128i T0 = _mm_unpacklo_epi32(B0.m_simd, B1.m_simd);
569  const __m128i T1 = _mm_unpacklo_epi32(B2.m_simd, B3.m_simd);
570  const __m128i T2 = _mm_unpackhi_epi32(B0.m_simd, B1.m_simd);
571  const __m128i T3 = _mm_unpackhi_epi32(B2.m_simd, B3.m_simd);
572 
573  B0.m_simd = _mm_unpacklo_epi64(T0, T1);
574  B1.m_simd = _mm_unpackhi_epi64(T0, T1);
575  B2.m_simd = _mm_unpacklo_epi64(T2, T3);
576  B3.m_simd = _mm_unpackhi_epi64(T2, T3);
577 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
578  const __vector unsigned int T0 = vec_mergeh(B0.m_simd, B2.m_simd);
579  const __vector unsigned int T1 = vec_mergeh(B1.m_simd, B3.m_simd);
580  const __vector unsigned int T2 = vec_mergel(B0.m_simd, B2.m_simd);
581  const __vector unsigned int T3 = vec_mergel(B1.m_simd, B3.m_simd);
582 
583  B0.m_simd = vec_mergeh(T0, T1);
584  B1.m_simd = vec_mergel(T0, T1);
585  B2.m_simd = vec_mergeh(T2, T3);
586  B3.m_simd = vec_mergel(T2, T3);
587 
588 #elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM32)
589  const uint32x4x2_t T0 = vzipq_u32(B0.m_simd, B2.m_simd);
590  const uint32x4x2_t T1 = vzipq_u32(B1.m_simd, B3.m_simd);
591  const uint32x4x2_t O0 = vzipq_u32(T0.val[0], T1.val[0]);
592  const uint32x4x2_t O1 = vzipq_u32(T0.val[1], T1.val[1]);
593 
594  B0.m_simd = O0.val[0];
595  B1.m_simd = O0.val[1];
596  B2.m_simd = O1.val[0];
597  B3.m_simd = O1.val[1];
598 
599 #elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM64)
600  const uint32x4_t T0 = vzip1q_u32(B0.m_simd, B2.m_simd);
601  const uint32x4_t T2 = vzip2q_u32(B0.m_simd, B2.m_simd);
602  const uint32x4_t T1 = vzip1q_u32(B1.m_simd, B3.m_simd);
603  const uint32x4_t T3 = vzip2q_u32(B1.m_simd, B3.m_simd);
604 
605  B0.m_simd = vzip1q_u32(T0, T1);
606  B1.m_simd = vzip2q_u32(T0, T1);
607  B2.m_simd = vzip1q_u32(T2, T3);
608  B3.m_simd = vzip2q_u32(T2, T3);
609 #endif
610  }
611 
612  native_simd_type raw() const BOTAN_FUNC_ISA(BOTAN_SIMD_ISA) { return m_simd; }
613 
614  explicit SIMD_4x32(native_simd_type x) : m_simd(x) {}
615  private:
616  native_simd_type m_simd;
617  };
618 
619 }
620 
621 #endif
SIMD_4x32(const uint32_t B[4])
Definition: simd_32.h:99
SIMD_4x32 operator-(const SIMD_4x32 &other) const
Definition: simd_32.h:337
void store_le(uint64_t out[2]) const
Definition: simd_32.h:198
SIMD_4x32 operator|(const SIMD_4x32 &other) const
Definition: simd_32.h:357
SIMD_4x32 shl() const
Definition: simd_32.h:431
SIMD_4x32 rotr() const
Definition: simd_32.h:319
SIMD_4x32 bswap() const
Definition: simd_32.h:492
void operator &=(const SIMD_4x32 &other)
Definition: simd_32.h:419
SIMD_4x32 operator~() const
Definition: simd_32.h:461
void store_le(uint8_t out[]) const
Definition: simd_32.h:206
void store_be(uint16_t in, uint8_t out[2])
Definition: loadstor.h:438
void store_be(uint8_t out[]) const
Definition: simd_32.h:236
SIMD_4x32 shift_elems_right() const
Definition: simd_32.h:540
#define BOTAN_IF_CONSTEXPR
Definition: compiler.h:185
SIMD_4x32 andc(const SIMD_4x32 &other) const
Definition: simd_32.h:473
static SIMD_4x32 load_le(const void *in)
Definition: simd_32.h:160
int(* final)(unsigned char *, CTX *)
SIMD_4x32 operator^(const SIMD_4x32 &other) const
Definition: simd_32.h:347
native_simd_type raw() const BOTAN_FUNC_ISA(BOTAN_SIMD_ISA)
Definition: simd_32.h:612
SIMD_4x32 shift_elems_left() const
Definition: simd_32.h:518
void operator^=(const SIMD_4x32 &other)
Definition: simd_32.h:396
static void transpose(SIMD_4x32 &B0, SIMD_4x32 &B1, SIMD_4x32 &B2, SIMD_4x32 &B3)
Definition: simd_32.h:564
constexpr uint32_t make_uint32(uint8_t i0, uint8_t i1, uint8_t i2, uint8_t i3)
Definition: loadstor.h:67
void operator+=(const SIMD_4x32 &other)
Definition: simd_32.h:374
T load_be(const uint8_t in[], size_t off)
Definition: loadstor.h:107
static SIMD_4x32 load_be(const void *in)
Definition: simd_32.h:177
SIMD_4x32 operator &(const SIMD_4x32 &other) const
Definition: simd_32.h:367
#define BOTAN_FUNC_ISA(isa)
Definition: compiler.h:77
SIMD_4x32 rho() const
Definition: simd_32.h:268
T load_le(const uint8_t in[], size_t off)
Definition: loadstor.h:123
static SIMD_4x32 splat_u8(uint8_t B)
Definition: simd_32.h:145
static bool is_little_endian()
Definition: cpuid.h:73
Definition: alg_id.cpp:13
void bswap_4(T x[4])
Definition: bswap.h:98
static SIMD_4x32 splat(uint32_t B)
Definition: simd_32.h:131
void store_le(uint32_t out[4]) const
Definition: simd_32.h:193
SIMD_4x32(native_simd_type x)
Definition: simd_32.h:614
SIMD_4x32 operator+(const SIMD_4x32 &other) const
Definition: simd_32.h:327
void operator|=(const SIMD_4x32 &other)
Definition: simd_32.h:408
void operator-=(const SIMD_4x32 &other)
Definition: simd_32.h:385
fe T
Definition: ge.cpp:37
SIMD_4x32 & operator=(const SIMD_4x32 &other)=default
SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3)
Definition: simd_32.h:114
static bool is_big_endian()
Definition: cpuid.h:84
SIMD_4x32 rotl() const
Definition: simd_32.h:280
SIMD_4x32 shr() const
Definition: simd_32.h:447
void store_le(uint16_t in, uint8_t out[2])
Definition: loadstor.h:454