Botan  2.13.0
Crypto and TLS for C++11
simd_32.h
Go to the documentation of this file.
1 /*
2 * Lightweight wrappers for SIMD operations
3 * (C) 2009,2011,2016,2017,2019 Jack Lloyd
4 *
5 * Botan is released under the Simplified BSD License (see license.txt)
6 */
7 
8 #ifndef BOTAN_SIMD_32_H_
9 #define BOTAN_SIMD_32_H_
10 
11 #include <botan/types.h>
12 
13 #if defined(BOTAN_TARGET_SUPPORTS_SSE2)
14  #include <emmintrin.h>
15  #define BOTAN_SIMD_USE_SSE2
16 
17 #elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC)
18  #include <botan/bswap.h>
19  #include <botan/loadstor.h>
20  #include <altivec.h>
21  #undef vector
22  #undef bool
23  #define BOTAN_SIMD_USE_ALTIVEC
24 
25 #elif defined(BOTAN_TARGET_SUPPORTS_NEON)
26  #include <botan/cpuid.h>
27  #include <arm_neon.h>
28  #define BOTAN_SIMD_USE_NEON
29 
30 #else
31  #error "No SIMD instruction set enabled"
32 #endif
33 
34 #if defined(BOTAN_SIMD_USE_SSE2)
35  #define BOTAN_SIMD_ISA "sse2"
36  #define BOTAN_VPERM_ISA "ssse3"
37  #define BOTAN_CLMUL_ISA "pclmul"
38 #elif defined(BOTAN_SIMD_USE_NEON)
39  #if defined(BOTAN_TARGET_ARCH_IS_ARM64)
40  #define BOTAN_SIMD_ISA "+simd"
41  #define BOTAN_CLMUL_ISA "+crypto"
42  #else
43  #define BOTAN_SIMD_ISA "fpu=neon"
44  #endif
45  #define BOTAN_VPERM_ISA BOTAN_SIMD_ISA
46 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
47  #define BOTAN_SIMD_ISA "altivec"
48  #define BOTAN_VPERM_ISA "altivec"
49 #endif
50 
51 namespace Botan {
52 
53 #if defined(BOTAN_SIMD_USE_SSE2)
54  typedef __m128i native_simd_type;
55 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
56  typedef __vector unsigned int native_simd_type;
57 #elif defined(BOTAN_SIMD_USE_NEON)
58  typedef uint32x4_t native_simd_type;
59 #endif
60 
61 /**
62 * 4x32 bit SIMD register
63 *
64 * This class is not a general purpose SIMD type, and only offers
65 * instructions needed for evaluation of specific crypto primitives.
66 * For example it does not currently have equality operators of any
67 * kind.
68 *
69 * Implemented for SSE2, VMX (Altivec), and NEON.
70 */
72  {
73  public:
74 
75  SIMD_4x32& operator=(const SIMD_4x32& other) = default;
76  SIMD_4x32(const SIMD_4x32& other) = default;
77 
78  SIMD_4x32& operator=(SIMD_4x32&& other) = default;
79  SIMD_4x32(SIMD_4x32&& other) = default;
80 
81  /**
82  * Zero initialize SIMD register with 4 32-bit elements
83  */
84  SIMD_4x32() // zero initialized
85  {
86 #if defined(BOTAN_SIMD_USE_SSE2)
87  m_simd = _mm_setzero_si128();
88 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
89  m_simd = vec_splat_u32(0);
90 #elif defined(BOTAN_SIMD_USE_NEON)
91  m_simd = vdupq_n_u32(0);
92 #endif
93  }
94 
95  /**
96  * Load SIMD register with 4 32-bit elements
97  */
98  explicit SIMD_4x32(const uint32_t B[4])
99  {
100 #if defined(BOTAN_SIMD_USE_SSE2)
101  m_simd = _mm_loadu_si128(reinterpret_cast<const __m128i*>(B));
102 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
103  __vector unsigned int val = { B[0], B[1], B[2], B[3]};
104  m_simd = val;
105 #elif defined(BOTAN_SIMD_USE_NEON)
106  m_simd = vld1q_u32(B);
107 #endif
108  }
109 
110  /**
111  * Load SIMD register with 4 32-bit elements
112  */
113  SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3)
114  {
115 #if defined(BOTAN_SIMD_USE_SSE2)
116  m_simd = _mm_set_epi32(B3, B2, B1, B0);
117 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
118  __vector unsigned int val = {B0, B1, B2, B3};
119  m_simd = val;
120 #elif defined(BOTAN_SIMD_USE_NEON)
121  // Better way to do this?
122  const uint32_t B[4] = { B0, B1, B2, B3 };
123  m_simd = vld1q_u32(B);
124 #endif
125  }
126 
127  /**
128  * Load SIMD register with one 32-bit element repeated
129  */
130  static SIMD_4x32 splat(uint32_t B)
131  {
132 #if defined(BOTAN_SIMD_USE_SSE2)
133  return SIMD_4x32(_mm_set1_epi32(B));
134 #elif defined(BOTAN_SIMD_USE_NEON)
135  return SIMD_4x32(vdupq_n_u32(B));
136 #else
137  return SIMD_4x32(B, B, B, B);
138 #endif
139  }
140 
141  /**
142  * Load SIMD register with one 8-bit element repeated
143  */
144  static SIMD_4x32 splat_u8(uint8_t B)
145  {
146 #if defined(BOTAN_SIMD_USE_SSE2)
147  return SIMD_4x32(_mm_set1_epi8(B));
148 #elif defined(BOTAN_SIMD_USE_NEON)
149  return SIMD_4x32(vreinterpretq_u32_u8(vdupq_n_u8(B)));
150 #else
151  const uint32_t B4 = make_uint32(B, B, B, B);
152  return SIMD_4x32(B4, B4, B4, B4);
153 #endif
154  }
155 
156  /**
157  * Load a SIMD register with little-endian convention
158  */
159  static SIMD_4x32 load_le(const void* in)
160  {
161 #if defined(BOTAN_SIMD_USE_SSE2)
162  return SIMD_4x32(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in)));
163 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
164  uint32_t R[4];
165  Botan::load_le(R, static_cast<const uint8_t*>(in), 4);
166  return SIMD_4x32(R);
167 #elif defined(BOTAN_SIMD_USE_NEON)
168  SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
169  return CPUID::is_big_endian() ? l.bswap() : l;
170 #endif
171  }
172 
173  /**
174  * Load a SIMD register with big-endian convention
175  */
176  static SIMD_4x32 load_be(const void* in)
177  {
178 #if defined(BOTAN_SIMD_USE_SSE2)
179  return load_le(in).bswap();
180 
181 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
182  uint32_t R[4];
183  Botan::load_be(R, static_cast<const uint8_t*>(in), 4);
184  return SIMD_4x32(R);
185 
186 #elif defined(BOTAN_SIMD_USE_NEON)
187  SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
188  return CPUID::is_little_endian() ? l.bswap() : l;
189 #endif
190  }
191 
192  void store_le(uint32_t out[4]) const
193  {
194  this->store_le(reinterpret_cast<uint8_t*>(out));
195  }
196 
197  void store_le(uint64_t out[2]) const
198  {
199  this->store_le(reinterpret_cast<uint8_t*>(out));
200  }
201 
202  /**
203  * Load a SIMD register with little-endian convention
204  */
205  void store_le(uint8_t out[]) const
206  {
207 #if defined(BOTAN_SIMD_USE_SSE2)
208 
209  _mm_storeu_si128(reinterpret_cast<__m128i*>(out), raw());
210 
211 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
212 
213  union {
214  __vector unsigned int V;
215  uint32_t R[4];
216  } vec;
217  vec.V = raw();
218  Botan::store_le(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
219 
220 #elif defined(BOTAN_SIMD_USE_NEON)
222  {
223  vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
224  }
225  else
226  {
227  vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
228  }
229 #endif
230  }
231 
232  /**
233  * Load a SIMD register with big-endian convention
234  */
235  void store_be(uint8_t out[]) const
236  {
237 #if defined(BOTAN_SIMD_USE_SSE2)
238 
239  bswap().store_le(out);
240 
241 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
242 
243  union {
244  __vector unsigned int V;
245  uint32_t R[4];
246  } vec;
247  vec.V = m_simd;
248  Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
249 
250 #elif defined(BOTAN_SIMD_USE_NEON)
252  {
253  vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
254  }
255  else
256  {
257  vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
258  }
259 #endif
260  }
261 
262  /*
263  * This is used for SHA-2/SHACAL2
264  * Return rotr(ROT1) ^ rotr(ROT2) ^ rotr(ROT3)
265  */
266  template<size_t ROT1, size_t ROT2, size_t ROT3>
267  SIMD_4x32 rho() const
268  {
269  const SIMD_4x32 rot1 = this->rotr<ROT1>();
270  const SIMD_4x32 rot2 = this->rotr<ROT2>();
271  const SIMD_4x32 rot3 = this->rotr<ROT3>();
272  return (rot1 ^ rot2 ^ rot3);
273  }
274 
275  /**
276  * Left rotation by a compile time constant
277  */
278  template<size_t ROT>
279  SIMD_4x32 rotl() const
280  {
281  static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
282 
283 #if defined(BOTAN_SIMD_USE_SSE2)
284 
285  return SIMD_4x32(_mm_or_si128(_mm_slli_epi32(m_simd, static_cast<int>(ROT)),
286  _mm_srli_epi32(m_simd, static_cast<int>(32-ROT))));
287 
288 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
289 
290  const unsigned int r = static_cast<unsigned int>(ROT);
291  __vector unsigned int rot = {r, r, r, r};
292  return SIMD_4x32(vec_rl(m_simd, rot));
293 
294 #elif defined(BOTAN_SIMD_USE_NEON)
295 
296 #if defined(BOTAN_TARGET_ARCH_IS_ARM64)
297 
298  BOTAN_IF_CONSTEXPR(ROT == 8)
299  {
300  const uint8_t maskb[16] = { 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 };
301  const uint8x16_t mask = vld1q_u8(maskb);
302  return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(m_simd), mask)));
303  }
304  else BOTAN_IF_CONSTEXPR(ROT == 16)
305  {
306  return SIMD_4x32(vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(m_simd))));
307  }
308 #endif
309  return SIMD_4x32(vorrq_u32(vshlq_n_u32(m_simd, static_cast<int>(ROT)),
310  vshrq_n_u32(m_simd, static_cast<int>(32-ROT))));
311 #endif
312  }
313 
314  /**
315  * Right rotation by a compile time constant
316  */
317  template<size_t ROT>
318  SIMD_4x32 rotr() const
319  {
320  return this->rotl<32-ROT>();
321  }
322 
323  /**
324  * Add elements of a SIMD vector
325  */
326  SIMD_4x32 operator+(const SIMD_4x32& other) const
327  {
328  SIMD_4x32 retval(*this);
329  retval += other;
330  return retval;
331  }
332 
333  /**
334  * Subtract elements of a SIMD vector
335  */
336  SIMD_4x32 operator-(const SIMD_4x32& other) const
337  {
338  SIMD_4x32 retval(*this);
339  retval -= other;
340  return retval;
341  }
342 
343  /**
344  * XOR elements of a SIMD vector
345  */
346  SIMD_4x32 operator^(const SIMD_4x32& other) const
347  {
348  SIMD_4x32 retval(*this);
349  retval ^= other;
350  return retval;
351  }
352 
353  /**
354  * Binary OR elements of a SIMD vector
355  */
356  SIMD_4x32 operator|(const SIMD_4x32& other) const
357  {
358  SIMD_4x32 retval(*this);
359  retval |= other;
360  return retval;
361  }
362 
363  /**
364  * Binary AND elements of a SIMD vector
365  */
366  SIMD_4x32 operator&(const SIMD_4x32& other) const
367  {
368  SIMD_4x32 retval(*this);
369  retval &= other;
370  return retval;
371  }
372 
373  void operator+=(const SIMD_4x32& other)
374  {
375 #if defined(BOTAN_SIMD_USE_SSE2)
376  m_simd = _mm_add_epi32(m_simd, other.m_simd);
377 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
378  m_simd = vec_add(m_simd, other.m_simd);
379 #elif defined(BOTAN_SIMD_USE_NEON)
380  m_simd = vaddq_u32(m_simd, other.m_simd);
381 #endif
382  }
383 
384  void operator-=(const SIMD_4x32& other)
385  {
386 #if defined(BOTAN_SIMD_USE_SSE2)
387  m_simd = _mm_sub_epi32(m_simd, other.m_simd);
388 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
389  m_simd = vec_sub(m_simd, other.m_simd);
390 #elif defined(BOTAN_SIMD_USE_NEON)
391  m_simd = vsubq_u32(m_simd, other.m_simd);
392 #endif
393  }
394 
395  void operator^=(const SIMD_4x32& other)
396  {
397 #if defined(BOTAN_SIMD_USE_SSE2)
398  m_simd = _mm_xor_si128(m_simd, other.m_simd);
399 
400 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
401  m_simd = vec_xor(m_simd, other.m_simd);
402 #elif defined(BOTAN_SIMD_USE_NEON)
403  m_simd = veorq_u32(m_simd, other.m_simd);
404 #endif
405  }
406 
407  void operator|=(const SIMD_4x32& other)
408  {
409 #if defined(BOTAN_SIMD_USE_SSE2)
410  m_simd = _mm_or_si128(m_simd, other.m_simd);
411 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
412  m_simd = vec_or(m_simd, other.m_simd);
413 #elif defined(BOTAN_SIMD_USE_NEON)
414  m_simd = vorrq_u32(m_simd, other.m_simd);
415 #endif
416  }
417 
418  void operator&=(const SIMD_4x32& other)
419  {
420 #if defined(BOTAN_SIMD_USE_SSE2)
421  m_simd = _mm_and_si128(m_simd, other.m_simd);
422 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
423  m_simd = vec_and(m_simd, other.m_simd);
424 #elif defined(BOTAN_SIMD_USE_NEON)
425  m_simd = vandq_u32(m_simd, other.m_simd);
426 #endif
427  }
428 
429 
430  template<int SHIFT> SIMD_4x32 shl() const
431  {
432  static_assert(SHIFT > 0 && SHIFT <= 31, "Invalid shift count");
433 
434 #if defined(BOTAN_SIMD_USE_SSE2)
435  return SIMD_4x32(_mm_slli_epi32(m_simd, SHIFT));
436 
437 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
438  const unsigned int s = static_cast<unsigned int>(SHIFT);
439  const __vector unsigned int shifts = {s, s, s, s};
440  return SIMD_4x32(vec_sl(m_simd, shifts));
441 #elif defined(BOTAN_SIMD_USE_NEON)
442  return SIMD_4x32(vshlq_n_u32(m_simd, SHIFT));
443 #endif
444  }
445 
446  template<int SHIFT> SIMD_4x32 shr() const
447  {
448 #if defined(BOTAN_SIMD_USE_SSE2)
449  return SIMD_4x32(_mm_srli_epi32(m_simd, SHIFT));
450 
451 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
452  const unsigned int s = static_cast<unsigned int>(SHIFT);
453  const __vector unsigned int shifts = {s, s, s, s};
454  return SIMD_4x32(vec_sr(m_simd, shifts));
455 #elif defined(BOTAN_SIMD_USE_NEON)
456  return SIMD_4x32(vshrq_n_u32(m_simd, SHIFT));
457 #endif
458  }
459 
461  {
462 #if defined(BOTAN_SIMD_USE_SSE2)
463  return SIMD_4x32(_mm_xor_si128(m_simd, _mm_set1_epi32(0xFFFFFFFF)));
464 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
465  return SIMD_4x32(vec_nor(m_simd, m_simd));
466 #elif defined(BOTAN_SIMD_USE_NEON)
467  return SIMD_4x32(vmvnq_u32(m_simd));
468 #endif
469  }
470 
471  // (~reg) & other
472  SIMD_4x32 andc(const SIMD_4x32& other) const
473  {
474 #if defined(BOTAN_SIMD_USE_SSE2)
475  return SIMD_4x32(_mm_andnot_si128(m_simd, other.m_simd));
476 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
477  /*
478  AltiVec does arg1 & ~arg2 rather than SSE's ~arg1 & arg2
479  so swap the arguments
480  */
481  return SIMD_4x32(vec_andc(other.m_simd, m_simd));
482 #elif defined(BOTAN_SIMD_USE_NEON)
483  // NEON is also a & ~b
484  return SIMD_4x32(vbicq_u32(other.m_simd, m_simd));
485 #endif
486  }
487 
488  /**
489  * Return copy *this with each word byte swapped
490  */
491  SIMD_4x32 bswap() const
492  {
493 #if defined(BOTAN_SIMD_USE_SSE2)
494 
495  __m128i T = m_simd;
496  T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
497  T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
498  return SIMD_4x32(_mm_or_si128(_mm_srli_epi16(T, 8), _mm_slli_epi16(T, 8)));
499 
500 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
501 
502  union {
503  __vector unsigned int V;
504  uint32_t R[4];
505  } vec;
506 
507  vec.V = m_simd;
508  bswap_4(vec.R);
509  return SIMD_4x32(vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
510 
511 #elif defined(BOTAN_SIMD_USE_NEON)
512  return SIMD_4x32(vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(m_simd))));
513 #endif
514  }
515 
516  template<size_t I>
518  {
519  static_assert(I <= 3, "Invalid shift count");
520 
521 #if defined(BOTAN_SIMD_USE_SSE2)
522  return SIMD_4x32(_mm_slli_si128(raw(), 4*I));
523 #elif defined(BOTAN_SIMD_USE_NEON)
524  return SIMD_4x32(vextq_u32(vdupq_n_u32(0), raw(), 4-I));
525 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
526  const __vector unsigned int zero = vec_splat_u32(0);
527 
528  const __vector unsigned char shuf[3] = {
529  { 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 },
530  { 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7 },
531  { 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3 },
532  };
533 
534  return SIMD_4x32(vec_perm(raw(), zero, shuf[I-1]));
535 #endif
536  }
537 
538  template<size_t I>
540  {
541  static_assert(I <= 3, "Invalid shift count");
542 
543 #if defined(BOTAN_SIMD_USE_SSE2)
544  return SIMD_4x32(_mm_srli_si128(raw(), 4*I));
545 #elif defined(BOTAN_SIMD_USE_NEON)
546  return SIMD_4x32(vextq_u32(raw(), vdupq_n_u32(0), I));
547 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
548  const __vector unsigned int zero = vec_splat_u32(0);
549 
550  const __vector unsigned char shuf[3] = {
551  { 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 },
552  { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 },
553  { 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 },
554  };
555 
556  return SIMD_4x32(vec_perm(raw(), zero, shuf[I-1]));
557 #endif
558  }
559 
560  /**
561  * 4x4 Transposition on SIMD registers
562  */
563  static void transpose(SIMD_4x32& B0, SIMD_4x32& B1,
564  SIMD_4x32& B2, SIMD_4x32& B3)
565  {
566 #if defined(BOTAN_SIMD_USE_SSE2)
567  const __m128i T0 = _mm_unpacklo_epi32(B0.m_simd, B1.m_simd);
568  const __m128i T1 = _mm_unpacklo_epi32(B2.m_simd, B3.m_simd);
569  const __m128i T2 = _mm_unpackhi_epi32(B0.m_simd, B1.m_simd);
570  const __m128i T3 = _mm_unpackhi_epi32(B2.m_simd, B3.m_simd);
571 
572  B0.m_simd = _mm_unpacklo_epi64(T0, T1);
573  B1.m_simd = _mm_unpackhi_epi64(T0, T1);
574  B2.m_simd = _mm_unpacklo_epi64(T2, T3);
575  B3.m_simd = _mm_unpackhi_epi64(T2, T3);
576 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
577  const __vector unsigned int T0 = vec_mergeh(B0.m_simd, B2.m_simd);
578  const __vector unsigned int T1 = vec_mergeh(B1.m_simd, B3.m_simd);
579  const __vector unsigned int T2 = vec_mergel(B0.m_simd, B2.m_simd);
580  const __vector unsigned int T3 = vec_mergel(B1.m_simd, B3.m_simd);
581 
582  B0.m_simd = vec_mergeh(T0, T1);
583  B1.m_simd = vec_mergel(T0, T1);
584  B2.m_simd = vec_mergeh(T2, T3);
585  B3.m_simd = vec_mergel(T2, T3);
586 
587 #elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM32)
588  const uint32x4x2_t T0 = vzipq_u32(B0.m_simd, B2.m_simd);
589  const uint32x4x2_t T1 = vzipq_u32(B1.m_simd, B3.m_simd);
590  const uint32x4x2_t O0 = vzipq_u32(T0.val[0], T1.val[0]);
591  const uint32x4x2_t O1 = vzipq_u32(T0.val[1], T1.val[1]);
592 
593  B0.m_simd = O0.val[0];
594  B1.m_simd = O0.val[1];
595  B2.m_simd = O1.val[0];
596  B3.m_simd = O1.val[1];
597 
598 #elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM64)
599  const uint32x4_t T0 = vzip1q_u32(B0.m_simd, B2.m_simd);
600  const uint32x4_t T2 = vzip2q_u32(B0.m_simd, B2.m_simd);
601  const uint32x4_t T1 = vzip1q_u32(B1.m_simd, B3.m_simd);
602  const uint32x4_t T3 = vzip2q_u32(B1.m_simd, B3.m_simd);
603 
604  B0.m_simd = vzip1q_u32(T0, T1);
605  B1.m_simd = vzip2q_u32(T0, T1);
606  B2.m_simd = vzip1q_u32(T2, T3);
607  B3.m_simd = vzip2q_u32(T2, T3);
608 #endif
609  }
610 
611  native_simd_type raw() const BOTAN_FUNC_ISA(BOTAN_SIMD_ISA) { return m_simd; }
612 
613  explicit SIMD_4x32(native_simd_type x) : m_simd(x) {}
614  private:
615  native_simd_type m_simd;
616  };
617 
618 }
619 
620 #endif
SIMD_4x32(const uint32_t B[4])
Definition: simd_32.h:98
SIMD_4x32 operator-(const SIMD_4x32 &other) const
Definition: simd_32.h:336
void store_le(uint64_t out[2]) const
Definition: simd_32.h:197
SIMD_4x32 operator|(const SIMD_4x32 &other) const
Definition: simd_32.h:356
SIMD_4x32 shl() const
Definition: simd_32.h:430
SIMD_4x32 rotr() const
Definition: simd_32.h:318
SIMD_4x32 bswap() const
Definition: simd_32.h:491
void operator &=(const SIMD_4x32 &other)
Definition: simd_32.h:418
SIMD_4x32 operator~() const
Definition: simd_32.h:460
void store_le(uint8_t out[]) const
Definition: simd_32.h:205
void store_be(uint16_t in, uint8_t out[2])
Definition: loadstor.h:438
void store_be(uint8_t out[]) const
Definition: simd_32.h:235
SIMD_4x32 shift_elems_right() const
Definition: simd_32.h:539
#define BOTAN_IF_CONSTEXPR
Definition: compiler.h:179
SIMD_4x32 andc(const SIMD_4x32 &other) const
Definition: simd_32.h:472
static SIMD_4x32 load_le(const void *in)
Definition: simd_32.h:159
int(* final)(unsigned char *, CTX *)
SIMD_4x32 operator^(const SIMD_4x32 &other) const
Definition: simd_32.h:346
native_simd_type raw() const BOTAN_FUNC_ISA(BOTAN_SIMD_ISA)
Definition: simd_32.h:611
SIMD_4x32 shift_elems_left() const
Definition: simd_32.h:517
void operator^=(const SIMD_4x32 &other)
Definition: simd_32.h:395
static void transpose(SIMD_4x32 &B0, SIMD_4x32 &B1, SIMD_4x32 &B2, SIMD_4x32 &B3)
Definition: simd_32.h:563
constexpr uint32_t make_uint32(uint8_t i0, uint8_t i1, uint8_t i2, uint8_t i3)
Definition: loadstor.h:67
void operator+=(const SIMD_4x32 &other)
Definition: simd_32.h:373
T load_be(const uint8_t in[], size_t off)
Definition: loadstor.h:107
static SIMD_4x32 load_be(const void *in)
Definition: simd_32.h:176
SIMD_4x32 operator &(const SIMD_4x32 &other) const
Definition: simd_32.h:366
#define BOTAN_FUNC_ISA(isa)
Definition: compiler.h:71
SIMD_4x32 rho() const
Definition: simd_32.h:267
T load_le(const uint8_t in[], size_t off)
Definition: loadstor.h:123
static SIMD_4x32 splat_u8(uint8_t B)
Definition: simd_32.h:144
static bool is_little_endian()
Definition: cpuid.h:73
Definition: alg_id.cpp:13
void bswap_4(T x[4])
Definition: bswap.h:98
static SIMD_4x32 splat(uint32_t B)
Definition: simd_32.h:130
void store_le(uint32_t out[4]) const
Definition: simd_32.h:192
SIMD_4x32(native_simd_type x)
Definition: simd_32.h:613
SIMD_4x32 operator+(const SIMD_4x32 &other) const
Definition: simd_32.h:326
void operator|=(const SIMD_4x32 &other)
Definition: simd_32.h:407
void operator-=(const SIMD_4x32 &other)
Definition: simd_32.h:384
fe T
Definition: ge.cpp:37
SIMD_4x32 & operator=(const SIMD_4x32 &other)=default
SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3)
Definition: simd_32.h:113
static bool is_big_endian()
Definition: cpuid.h:84
SIMD_4x32 rotl() const
Definition: simd_32.h:279
SIMD_4x32 shr() const
Definition: simd_32.h:446
void store_le(uint16_t in, uint8_t out[2])
Definition: loadstor.h:454