Botan  2.8.0
Crypto and TLS for C++11
simd_32.h
Go to the documentation of this file.
1 /*
2 * Lightweight wrappers for SIMD operations
3 * (C) 2009,2011,2016,2017 Jack Lloyd
4 *
5 * Botan is released under the Simplified BSD License (see license.txt)
6 */
7 
8 #ifndef BOTAN_SIMD_32_H_
9 #define BOTAN_SIMD_32_H_
10 
11 #include <botan/types.h>
12 #include <botan/loadstor.h>
13 #include <botan/bswap.h>
14 #include <botan/cpuid.h>
15 
16 #if defined(BOTAN_TARGET_SUPPORTS_SSE2)
17  #include <emmintrin.h>
18  #define BOTAN_SIMD_USE_SSE2
19 
20 #elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC)
21  #include <altivec.h>
22  #undef vector
23  #undef bool
24  #define BOTAN_SIMD_USE_ALTIVEC
25 
26 #elif defined(BOTAN_TARGET_SUPPORTS_NEON)
27  #include <arm_neon.h>
28  #define BOTAN_SIMD_USE_NEON
29 #endif
30 
31 namespace Botan {
32 
33 /**
34 * 4x32 bit SIMD register
35 *
36 * This class is not a general purpose SIMD type, and only offers
37 * instructions needed for evaluation of specific crypto primitives.
38 * For example it does not currently have equality operators of any
39 * kind.
40 *
41 * Implemented for SSE2, VMX (Altivec), and NEON.
42 */
44  {
45  public:
46 
47  SIMD_4x32& operator=(const SIMD_4x32& other) = default;
48  SIMD_4x32(const SIMD_4x32& other) = default;
49 
50 #if !defined(BOTAN_BUILD_COMPILER_IS_MSVC_2013)
51  SIMD_4x32& operator=(SIMD_4x32&& other) = default;
52  SIMD_4x32(SIMD_4x32&& other) = default;
53 #endif
54 
55  /**
56  * Zero initialize SIMD register with 4 32-bit elements
57  */
58  SIMD_4x32() // zero initialized
59  {
60 #if defined(BOTAN_SIMD_USE_SSE2)
61  m_sse = _mm_setzero_si128();
62 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
63  m_vmx = vec_splat_u32(0);
64 #elif defined(BOTAN_SIMD_USE_NEON)
65  m_neon = vdupq_n_u32(0);
66 #else
67  m_scalar[0] = 0;
68  m_scalar[1] = 0;
69  m_scalar[2] = 0;
70  m_scalar[3] = 0;
71 #endif
72  }
73 
74  /**
75  * Load SIMD register with 4 32-bit elements
76  */
77  explicit SIMD_4x32(const uint32_t B[4])
78  {
79 #if defined(BOTAN_SIMD_USE_SSE2)
80  m_sse = _mm_loadu_si128(reinterpret_cast<const __m128i*>(B));
81 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
82  m_vmx = (__vector unsigned int){B[0], B[1], B[2], B[3]};
83 #elif defined(BOTAN_SIMD_USE_NEON)
84  m_neon = vld1q_u32(B);
85 #else
86  m_scalar[0] = B[0];
87  m_scalar[1] = B[1];
88  m_scalar[2] = B[2];
89  m_scalar[3] = B[3];
90 #endif
91  }
92 
93  /**
94  * Load SIMD register with 4 32-bit elements
95  */
96  SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3)
97  {
98 #if defined(BOTAN_SIMD_USE_SSE2)
99  m_sse = _mm_set_epi32(B3, B2, B1, B0);
100 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
101  m_vmx = (__vector unsigned int){B0, B1, B2, B3};
102 #elif defined(BOTAN_SIMD_USE_NEON)
103  // Better way to do this?
104  const uint32_t B[4] = { B0, B1, B2, B3 };
105  m_neon = vld1q_u32(B);
106 #else
107  m_scalar[0] = B0;
108  m_scalar[1] = B1;
109  m_scalar[2] = B2;
110  m_scalar[3] = B3;
111 #endif
112  }
113 
114  /**
115  * Load SIMD register with one 32-bit element repeated
116  */
117  static SIMD_4x32 splat(uint32_t B)
118  {
119 #if defined(BOTAN_SIMD_USE_SSE2)
120  return SIMD_4x32(_mm_set1_epi32(B));
121 #elif defined(BOTAN_SIMD_USE_ARM)
122  return SIMD_4x32(vdupq_n_u32(B));
123 #else
124  return SIMD_4x32(B, B, B, B);
125 #endif
126  }
127 
128  /**
129  * Load a SIMD register with little-endian convention
130  */
131  static SIMD_4x32 load_le(const void* in)
132  {
133 #if defined(BOTAN_SIMD_USE_SSE2)
134  return SIMD_4x32(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in)));
135 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
136 
137  uint32_t R[4];
138  Botan::load_le(R, static_cast<const uint8_t*>(in), 4);
139  return SIMD_4x32(R);
140 
141 #elif defined(BOTAN_SIMD_USE_NEON)
142 
143  uint32_t in32[4];
144  std::memcpy(in32, in, 16);
146  {
147  bswap_4(in32);
148  }
149  return SIMD_4x32(vld1q_u32(in32));
150 
151 #else
152  SIMD_4x32 out;
153  Botan::load_le(out.m_scalar, static_cast<const uint8_t*>(in), 4);
154  return out;
155 #endif
156  }
157 
158  /**
159  * Load a SIMD register with big-endian convention
160  */
161  static SIMD_4x32 load_be(const void* in)
162  {
163 #if defined(BOTAN_SIMD_USE_SSE2)
164 
165  return load_le(in).bswap();
166 
167 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
168 
169  uint32_t R[4];
170  Botan::load_be(R, static_cast<const uint8_t*>(in), 4);
171  return SIMD_4x32(R);
172 
173 #elif defined(BOTAN_SIMD_USE_NEON)
174 
175  uint32_t in32[4];
176  std::memcpy(in32, in, 16);
178  {
179  bswap_4(in32);
180  }
181  return SIMD_4x32(vld1q_u32(in32));
182 
183 #else
184  SIMD_4x32 out;
185  Botan::load_be(out.m_scalar, static_cast<const uint8_t*>(in), 4);
186  return out;
187 #endif
188  }
189 
190  /**
191  * Load a SIMD register with little-endian convention
192  */
193  void store_le(uint8_t out[]) const
194  {
195 #if defined(BOTAN_SIMD_USE_SSE2)
196 
197  _mm_storeu_si128(reinterpret_cast<__m128i*>(out), m_sse);
198 
199 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
200 
201  union {
202  __vector unsigned int V;
203  uint32_t R[4];
204  } vec;
205  vec.V = m_vmx;
206  Botan::store_le(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
207 
208 #elif defined(BOTAN_SIMD_USE_NEON)
209 
211  {
212  SIMD_4x32 swap = bswap();
213  swap.store_be(out);
214  }
215  else
216  {
217  uint32_t out32[4] = { 0 };
218  vst1q_u32(out32, m_neon);
219  copy_out_le(out, 16, out32);
220  }
221 #else
222  Botan::store_le(out, m_scalar[0], m_scalar[1], m_scalar[2], m_scalar[3]);
223 #endif
224  }
225 
226  /**
227  * Load a SIMD register with big-endian convention
228  */
229  void store_be(uint8_t out[]) const
230  {
231 #if defined(BOTAN_SIMD_USE_SSE2)
232 
233  bswap().store_le(out);
234 
235 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
236 
237  union {
238  __vector unsigned int V;
239  uint32_t R[4];
240  } vec;
241  vec.V = m_vmx;
242  Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
243 
244 #elif defined(BOTAN_SIMD_USE_NEON)
245 
247  {
248  SIMD_4x32 swap = bswap();
249  swap.store_le(out);
250  }
251  else
252  {
253  uint32_t out32[4] = { 0 };
254  vst1q_u32(out32, m_neon);
255  copy_out_be(out, 16, out32);
256  }
257 
258 #else
259  Botan::store_be(out, m_scalar[0], m_scalar[1], m_scalar[2], m_scalar[3]);
260 #endif
261  }
262 
263 
264  /*
265  * This is used for SHA-2/SHACAL2
266  * Return rotr(ROT1) ^ rotr(ROT2) ^ rotr(ROT3)
267  */
268  template<size_t ROT1, size_t ROT2, size_t ROT3>
269  SIMD_4x32 rho() const
270  {
271  const SIMD_4x32 rot1 = this->rotr<ROT1>();
272  const SIMD_4x32 rot2 = this->rotr<ROT2>();
273  const SIMD_4x32 rot3 = this->rotr<ROT3>();
274  return (rot1 ^ rot2 ^ rot3);
275  }
276 
277  /**
278  * Left rotation by a compile time constant
279  */
280  template<size_t ROT>
281  SIMD_4x32 rotl() const
282  {
283  static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
284 
285 #if defined(BOTAN_SIMD_USE_SSE2)
286 
287  return SIMD_4x32(_mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(ROT)),
288  _mm_srli_epi32(m_sse, static_cast<int>(32-ROT))));
289 
290 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
291 
292  const unsigned int r = static_cast<unsigned int>(ROT);
293  return SIMD_4x32(vec_rl(m_vmx, (__vector unsigned int){r, r, r, r}));
294 
295 #elif defined(BOTAN_SIMD_USE_NEON)
296  return SIMD_4x32(vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(ROT)),
297  vshrq_n_u32(m_neon, static_cast<int>(32-ROT))));
298 
299 #else
300  return SIMD_4x32(Botan::rotl<ROT>(m_scalar[0]),
301  Botan::rotl<ROT>(m_scalar[1]),
302  Botan::rotl<ROT>(m_scalar[2]),
303  Botan::rotl<ROT>(m_scalar[3]));
304 #endif
305  }
306 
307  /**
308  * Right rotation by a compile time constant
309  */
310  template<size_t ROT>
311  SIMD_4x32 rotr() const
312  {
313  return this->rotl<32-ROT>();
314  }
315 
316  /**
317  * Add elements of a SIMD vector
318  */
319  SIMD_4x32 operator+(const SIMD_4x32& other) const
320  {
321  SIMD_4x32 retval(*this);
322  retval += other;
323  return retval;
324  }
325 
326  /**
327  * Subtract elements of a SIMD vector
328  */
329  SIMD_4x32 operator-(const SIMD_4x32& other) const
330  {
331  SIMD_4x32 retval(*this);
332  retval -= other;
333  return retval;
334  }
335 
336  /**
337  * XOR elements of a SIMD vector
338  */
339  SIMD_4x32 operator^(const SIMD_4x32& other) const
340  {
341  SIMD_4x32 retval(*this);
342  retval ^= other;
343  return retval;
344  }
345 
346  /**
347  * Binary OR elements of a SIMD vector
348  */
349  SIMD_4x32 operator|(const SIMD_4x32& other) const
350  {
351  SIMD_4x32 retval(*this);
352  retval |= other;
353  return retval;
354  }
355 
356  /**
357  * Binary AND elements of a SIMD vector
358  */
359  SIMD_4x32 operator&(const SIMD_4x32& other) const
360  {
361  SIMD_4x32 retval(*this);
362  retval &= other;
363  return retval;
364  }
365 
366  void operator+=(const SIMD_4x32& other)
367  {
368 #if defined(BOTAN_SIMD_USE_SSE2)
369  m_sse = _mm_add_epi32(m_sse, other.m_sse);
370 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
371  m_vmx = vec_add(m_vmx, other.m_vmx);
372 #elif defined(BOTAN_SIMD_USE_NEON)
373  m_neon = vaddq_u32(m_neon, other.m_neon);
374 #else
375  m_scalar[0] += other.m_scalar[0];
376  m_scalar[1] += other.m_scalar[1];
377  m_scalar[2] += other.m_scalar[2];
378  m_scalar[3] += other.m_scalar[3];
379 #endif
380  }
381 
382  void operator-=(const SIMD_4x32& other)
383  {
384 #if defined(BOTAN_SIMD_USE_SSE2)
385  m_sse = _mm_sub_epi32(m_sse, other.m_sse);
386 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
387  m_vmx = vec_sub(m_vmx, other.m_vmx);
388 #elif defined(BOTAN_SIMD_USE_NEON)
389  m_neon = vsubq_u32(m_neon, other.m_neon);
390 #else
391  m_scalar[0] -= other.m_scalar[0];
392  m_scalar[1] -= other.m_scalar[1];
393  m_scalar[2] -= other.m_scalar[2];
394  m_scalar[3] -= other.m_scalar[3];
395 #endif
396  }
397 
398  void operator^=(const SIMD_4x32& other)
399  {
400 #if defined(BOTAN_SIMD_USE_SSE2)
401  m_sse = _mm_xor_si128(m_sse, other.m_sse);
402 
403 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
404  m_vmx = vec_xor(m_vmx, other.m_vmx);
405 #elif defined(BOTAN_SIMD_USE_NEON)
406  m_neon = veorq_u32(m_neon, other.m_neon);
407 #else
408  m_scalar[0] ^= other.m_scalar[0];
409  m_scalar[1] ^= other.m_scalar[1];
410  m_scalar[2] ^= other.m_scalar[2];
411  m_scalar[3] ^= other.m_scalar[3];
412 #endif
413  }
414 
415  void operator|=(const SIMD_4x32& other)
416  {
417 #if defined(BOTAN_SIMD_USE_SSE2)
418  m_sse = _mm_or_si128(m_sse, other.m_sse);
419 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
420  m_vmx = vec_or(m_vmx, other.m_vmx);
421 #elif defined(BOTAN_SIMD_USE_NEON)
422  m_neon = vorrq_u32(m_neon, other.m_neon);
423 #else
424  m_scalar[0] |= other.m_scalar[0];
425  m_scalar[1] |= other.m_scalar[1];
426  m_scalar[2] |= other.m_scalar[2];
427  m_scalar[3] |= other.m_scalar[3];
428 #endif
429  }
430 
431  void operator&=(const SIMD_4x32& other)
432  {
433 #if defined(BOTAN_SIMD_USE_SSE2)
434  m_sse = _mm_and_si128(m_sse, other.m_sse);
435 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
436  m_vmx = vec_and(m_vmx, other.m_vmx);
437 #elif defined(BOTAN_SIMD_USE_NEON)
438  m_neon = vandq_u32(m_neon, other.m_neon);
439 #else
440  m_scalar[0] &= other.m_scalar[0];
441  m_scalar[1] &= other.m_scalar[1];
442  m_scalar[2] &= other.m_scalar[2];
443  m_scalar[3] &= other.m_scalar[3];
444 #endif
445  }
446 
447 
448  template<int SHIFT> SIMD_4x32 shl() const
449  {
450 #if defined(BOTAN_SIMD_USE_SSE2)
451  return SIMD_4x32(_mm_slli_epi32(m_sse, SHIFT));
452 
453 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
454  const unsigned int s = static_cast<unsigned int>(SHIFT);
455  return SIMD_4x32(vec_sl(m_vmx, (__vector unsigned int){s, s, s, s}));
456 #elif defined(BOTAN_SIMD_USE_NEON)
457  return SIMD_4x32(vshlq_n_u32(m_neon, SHIFT));
458 #else
459  return SIMD_4x32(m_scalar[0] << SHIFT,
460  m_scalar[1] << SHIFT,
461  m_scalar[2] << SHIFT,
462  m_scalar[3] << SHIFT);
463 #endif
464  }
465 
466  template<int SHIFT> SIMD_4x32 shr() const
467  {
468 #if defined(BOTAN_SIMD_USE_SSE2)
469  return SIMD_4x32(_mm_srli_epi32(m_sse, SHIFT));
470 
471 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
472  const unsigned int s = static_cast<unsigned int>(SHIFT);
473  return SIMD_4x32(vec_sr(m_vmx, (__vector unsigned int){s, s, s, s}));
474 #elif defined(BOTAN_SIMD_USE_NEON)
475  return SIMD_4x32(vshrq_n_u32(m_neon, SHIFT));
476 #else
477  return SIMD_4x32(m_scalar[0] >> SHIFT, m_scalar[1] >> SHIFT,
478  m_scalar[2] >> SHIFT, m_scalar[3] >> SHIFT);
479 
480 #endif
481  }
482 
484  {
485 #if defined(BOTAN_SIMD_USE_SSE2)
486  return SIMD_4x32(_mm_xor_si128(m_sse, _mm_set1_epi32(0xFFFFFFFF)));
487 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
488  return SIMD_4x32(vec_nor(m_vmx, m_vmx));
489 #elif defined(BOTAN_SIMD_USE_NEON)
490  return SIMD_4x32(vmvnq_u32(m_neon));
491 #else
492  return SIMD_4x32(~m_scalar[0], ~m_scalar[1], ~m_scalar[2], ~m_scalar[3]);
493 #endif
494  }
495 
496  // (~reg) & other
497  SIMD_4x32 andc(const SIMD_4x32& other) const
498  {
499 #if defined(BOTAN_SIMD_USE_SSE2)
500  return SIMD_4x32(_mm_andnot_si128(m_sse, other.m_sse));
501 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
502  /*
503  AltiVec does arg1 & ~arg2 rather than SSE's ~arg1 & arg2
504  so swap the arguments
505  */
506  return SIMD_4x32(vec_andc(other.m_vmx, m_vmx));
507 #elif defined(BOTAN_SIMD_USE_NEON)
508  // NEON is also a & ~b
509  return SIMD_4x32(vbicq_u32(other.m_neon, m_neon));
510 #else
511  return SIMD_4x32((~m_scalar[0]) & other.m_scalar[0],
512  (~m_scalar[1]) & other.m_scalar[1],
513  (~m_scalar[2]) & other.m_scalar[2],
514  (~m_scalar[3]) & other.m_scalar[3]);
515 #endif
516  }
517 
518  /**
519  * Return copy *this with each word byte swapped
520  */
521  SIMD_4x32 bswap() const
522  {
523 #if defined(BOTAN_SIMD_USE_SSE2)
524 
525  __m128i T = m_sse;
526  T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
527  T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
528  return SIMD_4x32(_mm_or_si128(_mm_srli_epi16(T, 8), _mm_slli_epi16(T, 8)));
529 
530 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
531 
532  union {
533  __vector unsigned int V;
534  uint32_t R[4];
535  } vec;
536 
537  vec.V = m_vmx;
538  bswap_4(vec.R);
539  return SIMD_4x32(vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
540 
541 #elif defined(BOTAN_SIMD_USE_NEON)
542 
543  //return SIMD_4x32(vrev64q_u32(m_neon));
544 
545  // FIXME this is really slow
546  SIMD_4x32 ror8 = this->rotr<8>();
547  SIMD_4x32 rol8 = this->rotl<8>();
548 
549  const SIMD_4x32 mask1 = SIMD_4x32::splat(0xFF00FF00);
550  const SIMD_4x32 mask2 = SIMD_4x32::splat(0x00FF00FF);
551  return (ror8 & mask1) | (rol8 & mask2);
552 #else
553  // scalar
554  return SIMD_4x32(reverse_bytes(m_scalar[0]),
555  reverse_bytes(m_scalar[1]),
556  reverse_bytes(m_scalar[2]),
557  reverse_bytes(m_scalar[3]));
558 #endif
559  }
560 
561  /**
562  * 4x4 Transposition on SIMD registers
563  */
564  static void transpose(SIMD_4x32& B0, SIMD_4x32& B1,
565  SIMD_4x32& B2, SIMD_4x32& B3)
566  {
567 #if defined(BOTAN_SIMD_USE_SSE2)
568  const __m128i T0 = _mm_unpacklo_epi32(B0.m_sse, B1.m_sse);
569  const __m128i T1 = _mm_unpacklo_epi32(B2.m_sse, B3.m_sse);
570  const __m128i T2 = _mm_unpackhi_epi32(B0.m_sse, B1.m_sse);
571  const __m128i T3 = _mm_unpackhi_epi32(B2.m_sse, B3.m_sse);
572 
573  B0.m_sse = _mm_unpacklo_epi64(T0, T1);
574  B1.m_sse = _mm_unpackhi_epi64(T0, T1);
575  B2.m_sse = _mm_unpacklo_epi64(T2, T3);
576  B3.m_sse = _mm_unpackhi_epi64(T2, T3);
577 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
578  const __vector unsigned int T0 = vec_mergeh(B0.m_vmx, B2.m_vmx);
579  const __vector unsigned int T1 = vec_mergeh(B1.m_vmx, B3.m_vmx);
580  const __vector unsigned int T2 = vec_mergel(B0.m_vmx, B2.m_vmx);
581  const __vector unsigned int T3 = vec_mergel(B1.m_vmx, B3.m_vmx);
582 
583  B0.m_vmx = vec_mergeh(T0, T1);
584  B1.m_vmx = vec_mergel(T0, T1);
585  B2.m_vmx = vec_mergeh(T2, T3);
586  B3.m_vmx = vec_mergel(T2, T3);
587 #elif defined(BOTAN_SIMD_USE_NEON)
588 
589 #if defined(BOTAN_TARGET_ARCH_IS_ARM32)
590 
591  const uint32x4x2_t T0 = vzipq_u32(B0.m_neon, B2.m_neon);
592  const uint32x4x2_t T1 = vzipq_u32(B1.m_neon, B3.m_neon);
593  const uint32x4x2_t O0 = vzipq_u32(T0.val[0], T1.val[0]);
594  const uint32x4x2_t O1 = vzipq_u32(T0.val[1], T1.val[1]);
595 
596  B0.m_neon = O0.val[0];
597  B1.m_neon = O0.val[1];
598  B2.m_neon = O1.val[0];
599  B3.m_neon = O1.val[1];
600 
601 #elif defined(BOTAN_TARGET_ARCH_IS_ARM64)
602  const uint32x4_t T0 = vzip1q_u32(B0.m_neon, B2.m_neon);
603  const uint32x4_t T2 = vzip2q_u32(B0.m_neon, B2.m_neon);
604 
605  const uint32x4_t T1 = vzip1q_u32(B1.m_neon, B3.m_neon);
606  const uint32x4_t T3 = vzip2q_u32(B1.m_neon, B3.m_neon);
607 
608  B0.m_neon = vzip1q_u32(T0, T1);
609  B1.m_neon = vzip2q_u32(T0, T1);
610 
611  B2.m_neon = vzip1q_u32(T2, T3);
612  B3.m_neon = vzip2q_u32(T2, T3);
613 #endif
614 
615 #else
616  // scalar
617  SIMD_4x32 T0(B0.m_scalar[0], B1.m_scalar[0], B2.m_scalar[0], B3.m_scalar[0]);
618  SIMD_4x32 T1(B0.m_scalar[1], B1.m_scalar[1], B2.m_scalar[1], B3.m_scalar[1]);
619  SIMD_4x32 T2(B0.m_scalar[2], B1.m_scalar[2], B2.m_scalar[2], B3.m_scalar[2]);
620  SIMD_4x32 T3(B0.m_scalar[3], B1.m_scalar[3], B2.m_scalar[3], B3.m_scalar[3]);
621 
622  B0 = T0;
623  B1 = T1;
624  B2 = T2;
625  B3 = T3;
626 #endif
627  }
628 
629  private:
630 
631 #if defined(BOTAN_SIMD_USE_SSE2)
632  explicit SIMD_4x32(__m128i in) : m_sse(in) {}
633 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
634  explicit SIMD_4x32(__vector unsigned int in) : m_vmx(in) {}
635 #elif defined(BOTAN_SIMD_USE_NEON)
636  explicit SIMD_4x32(uint32x4_t in) : m_neon(in) {}
637 #endif
638 
639 #if defined(BOTAN_SIMD_USE_SSE2)
640  __m128i m_sse;
641 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
642  __vector unsigned int m_vmx;
643 #elif defined(BOTAN_SIMD_USE_NEON)
644  uint32x4_t m_neon;
645 #else
646  uint32_t m_scalar[4];
647 #endif
648  };
649 
651 
652 }
653 
654 #endif
SIMD_4x32(const uint32_t B[4])
Definition: simd_32.h:77
SIMD_4x32 operator-(const SIMD_4x32 &other) const
Definition: simd_32.h:329
SIMD_4x32 operator|(const SIMD_4x32 &other) const
Definition: simd_32.h:349
SIMD_4x32 shl() const
Definition: simd_32.h:448
SIMD_4x32 rotr() const
Definition: simd_32.h:311
SIMD_4x32 bswap() const
Definition: simd_32.h:521
void operator &=(const SIMD_4x32 &other)
Definition: simd_32.h:431
SIMD_4x32 operator~() const
Definition: simd_32.h:483
void store_le(uint8_t out[]) const
Definition: simd_32.h:193
void store_be(uint16_t in, uint8_t out[2])
Definition: loadstor.h:434
void store_be(uint8_t out[]) const
Definition: simd_32.h:229
void copy_out_le(uint8_t out[], size_t out_bytes, const T in[])
Definition: loadstor.h:675
SIMD_4x32 andc(const SIMD_4x32 &other) const
Definition: simd_32.h:497
static SIMD_4x32 load_le(const void *in)
Definition: simd_32.h:131
int(* final)(unsigned char *, CTX *)
SIMD_4x32 operator^(const SIMD_4x32 &other) const
Definition: simd_32.h:339
void operator^=(const SIMD_4x32 &other)
Definition: simd_32.h:398
static void transpose(SIMD_4x32 &B0, SIMD_4x32 &B1, SIMD_4x32 &B2, SIMD_4x32 &B3)
Definition: simd_32.h:564
void operator+=(const SIMD_4x32 &other)
Definition: simd_32.h:366
T load_be(const uint8_t in[], size_t off)
Definition: loadstor.h:105
static SIMD_4x32 load_be(const void *in)
Definition: simd_32.h:161
SIMD_4x32 operator &(const SIMD_4x32 &other) const
Definition: simd_32.h:359
SIMD_4x32 rho() const
Definition: simd_32.h:269
T load_le(const uint8_t in[], size_t off)
Definition: loadstor.h:121
static bool is_little_endian()
Definition: cpuid.h:75
Definition: alg_id.cpp:13
void bswap_4(T x[4])
Definition: bswap.h:89
uint16_t reverse_bytes(uint16_t val)
Definition: bswap.h:24
static SIMD_4x32 splat(uint32_t B)
Definition: simd_32.h:117
SIMD_4x32 SIMD_32
Definition: simd_32.h:650
SIMD_4x32 operator+(const SIMD_4x32 &other) const
Definition: simd_32.h:319
void operator|=(const SIMD_4x32 &other)
Definition: simd_32.h:415
void operator-=(const SIMD_4x32 &other)
Definition: simd_32.h:382
fe T
Definition: ge.cpp:37
SIMD_4x32 & operator=(const SIMD_4x32 &other)=default
SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3)
Definition: simd_32.h:96
static bool is_big_endian()
Definition: cpuid.h:80
SIMD_4x32 rotl() const
Definition: simd_32.h:281
SIMD_4x32 shr() const
Definition: simd_32.h:466
void copy_out_be(uint8_t out[], size_t out_bytes, const T in[])
Definition: loadstor.h:654
void store_le(uint16_t in, uint8_t out[2])
Definition: loadstor.h:450