Botan  2.11.0
Crypto and TLS for C++11
simd_32.h
Go to the documentation of this file.
1 /*
2 * Lightweight wrappers for SIMD operations
3 * (C) 2009,2011,2016,2017 Jack Lloyd
4 *
5 * Botan is released under the Simplified BSD License (see license.txt)
6 */
7 
8 #ifndef BOTAN_SIMD_32_H_
9 #define BOTAN_SIMD_32_H_
10 
11 #include <botan/types.h>
12 #include <botan/loadstor.h>
13 #include <botan/bswap.h>
14 #include <botan/cpuid.h>
15 
16 #if defined(BOTAN_TARGET_SUPPORTS_SSE2)
17  #include <emmintrin.h>
18  #define BOTAN_SIMD_USE_SSE2
19 
20 #elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC)
21  #include <altivec.h>
22  #undef vector
23  #undef bool
24  #define BOTAN_SIMD_USE_ALTIVEC
25 
26 #elif defined(BOTAN_TARGET_SUPPORTS_NEON)
27  #include <arm_neon.h>
28  #define BOTAN_SIMD_USE_NEON
29 
30 #else
31  #include <botan/rotate.h>
32 #endif
33 
34 namespace Botan {
35 
36 /**
37 * 4x32 bit SIMD register
38 *
39 * This class is not a general purpose SIMD type, and only offers
40 * instructions needed for evaluation of specific crypto primitives.
41 * For example it does not currently have equality operators of any
42 * kind.
43 *
44 * Implemented for SSE2, VMX (Altivec), and NEON.
45 */
47  {
48  public:
49 
50  SIMD_4x32& operator=(const SIMD_4x32& other) = default;
51  SIMD_4x32(const SIMD_4x32& other) = default;
52 
53  SIMD_4x32& operator=(SIMD_4x32&& other) = default;
54  SIMD_4x32(SIMD_4x32&& other) = default;
55 
56  /**
57  * Zero initialize SIMD register with 4 32-bit elements
58  */
59  SIMD_4x32() // zero initialized
60  {
61 #if defined(BOTAN_SIMD_USE_SSE2)
62  m_sse = _mm_setzero_si128();
63 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
64  m_vmx = vec_splat_u32(0);
65 #elif defined(BOTAN_SIMD_USE_NEON)
66  m_neon = vdupq_n_u32(0);
67 #else
68  m_scalar[0] = 0;
69  m_scalar[1] = 0;
70  m_scalar[2] = 0;
71  m_scalar[3] = 0;
72 #endif
73  }
74 
75  /**
76  * Load SIMD register with 4 32-bit elements
77  */
78  explicit SIMD_4x32(const uint32_t B[4])
79  {
80 #if defined(BOTAN_SIMD_USE_SSE2)
81  m_sse = _mm_loadu_si128(reinterpret_cast<const __m128i*>(B));
82 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
83  m_vmx = (__vector unsigned int){B[0], B[1], B[2], B[3]};
84 #elif defined(BOTAN_SIMD_USE_NEON)
85  m_neon = vld1q_u32(B);
86 #else
87  m_scalar[0] = B[0];
88  m_scalar[1] = B[1];
89  m_scalar[2] = B[2];
90  m_scalar[3] = B[3];
91 #endif
92  }
93 
94  /**
95  * Load SIMD register with 4 32-bit elements
96  */
97  SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3)
98  {
99 #if defined(BOTAN_SIMD_USE_SSE2)
100  m_sse = _mm_set_epi32(B3, B2, B1, B0);
101 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
102  m_vmx = (__vector unsigned int){B0, B1, B2, B3};
103 #elif defined(BOTAN_SIMD_USE_NEON)
104  // Better way to do this?
105  const uint32_t B[4] = { B0, B1, B2, B3 };
106  m_neon = vld1q_u32(B);
107 #else
108  m_scalar[0] = B0;
109  m_scalar[1] = B1;
110  m_scalar[2] = B2;
111  m_scalar[3] = B3;
112 #endif
113  }
114 
115  /**
116  * Load SIMD register with one 32-bit element repeated
117  */
118  static SIMD_4x32 splat(uint32_t B)
119  {
120 #if defined(BOTAN_SIMD_USE_SSE2)
121  return SIMD_4x32(_mm_set1_epi32(B));
122 #elif defined(BOTAN_SIMD_USE_ARM)
123  return SIMD_4x32(vdupq_n_u32(B));
124 #else
125  return SIMD_4x32(B, B, B, B);
126 #endif
127  }
128 
129  /**
130  * Load a SIMD register with little-endian convention
131  */
132  static SIMD_4x32 load_le(const void* in)
133  {
134 #if defined(BOTAN_SIMD_USE_SSE2)
135  return SIMD_4x32(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in)));
136 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
137 
138  uint32_t R[4];
139  Botan::load_le(R, static_cast<const uint8_t*>(in), 4);
140  return SIMD_4x32(R);
141 
142 #elif defined(BOTAN_SIMD_USE_NEON)
143 
144  SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
145  return CPUID::is_big_endian() ? l.bswap() : l;
146 #else
147  SIMD_4x32 out;
148  Botan::load_le(out.m_scalar, static_cast<const uint8_t*>(in), 4);
149  return out;
150 #endif
151  }
152 
153  /**
154  * Load a SIMD register with big-endian convention
155  */
156  static SIMD_4x32 load_be(const void* in)
157  {
158 #if defined(BOTAN_SIMD_USE_SSE2)
159 
160  return load_le(in).bswap();
161 
162 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
163 
164  uint32_t R[4];
165  Botan::load_be(R, static_cast<const uint8_t*>(in), 4);
166  return SIMD_4x32(R);
167 
168 #elif defined(BOTAN_SIMD_USE_NEON)
169 
170  SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
171  return CPUID::is_little_endian() ? l.bswap() : l;
172 
173 #else
174  SIMD_4x32 out;
175  Botan::load_be(out.m_scalar, static_cast<const uint8_t*>(in), 4);
176  return out;
177 #endif
178  }
179 
180  /**
181  * Load a SIMD register with little-endian convention
182  */
183  void store_le(uint8_t out[]) const
184  {
185 #if defined(BOTAN_SIMD_USE_SSE2)
186 
187  _mm_storeu_si128(reinterpret_cast<__m128i*>(out), m_sse);
188 
189 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
190 
191  union {
192  __vector unsigned int V;
193  uint32_t R[4];
194  } vec;
195  vec.V = m_vmx;
196  Botan::store_le(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
197 
198 #elif defined(BOTAN_SIMD_USE_NEON)
199 
200  if(CPUID::is_big_endian())
201  {
202  bswap().store_le(out);
203  }
204  else
205  {
206  vst1q_u8(out, vreinterpretq_u8_u32(m_neon));
207  }
208 #else
209  Botan::store_le(out, m_scalar[0], m_scalar[1], m_scalar[2], m_scalar[3]);
210 #endif
211  }
212 
213  /**
214  * Load a SIMD register with big-endian convention
215  */
216  void store_be(uint8_t out[]) const
217  {
218 #if defined(BOTAN_SIMD_USE_SSE2)
219 
220  bswap().store_le(out);
221 
222 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
223 
224  union {
225  __vector unsigned int V;
226  uint32_t R[4];
227  } vec;
228  vec.V = m_vmx;
229  Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
230 
231 #elif defined(BOTAN_SIMD_USE_NEON)
232 
233  if(CPUID::is_little_endian())
234  {
235  bswap().store_le(out);
236  }
237  else
238  {
239  vst1q_u8(out, vreinterpretq_u8_u32(m_neon));
240  }
241 
242 #else
243  Botan::store_be(out, m_scalar[0], m_scalar[1], m_scalar[2], m_scalar[3]);
244 #endif
245  }
246 
247 
248  /*
249  * This is used for SHA-2/SHACAL2
250  * Return rotr(ROT1) ^ rotr(ROT2) ^ rotr(ROT3)
251  */
252  template<size_t ROT1, size_t ROT2, size_t ROT3>
253  SIMD_4x32 rho() const
254  {
255  const SIMD_4x32 rot1 = this->rotr<ROT1>();
256  const SIMD_4x32 rot2 = this->rotr<ROT2>();
257  const SIMD_4x32 rot3 = this->rotr<ROT3>();
258  return (rot1 ^ rot2 ^ rot3);
259  }
260 
261  /**
262  * Left rotation by a compile time constant
263  */
264  template<size_t ROT>
265  SIMD_4x32 rotl() const
266  {
267  static_assert(ROT > 0 && ROT < 32, "Invalid rotation constant");
268 
269 #if defined(BOTAN_SIMD_USE_SSE2)
270 
271  return SIMD_4x32(_mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(ROT)),
272  _mm_srli_epi32(m_sse, static_cast<int>(32-ROT))));
273 
274 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
275 
276  const unsigned int r = static_cast<unsigned int>(ROT);
277  return SIMD_4x32(vec_rl(m_vmx, (__vector unsigned int){r, r, r, r}));
278 
279 #elif defined(BOTAN_SIMD_USE_NEON)
280 
281  #if defined(BOTAN_TARGET_ARCH_IS_ARM32)
282 
283  return SIMD_4x32(vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(ROT)),
284  vshrq_n_u32(m_neon, static_cast<int>(32-ROT))));
285 
286  #else
287 
288  BOTAN_IF_CONSTEXPR(ROT == 8)
289  {
290  const uint8_t maskb[16] = { 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 };
291  const uint8x16_t mask = vld1q_u8(maskb);
292  return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(m_neon), mask)));
293  }
294  else BOTAN_IF_CONSTEXPR(ROT == 16)
295  {
296  return SIMD_4x32(vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(m_neon))));
297  }
298  else
299  {
300  return SIMD_4x32(vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(ROT)),
301  vshrq_n_u32(m_neon, static_cast<int>(32-ROT))));
302  }
303 
304  #endif
305 
306 #else
307  return SIMD_4x32(Botan::rotl<ROT>(m_scalar[0]),
308  Botan::rotl<ROT>(m_scalar[1]),
309  Botan::rotl<ROT>(m_scalar[2]),
310  Botan::rotl<ROT>(m_scalar[3]));
311 #endif
312  }
313 
314  /**
315  * Right rotation by a compile time constant
316  */
317  template<size_t ROT>
318  SIMD_4x32 rotr() const
319  {
320  return this->rotl<32-ROT>();
321  }
322 
323  /**
324  * Add elements of a SIMD vector
325  */
326  SIMD_4x32 operator+(const SIMD_4x32& other) const
327  {
328  SIMD_4x32 retval(*this);
329  retval += other;
330  return retval;
331  }
332 
333  /**
334  * Subtract elements of a SIMD vector
335  */
336  SIMD_4x32 operator-(const SIMD_4x32& other) const
337  {
338  SIMD_4x32 retval(*this);
339  retval -= other;
340  return retval;
341  }
342 
343  /**
344  * XOR elements of a SIMD vector
345  */
346  SIMD_4x32 operator^(const SIMD_4x32& other) const
347  {
348  SIMD_4x32 retval(*this);
349  retval ^= other;
350  return retval;
351  }
352 
353  /**
354  * Binary OR elements of a SIMD vector
355  */
356  SIMD_4x32 operator|(const SIMD_4x32& other) const
357  {
358  SIMD_4x32 retval(*this);
359  retval |= other;
360  return retval;
361  }
362 
363  /**
364  * Binary AND elements of a SIMD vector
365  */
366  SIMD_4x32 operator&(const SIMD_4x32& other) const
367  {
368  SIMD_4x32 retval(*this);
369  retval &= other;
370  return retval;
371  }
372 
373  void operator+=(const SIMD_4x32& other)
374  {
375 #if defined(BOTAN_SIMD_USE_SSE2)
376  m_sse = _mm_add_epi32(m_sse, other.m_sse);
377 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
378  m_vmx = vec_add(m_vmx, other.m_vmx);
379 #elif defined(BOTAN_SIMD_USE_NEON)
380  m_neon = vaddq_u32(m_neon, other.m_neon);
381 #else
382  m_scalar[0] += other.m_scalar[0];
383  m_scalar[1] += other.m_scalar[1];
384  m_scalar[2] += other.m_scalar[2];
385  m_scalar[3] += other.m_scalar[3];
386 #endif
387  }
388 
389  void operator-=(const SIMD_4x32& other)
390  {
391 #if defined(BOTAN_SIMD_USE_SSE2)
392  m_sse = _mm_sub_epi32(m_sse, other.m_sse);
393 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
394  m_vmx = vec_sub(m_vmx, other.m_vmx);
395 #elif defined(BOTAN_SIMD_USE_NEON)
396  m_neon = vsubq_u32(m_neon, other.m_neon);
397 #else
398  m_scalar[0] -= other.m_scalar[0];
399  m_scalar[1] -= other.m_scalar[1];
400  m_scalar[2] -= other.m_scalar[2];
401  m_scalar[3] -= other.m_scalar[3];
402 #endif
403  }
404 
405  void operator^=(const SIMD_4x32& other)
406  {
407 #if defined(BOTAN_SIMD_USE_SSE2)
408  m_sse = _mm_xor_si128(m_sse, other.m_sse);
409 
410 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
411  m_vmx = vec_xor(m_vmx, other.m_vmx);
412 #elif defined(BOTAN_SIMD_USE_NEON)
413  m_neon = veorq_u32(m_neon, other.m_neon);
414 #else
415  m_scalar[0] ^= other.m_scalar[0];
416  m_scalar[1] ^= other.m_scalar[1];
417  m_scalar[2] ^= other.m_scalar[2];
418  m_scalar[3] ^= other.m_scalar[3];
419 #endif
420  }
421 
422  void operator|=(const SIMD_4x32& other)
423  {
424 #if defined(BOTAN_SIMD_USE_SSE2)
425  m_sse = _mm_or_si128(m_sse, other.m_sse);
426 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
427  m_vmx = vec_or(m_vmx, other.m_vmx);
428 #elif defined(BOTAN_SIMD_USE_NEON)
429  m_neon = vorrq_u32(m_neon, other.m_neon);
430 #else
431  m_scalar[0] |= other.m_scalar[0];
432  m_scalar[1] |= other.m_scalar[1];
433  m_scalar[2] |= other.m_scalar[2];
434  m_scalar[3] |= other.m_scalar[3];
435 #endif
436  }
437 
438  void operator&=(const SIMD_4x32& other)
439  {
440 #if defined(BOTAN_SIMD_USE_SSE2)
441  m_sse = _mm_and_si128(m_sse, other.m_sse);
442 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
443  m_vmx = vec_and(m_vmx, other.m_vmx);
444 #elif defined(BOTAN_SIMD_USE_NEON)
445  m_neon = vandq_u32(m_neon, other.m_neon);
446 #else
447  m_scalar[0] &= other.m_scalar[0];
448  m_scalar[1] &= other.m_scalar[1];
449  m_scalar[2] &= other.m_scalar[2];
450  m_scalar[3] &= other.m_scalar[3];
451 #endif
452  }
453 
454 
455  template<int SHIFT> SIMD_4x32 shl() const
456  {
457 #if defined(BOTAN_SIMD_USE_SSE2)
458  return SIMD_4x32(_mm_slli_epi32(m_sse, SHIFT));
459 
460 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
461  const unsigned int s = static_cast<unsigned int>(SHIFT);
462  return SIMD_4x32(vec_sl(m_vmx, (__vector unsigned int){s, s, s, s}));
463 #elif defined(BOTAN_SIMD_USE_NEON)
464  return SIMD_4x32(vshlq_n_u32(m_neon, SHIFT));
465 #else
466  return SIMD_4x32(m_scalar[0] << SHIFT,
467  m_scalar[1] << SHIFT,
468  m_scalar[2] << SHIFT,
469  m_scalar[3] << SHIFT);
470 #endif
471  }
472 
473  template<int SHIFT> SIMD_4x32 shr() const
474  {
475 #if defined(BOTAN_SIMD_USE_SSE2)
476  return SIMD_4x32(_mm_srli_epi32(m_sse, SHIFT));
477 
478 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
479  const unsigned int s = static_cast<unsigned int>(SHIFT);
480  return SIMD_4x32(vec_sr(m_vmx, (__vector unsigned int){s, s, s, s}));
481 #elif defined(BOTAN_SIMD_USE_NEON)
482  return SIMD_4x32(vshrq_n_u32(m_neon, SHIFT));
483 #else
484  return SIMD_4x32(m_scalar[0] >> SHIFT, m_scalar[1] >> SHIFT,
485  m_scalar[2] >> SHIFT, m_scalar[3] >> SHIFT);
486 
487 #endif
488  }
489 
491  {
492 #if defined(BOTAN_SIMD_USE_SSE2)
493  return SIMD_4x32(_mm_xor_si128(m_sse, _mm_set1_epi32(0xFFFFFFFF)));
494 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
495  return SIMD_4x32(vec_nor(m_vmx, m_vmx));
496 #elif defined(BOTAN_SIMD_USE_NEON)
497  return SIMD_4x32(vmvnq_u32(m_neon));
498 #else
499  return SIMD_4x32(~m_scalar[0], ~m_scalar[1], ~m_scalar[2], ~m_scalar[3]);
500 #endif
501  }
502 
503  // (~reg) & other
504  SIMD_4x32 andc(const SIMD_4x32& other) const
505  {
506 #if defined(BOTAN_SIMD_USE_SSE2)
507  return SIMD_4x32(_mm_andnot_si128(m_sse, other.m_sse));
508 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
509  /*
510  AltiVec does arg1 & ~arg2 rather than SSE's ~arg1 & arg2
511  so swap the arguments
512  */
513  return SIMD_4x32(vec_andc(other.m_vmx, m_vmx));
514 #elif defined(BOTAN_SIMD_USE_NEON)
515  // NEON is also a & ~b
516  return SIMD_4x32(vbicq_u32(other.m_neon, m_neon));
517 #else
518  return SIMD_4x32((~m_scalar[0]) & other.m_scalar[0],
519  (~m_scalar[1]) & other.m_scalar[1],
520  (~m_scalar[2]) & other.m_scalar[2],
521  (~m_scalar[3]) & other.m_scalar[3]);
522 #endif
523  }
524 
525  /**
526  * Return copy *this with each word byte swapped
527  */
528  SIMD_4x32 bswap() const
529  {
530 #if defined(BOTAN_SIMD_USE_SSE2)
531 
532  __m128i T = m_sse;
533  T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
534  T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
535  return SIMD_4x32(_mm_or_si128(_mm_srli_epi16(T, 8), _mm_slli_epi16(T, 8)));
536 
537 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
538 
539  union {
540  __vector unsigned int V;
541  uint32_t R[4];
542  } vec;
543 
544  vec.V = m_vmx;
545  bswap_4(vec.R);
546  return SIMD_4x32(vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
547 
548 #elif defined(BOTAN_SIMD_USE_NEON)
549 
550  return SIMD_4x32(vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(m_neon))));
551 
552 #else
553  // scalar
554  return SIMD_4x32(reverse_bytes(m_scalar[0]),
555  reverse_bytes(m_scalar[1]),
556  reverse_bytes(m_scalar[2]),
557  reverse_bytes(m_scalar[3]));
558 #endif
559  }
560 
561  /**
562  * 4x4 Transposition on SIMD registers
563  */
564  static void transpose(SIMD_4x32& B0, SIMD_4x32& B1,
565  SIMD_4x32& B2, SIMD_4x32& B3)
566  {
567 #if defined(BOTAN_SIMD_USE_SSE2)
568  const __m128i T0 = _mm_unpacklo_epi32(B0.m_sse, B1.m_sse);
569  const __m128i T1 = _mm_unpacklo_epi32(B2.m_sse, B3.m_sse);
570  const __m128i T2 = _mm_unpackhi_epi32(B0.m_sse, B1.m_sse);
571  const __m128i T3 = _mm_unpackhi_epi32(B2.m_sse, B3.m_sse);
572 
573  B0.m_sse = _mm_unpacklo_epi64(T0, T1);
574  B1.m_sse = _mm_unpackhi_epi64(T0, T1);
575  B2.m_sse = _mm_unpacklo_epi64(T2, T3);
576  B3.m_sse = _mm_unpackhi_epi64(T2, T3);
577 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
578  const __vector unsigned int T0 = vec_mergeh(B0.m_vmx, B2.m_vmx);
579  const __vector unsigned int T1 = vec_mergeh(B1.m_vmx, B3.m_vmx);
580  const __vector unsigned int T2 = vec_mergel(B0.m_vmx, B2.m_vmx);
581  const __vector unsigned int T3 = vec_mergel(B1.m_vmx, B3.m_vmx);
582 
583  B0.m_vmx = vec_mergeh(T0, T1);
584  B1.m_vmx = vec_mergel(T0, T1);
585  B2.m_vmx = vec_mergeh(T2, T3);
586  B3.m_vmx = vec_mergel(T2, T3);
587 #elif defined(BOTAN_SIMD_USE_NEON)
588 
589 #if defined(BOTAN_TARGET_ARCH_IS_ARM32)
590 
591  const uint32x4x2_t T0 = vzipq_u32(B0.m_neon, B2.m_neon);
592  const uint32x4x2_t T1 = vzipq_u32(B1.m_neon, B3.m_neon);
593  const uint32x4x2_t O0 = vzipq_u32(T0.val[0], T1.val[0]);
594  const uint32x4x2_t O1 = vzipq_u32(T0.val[1], T1.val[1]);
595 
596  B0.m_neon = O0.val[0];
597  B1.m_neon = O0.val[1];
598  B2.m_neon = O1.val[0];
599  B3.m_neon = O1.val[1];
600 
601 #elif defined(BOTAN_TARGET_ARCH_IS_ARM64)
602  const uint32x4_t T0 = vzip1q_u32(B0.m_neon, B2.m_neon);
603  const uint32x4_t T2 = vzip2q_u32(B0.m_neon, B2.m_neon);
604 
605  const uint32x4_t T1 = vzip1q_u32(B1.m_neon, B3.m_neon);
606  const uint32x4_t T3 = vzip2q_u32(B1.m_neon, B3.m_neon);
607 
608  B0.m_neon = vzip1q_u32(T0, T1);
609  B1.m_neon = vzip2q_u32(T0, T1);
610 
611  B2.m_neon = vzip1q_u32(T2, T3);
612  B3.m_neon = vzip2q_u32(T2, T3);
613 #endif
614 
615 #else
616  // scalar
617  SIMD_4x32 T0(B0.m_scalar[0], B1.m_scalar[0], B2.m_scalar[0], B3.m_scalar[0]);
618  SIMD_4x32 T1(B0.m_scalar[1], B1.m_scalar[1], B2.m_scalar[1], B3.m_scalar[1]);
619  SIMD_4x32 T2(B0.m_scalar[2], B1.m_scalar[2], B2.m_scalar[2], B3.m_scalar[2]);
620  SIMD_4x32 T3(B0.m_scalar[3], B1.m_scalar[3], B2.m_scalar[3], B3.m_scalar[3]);
621 
622  B0 = T0;
623  B1 = T1;
624  B2 = T2;
625  B3 = T3;
626 #endif
627  }
628 
629  private:
630 
631 #if defined(BOTAN_SIMD_USE_SSE2)
632  explicit SIMD_4x32(__m128i in) : m_sse(in) {}
633 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
634  explicit SIMD_4x32(__vector unsigned int in) : m_vmx(in) {}
635 #elif defined(BOTAN_SIMD_USE_NEON)
636  explicit SIMD_4x32(uint32x4_t in) : m_neon(in) {}
637 #endif
638 
639 #if defined(BOTAN_SIMD_USE_SSE2)
640  __m128i m_sse;
641 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
642  __vector unsigned int m_vmx;
643 #elif defined(BOTAN_SIMD_USE_NEON)
644  uint32x4_t m_neon;
645 #else
646  uint32_t m_scalar[4];
647 #endif
648  };
649 
650 }
651 
652 #endif
SIMD_4x32(const uint32_t B[4])
Definition: simd_32.h:78
SIMD_4x32 operator-(const SIMD_4x32 &other) const
Definition: simd_32.h:336
SIMD_4x32 operator|(const SIMD_4x32 &other) const
Definition: simd_32.h:356
SIMD_4x32 shl() const
Definition: simd_32.h:455
SIMD_4x32 rotr() const
Definition: simd_32.h:318
SIMD_4x32 bswap() const
Definition: simd_32.h:528
SIMD_4x32 operator~() const
Definition: simd_32.h:490
void store_le(uint8_t out[]) const
Definition: simd_32.h:183
void store_be(uint16_t in, uint8_t out[2])
Definition: loadstor.h:436
void store_be(uint8_t out[]) const
Definition: simd_32.h:216
#define BOTAN_IF_CONSTEXPR
Definition: compiler.h:161
SIMD_4x32 andc(const SIMD_4x32 &other) const
Definition: simd_32.h:504
static SIMD_4x32 load_le(const void *in)
Definition: simd_32.h:132
int(* final)(unsigned char *, CTX *)
SIMD_4x32 operator^(const SIMD_4x32 &other) const
Definition: simd_32.h:346
void const BigInt BigInt BigInt & r
Definition: divide.h:23
SIMD_4x32 operator &(const SIMD_4x32 &other) const
Definition: simd_32.h:366
void const uint8_t in[]
Definition: mgf1.h:26
void operator^=(const SIMD_4x32 &other)
Definition: simd_32.h:405
static void transpose(SIMD_4x32 &B0, SIMD_4x32 &B1, SIMD_4x32 &B2, SIMD_4x32 &B3)
Definition: simd_32.h:564
void operator+=(const SIMD_4x32 &other)
Definition: simd_32.h:373
T load_be(const uint8_t in[], size_t off)
Definition: loadstor.h:105
static SIMD_4x32 load_be(const void *in)
Definition: simd_32.h:156
SIMD_4x32 rho() const
Definition: simd_32.h:253
T load_le(const uint8_t in[], size_t off)
Definition: loadstor.h:121
uint8_t vec[]
Definition: ffi.h:790
Definition: alg_id.cpp:13
void bswap_4(T x[4])
Definition: bswap.h:96
uint8_t out[]
Definition: pbkdf2.h:19
uint16_t reverse_bytes(uint16_t val)
Definition: bswap.h:23
static SIMD_4x32 splat(uint32_t B)
Definition: simd_32.h:118
SIMD_4x32 operator+(const SIMD_4x32 &other) const
Definition: simd_32.h:326
void operator|=(const SIMD_4x32 &other)
Definition: simd_32.h:422
void operator-=(const SIMD_4x32 &other)
Definition: simd_32.h:389
fe T
Definition: ge.cpp:37
SIMD_4x32 & operator=(const SIMD_4x32 &other)=default
SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3)
Definition: simd_32.h:97
void operator&=(const SIMD_4x32 &other)
Definition: simd_32.h:438
SIMD_4x32 rotl() const
Definition: simd_32.h:265
SIMD_4x32 shr() const
Definition: simd_32.h:473
void store_le(uint16_t in, uint8_t out[2])
Definition: loadstor.h:452