Botan 3.5.0
Crypto and TLS for C&
simd_32.h
Go to the documentation of this file.
1/*
2* Lightweight wrappers for SIMD operations
3* (C) 2009,2011,2016,2017,2019 Jack Lloyd
4*
5* Botan is released under the Simplified BSD License (see license.txt)
6*/
7
8#ifndef BOTAN_SIMD_32_H_
9#define BOTAN_SIMD_32_H_
10
11#include <botan/types.h>
12
13#if defined(BOTAN_TARGET_SUPPORTS_SSE2)
14 #include <emmintrin.h>
15 #define BOTAN_SIMD_USE_SSE2
16
17#elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC)
18 #include <botan/internal/loadstor.h>
19 #include <altivec.h>
20 #undef vector
21 #undef bool
22 #define BOTAN_SIMD_USE_ALTIVEC
23 #ifdef __VSX__
24 #define BOTAN_SIMD_USE_VSX
25 #endif
26
27#elif defined(BOTAN_TARGET_SUPPORTS_NEON)
28 #include <botan/internal/cpuid.h>
29 #include <arm_neon.h>
30 #define BOTAN_SIMD_USE_NEON
31
32#else
33 #error "No SIMD instruction set enabled"
34#endif
35
36#if defined(BOTAN_SIMD_USE_SSE2)
37 #define BOTAN_SIMD_ISA "sse2"
38 #define BOTAN_VPERM_ISA "ssse3"
39 #define BOTAN_CLMUL_ISA "pclmul"
40#elif defined(BOTAN_SIMD_USE_NEON)
41 #if defined(BOTAN_TARGET_ARCH_IS_ARM64)
42 #define BOTAN_SIMD_ISA "+simd"
43 #define BOTAN_CLMUL_ISA "+crypto+aes"
44 #else
45 #define BOTAN_SIMD_ISA "fpu=neon"
46 #endif
47 #define BOTAN_VPERM_ISA BOTAN_SIMD_ISA
48#elif defined(BOTAN_SIMD_USE_ALTIVEC)
49 #define BOTAN_SIMD_ISA "altivec"
50 #define BOTAN_VPERM_ISA "altivec"
51 #define BOTAN_CLMUL_ISA "crypto"
52#endif
53
54namespace Botan {
55
56#if defined(BOTAN_SIMD_USE_SSE2)
57using native_simd_type = __m128i;
58#elif defined(BOTAN_SIMD_USE_ALTIVEC)
59using native_simd_type = __vector unsigned int;
60#elif defined(BOTAN_SIMD_USE_NEON)
61using native_simd_type = uint32x4_t;
62#endif
63
64/**
65* 4x32 bit SIMD register
66*
67* This class is not a general purpose SIMD type, and only offers
68* instructions needed for evaluation of specific crypto primitives.
69* For example it does not currently have equality operators of any
70* kind.
71*
72* Implemented for SSE2, VMX (Altivec), and NEON.
73*/
75 public:
76 SIMD_4x32& operator=(const SIMD_4x32& other) = default;
77 SIMD_4x32(const SIMD_4x32& other) = default;
78
79 SIMD_4x32& operator=(SIMD_4x32&& other) = default;
80 SIMD_4x32(SIMD_4x32&& other) = default;
81
82 ~SIMD_4x32() = default;
83
84 /**
85 * Zero initialize SIMD register with 4 32-bit elements
86 */
87 SIMD_4x32() noexcept // zero initialized
88 {
89#if defined(BOTAN_SIMD_USE_SSE2)
90 m_simd = _mm_setzero_si128();
91#elif defined(BOTAN_SIMD_USE_ALTIVEC)
92 m_simd = vec_splat_u32(0);
93#elif defined(BOTAN_SIMD_USE_NEON)
94 m_simd = vdupq_n_u32(0);
95#endif
96 }
97
98 /**
99 * Load SIMD register with 4 32-bit elements
100 */
101 explicit SIMD_4x32(const uint32_t B[4]) noexcept {
102#if defined(BOTAN_SIMD_USE_SSE2)
103 m_simd = _mm_loadu_si128(reinterpret_cast<const __m128i*>(B));
104#elif defined(BOTAN_SIMD_USE_ALTIVEC)
105 __vector unsigned int val = {B[0], B[1], B[2], B[3]};
106 m_simd = val;
107#elif defined(BOTAN_SIMD_USE_NEON)
108 m_simd = vld1q_u32(B);
109#endif
110 }
111
112 /**
113 * Load SIMD register with 4 32-bit elements
114 */
115 SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3) noexcept {
116#if defined(BOTAN_SIMD_USE_SSE2)
117 m_simd = _mm_set_epi32(B3, B2, B1, B0);
118#elif defined(BOTAN_SIMD_USE_ALTIVEC)
119 __vector unsigned int val = {B0, B1, B2, B3};
120 m_simd = val;
121#elif defined(BOTAN_SIMD_USE_NEON)
122 // Better way to do this?
123 const uint32_t B[4] = {B0, B1, B2, B3};
124 m_simd = vld1q_u32(B);
125#endif
126 }
127
128 /**
129 * Load SIMD register with one 32-bit element repeated
130 */
131 static SIMD_4x32 splat(uint32_t B) noexcept {
132#if defined(BOTAN_SIMD_USE_SSE2)
133 return SIMD_4x32(_mm_set1_epi32(B));
134#elif defined(BOTAN_SIMD_USE_NEON)
135 return SIMD_4x32(vdupq_n_u32(B));
136#else
137 return SIMD_4x32(B, B, B, B);
138#endif
139 }
140
141 /**
142 * Load SIMD register with one 8-bit element repeated
143 */
144 static SIMD_4x32 splat_u8(uint8_t B) noexcept {
145#if defined(BOTAN_SIMD_USE_SSE2)
146 return SIMD_4x32(_mm_set1_epi8(B));
147#elif defined(BOTAN_SIMD_USE_NEON)
148 return SIMD_4x32(vreinterpretq_u32_u8(vdupq_n_u8(B)));
149#else
150 const uint32_t B4 = make_uint32(B, B, B, B);
151 return SIMD_4x32(B4, B4, B4, B4);
152#endif
153 }
154
155 /**
156 * Load a SIMD register with little-endian convention
157 */
158 static SIMD_4x32 load_le(const void* in) noexcept {
159#if defined(BOTAN_SIMD_USE_SSE2)
160 return SIMD_4x32(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in)));
161#elif defined(BOTAN_SIMD_USE_ALTIVEC)
162 uint32_t R[4];
163 Botan::load_le(R, static_cast<const uint8_t*>(in), 4);
164 return SIMD_4x32(R);
165#elif defined(BOTAN_SIMD_USE_NEON)
166 SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
167 return CPUID::is_big_endian() ? l.bswap() : l;
168#endif
169 }
170
171 /**
172 * Load a SIMD register with big-endian convention
173 */
174 static SIMD_4x32 load_be(const void* in) noexcept {
175#if defined(BOTAN_SIMD_USE_SSE2)
176 return load_le(in).bswap();
177
178#elif defined(BOTAN_SIMD_USE_ALTIVEC)
179 uint32_t R[4];
180 Botan::load_be(R, static_cast<const uint8_t*>(in), 4);
181 return SIMD_4x32(R);
182
183#elif defined(BOTAN_SIMD_USE_NEON)
184 SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
185 return CPUID::is_little_endian() ? l.bswap() : l;
186#endif
187 }
188
189 void store_le(uint32_t out[4]) const noexcept { this->store_le(reinterpret_cast<uint8_t*>(out)); }
190
191 void store_be(uint32_t out[4]) const noexcept { this->store_be(reinterpret_cast<uint8_t*>(out)); }
192
193 void store_le(uint64_t out[2]) const noexcept { this->store_le(reinterpret_cast<uint8_t*>(out)); }
194
195 /**
196 * Load a SIMD register with little-endian convention
197 */
198 void store_le(uint8_t out[]) const noexcept {
199#if defined(BOTAN_SIMD_USE_SSE2)
200
201 _mm_storeu_si128(reinterpret_cast<__m128i*>(out), raw());
202
203#elif defined(BOTAN_SIMD_USE_ALTIVEC)
204
205 union {
206 __vector unsigned int V;
207 uint32_t R[4];
208 } vec;
209
210 vec.V = raw();
211 Botan::store_le(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
212
213#elif defined(BOTAN_SIMD_USE_NEON)
215 vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
216 } else {
217 vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
218 }
219#endif
220 }
221
222 /**
223 * Load a SIMD register with big-endian convention
224 */
225 void store_be(uint8_t out[]) const noexcept {
226#if defined(BOTAN_SIMD_USE_SSE2)
227
228 bswap().store_le(out);
229
230#elif defined(BOTAN_SIMD_USE_ALTIVEC)
231
232 union {
233 __vector unsigned int V;
234 uint32_t R[4];
235 } vec;
236
237 vec.V = m_simd;
238 Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
239
240#elif defined(BOTAN_SIMD_USE_NEON)
242 vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
243 } else {
244 vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
245 }
246#endif
247 }
248
249 /*
250 * This is used for SHA-2/SHACAL2
251 */
252 SIMD_4x32 sigma0() const noexcept {
253#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_crypto_vshasigmaw) && defined(_ARCH_PWR8)
254 return SIMD_4x32(__builtin_crypto_vshasigmaw(raw(), 1, 0));
255#else
256 const SIMD_4x32 rot1 = this->rotr<2>();
257 const SIMD_4x32 rot2 = this->rotr<13>();
258 const SIMD_4x32 rot3 = this->rotr<22>();
259 return (rot1 ^ rot2 ^ rot3);
260#endif
261 }
262
263 /*
264 * This is used for SHA-2/SHACAL2
265 */
266 SIMD_4x32 sigma1() const noexcept {
267#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_crypto_vshasigmaw) && defined(_ARCH_PWR8)
268 return SIMD_4x32(__builtin_crypto_vshasigmaw(raw(), 1, 0xF));
269#else
270 const SIMD_4x32 rot1 = this->rotr<6>();
271 const SIMD_4x32 rot2 = this->rotr<11>();
272 const SIMD_4x32 rot3 = this->rotr<25>();
273 return (rot1 ^ rot2 ^ rot3);
274#endif
275 }
276
277 /**
278 * Left rotation by a compile time constant
279 */
280 template <size_t ROT>
281 SIMD_4x32 rotl() const noexcept
282 requires(ROT > 0 && ROT < 32)
283 {
284#if defined(BOTAN_SIMD_USE_SSE2)
285
286 return SIMD_4x32(_mm_or_si128(_mm_slli_epi32(m_simd, static_cast<int>(ROT)),
287 _mm_srli_epi32(m_simd, static_cast<int>(32 - ROT))));
288
289#elif defined(BOTAN_SIMD_USE_ALTIVEC)
290
291 const unsigned int r = static_cast<unsigned int>(ROT);
292 __vector unsigned int rot = {r, r, r, r};
293 return SIMD_4x32(vec_rl(m_simd, rot));
294
295#elif defined(BOTAN_SIMD_USE_NEON)
296
297 #if defined(BOTAN_TARGET_ARCH_IS_ARM64)
298
299 if constexpr(ROT == 8) {
300 const uint8_t maskb[16] = {3, 0, 1, 2, 7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14};
301 const uint8x16_t mask = vld1q_u8(maskb);
302 return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(m_simd), mask)));
303 } else if constexpr(ROT == 16) {
304 return SIMD_4x32(vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(m_simd))));
305 }
306 #endif
307 return SIMD_4x32(
308 vorrq_u32(vshlq_n_u32(m_simd, static_cast<int>(ROT)), vshrq_n_u32(m_simd, static_cast<int>(32 - ROT))));
309#endif
310 }
311
312 /**
313 * Right rotation by a compile time constant
314 */
315 template <size_t ROT>
316 SIMD_4x32 rotr() const noexcept {
317 return this->rotl<32 - ROT>();
318 }
319
320 /**
321 * Add elements of a SIMD vector
322 */
323 SIMD_4x32 operator+(const SIMD_4x32& other) const noexcept {
324 SIMD_4x32 retval(*this);
325 retval += other;
326 return retval;
327 }
328
329 /**
330 * Subtract elements of a SIMD vector
331 */
332 SIMD_4x32 operator-(const SIMD_4x32& other) const noexcept {
333 SIMD_4x32 retval(*this);
334 retval -= other;
335 return retval;
336 }
337
338 /**
339 * XOR elements of a SIMD vector
340 */
341 SIMD_4x32 operator^(const SIMD_4x32& other) const noexcept {
342 SIMD_4x32 retval(*this);
343 retval ^= other;
344 return retval;
345 }
346
347 /**
348 * Binary OR elements of a SIMD vector
349 */
350 SIMD_4x32 operator|(const SIMD_4x32& other) const noexcept {
351 SIMD_4x32 retval(*this);
352 retval |= other;
353 return retval;
354 }
355
356 /**
357 * Binary AND elements of a SIMD vector
358 */
359 SIMD_4x32 operator&(const SIMD_4x32& other) const noexcept {
360 SIMD_4x32 retval(*this);
361 retval &= other;
362 return retval;
363 }
364
365 void operator+=(const SIMD_4x32& other) noexcept {
366#if defined(BOTAN_SIMD_USE_SSE2)
367 m_simd = _mm_add_epi32(m_simd, other.m_simd);
368#elif defined(BOTAN_SIMD_USE_ALTIVEC)
369 m_simd = vec_add(m_simd, other.m_simd);
370#elif defined(BOTAN_SIMD_USE_NEON)
371 m_simd = vaddq_u32(m_simd, other.m_simd);
372#endif
373 }
374
375 void operator-=(const SIMD_4x32& other) noexcept {
376#if defined(BOTAN_SIMD_USE_SSE2)
377 m_simd = _mm_sub_epi32(m_simd, other.m_simd);
378#elif defined(BOTAN_SIMD_USE_ALTIVEC)
379 m_simd = vec_sub(m_simd, other.m_simd);
380#elif defined(BOTAN_SIMD_USE_NEON)
381 m_simd = vsubq_u32(m_simd, other.m_simd);
382#endif
383 }
384
385 void operator^=(const SIMD_4x32& other) noexcept {
386#if defined(BOTAN_SIMD_USE_SSE2)
387 m_simd = _mm_xor_si128(m_simd, other.m_simd);
388#elif defined(BOTAN_SIMD_USE_ALTIVEC)
389 m_simd = vec_xor(m_simd, other.m_simd);
390#elif defined(BOTAN_SIMD_USE_NEON)
391 m_simd = veorq_u32(m_simd, other.m_simd);
392#endif
393 }
394
395 void operator^=(uint32_t other) noexcept { *this ^= SIMD_4x32::splat(other); }
396
397 void operator|=(const SIMD_4x32& other) noexcept {
398#if defined(BOTAN_SIMD_USE_SSE2)
399 m_simd = _mm_or_si128(m_simd, other.m_simd);
400#elif defined(BOTAN_SIMD_USE_ALTIVEC)
401 m_simd = vec_or(m_simd, other.m_simd);
402#elif defined(BOTAN_SIMD_USE_NEON)
403 m_simd = vorrq_u32(m_simd, other.m_simd);
404#endif
405 }
406
407 void operator&=(const SIMD_4x32& other) noexcept {
408#if defined(BOTAN_SIMD_USE_SSE2)
409 m_simd = _mm_and_si128(m_simd, other.m_simd);
410#elif defined(BOTAN_SIMD_USE_ALTIVEC)
411 m_simd = vec_and(m_simd, other.m_simd);
412#elif defined(BOTAN_SIMD_USE_NEON)
413 m_simd = vandq_u32(m_simd, other.m_simd);
414#endif
415 }
416
417 template <int SHIFT>
418 SIMD_4x32 shl() const noexcept
419 requires(SHIFT > 0 && SHIFT < 32)
420 {
421#if defined(BOTAN_SIMD_USE_SSE2)
422 return SIMD_4x32(_mm_slli_epi32(m_simd, SHIFT));
423
424#elif defined(BOTAN_SIMD_USE_ALTIVEC)
425 const unsigned int s = static_cast<unsigned int>(SHIFT);
426 const __vector unsigned int shifts = {s, s, s, s};
427 return SIMD_4x32(vec_sl(m_simd, shifts));
428#elif defined(BOTAN_SIMD_USE_NEON)
429 return SIMD_4x32(vshlq_n_u32(m_simd, SHIFT));
430#endif
431 }
432
433 template <int SHIFT>
434 SIMD_4x32 shr() const noexcept {
435#if defined(BOTAN_SIMD_USE_SSE2)
436 return SIMD_4x32(_mm_srli_epi32(m_simd, SHIFT));
437
438#elif defined(BOTAN_SIMD_USE_ALTIVEC)
439 const unsigned int s = static_cast<unsigned int>(SHIFT);
440 const __vector unsigned int shifts = {s, s, s, s};
441 return SIMD_4x32(vec_sr(m_simd, shifts));
442#elif defined(BOTAN_SIMD_USE_NEON)
443 return SIMD_4x32(vshrq_n_u32(m_simd, SHIFT));
444#endif
445 }
446
447 SIMD_4x32 operator~() const noexcept {
448#if defined(BOTAN_SIMD_USE_SSE2)
449 return SIMD_4x32(_mm_xor_si128(m_simd, _mm_set1_epi32(0xFFFFFFFF)));
450#elif defined(BOTAN_SIMD_USE_ALTIVEC)
451 return SIMD_4x32(vec_nor(m_simd, m_simd));
452#elif defined(BOTAN_SIMD_USE_NEON)
453 return SIMD_4x32(vmvnq_u32(m_simd));
454#endif
455 }
456
457 // (~reg) & other
458 SIMD_4x32 andc(const SIMD_4x32& other) const noexcept {
459#if defined(BOTAN_SIMD_USE_SSE2)
460 return SIMD_4x32(_mm_andnot_si128(m_simd, other.m_simd));
461#elif defined(BOTAN_SIMD_USE_ALTIVEC)
462 /*
463 AltiVec does arg1 & ~arg2 rather than SSE's ~arg1 & arg2
464 so swap the arguments
465 */
466 return SIMD_4x32(vec_andc(other.m_simd, m_simd));
467#elif defined(BOTAN_SIMD_USE_NEON)
468 // NEON is also a & ~b
469 return SIMD_4x32(vbicq_u32(other.m_simd, m_simd));
470#endif
471 }
472
473 /**
474 * Return copy *this with each word byte swapped
475 */
476 SIMD_4x32 bswap() const noexcept {
477#if defined(BOTAN_SIMD_USE_SSE2)
478
479 __m128i T = m_simd;
480 T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
481 T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
482 return SIMD_4x32(_mm_or_si128(_mm_srli_epi16(T, 8), _mm_slli_epi16(T, 8)));
483
484#elif defined(BOTAN_SIMD_USE_ALTIVEC)
485 #ifdef BOTAN_SIMD_USE_VSX
486 return SIMD_4x32(vec_revb(m_simd));
487 #else
488 const __vector unsigned char rev[1] = {
489 {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12},
490 };
491
492 return SIMD_4x32(vec_perm(m_simd, m_simd, rev[0]));
493 #endif
494
495#elif defined(BOTAN_SIMD_USE_NEON)
496 return SIMD_4x32(vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(m_simd))));
497#endif
498 }
499
500 template <size_t I>
502 requires(I <= 3)
503 {
504#if defined(BOTAN_SIMD_USE_SSE2)
505 return SIMD_4x32(_mm_slli_si128(raw(), 4 * I));
506#elif defined(BOTAN_SIMD_USE_NEON)
507 return SIMD_4x32(vextq_u32(vdupq_n_u32(0), raw(), 4 - I));
508#elif defined(BOTAN_SIMD_USE_ALTIVEC)
509 const __vector unsigned int zero = vec_splat_u32(0);
510
511 const __vector unsigned char shuf[3] = {
512 {16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
513 {16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7},
514 {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3},
515 };
516
517 return SIMD_4x32(vec_perm(raw(), zero, shuf[I - 1]));
518#endif
519 }
520
521 template <size_t I>
523 requires(I <= 3)
524 {
525#if defined(BOTAN_SIMD_USE_SSE2)
526 return SIMD_4x32(_mm_srli_si128(raw(), 4 * I));
527#elif defined(BOTAN_SIMD_USE_NEON)
528 return SIMD_4x32(vextq_u32(raw(), vdupq_n_u32(0), I));
529#elif defined(BOTAN_SIMD_USE_ALTIVEC)
530 const __vector unsigned int zero = vec_splat_u32(0);
531
532 const __vector unsigned char shuf[3] = {
533 {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19},
534 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
535 {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
536 };
537
538 return SIMD_4x32(vec_perm(raw(), zero, shuf[I - 1]));
539#endif
540 }
541
542 /**
543 * 4x4 Transposition on SIMD registers
544 */
545 static void transpose(SIMD_4x32& B0, SIMD_4x32& B1, SIMD_4x32& B2, SIMD_4x32& B3) noexcept {
546#if defined(BOTAN_SIMD_USE_SSE2)
547 const __m128i T0 = _mm_unpacklo_epi32(B0.m_simd, B1.m_simd);
548 const __m128i T1 = _mm_unpacklo_epi32(B2.m_simd, B3.m_simd);
549 const __m128i T2 = _mm_unpackhi_epi32(B0.m_simd, B1.m_simd);
550 const __m128i T3 = _mm_unpackhi_epi32(B2.m_simd, B3.m_simd);
551
552 B0.m_simd = _mm_unpacklo_epi64(T0, T1);
553 B1.m_simd = _mm_unpackhi_epi64(T0, T1);
554 B2.m_simd = _mm_unpacklo_epi64(T2, T3);
555 B3.m_simd = _mm_unpackhi_epi64(T2, T3);
556#elif defined(BOTAN_SIMD_USE_ALTIVEC)
557 const __vector unsigned int T0 = vec_mergeh(B0.m_simd, B2.m_simd);
558 const __vector unsigned int T1 = vec_mergeh(B1.m_simd, B3.m_simd);
559 const __vector unsigned int T2 = vec_mergel(B0.m_simd, B2.m_simd);
560 const __vector unsigned int T3 = vec_mergel(B1.m_simd, B3.m_simd);
561
562 B0.m_simd = vec_mergeh(T0, T1);
563 B1.m_simd = vec_mergel(T0, T1);
564 B2.m_simd = vec_mergeh(T2, T3);
565 B3.m_simd = vec_mergel(T2, T3);
566
567#elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM32)
568 const uint32x4x2_t T0 = vzipq_u32(B0.m_simd, B2.m_simd);
569 const uint32x4x2_t T1 = vzipq_u32(B1.m_simd, B3.m_simd);
570 const uint32x4x2_t O0 = vzipq_u32(T0.val[0], T1.val[0]);
571 const uint32x4x2_t O1 = vzipq_u32(T0.val[1], T1.val[1]);
572
573 B0.m_simd = O0.val[0];
574 B1.m_simd = O0.val[1];
575 B2.m_simd = O1.val[0];
576 B3.m_simd = O1.val[1];
577
578#elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM64)
579 const uint32x4_t T0 = vzip1q_u32(B0.m_simd, B2.m_simd);
580 const uint32x4_t T2 = vzip2q_u32(B0.m_simd, B2.m_simd);
581 const uint32x4_t T1 = vzip1q_u32(B1.m_simd, B3.m_simd);
582 const uint32x4_t T3 = vzip2q_u32(B1.m_simd, B3.m_simd);
583
584 B0.m_simd = vzip1q_u32(T0, T1);
585 B1.m_simd = vzip2q_u32(T0, T1);
586 B2.m_simd = vzip1q_u32(T2, T3);
587 B3.m_simd = vzip2q_u32(T2, T3);
588#endif
589 }
590
591 static inline SIMD_4x32 choose(const SIMD_4x32& mask, const SIMD_4x32& a, const SIMD_4x32& b) noexcept {
592#if defined(BOTAN_SIMD_USE_ALTIVEC)
593 return SIMD_4x32(vec_sel(b.raw(), a.raw(), mask.raw()));
594#elif defined(BOTAN_SIMD_USE_NEON)
595 return SIMD_4x32(vbslq_u32(mask.raw(), a.raw(), b.raw()));
596#else
597 return (mask & a) ^ mask.andc(b);
598#endif
599 }
600
601 static inline SIMD_4x32 majority(const SIMD_4x32& x, const SIMD_4x32& y, const SIMD_4x32& z) noexcept {
602 return SIMD_4x32::choose(x ^ y, z, y);
603 }
604
605 native_simd_type raw() const noexcept { return m_simd; }
606
607 explicit SIMD_4x32(native_simd_type x) noexcept : m_simd(x) {}
608
609 private:
610 native_simd_type m_simd;
611};
612
613template <size_t R>
614inline SIMD_4x32 rotl(SIMD_4x32 input) {
615 return input.rotl<R>();
616}
617
618template <size_t R>
619inline SIMD_4x32 rotr(SIMD_4x32 input) {
620 return input.rotr<R>();
621}
622
623// For Serpent:
624template <size_t S>
625inline SIMD_4x32 shl(SIMD_4x32 input) {
626 return input.shl<S>();
627}
628
629} // namespace Botan
630
631#endif
static bool is_little_endian()
Definition cpuid.h:59
static bool is_big_endian()
Definition cpuid.h:69
static SIMD_4x32 load_be(const void *in) noexcept
Definition simd_32.h:174
SIMD_4x32 andc(const SIMD_4x32 &other) const noexcept
Definition simd_32.h:458
void store_le(uint8_t out[]) const noexcept
Definition simd_32.h:198
SIMD_4x32(SIMD_4x32 &&other)=default
SIMD_4x32 & operator=(SIMD_4x32 &&other)=default
SIMD_4x32 operator|(const SIMD_4x32 &other) const noexcept
Definition simd_32.h:350
SIMD_4x32 operator^(const SIMD_4x32 &other) const noexcept
Definition simd_32.h:341
SIMD_4x32(native_simd_type x) noexcept
Definition simd_32.h:607
static void transpose(SIMD_4x32 &B0, SIMD_4x32 &B1, SIMD_4x32 &B2, SIMD_4x32 &B3) noexcept
Definition simd_32.h:545
SIMD_4x32 bswap() const noexcept
Definition simd_32.h:476
SIMD_4x32 operator+(const SIMD_4x32 &other) const noexcept
Definition simd_32.h:323
SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3) noexcept
Definition simd_32.h:115
native_simd_type raw() const noexcept
Definition simd_32.h:605
SIMD_4x32() noexcept
Definition simd_32.h:87
void store_le(uint32_t out[4]) const noexcept
Definition simd_32.h:189
static SIMD_4x32 load_le(const void *in) noexcept
Definition simd_32.h:158
SIMD_4x32 sigma1() const noexcept
Definition simd_32.h:266
void store_be(uint8_t out[]) const noexcept
Definition simd_32.h:225
SIMD_4x32(const SIMD_4x32 &other)=default
~SIMD_4x32()=default
void operator^=(uint32_t other) noexcept
Definition simd_32.h:395
void operator^=(const SIMD_4x32 &other) noexcept
Definition simd_32.h:385
void operator+=(const SIMD_4x32 &other) noexcept
Definition simd_32.h:365
SIMD_4x32 operator~() const noexcept
Definition simd_32.h:447
void store_le(uint64_t out[2]) const noexcept
Definition simd_32.h:193
void operator|=(const SIMD_4x32 &other) noexcept
Definition simd_32.h:397
SIMD_4x32 shift_elems_left() const noexcept
Definition simd_32.h:501
void store_be(uint32_t out[4]) const noexcept
Definition simd_32.h:191
SIMD_4x32(const uint32_t B[4]) noexcept
Definition simd_32.h:101
SIMD_4x32 sigma0() const noexcept
Definition simd_32.h:252
SIMD_4x32 operator-(const SIMD_4x32 &other) const noexcept
Definition simd_32.h:332
SIMD_4x32 shr() const noexcept
Definition simd_32.h:434
static SIMD_4x32 splat_u8(uint8_t B) noexcept
Definition simd_32.h:144
SIMD_4x32 rotr() const noexcept
Definition simd_32.h:316
SIMD_4x32 shl() const noexcept
Definition simd_32.h:418
SIMD_4x32 shift_elems_right() const noexcept
Definition simd_32.h:522
SIMD_4x32 rotl() const noexcept
Definition simd_32.h:281
void operator&=(const SIMD_4x32 &other) noexcept
Definition simd_32.h:407
SIMD_4x32 operator&(const SIMD_4x32 &other) const noexcept
Definition simd_32.h:359
SIMD_4x32 & operator=(const SIMD_4x32 &other)=default
static SIMD_4x32 choose(const SIMD_4x32 &mask, const SIMD_4x32 &a, const SIMD_4x32 &b) noexcept
Definition simd_32.h:591
void operator-=(const SIMD_4x32 &other) noexcept
Definition simd_32.h:375
static SIMD_4x32 majority(const SIMD_4x32 &x, const SIMD_4x32 &y, const SIMD_4x32 &z) noexcept
Definition simd_32.h:601
static SIMD_4x32 splat(uint32_t B) noexcept
Definition simd_32.h:131
int(* final)(unsigned char *, CTX *)
FE_25519 T
Definition ge.cpp:34
constexpr T rotl(T input)
Definition rotate.h:21
constexpr T rotr(T input)
Definition rotate.h:33
constexpr uint32_t make_uint32(uint8_t i0, uint8_t i1, uint8_t i2, uint8_t i3)
Definition loadstor.h:100
constexpr auto store_le(ParamTs &&... params)
Definition loadstor.h:698
SIMD_4x32 shl(SIMD_4x32 input)
Definition simd_32.h:625
constexpr auto load_le(ParamTs &&... params)
Definition loadstor.h:458
constexpr auto store_be(ParamTs &&... params)
Definition loadstor.h:707
constexpr auto load_be(ParamTs &&... params)
Definition loadstor.h:467