Botan 3.4.0
Crypto and TLS for C&
simd_32.h
Go to the documentation of this file.
1/*
2* Lightweight wrappers for SIMD operations
3* (C) 2009,2011,2016,2017,2019 Jack Lloyd
4*
5* Botan is released under the Simplified BSD License (see license.txt)
6*/
7
8#ifndef BOTAN_SIMD_32_H_
9#define BOTAN_SIMD_32_H_
10
11#include <botan/types.h>
12
13#if defined(BOTAN_TARGET_SUPPORTS_SSE2)
14 #include <emmintrin.h>
15 #define BOTAN_SIMD_USE_SSE2
16
17#elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC)
18 #include <botan/internal/bswap.h>
19 #include <botan/internal/loadstor.h>
20 #include <altivec.h>
21 #undef vector
22 #undef bool
23 #define BOTAN_SIMD_USE_ALTIVEC
24 #ifdef __VSX__
25 #define BOTAN_SIMD_USE_VSX
26 #endif
27
28#elif defined(BOTAN_TARGET_SUPPORTS_NEON)
29 #include <botan/internal/cpuid.h>
30 #include <arm_neon.h>
31 #define BOTAN_SIMD_USE_NEON
32
33#else
34 #error "No SIMD instruction set enabled"
35#endif
36
37#if defined(BOTAN_SIMD_USE_SSE2)
38 #define BOTAN_SIMD_ISA "sse2"
39 #define BOTAN_VPERM_ISA "ssse3"
40 #define BOTAN_CLMUL_ISA "pclmul"
41#elif defined(BOTAN_SIMD_USE_NEON)
42 #if defined(BOTAN_TARGET_ARCH_IS_ARM64)
43 #define BOTAN_SIMD_ISA "+simd"
44 #define BOTAN_CLMUL_ISA "+crypto+aes"
45 #else
46 #define BOTAN_SIMD_ISA "fpu=neon"
47 #endif
48 #define BOTAN_VPERM_ISA BOTAN_SIMD_ISA
49#elif defined(BOTAN_SIMD_USE_ALTIVEC)
50 #define BOTAN_SIMD_ISA "altivec"
51 #define BOTAN_VPERM_ISA "altivec"
52 #define BOTAN_CLMUL_ISA "crypto"
53#endif
54
55namespace Botan {
56
57#if defined(BOTAN_SIMD_USE_SSE2)
58using native_simd_type = __m128i;
59#elif defined(BOTAN_SIMD_USE_ALTIVEC)
60using native_simd_type = __vector unsigned int;
61#elif defined(BOTAN_SIMD_USE_NEON)
62using native_simd_type = uint32x4_t;
63#endif
64
65/**
66* 4x32 bit SIMD register
67*
68* This class is not a general purpose SIMD type, and only offers
69* instructions needed for evaluation of specific crypto primitives.
70* For example it does not currently have equality operators of any
71* kind.
72*
73* Implemented for SSE2, VMX (Altivec), and NEON.
74*/
76 public:
77 SIMD_4x32& operator=(const SIMD_4x32& other) = default;
78 SIMD_4x32(const SIMD_4x32& other) = default;
79
80 SIMD_4x32& operator=(SIMD_4x32&& other) = default;
81 SIMD_4x32(SIMD_4x32&& other) = default;
82
83 ~SIMD_4x32() = default;
84
85 /**
86 * Zero initialize SIMD register with 4 32-bit elements
87 */
88 SIMD_4x32() noexcept // zero initialized
89 {
90#if defined(BOTAN_SIMD_USE_SSE2)
91 m_simd = _mm_setzero_si128();
92#elif defined(BOTAN_SIMD_USE_ALTIVEC)
93 m_simd = vec_splat_u32(0);
94#elif defined(BOTAN_SIMD_USE_NEON)
95 m_simd = vdupq_n_u32(0);
96#endif
97 }
98
99 /**
100 * Load SIMD register with 4 32-bit elements
101 */
102 explicit SIMD_4x32(const uint32_t B[4]) noexcept {
103#if defined(BOTAN_SIMD_USE_SSE2)
104 m_simd = _mm_loadu_si128(reinterpret_cast<const __m128i*>(B));
105#elif defined(BOTAN_SIMD_USE_ALTIVEC)
106 __vector unsigned int val = {B[0], B[1], B[2], B[3]};
107 m_simd = val;
108#elif defined(BOTAN_SIMD_USE_NEON)
109 m_simd = vld1q_u32(B);
110#endif
111 }
112
113 /**
114 * Load SIMD register with 4 32-bit elements
115 */
116 SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3) noexcept {
117#if defined(BOTAN_SIMD_USE_SSE2)
118 m_simd = _mm_set_epi32(B3, B2, B1, B0);
119#elif defined(BOTAN_SIMD_USE_ALTIVEC)
120 __vector unsigned int val = {B0, B1, B2, B3};
121 m_simd = val;
122#elif defined(BOTAN_SIMD_USE_NEON)
123 // Better way to do this?
124 const uint32_t B[4] = {B0, B1, B2, B3};
125 m_simd = vld1q_u32(B);
126#endif
127 }
128
129 /**
130 * Load SIMD register with one 32-bit element repeated
131 */
132 static SIMD_4x32 splat(uint32_t B) noexcept {
133#if defined(BOTAN_SIMD_USE_SSE2)
134 return SIMD_4x32(_mm_set1_epi32(B));
135#elif defined(BOTAN_SIMD_USE_NEON)
136 return SIMD_4x32(vdupq_n_u32(B));
137#else
138 return SIMD_4x32(B, B, B, B);
139#endif
140 }
141
142 /**
143 * Load SIMD register with one 8-bit element repeated
144 */
145 static SIMD_4x32 splat_u8(uint8_t B) noexcept {
146#if defined(BOTAN_SIMD_USE_SSE2)
147 return SIMD_4x32(_mm_set1_epi8(B));
148#elif defined(BOTAN_SIMD_USE_NEON)
149 return SIMD_4x32(vreinterpretq_u32_u8(vdupq_n_u8(B)));
150#else
151 const uint32_t B4 = make_uint32(B, B, B, B);
152 return SIMD_4x32(B4, B4, B4, B4);
153#endif
154 }
155
156 /**
157 * Load a SIMD register with little-endian convention
158 */
159 static SIMD_4x32 load_le(const void* in) noexcept {
160#if defined(BOTAN_SIMD_USE_SSE2)
161 return SIMD_4x32(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in)));
162#elif defined(BOTAN_SIMD_USE_ALTIVEC)
163 uint32_t R[4];
164 Botan::load_le(R, static_cast<const uint8_t*>(in), 4);
165 return SIMD_4x32(R);
166#elif defined(BOTAN_SIMD_USE_NEON)
167 SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
168 return CPUID::is_big_endian() ? l.bswap() : l;
169#endif
170 }
171
172 /**
173 * Load a SIMD register with big-endian convention
174 */
175 static SIMD_4x32 load_be(const void* in) noexcept {
176#if defined(BOTAN_SIMD_USE_SSE2)
177 return load_le(in).bswap();
178
179#elif defined(BOTAN_SIMD_USE_ALTIVEC)
180 uint32_t R[4];
181 Botan::load_be(R, static_cast<const uint8_t*>(in), 4);
182 return SIMD_4x32(R);
183
184#elif defined(BOTAN_SIMD_USE_NEON)
185 SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
186 return CPUID::is_little_endian() ? l.bswap() : l;
187#endif
188 }
189
190 void store_le(uint32_t out[4]) const noexcept { this->store_le(reinterpret_cast<uint8_t*>(out)); }
191
192 void store_be(uint32_t out[4]) const noexcept { this->store_be(reinterpret_cast<uint8_t*>(out)); }
193
194 void store_le(uint64_t out[2]) const noexcept { this->store_le(reinterpret_cast<uint8_t*>(out)); }
195
196 /**
197 * Load a SIMD register with little-endian convention
198 */
199 void store_le(uint8_t out[]) const noexcept {
200#if defined(BOTAN_SIMD_USE_SSE2)
201
202 _mm_storeu_si128(reinterpret_cast<__m128i*>(out), raw());
203
204#elif defined(BOTAN_SIMD_USE_ALTIVEC)
205
206 union {
207 __vector unsigned int V;
208 uint32_t R[4];
209 } vec;
210
211 vec.V = raw();
212 Botan::store_le(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
213
214#elif defined(BOTAN_SIMD_USE_NEON)
216 vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
217 } else {
218 vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
219 }
220#endif
221 }
222
223 /**
224 * Load a SIMD register with big-endian convention
225 */
226 void store_be(uint8_t out[]) const noexcept {
227#if defined(BOTAN_SIMD_USE_SSE2)
228
229 bswap().store_le(out);
230
231#elif defined(BOTAN_SIMD_USE_ALTIVEC)
232
233 union {
234 __vector unsigned int V;
235 uint32_t R[4];
236 } vec;
237
238 vec.V = m_simd;
239 Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
240
241#elif defined(BOTAN_SIMD_USE_NEON)
243 vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
244 } else {
245 vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
246 }
247#endif
248 }
249
250 /*
251 * This is used for SHA-2/SHACAL2
252 */
253 SIMD_4x32 sigma0() const noexcept {
254#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_crypto_vshasigmaw) && defined(_ARCH_PWR8)
255 return SIMD_4x32(__builtin_crypto_vshasigmaw(raw(), 1, 0));
256#else
257 const SIMD_4x32 rot1 = this->rotr<2>();
258 const SIMD_4x32 rot2 = this->rotr<13>();
259 const SIMD_4x32 rot3 = this->rotr<22>();
260 return (rot1 ^ rot2 ^ rot3);
261#endif
262 }
263
264 /*
265 * This is used for SHA-2/SHACAL2
266 */
267 SIMD_4x32 sigma1() const noexcept {
268#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_crypto_vshasigmaw) && defined(_ARCH_PWR8)
269 return SIMD_4x32(__builtin_crypto_vshasigmaw(raw(), 1, 0xF));
270#else
271 const SIMD_4x32 rot1 = this->rotr<6>();
272 const SIMD_4x32 rot2 = this->rotr<11>();
273 const SIMD_4x32 rot3 = this->rotr<25>();
274 return (rot1 ^ rot2 ^ rot3);
275#endif
276 }
277
278 /**
279 * Left rotation by a compile time constant
280 */
281 template <size_t ROT>
282 SIMD_4x32 rotl() const noexcept
283 requires(ROT > 0 && ROT < 32)
284 {
285#if defined(BOTAN_SIMD_USE_SSE2)
286
287 return SIMD_4x32(_mm_or_si128(_mm_slli_epi32(m_simd, static_cast<int>(ROT)),
288 _mm_srli_epi32(m_simd, static_cast<int>(32 - ROT))));
289
290#elif defined(BOTAN_SIMD_USE_ALTIVEC)
291
292 const unsigned int r = static_cast<unsigned int>(ROT);
293 __vector unsigned int rot = {r, r, r, r};
294 return SIMD_4x32(vec_rl(m_simd, rot));
295
296#elif defined(BOTAN_SIMD_USE_NEON)
297
298 #if defined(BOTAN_TARGET_ARCH_IS_ARM64)
299
300 if constexpr(ROT == 8) {
301 const uint8_t maskb[16] = {3, 0, 1, 2, 7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14};
302 const uint8x16_t mask = vld1q_u8(maskb);
303 return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(m_simd), mask)));
304 } else if constexpr(ROT == 16) {
305 return SIMD_4x32(vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(m_simd))));
306 }
307 #endif
308 return SIMD_4x32(
309 vorrq_u32(vshlq_n_u32(m_simd, static_cast<int>(ROT)), vshrq_n_u32(m_simd, static_cast<int>(32 - ROT))));
310#endif
311 }
312
313 /**
314 * Right rotation by a compile time constant
315 */
316 template <size_t ROT>
317 SIMD_4x32 rotr() const noexcept {
318 return this->rotl<32 - ROT>();
319 }
320
321 /**
322 * Add elements of a SIMD vector
323 */
324 SIMD_4x32 operator+(const SIMD_4x32& other) const noexcept {
325 SIMD_4x32 retval(*this);
326 retval += other;
327 return retval;
328 }
329
330 /**
331 * Subtract elements of a SIMD vector
332 */
333 SIMD_4x32 operator-(const SIMD_4x32& other) const noexcept {
334 SIMD_4x32 retval(*this);
335 retval -= other;
336 return retval;
337 }
338
339 /**
340 * XOR elements of a SIMD vector
341 */
342 SIMD_4x32 operator^(const SIMD_4x32& other) const noexcept {
343 SIMD_4x32 retval(*this);
344 retval ^= other;
345 return retval;
346 }
347
348 /**
349 * Binary OR elements of a SIMD vector
350 */
351 SIMD_4x32 operator|(const SIMD_4x32& other) const noexcept {
352 SIMD_4x32 retval(*this);
353 retval |= other;
354 return retval;
355 }
356
357 /**
358 * Binary AND elements of a SIMD vector
359 */
360 SIMD_4x32 operator&(const SIMD_4x32& other) const noexcept {
361 SIMD_4x32 retval(*this);
362 retval &= other;
363 return retval;
364 }
365
366 void operator+=(const SIMD_4x32& other) noexcept {
367#if defined(BOTAN_SIMD_USE_SSE2)
368 m_simd = _mm_add_epi32(m_simd, other.m_simd);
369#elif defined(BOTAN_SIMD_USE_ALTIVEC)
370 m_simd = vec_add(m_simd, other.m_simd);
371#elif defined(BOTAN_SIMD_USE_NEON)
372 m_simd = vaddq_u32(m_simd, other.m_simd);
373#endif
374 }
375
376 void operator-=(const SIMD_4x32& other) noexcept {
377#if defined(BOTAN_SIMD_USE_SSE2)
378 m_simd = _mm_sub_epi32(m_simd, other.m_simd);
379#elif defined(BOTAN_SIMD_USE_ALTIVEC)
380 m_simd = vec_sub(m_simd, other.m_simd);
381#elif defined(BOTAN_SIMD_USE_NEON)
382 m_simd = vsubq_u32(m_simd, other.m_simd);
383#endif
384 }
385
386 void operator^=(const SIMD_4x32& other) noexcept {
387#if defined(BOTAN_SIMD_USE_SSE2)
388 m_simd = _mm_xor_si128(m_simd, other.m_simd);
389#elif defined(BOTAN_SIMD_USE_ALTIVEC)
390 m_simd = vec_xor(m_simd, other.m_simd);
391#elif defined(BOTAN_SIMD_USE_NEON)
392 m_simd = veorq_u32(m_simd, other.m_simd);
393#endif
394 }
395
396 void operator^=(uint32_t other) noexcept { *this ^= SIMD_4x32::splat(other); }
397
398 void operator|=(const SIMD_4x32& other) noexcept {
399#if defined(BOTAN_SIMD_USE_SSE2)
400 m_simd = _mm_or_si128(m_simd, other.m_simd);
401#elif defined(BOTAN_SIMD_USE_ALTIVEC)
402 m_simd = vec_or(m_simd, other.m_simd);
403#elif defined(BOTAN_SIMD_USE_NEON)
404 m_simd = vorrq_u32(m_simd, other.m_simd);
405#endif
406 }
407
408 void operator&=(const SIMD_4x32& other) noexcept {
409#if defined(BOTAN_SIMD_USE_SSE2)
410 m_simd = _mm_and_si128(m_simd, other.m_simd);
411#elif defined(BOTAN_SIMD_USE_ALTIVEC)
412 m_simd = vec_and(m_simd, other.m_simd);
413#elif defined(BOTAN_SIMD_USE_NEON)
414 m_simd = vandq_u32(m_simd, other.m_simd);
415#endif
416 }
417
418 template <int SHIFT>
419 SIMD_4x32 shl() const noexcept
420 requires(SHIFT > 0 && SHIFT < 32)
421 {
422#if defined(BOTAN_SIMD_USE_SSE2)
423 return SIMD_4x32(_mm_slli_epi32(m_simd, SHIFT));
424
425#elif defined(BOTAN_SIMD_USE_ALTIVEC)
426 const unsigned int s = static_cast<unsigned int>(SHIFT);
427 const __vector unsigned int shifts = {s, s, s, s};
428 return SIMD_4x32(vec_sl(m_simd, shifts));
429#elif defined(BOTAN_SIMD_USE_NEON)
430 return SIMD_4x32(vshlq_n_u32(m_simd, SHIFT));
431#endif
432 }
433
434 template <int SHIFT>
435 SIMD_4x32 shr() const noexcept {
436#if defined(BOTAN_SIMD_USE_SSE2)
437 return SIMD_4x32(_mm_srli_epi32(m_simd, SHIFT));
438
439#elif defined(BOTAN_SIMD_USE_ALTIVEC)
440 const unsigned int s = static_cast<unsigned int>(SHIFT);
441 const __vector unsigned int shifts = {s, s, s, s};
442 return SIMD_4x32(vec_sr(m_simd, shifts));
443#elif defined(BOTAN_SIMD_USE_NEON)
444 return SIMD_4x32(vshrq_n_u32(m_simd, SHIFT));
445#endif
446 }
447
448 SIMD_4x32 operator~() const noexcept {
449#if defined(BOTAN_SIMD_USE_SSE2)
450 return SIMD_4x32(_mm_xor_si128(m_simd, _mm_set1_epi32(0xFFFFFFFF)));
451#elif defined(BOTAN_SIMD_USE_ALTIVEC)
452 return SIMD_4x32(vec_nor(m_simd, m_simd));
453#elif defined(BOTAN_SIMD_USE_NEON)
454 return SIMD_4x32(vmvnq_u32(m_simd));
455#endif
456 }
457
458 // (~reg) & other
459 SIMD_4x32 andc(const SIMD_4x32& other) const noexcept {
460#if defined(BOTAN_SIMD_USE_SSE2)
461 return SIMD_4x32(_mm_andnot_si128(m_simd, other.m_simd));
462#elif defined(BOTAN_SIMD_USE_ALTIVEC)
463 /*
464 AltiVec does arg1 & ~arg2 rather than SSE's ~arg1 & arg2
465 so swap the arguments
466 */
467 return SIMD_4x32(vec_andc(other.m_simd, m_simd));
468#elif defined(BOTAN_SIMD_USE_NEON)
469 // NEON is also a & ~b
470 return SIMD_4x32(vbicq_u32(other.m_simd, m_simd));
471#endif
472 }
473
474 /**
475 * Return copy *this with each word byte swapped
476 */
477 SIMD_4x32 bswap() const noexcept {
478#if defined(BOTAN_SIMD_USE_SSE2)
479
480 __m128i T = m_simd;
481 T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
482 T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
483 return SIMD_4x32(_mm_or_si128(_mm_srli_epi16(T, 8), _mm_slli_epi16(T, 8)));
484
485#elif defined(BOTAN_SIMD_USE_ALTIVEC)
486 #ifdef BOTAN_SIMD_USE_VSX
487 return SIMD_4x32(vec_revb(m_simd));
488 #else
489 const __vector unsigned char rev[1] = {
490 {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12},
491 };
492
493 return SIMD_4x32(vec_perm(m_simd, m_simd, rev[0]));
494 #endif
495
496#elif defined(BOTAN_SIMD_USE_NEON)
497 return SIMD_4x32(vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(m_simd))));
498#endif
499 }
500
501 template <size_t I>
503 requires(I <= 3)
504 {
505#if defined(BOTAN_SIMD_USE_SSE2)
506 return SIMD_4x32(_mm_slli_si128(raw(), 4 * I));
507#elif defined(BOTAN_SIMD_USE_NEON)
508 return SIMD_4x32(vextq_u32(vdupq_n_u32(0), raw(), 4 - I));
509#elif defined(BOTAN_SIMD_USE_ALTIVEC)
510 const __vector unsigned int zero = vec_splat_u32(0);
511
512 const __vector unsigned char shuf[3] = {
513 {16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
514 {16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7},
515 {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3},
516 };
517
518 return SIMD_4x32(vec_perm(raw(), zero, shuf[I - 1]));
519#endif
520 }
521
522 template <size_t I>
524 requires(I <= 3)
525 {
526#if defined(BOTAN_SIMD_USE_SSE2)
527 return SIMD_4x32(_mm_srli_si128(raw(), 4 * I));
528#elif defined(BOTAN_SIMD_USE_NEON)
529 return SIMD_4x32(vextq_u32(raw(), vdupq_n_u32(0), I));
530#elif defined(BOTAN_SIMD_USE_ALTIVEC)
531 const __vector unsigned int zero = vec_splat_u32(0);
532
533 const __vector unsigned char shuf[3] = {
534 {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19},
535 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
536 {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
537 };
538
539 return SIMD_4x32(vec_perm(raw(), zero, shuf[I - 1]));
540#endif
541 }
542
543 /**
544 * 4x4 Transposition on SIMD registers
545 */
546 static void transpose(SIMD_4x32& B0, SIMD_4x32& B1, SIMD_4x32& B2, SIMD_4x32& B3) noexcept {
547#if defined(BOTAN_SIMD_USE_SSE2)
548 const __m128i T0 = _mm_unpacklo_epi32(B0.m_simd, B1.m_simd);
549 const __m128i T1 = _mm_unpacklo_epi32(B2.m_simd, B3.m_simd);
550 const __m128i T2 = _mm_unpackhi_epi32(B0.m_simd, B1.m_simd);
551 const __m128i T3 = _mm_unpackhi_epi32(B2.m_simd, B3.m_simd);
552
553 B0.m_simd = _mm_unpacklo_epi64(T0, T1);
554 B1.m_simd = _mm_unpackhi_epi64(T0, T1);
555 B2.m_simd = _mm_unpacklo_epi64(T2, T3);
556 B3.m_simd = _mm_unpackhi_epi64(T2, T3);
557#elif defined(BOTAN_SIMD_USE_ALTIVEC)
558 const __vector unsigned int T0 = vec_mergeh(B0.m_simd, B2.m_simd);
559 const __vector unsigned int T1 = vec_mergeh(B1.m_simd, B3.m_simd);
560 const __vector unsigned int T2 = vec_mergel(B0.m_simd, B2.m_simd);
561 const __vector unsigned int T3 = vec_mergel(B1.m_simd, B3.m_simd);
562
563 B0.m_simd = vec_mergeh(T0, T1);
564 B1.m_simd = vec_mergel(T0, T1);
565 B2.m_simd = vec_mergeh(T2, T3);
566 B3.m_simd = vec_mergel(T2, T3);
567
568#elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM32)
569 const uint32x4x2_t T0 = vzipq_u32(B0.m_simd, B2.m_simd);
570 const uint32x4x2_t T1 = vzipq_u32(B1.m_simd, B3.m_simd);
571 const uint32x4x2_t O0 = vzipq_u32(T0.val[0], T1.val[0]);
572 const uint32x4x2_t O1 = vzipq_u32(T0.val[1], T1.val[1]);
573
574 B0.m_simd = O0.val[0];
575 B1.m_simd = O0.val[1];
576 B2.m_simd = O1.val[0];
577 B3.m_simd = O1.val[1];
578
579#elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM64)
580 const uint32x4_t T0 = vzip1q_u32(B0.m_simd, B2.m_simd);
581 const uint32x4_t T2 = vzip2q_u32(B0.m_simd, B2.m_simd);
582 const uint32x4_t T1 = vzip1q_u32(B1.m_simd, B3.m_simd);
583 const uint32x4_t T3 = vzip2q_u32(B1.m_simd, B3.m_simd);
584
585 B0.m_simd = vzip1q_u32(T0, T1);
586 B1.m_simd = vzip2q_u32(T0, T1);
587 B2.m_simd = vzip1q_u32(T2, T3);
588 B3.m_simd = vzip2q_u32(T2, T3);
589#endif
590 }
591
592 static inline SIMD_4x32 choose(const SIMD_4x32& mask, const SIMD_4x32& a, const SIMD_4x32& b) noexcept {
593#if defined(BOTAN_SIMD_USE_ALTIVEC)
594 return SIMD_4x32(vec_sel(b.raw(), a.raw(), mask.raw()));
595#elif defined(BOTAN_SIMD_USE_NEON)
596 return SIMD_4x32(vbslq_u32(mask.raw(), a.raw(), b.raw()));
597#else
598 return (mask & a) ^ mask.andc(b);
599#endif
600 }
601
602 static inline SIMD_4x32 majority(const SIMD_4x32& x, const SIMD_4x32& y, const SIMD_4x32& z) noexcept {
603 return SIMD_4x32::choose(x ^ y, z, y);
604 }
605
606 native_simd_type raw() const noexcept { return m_simd; }
607
608 explicit SIMD_4x32(native_simd_type x) noexcept : m_simd(x) {}
609
610 private:
611 native_simd_type m_simd;
612};
613
614template <size_t R>
615inline SIMD_4x32 rotl(SIMD_4x32 input) {
616 return input.rotl<R>();
617}
618
619template <size_t R>
620inline SIMD_4x32 rotr(SIMD_4x32 input) {
621 return input.rotr<R>();
622}
623
624// For Serpent:
625template <size_t S>
626inline SIMD_4x32 shl(SIMD_4x32 input) {
627 return input.shl<S>();
628}
629
630} // namespace Botan
631
632#endif
static bool is_little_endian()
Definition cpuid.h:59
static bool is_big_endian()
Definition cpuid.h:69
static SIMD_4x32 load_be(const void *in) noexcept
Definition simd_32.h:175
SIMD_4x32 andc(const SIMD_4x32 &other) const noexcept
Definition simd_32.h:459
void store_le(uint8_t out[]) const noexcept
Definition simd_32.h:199
SIMD_4x32(SIMD_4x32 &&other)=default
SIMD_4x32 & operator=(SIMD_4x32 &&other)=default
SIMD_4x32 operator|(const SIMD_4x32 &other) const noexcept
Definition simd_32.h:351
SIMD_4x32 operator^(const SIMD_4x32 &other) const noexcept
Definition simd_32.h:342
SIMD_4x32(native_simd_type x) noexcept
Definition simd_32.h:608
static void transpose(SIMD_4x32 &B0, SIMD_4x32 &B1, SIMD_4x32 &B2, SIMD_4x32 &B3) noexcept
Definition simd_32.h:546
SIMD_4x32 bswap() const noexcept
Definition simd_32.h:477
SIMD_4x32 operator+(const SIMD_4x32 &other) const noexcept
Definition simd_32.h:324
SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3) noexcept
Definition simd_32.h:116
native_simd_type raw() const noexcept
Definition simd_32.h:606
SIMD_4x32() noexcept
Definition simd_32.h:88
void store_le(uint32_t out[4]) const noexcept
Definition simd_32.h:190
static SIMD_4x32 load_le(const void *in) noexcept
Definition simd_32.h:159
SIMD_4x32 sigma1() const noexcept
Definition simd_32.h:267
void store_be(uint8_t out[]) const noexcept
Definition simd_32.h:226
SIMD_4x32(const SIMD_4x32 &other)=default
~SIMD_4x32()=default
void operator^=(uint32_t other) noexcept
Definition simd_32.h:396
void operator^=(const SIMD_4x32 &other) noexcept
Definition simd_32.h:386
void operator+=(const SIMD_4x32 &other) noexcept
Definition simd_32.h:366
SIMD_4x32 operator~() const noexcept
Definition simd_32.h:448
void store_le(uint64_t out[2]) const noexcept
Definition simd_32.h:194
void operator|=(const SIMD_4x32 &other) noexcept
Definition simd_32.h:398
SIMD_4x32 shift_elems_left() const noexcept
Definition simd_32.h:502
void store_be(uint32_t out[4]) const noexcept
Definition simd_32.h:192
SIMD_4x32(const uint32_t B[4]) noexcept
Definition simd_32.h:102
SIMD_4x32 sigma0() const noexcept
Definition simd_32.h:253
SIMD_4x32 operator-(const SIMD_4x32 &other) const noexcept
Definition simd_32.h:333
SIMD_4x32 shr() const noexcept
Definition simd_32.h:435
static SIMD_4x32 splat_u8(uint8_t B) noexcept
Definition simd_32.h:145
SIMD_4x32 rotr() const noexcept
Definition simd_32.h:317
SIMD_4x32 shl() const noexcept
Definition simd_32.h:419
SIMD_4x32 shift_elems_right() const noexcept
Definition simd_32.h:523
SIMD_4x32 rotl() const noexcept
Definition simd_32.h:282
void operator&=(const SIMD_4x32 &other) noexcept
Definition simd_32.h:408
SIMD_4x32 operator&(const SIMD_4x32 &other) const noexcept
Definition simd_32.h:360
SIMD_4x32 & operator=(const SIMD_4x32 &other)=default
static SIMD_4x32 choose(const SIMD_4x32 &mask, const SIMD_4x32 &a, const SIMD_4x32 &b) noexcept
Definition simd_32.h:592
void operator-=(const SIMD_4x32 &other) noexcept
Definition simd_32.h:376
static SIMD_4x32 majority(const SIMD_4x32 &x, const SIMD_4x32 &y, const SIMD_4x32 &z) noexcept
Definition simd_32.h:602
static SIMD_4x32 splat(uint32_t B) noexcept
Definition simd_32.h:132
int(* final)(unsigned char *, CTX *)
FE_25519 T
Definition ge.cpp:34
constexpr T rotl(T input)
Definition rotate.h:21
constexpr T rotr(T input)
Definition rotate.h:33
constexpr uint32_t make_uint32(uint8_t i0, uint8_t i1, uint8_t i2, uint8_t i3)
Definition loadstor.h:100
constexpr auto store_le(ParamTs &&... params)
Definition loadstor.h:702
SIMD_4x32 shl(SIMD_4x32 input)
Definition simd_32.h:626
constexpr auto load_le(ParamTs &&... params)
Definition loadstor.h:462
constexpr auto store_be(ParamTs &&... params)
Definition loadstor.h:711
constexpr auto load_be(ParamTs &&... params)
Definition loadstor.h:471