Botan 3.6.1
Crypto and TLS for C&
simd_32.h
Go to the documentation of this file.
1/*
2* Lightweight wrappers for SIMD operations
3* (C) 2009,2011,2016,2017,2019 Jack Lloyd
4*
5* Botan is released under the Simplified BSD License (see license.txt)
6*/
7
8#ifndef BOTAN_SIMD_32_H_
9#define BOTAN_SIMD_32_H_
10
11#include <botan/types.h>
12
13#include <span>
14
15#if defined(BOTAN_TARGET_SUPPORTS_SSE2)
16 #include <emmintrin.h>
17 #define BOTAN_SIMD_USE_SSE2
18
19#elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC)
20 #include <botan/internal/loadstor.h>
21 #include <altivec.h>
22 #undef vector
23 #undef bool
24 #define BOTAN_SIMD_USE_ALTIVEC
25 #ifdef __VSX__
26 #define BOTAN_SIMD_USE_VSX
27 #endif
28
29#elif defined(BOTAN_TARGET_SUPPORTS_NEON)
30 #include <botan/internal/cpuid.h>
31 #include <arm_neon.h>
32 #define BOTAN_SIMD_USE_NEON
33
34#else
35 #error "No SIMD instruction set enabled"
36#endif
37
38#if defined(BOTAN_SIMD_USE_SSE2)
39 #define BOTAN_SIMD_ISA "sse2"
40 #define BOTAN_VPERM_ISA "ssse3"
41 #define BOTAN_CLMUL_ISA "pclmul"
42#elif defined(BOTAN_SIMD_USE_NEON)
43 #if defined(BOTAN_TARGET_ARCH_IS_ARM64)
44 #define BOTAN_SIMD_ISA "+simd"
45 #define BOTAN_CLMUL_ISA "+crypto+aes"
46 #else
47 #define BOTAN_SIMD_ISA "fpu=neon"
48 #endif
49 #define BOTAN_VPERM_ISA BOTAN_SIMD_ISA
50#elif defined(BOTAN_SIMD_USE_ALTIVEC)
51 #define BOTAN_SIMD_ISA "altivec"
52 #define BOTAN_VPERM_ISA "altivec"
53 #define BOTAN_CLMUL_ISA "crypto"
54#endif
55
56namespace Botan {
57
58#if defined(BOTAN_SIMD_USE_SSE2)
59using native_simd_type = __m128i;
60#elif defined(BOTAN_SIMD_USE_ALTIVEC)
61using native_simd_type = __vector unsigned int;
62#elif defined(BOTAN_SIMD_USE_NEON)
63using native_simd_type = uint32x4_t;
64#endif
65
66/**
67* 4x32 bit SIMD register
68*
69* This class is not a general purpose SIMD type, and only offers
70* instructions needed for evaluation of specific crypto primitives.
71* For example it does not currently have equality operators of any
72* kind.
73*
74* Implemented for SSE2, VMX (Altivec), and NEON.
75*/
77 public:
78 SIMD_4x32& operator=(const SIMD_4x32& other) = default;
79 SIMD_4x32(const SIMD_4x32& other) = default;
80
81 SIMD_4x32& operator=(SIMD_4x32&& other) = default;
82 SIMD_4x32(SIMD_4x32&& other) = default;
83
84 ~SIMD_4x32() = default;
85
86 /**
87 * Zero initialize SIMD register with 4 32-bit elements
88 */
89 SIMD_4x32() noexcept {
90#if defined(BOTAN_SIMD_USE_SSE2)
91 m_simd = _mm_setzero_si128();
92#elif defined(BOTAN_SIMD_USE_ALTIVEC)
93 m_simd = vec_splat_u32(0);
94#elif defined(BOTAN_SIMD_USE_NEON)
95 m_simd = vdupq_n_u32(0);
96#endif
97 }
98
99 /**
100 * Load SIMD register with 4 32-bit elements
101 */
102 explicit SIMD_4x32(const uint32_t B[4]) noexcept {
103#if defined(BOTAN_SIMD_USE_SSE2)
104 m_simd = _mm_loadu_si128(reinterpret_cast<const __m128i*>(B));
105#elif defined(BOTAN_SIMD_USE_ALTIVEC)
106 __vector unsigned int val = {B[0], B[1], B[2], B[3]};
107 m_simd = val;
108#elif defined(BOTAN_SIMD_USE_NEON)
109 m_simd = vld1q_u32(B);
110#endif
111 }
112
113 /**
114 * Load SIMD register with 4 32-bit elements
115 */
116 SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3) noexcept {
117#if defined(BOTAN_SIMD_USE_SSE2)
118 m_simd = _mm_set_epi32(B3, B2, B1, B0);
119#elif defined(BOTAN_SIMD_USE_ALTIVEC)
120 __vector unsigned int val = {B0, B1, B2, B3};
121 m_simd = val;
122#elif defined(BOTAN_SIMD_USE_NEON)
123 // Better way to do this?
124 const uint32_t B[4] = {B0, B1, B2, B3};
125 m_simd = vld1q_u32(B);
126#endif
127 }
128
129 /**
130 * Load SIMD register with one 32-bit element repeated
131 */
132 static SIMD_4x32 splat(uint32_t B) noexcept {
133#if defined(BOTAN_SIMD_USE_SSE2)
134 return SIMD_4x32(_mm_set1_epi32(B));
135#elif defined(BOTAN_SIMD_USE_NEON)
136 return SIMD_4x32(vdupq_n_u32(B));
137#else
138 return SIMD_4x32(B, B, B, B);
139#endif
140 }
141
142 /**
143 * Load SIMD register with one 8-bit element repeated
144 */
145 static SIMD_4x32 splat_u8(uint8_t B) noexcept {
146#if defined(BOTAN_SIMD_USE_SSE2)
147 return SIMD_4x32(_mm_set1_epi8(B));
148#elif defined(BOTAN_SIMD_USE_NEON)
149 return SIMD_4x32(vreinterpretq_u32_u8(vdupq_n_u8(B)));
150#else
151 const uint32_t B4 = make_uint32(B, B, B, B);
152 return SIMD_4x32(B4, B4, B4, B4);
153#endif
154 }
155
156 /**
157 * Load a SIMD register with little-endian convention
158 */
159 static SIMD_4x32 load_le(const void* in) noexcept {
160#if defined(BOTAN_SIMD_USE_SSE2)
161 return SIMD_4x32(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in)));
162#elif defined(BOTAN_SIMD_USE_ALTIVEC)
163 uint32_t R[4];
164 Botan::load_le(R, static_cast<const uint8_t*>(in), 4);
165 return SIMD_4x32(R);
166#elif defined(BOTAN_SIMD_USE_NEON)
167 SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
168 return CPUID::is_big_endian() ? l.bswap() : l;
169#endif
170 }
171
172 /**
173 * Load a SIMD register with big-endian convention
174 */
175 static SIMD_4x32 load_be(const void* in) noexcept {
176#if defined(BOTAN_SIMD_USE_SSE2)
177 return load_le(in).bswap();
178
179#elif defined(BOTAN_SIMD_USE_ALTIVEC)
180 uint32_t R[4];
181 Botan::load_be(R, static_cast<const uint8_t*>(in), 4);
182 return SIMD_4x32(R);
183
184#elif defined(BOTAN_SIMD_USE_NEON)
185 SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
186 return CPUID::is_little_endian() ? l.bswap() : l;
187#endif
188 }
189
190 static SIMD_4x32 load_le(std::span<const uint8_t, 16> in) { return SIMD_4x32::load_le(in.data()); }
191
192 static SIMD_4x32 load_be(std::span<const uint8_t, 16> in) { return SIMD_4x32::load_be(in.data()); }
193
194 void store_le(uint32_t out[4]) const noexcept { this->store_le(reinterpret_cast<uint8_t*>(out)); }
195
196 void store_be(uint32_t out[4]) const noexcept { this->store_be(reinterpret_cast<uint8_t*>(out)); }
197
198 void store_le(uint64_t out[2]) const noexcept { this->store_le(reinterpret_cast<uint8_t*>(out)); }
199
200 /**
201 * Load a SIMD register with little-endian convention
202 */
203 void store_le(uint8_t out[]) const noexcept {
204#if defined(BOTAN_SIMD_USE_SSE2)
205
206 _mm_storeu_si128(reinterpret_cast<__m128i*>(out), raw());
207
208#elif defined(BOTAN_SIMD_USE_ALTIVEC)
209
210 union {
211 __vector unsigned int V;
212 uint32_t R[4];
213 } vec;
214
215 vec.V = raw();
216 Botan::store_le(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
217
218#elif defined(BOTAN_SIMD_USE_NEON)
220 vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
221 } else {
222 vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
223 }
224#endif
225 }
226
227 /**
228 * Load a SIMD register with big-endian convention
229 */
230 void store_be(uint8_t out[]) const noexcept {
231#if defined(BOTAN_SIMD_USE_SSE2)
232
233 bswap().store_le(out);
234
235#elif defined(BOTAN_SIMD_USE_ALTIVEC)
236
237 union {
238 __vector unsigned int V;
239 uint32_t R[4];
240 } vec;
241
242 vec.V = m_simd;
243 Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
244
245#elif defined(BOTAN_SIMD_USE_NEON)
247 vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
248 } else {
249 vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
250 }
251#endif
252 }
253
254 void store_be(std::span<uint8_t, 16> out) const { this->store_be(out.data()); }
255
256 void store_le(std::span<uint8_t, 16> out) const { this->store_le(out.data()); }
257
258 /*
259 * This is used for SHA-2/SHACAL2
260 */
261 SIMD_4x32 sigma0() const noexcept {
262#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_crypto_vshasigmaw) && defined(_ARCH_PWR8)
263 return SIMD_4x32(__builtin_crypto_vshasigmaw(raw(), 1, 0));
264#else
265 const SIMD_4x32 rot1 = this->rotr<2>();
266 const SIMD_4x32 rot2 = this->rotr<13>();
267 const SIMD_4x32 rot3 = this->rotr<22>();
268 return (rot1 ^ rot2 ^ rot3);
269#endif
270 }
271
272 /*
273 * This is used for SHA-2/SHACAL2
274 */
275 SIMD_4x32 sigma1() const noexcept {
276#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_crypto_vshasigmaw) && defined(_ARCH_PWR8)
277 return SIMD_4x32(__builtin_crypto_vshasigmaw(raw(), 1, 0xF));
278#else
279 const SIMD_4x32 rot1 = this->rotr<6>();
280 const SIMD_4x32 rot2 = this->rotr<11>();
281 const SIMD_4x32 rot3 = this->rotr<25>();
282 return (rot1 ^ rot2 ^ rot3);
283#endif
284 }
285
286 /**
287 * Left rotation by a compile time constant
288 */
289 template <size_t ROT>
290 SIMD_4x32 rotl() const noexcept
291 requires(ROT > 0 && ROT < 32)
292 {
293#if defined(BOTAN_SIMD_USE_SSE2)
294
295 return SIMD_4x32(_mm_or_si128(_mm_slli_epi32(m_simd, static_cast<int>(ROT)),
296 _mm_srli_epi32(m_simd, static_cast<int>(32 - ROT))));
297
298#elif defined(BOTAN_SIMD_USE_ALTIVEC)
299
300 const unsigned int r = static_cast<unsigned int>(ROT);
301 __vector unsigned int rot = {r, r, r, r};
302 return SIMD_4x32(vec_rl(m_simd, rot));
303
304#elif defined(BOTAN_SIMD_USE_NEON)
305
306 #if defined(BOTAN_TARGET_ARCH_IS_ARM64)
307
308 if constexpr(ROT == 8) {
309 const uint8_t maskb[16] = {3, 0, 1, 2, 7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14};
310 const uint8x16_t mask = vld1q_u8(maskb);
311 return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(m_simd), mask)));
312 } else if constexpr(ROT == 16) {
313 return SIMD_4x32(vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(m_simd))));
314 }
315 #endif
316 return SIMD_4x32(
317 vorrq_u32(vshlq_n_u32(m_simd, static_cast<int>(ROT)), vshrq_n_u32(m_simd, static_cast<int>(32 - ROT))));
318#endif
319 }
320
321 /**
322 * Right rotation by a compile time constant
323 */
324 template <size_t ROT>
325 SIMD_4x32 rotr() const noexcept {
326 return this->rotl<32 - ROT>();
327 }
328
329 /**
330 * Add elements of a SIMD vector
331 */
332 SIMD_4x32 operator+(const SIMD_4x32& other) const noexcept {
333 SIMD_4x32 retval(*this);
334 retval += other;
335 return retval;
336 }
337
338 /**
339 * Subtract elements of a SIMD vector
340 */
341 SIMD_4x32 operator-(const SIMD_4x32& other) const noexcept {
342 SIMD_4x32 retval(*this);
343 retval -= other;
344 return retval;
345 }
346
347 /**
348 * XOR elements of a SIMD vector
349 */
350 SIMD_4x32 operator^(const SIMD_4x32& other) const noexcept {
351 SIMD_4x32 retval(*this);
352 retval ^= other;
353 return retval;
354 }
355
356 /**
357 * Binary OR elements of a SIMD vector
358 */
359 SIMD_4x32 operator|(const SIMD_4x32& other) const noexcept {
360 SIMD_4x32 retval(*this);
361 retval |= other;
362 return retval;
363 }
364
365 /**
366 * Binary AND elements of a SIMD vector
367 */
368 SIMD_4x32 operator&(const SIMD_4x32& other) const noexcept {
369 SIMD_4x32 retval(*this);
370 retval &= other;
371 return retval;
372 }
373
374 void operator+=(const SIMD_4x32& other) noexcept {
375#if defined(BOTAN_SIMD_USE_SSE2)
376 m_simd = _mm_add_epi32(m_simd, other.m_simd);
377#elif defined(BOTAN_SIMD_USE_ALTIVEC)
378 m_simd = vec_add(m_simd, other.m_simd);
379#elif defined(BOTAN_SIMD_USE_NEON)
380 m_simd = vaddq_u32(m_simd, other.m_simd);
381#endif
382 }
383
384 void operator-=(const SIMD_4x32& other) noexcept {
385#if defined(BOTAN_SIMD_USE_SSE2)
386 m_simd = _mm_sub_epi32(m_simd, other.m_simd);
387#elif defined(BOTAN_SIMD_USE_ALTIVEC)
388 m_simd = vec_sub(m_simd, other.m_simd);
389#elif defined(BOTAN_SIMD_USE_NEON)
390 m_simd = vsubq_u32(m_simd, other.m_simd);
391#endif
392 }
393
394 void operator^=(const SIMD_4x32& other) noexcept {
395#if defined(BOTAN_SIMD_USE_SSE2)
396 m_simd = _mm_xor_si128(m_simd, other.m_simd);
397#elif defined(BOTAN_SIMD_USE_ALTIVEC)
398 m_simd = vec_xor(m_simd, other.m_simd);
399#elif defined(BOTAN_SIMD_USE_NEON)
400 m_simd = veorq_u32(m_simd, other.m_simd);
401#endif
402 }
403
404 void operator^=(uint32_t other) noexcept { *this ^= SIMD_4x32::splat(other); }
405
406 void operator|=(const SIMD_4x32& other) noexcept {
407#if defined(BOTAN_SIMD_USE_SSE2)
408 m_simd = _mm_or_si128(m_simd, other.m_simd);
409#elif defined(BOTAN_SIMD_USE_ALTIVEC)
410 m_simd = vec_or(m_simd, other.m_simd);
411#elif defined(BOTAN_SIMD_USE_NEON)
412 m_simd = vorrq_u32(m_simd, other.m_simd);
413#endif
414 }
415
416 void operator&=(const SIMD_4x32& other) noexcept {
417#if defined(BOTAN_SIMD_USE_SSE2)
418 m_simd = _mm_and_si128(m_simd, other.m_simd);
419#elif defined(BOTAN_SIMD_USE_ALTIVEC)
420 m_simd = vec_and(m_simd, other.m_simd);
421#elif defined(BOTAN_SIMD_USE_NEON)
422 m_simd = vandq_u32(m_simd, other.m_simd);
423#endif
424 }
425
426 template <int SHIFT>
427 SIMD_4x32 shl() const noexcept
428 requires(SHIFT > 0 && SHIFT < 32)
429 {
430#if defined(BOTAN_SIMD_USE_SSE2)
431 return SIMD_4x32(_mm_slli_epi32(m_simd, SHIFT));
432
433#elif defined(BOTAN_SIMD_USE_ALTIVEC)
434 const unsigned int s = static_cast<unsigned int>(SHIFT);
435 const __vector unsigned int shifts = {s, s, s, s};
436 return SIMD_4x32(vec_sl(m_simd, shifts));
437#elif defined(BOTAN_SIMD_USE_NEON)
438 return SIMD_4x32(vshlq_n_u32(m_simd, SHIFT));
439#endif
440 }
441
442 template <int SHIFT>
443 SIMD_4x32 shr() const noexcept {
444#if defined(BOTAN_SIMD_USE_SSE2)
445 return SIMD_4x32(_mm_srli_epi32(m_simd, SHIFT));
446
447#elif defined(BOTAN_SIMD_USE_ALTIVEC)
448 const unsigned int s = static_cast<unsigned int>(SHIFT);
449 const __vector unsigned int shifts = {s, s, s, s};
450 return SIMD_4x32(vec_sr(m_simd, shifts));
451#elif defined(BOTAN_SIMD_USE_NEON)
452 return SIMD_4x32(vshrq_n_u32(m_simd, SHIFT));
453#endif
454 }
455
456 SIMD_4x32 operator~() const noexcept {
457#if defined(BOTAN_SIMD_USE_SSE2)
458 return SIMD_4x32(_mm_xor_si128(m_simd, _mm_set1_epi32(0xFFFFFFFF)));
459#elif defined(BOTAN_SIMD_USE_ALTIVEC)
460 return SIMD_4x32(vec_nor(m_simd, m_simd));
461#elif defined(BOTAN_SIMD_USE_NEON)
462 return SIMD_4x32(vmvnq_u32(m_simd));
463#endif
464 }
465
466 // (~reg) & other
467 SIMD_4x32 andc(const SIMD_4x32& other) const noexcept {
468#if defined(BOTAN_SIMD_USE_SSE2)
469 return SIMD_4x32(_mm_andnot_si128(m_simd, other.m_simd));
470#elif defined(BOTAN_SIMD_USE_ALTIVEC)
471 /*
472 AltiVec does arg1 & ~arg2 rather than SSE's ~arg1 & arg2
473 so swap the arguments
474 */
475 return SIMD_4x32(vec_andc(other.m_simd, m_simd));
476#elif defined(BOTAN_SIMD_USE_NEON)
477 // NEON is also a & ~b
478 return SIMD_4x32(vbicq_u32(other.m_simd, m_simd));
479#endif
480 }
481
482 /**
483 * Return copy *this with each word byte swapped
484 */
485 SIMD_4x32 bswap() const noexcept {
486#if defined(BOTAN_SIMD_USE_SSE2)
487
488 __m128i T = m_simd;
489 T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
490 T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
491 return SIMD_4x32(_mm_or_si128(_mm_srli_epi16(T, 8), _mm_slli_epi16(T, 8)));
492
493#elif defined(BOTAN_SIMD_USE_ALTIVEC)
494 #ifdef BOTAN_SIMD_USE_VSX
495 return SIMD_4x32(vec_revb(m_simd));
496 #else
497 const __vector unsigned char rev[1] = {
498 {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12},
499 };
500
501 return SIMD_4x32(vec_perm(m_simd, m_simd, rev[0]));
502 #endif
503
504#elif defined(BOTAN_SIMD_USE_NEON)
505 return SIMD_4x32(vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(m_simd))));
506#endif
507 }
508
509 template <size_t I>
511 requires(I <= 3)
512 {
513#if defined(BOTAN_SIMD_USE_SSE2)
514 return SIMD_4x32(_mm_slli_si128(raw(), 4 * I));
515#elif defined(BOTAN_SIMD_USE_NEON)
516 return SIMD_4x32(vextq_u32(vdupq_n_u32(0), raw(), 4 - I));
517#elif defined(BOTAN_SIMD_USE_ALTIVEC)
518 const __vector unsigned int zero = vec_splat_u32(0);
519
520 const __vector unsigned char shuf[3] = {
521 {16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
522 {16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7},
523 {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3},
524 };
525
526 return SIMD_4x32(vec_perm(raw(), zero, shuf[I - 1]));
527#endif
528 }
529
530 template <size_t I>
532 requires(I <= 3)
533 {
534#if defined(BOTAN_SIMD_USE_SSE2)
535 return SIMD_4x32(_mm_srli_si128(raw(), 4 * I));
536#elif defined(BOTAN_SIMD_USE_NEON)
537 return SIMD_4x32(vextq_u32(raw(), vdupq_n_u32(0), I));
538#elif defined(BOTAN_SIMD_USE_ALTIVEC)
539 const __vector unsigned int zero = vec_splat_u32(0);
540
541 const __vector unsigned char shuf[3] = {
542 {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19},
543 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
544 {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
545 };
546
547 return SIMD_4x32(vec_perm(raw(), zero, shuf[I - 1]));
548#endif
549 }
550
551 /**
552 * 4x4 Transposition on SIMD registers
553 */
554 static void transpose(SIMD_4x32& B0, SIMD_4x32& B1, SIMD_4x32& B2, SIMD_4x32& B3) noexcept {
555#if defined(BOTAN_SIMD_USE_SSE2)
556 const __m128i T0 = _mm_unpacklo_epi32(B0.m_simd, B1.m_simd);
557 const __m128i T1 = _mm_unpacklo_epi32(B2.m_simd, B3.m_simd);
558 const __m128i T2 = _mm_unpackhi_epi32(B0.m_simd, B1.m_simd);
559 const __m128i T3 = _mm_unpackhi_epi32(B2.m_simd, B3.m_simd);
560
561 B0.m_simd = _mm_unpacklo_epi64(T0, T1);
562 B1.m_simd = _mm_unpackhi_epi64(T0, T1);
563 B2.m_simd = _mm_unpacklo_epi64(T2, T3);
564 B3.m_simd = _mm_unpackhi_epi64(T2, T3);
565#elif defined(BOTAN_SIMD_USE_ALTIVEC)
566 const __vector unsigned int T0 = vec_mergeh(B0.m_simd, B2.m_simd);
567 const __vector unsigned int T1 = vec_mergeh(B1.m_simd, B3.m_simd);
568 const __vector unsigned int T2 = vec_mergel(B0.m_simd, B2.m_simd);
569 const __vector unsigned int T3 = vec_mergel(B1.m_simd, B3.m_simd);
570
571 B0.m_simd = vec_mergeh(T0, T1);
572 B1.m_simd = vec_mergel(T0, T1);
573 B2.m_simd = vec_mergeh(T2, T3);
574 B3.m_simd = vec_mergel(T2, T3);
575
576#elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM32)
577 const uint32x4x2_t T0 = vzipq_u32(B0.m_simd, B2.m_simd);
578 const uint32x4x2_t T1 = vzipq_u32(B1.m_simd, B3.m_simd);
579 const uint32x4x2_t O0 = vzipq_u32(T0.val[0], T1.val[0]);
580 const uint32x4x2_t O1 = vzipq_u32(T0.val[1], T1.val[1]);
581
582 B0.m_simd = O0.val[0];
583 B1.m_simd = O0.val[1];
584 B2.m_simd = O1.val[0];
585 B3.m_simd = O1.val[1];
586
587#elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM64)
588 const uint32x4_t T0 = vzip1q_u32(B0.m_simd, B2.m_simd);
589 const uint32x4_t T2 = vzip2q_u32(B0.m_simd, B2.m_simd);
590 const uint32x4_t T1 = vzip1q_u32(B1.m_simd, B3.m_simd);
591 const uint32x4_t T3 = vzip2q_u32(B1.m_simd, B3.m_simd);
592
593 B0.m_simd = vzip1q_u32(T0, T1);
594 B1.m_simd = vzip2q_u32(T0, T1);
595 B2.m_simd = vzip1q_u32(T2, T3);
596 B3.m_simd = vzip2q_u32(T2, T3);
597#endif
598 }
599
600 static inline SIMD_4x32 choose(const SIMD_4x32& mask, const SIMD_4x32& a, const SIMD_4x32& b) noexcept {
601#if defined(BOTAN_SIMD_USE_ALTIVEC)
602 return SIMD_4x32(vec_sel(b.raw(), a.raw(), mask.raw()));
603#elif defined(BOTAN_SIMD_USE_NEON)
604 return SIMD_4x32(vbslq_u32(mask.raw(), a.raw(), b.raw()));
605#else
606 return (mask & a) ^ mask.andc(b);
607#endif
608 }
609
610 static inline SIMD_4x32 majority(const SIMD_4x32& x, const SIMD_4x32& y, const SIMD_4x32& z) noexcept {
611 return SIMD_4x32::choose(x ^ y, z, y);
612 }
613
614 native_simd_type raw() const noexcept { return m_simd; }
615
616 explicit SIMD_4x32(native_simd_type x) noexcept : m_simd(x) {}
617
618 private:
619 native_simd_type m_simd;
620};
621
622template <size_t R>
623inline SIMD_4x32 rotl(SIMD_4x32 input) {
624 return input.rotl<R>();
625}
626
627template <size_t R>
628inline SIMD_4x32 rotr(SIMD_4x32 input) {
629 return input.rotr<R>();
630}
631
632// For Serpent:
633template <size_t S>
634inline SIMD_4x32 shl(SIMD_4x32 input) {
635 return input.shl<S>();
636}
637
638} // namespace Botan
639
640#endif
static bool is_little_endian()
Definition cpuid.h:60
static bool is_big_endian()
Definition cpuid.h:70
static SIMD_4x32 load_be(const void *in) noexcept
Definition simd_32.h:175
SIMD_4x32 andc(const SIMD_4x32 &other) const noexcept
Definition simd_32.h:467
void store_le(uint8_t out[]) const noexcept
Definition simd_32.h:203
SIMD_4x32(SIMD_4x32 &&other)=default
static SIMD_4x32 load_le(std::span< const uint8_t, 16 > in)
Definition simd_32.h:190
SIMD_4x32 & operator=(SIMD_4x32 &&other)=default
SIMD_4x32 operator|(const SIMD_4x32 &other) const noexcept
Definition simd_32.h:359
SIMD_4x32 operator^(const SIMD_4x32 &other) const noexcept
Definition simd_32.h:350
SIMD_4x32(native_simd_type x) noexcept
Definition simd_32.h:616
void store_be(std::span< uint8_t, 16 > out) const
Definition simd_32.h:254
static void transpose(SIMD_4x32 &B0, SIMD_4x32 &B1, SIMD_4x32 &B2, SIMD_4x32 &B3) noexcept
Definition simd_32.h:554
SIMD_4x32 bswap() const noexcept
Definition simd_32.h:485
SIMD_4x32 operator+(const SIMD_4x32 &other) const noexcept
Definition simd_32.h:332
void store_le(std::span< uint8_t, 16 > out) const
Definition simd_32.h:256
SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3) noexcept
Definition simd_32.h:116
native_simd_type raw() const noexcept
Definition simd_32.h:614
SIMD_4x32() noexcept
Definition simd_32.h:89
void store_le(uint32_t out[4]) const noexcept
Definition simd_32.h:194
static SIMD_4x32 load_le(const void *in) noexcept
Definition simd_32.h:159
SIMD_4x32 sigma1() const noexcept
Definition simd_32.h:275
void store_be(uint8_t out[]) const noexcept
Definition simd_32.h:230
SIMD_4x32(const SIMD_4x32 &other)=default
~SIMD_4x32()=default
void operator^=(uint32_t other) noexcept
Definition simd_32.h:404
void operator^=(const SIMD_4x32 &other) noexcept
Definition simd_32.h:394
void operator+=(const SIMD_4x32 &other) noexcept
Definition simd_32.h:374
SIMD_4x32 operator~() const noexcept
Definition simd_32.h:456
void store_le(uint64_t out[2]) const noexcept
Definition simd_32.h:198
void operator|=(const SIMD_4x32 &other) noexcept
Definition simd_32.h:406
static SIMD_4x32 load_be(std::span< const uint8_t, 16 > in)
Definition simd_32.h:192
SIMD_4x32 shift_elems_left() const noexcept
Definition simd_32.h:510
void store_be(uint32_t out[4]) const noexcept
Definition simd_32.h:196
SIMD_4x32(const uint32_t B[4]) noexcept
Definition simd_32.h:102
SIMD_4x32 sigma0() const noexcept
Definition simd_32.h:261
SIMD_4x32 operator-(const SIMD_4x32 &other) const noexcept
Definition simd_32.h:341
SIMD_4x32 shr() const noexcept
Definition simd_32.h:443
static SIMD_4x32 splat_u8(uint8_t B) noexcept
Definition simd_32.h:145
SIMD_4x32 rotr() const noexcept
Definition simd_32.h:325
SIMD_4x32 shl() const noexcept
Definition simd_32.h:427
SIMD_4x32 shift_elems_right() const noexcept
Definition simd_32.h:531
SIMD_4x32 rotl() const noexcept
Definition simd_32.h:290
void operator&=(const SIMD_4x32 &other) noexcept
Definition simd_32.h:416
SIMD_4x32 operator&(const SIMD_4x32 &other) const noexcept
Definition simd_32.h:368
SIMD_4x32 & operator=(const SIMD_4x32 &other)=default
static SIMD_4x32 choose(const SIMD_4x32 &mask, const SIMD_4x32 &a, const SIMD_4x32 &b) noexcept
Definition simd_32.h:600
void operator-=(const SIMD_4x32 &other) noexcept
Definition simd_32.h:384
static SIMD_4x32 majority(const SIMD_4x32 &x, const SIMD_4x32 &y, const SIMD_4x32 &z) noexcept
Definition simd_32.h:610
static SIMD_4x32 splat(uint32_t B) noexcept
Definition simd_32.h:132
int(* final)(unsigned char *, CTX *)
FE_25519 T
Definition ge.cpp:34
constexpr T rotl(T input)
Definition rotate.h:21
constexpr T rotr(T input)
Definition rotate.h:33
constexpr uint32_t make_uint32(uint8_t i0, uint8_t i1, uint8_t i2, uint8_t i3)
Definition loadstor.h:100
constexpr auto store_le(ParamTs &&... params)
Definition loadstor.h:764
SIMD_4x32 shl(SIMD_4x32 input)
Definition simd_32.h:634
constexpr auto load_le(ParamTs &&... params)
Definition loadstor.h:521
const SIMD_8x32 & b
constexpr auto store_be(ParamTs &&... params)
Definition loadstor.h:773
constexpr auto load_be(ParamTs &&... params)
Definition loadstor.h:530