Botan 3.11.0
Crypto and TLS for C&
simd_4x32.h
Go to the documentation of this file.
1/*
2* Lightweight wrappers for SIMD (4x32 bit) operations
3* (C) 2009,2011,2016,2017,2019,2025 Jack Lloyd
4*
5* Botan is released under the Simplified BSD License (see license.txt)
6*/
7
8#ifndef BOTAN_SIMD_4X32_H_
9#define BOTAN_SIMD_4X32_H_
10
11#include <botan/compiler.h>
12#include <botan/types.h>
13#include <botan/internal/isa_extn.h>
14#include <botan/internal/target_info.h>
15#include <span>
16
17#if defined(BOTAN_TARGET_ARCH_SUPPORTS_SSSE3)
18 #include <immintrin.h>
19 #define BOTAN_SIMD_USE_SSSE3
20
21#elif defined(BOTAN_TARGET_ARCH_SUPPORTS_ALTIVEC)
22 #include <botan/internal/loadstor.h>
23 #include <altivec.h>
24 #undef vector
25 #undef bool
26 #define BOTAN_SIMD_USE_ALTIVEC
27 #ifdef __VSX__
28 #define BOTAN_SIMD_USE_VSX
29 #endif
30
31#elif defined(BOTAN_TARGET_ARCH_SUPPORTS_NEON)
32 #include <arm_neon.h>
33 #include <bit>
34 #define BOTAN_SIMD_USE_NEON
35
36#elif defined(BOTAN_TARGET_ARCH_SUPPORTS_LSX)
37 #include <lsxintrin.h>
38 #define BOTAN_SIMD_USE_LSX
39
40#elif defined(BOTAN_TARGET_ARCH_SUPPORTS_SIMD128)
41 #include <wasm_simd128.h>
42 #define BOTAN_SIMD_USE_SIMD128
43
44#else
45 #error "No SIMD instruction set enabled"
46#endif
47
48namespace Botan {
49
50// NOLINTBEGIN(portability-simd-intrinsics)
51
52/**
53* 4x32 bit SIMD register
54*
55* This class is not a general purpose SIMD type, and only offers instructions
56* needed for evaluation of specific crypto primitives. For example it does not
57* currently have equality operators of any kind.
58*
59* Implemented for SSE2, VMX (Altivec), ARMv7/Aarch64 NEON, LoongArch LSX and Wasm SIMD128
60*/
61class SIMD_4x32 final {
62 public:
63#if defined(BOTAN_SIMD_USE_SSSE3) || defined(BOTAN_SIMD_USE_LSX)
64 using native_simd_type = __m128i;
65#elif defined(BOTAN_SIMD_USE_ALTIVEC)
66 using native_simd_type = __vector unsigned int;
67#elif defined(BOTAN_SIMD_USE_NEON)
68 using native_simd_type = uint32x4_t;
69#elif defined(BOTAN_SIMD_USE_SIMD128)
70 using native_simd_type = v128_t;
71#endif
72
73 SIMD_4x32& operator=(const SIMD_4x32& other) = default;
74 SIMD_4x32(const SIMD_4x32& other) = default;
75
76 SIMD_4x32& operator=(SIMD_4x32&& other) = default;
77 SIMD_4x32(SIMD_4x32&& other) = default;
78
79 ~SIMD_4x32() = default;
80
81 /* NOLINTBEGIN(*-prefer-member-initializer) */
82
83 /**
84 * Zero initialize SIMD register with 4 32-bit elements
85 */
86 BOTAN_FN_ISA_SIMD_4X32 SIMD_4x32() noexcept {
87#if defined(BOTAN_SIMD_USE_SSSE3)
88 m_simd = _mm_setzero_si128();
89#elif defined(BOTAN_SIMD_USE_ALTIVEC)
90 m_simd = vec_splat_u32(0);
91#elif defined(BOTAN_SIMD_USE_NEON)
92 m_simd = vdupq_n_u32(0);
93#elif defined(BOTAN_SIMD_USE_LSX)
94 m_simd = __lsx_vldi(0);
95#elif defined(BOTAN_SIMD_USE_SIMD128)
96 m_simd = wasm_u32x4_const_splat(0);
97#endif
98 }
99
100 /**
101 * Load SIMD register with 4 32-bit elements
102 */
103 BOTAN_FN_ISA_SIMD_4X32 SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3) noexcept {
104#if defined(BOTAN_SIMD_USE_SSSE3)
105 m_simd = _mm_set_epi32(B3, B2, B1, B0);
106#elif defined(BOTAN_SIMD_USE_ALTIVEC)
107 __vector unsigned int val = {B0, B1, B2, B3};
108 m_simd = val;
109#elif defined(BOTAN_SIMD_USE_NEON)
110 // Better way to do this?
111 const uint32_t B[4] = {B0, B1, B2, B3};
112 m_simd = vld1q_u32(B);
113#elif defined(BOTAN_SIMD_USE_LSX)
114 // Better way to do this?
115 const uint32_t B[4] = {B0, B1, B2, B3};
116 m_simd = __lsx_vld(B, 0);
117#elif defined(BOTAN_SIMD_USE_SIMD128)
118 m_simd = wasm_u32x4_make(B0, B1, B2, B3);
119#endif
120 }
121
122 /* NOLINTEND(*-prefer-member-initializer) */
123
124 /**
125 * Load SIMD register with one 32-bit element repeated
126 */
127 static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 splat(uint32_t B) noexcept {
128#if defined(BOTAN_SIMD_USE_SSSE3)
129 return SIMD_4x32(_mm_set1_epi32(B));
130#elif defined(BOTAN_SIMD_USE_NEON)
131 return SIMD_4x32(vdupq_n_u32(B));
132#elif defined(BOTAN_SIMD_USE_LSX)
133 return SIMD_4x32(__lsx_vreplgr2vr_w(B));
134#elif defined(BOTAN_SIMD_USE_SIMD128)
135 return SIMD_4x32(wasm_u32x4_splat(B));
136#else
137 return SIMD_4x32(B, B, B, B);
138#endif
139 }
140
141 /**
142 * Load SIMD register with one 8-bit element repeated
143 */
144 static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 splat_u8(uint8_t B) noexcept {
145#if defined(BOTAN_SIMD_USE_SSSE3)
146 return SIMD_4x32(_mm_set1_epi8(B));
147#elif defined(BOTAN_SIMD_USE_NEON)
148 return SIMD_4x32(vreinterpretq_u32_u8(vdupq_n_u8(B)));
149#elif defined(BOTAN_SIMD_USE_LSX)
150 return SIMD_4x32(__lsx_vreplgr2vr_b(B));
151#elif defined(BOTAN_SIMD_USE_SIMD128)
152 return SIMD_4x32(wasm_u8x16_splat(B));
153#else
154 const uint32_t B4 = make_uint32(B, B, B, B);
155 return SIMD_4x32(B4, B4, B4, B4);
156#endif
157 }
158
159 /**
160 * Load a SIMD register with little-endian convention
161 */
162 static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_le(const void* in) noexcept {
163#if defined(BOTAN_SIMD_USE_SSSE3)
164 return SIMD_4x32(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in)));
165#elif defined(BOTAN_SIMD_USE_ALTIVEC)
166 uint32_t R0 = Botan::load_le<uint32_t>(reinterpret_cast<const uint8_t*>(in), 0);
167 uint32_t R1 = Botan::load_le<uint32_t>(reinterpret_cast<const uint8_t*>(in), 1);
168 uint32_t R2 = Botan::load_le<uint32_t>(reinterpret_cast<const uint8_t*>(in), 2);
169 uint32_t R3 = Botan::load_le<uint32_t>(reinterpret_cast<const uint8_t*>(in), 3);
170 __vector unsigned int val = {R0, R1, R2, R3};
171 return SIMD_4x32(val);
172#elif defined(BOTAN_SIMD_USE_NEON)
173 SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
174 if constexpr(std::endian::native == std::endian::big) {
175 return l.bswap();
176 } else {
177 return l;
178 }
179#elif defined(BOTAN_SIMD_USE_LSX)
180 return SIMD_4x32(__lsx_vld(in, 0));
181#elif defined(BOTAN_SIMD_USE_SIMD128)
182 return SIMD_4x32(wasm_v128_load(in));
183#endif
184 }
185
186 /**
187 * Load a SIMD register with big-endian convention
188 */
189 static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(const void* in) noexcept {
190#if defined(BOTAN_SIMD_USE_SSSE3) || defined(BOTAN_SIMD_USE_LSX) || defined(BOTAN_SIMD_USE_SIMD128)
191 return load_le(in).bswap();
192
193#elif defined(BOTAN_SIMD_USE_ALTIVEC)
194 uint32_t R0 = Botan::load_be<uint32_t>(reinterpret_cast<const uint8_t*>(in), 0);
195 uint32_t R1 = Botan::load_be<uint32_t>(reinterpret_cast<const uint8_t*>(in), 1);
196 uint32_t R2 = Botan::load_be<uint32_t>(reinterpret_cast<const uint8_t*>(in), 2);
197 uint32_t R3 = Botan::load_be<uint32_t>(reinterpret_cast<const uint8_t*>(in), 3);
198 __vector unsigned int val = {R0, R1, R2, R3};
199 return SIMD_4x32(val);
200
201#elif defined(BOTAN_SIMD_USE_NEON)
202 SIMD_4x32 l(vld1q_u32(static_cast<const uint32_t*>(in)));
203 if constexpr(std::endian::native == std::endian::little) {
204 return l.bswap();
205 } else {
206 return l;
207 }
208#endif
209 }
210
211 static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_le(std::span<const uint8_t, 16> in) {
212 return SIMD_4x32::load_le(in.data());
213 }
214
215 static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(std::span<const uint8_t, 16> in) {
216 return SIMD_4x32::load_be(in.data());
217 }
218
219 void BOTAN_FN_ISA_SIMD_4X32 store_le(uint32_t out[4]) const noexcept {
220 this->store_le(reinterpret_cast<uint8_t*>(out));
221 }
222
223 void BOTAN_FN_ISA_SIMD_4X32 store_be(uint32_t out[4]) const noexcept {
224 this->store_be(reinterpret_cast<uint8_t*>(out));
225 }
226
227 void BOTAN_FN_ISA_SIMD_4X32 store_le(uint64_t out[2]) const noexcept {
228 this->store_le(reinterpret_cast<uint8_t*>(out));
229 }
230
231 /**
232 * Load a SIMD register with little-endian convention
233 */
234 void BOTAN_FN_ISA_SIMD_4X32 store_le(uint8_t out[]) const noexcept {
235#if defined(BOTAN_SIMD_USE_SSSE3)
236
237 _mm_storeu_si128(reinterpret_cast<__m128i*>(out), raw());
238
239#elif defined(BOTAN_SIMD_USE_ALTIVEC)
240
241 union {
242 __vector unsigned int V;
243 uint32_t R[4];
244 } vec{};
245
246 // NOLINTNEXTLINE(*-union-access)
247 vec.V = raw();
248 // NOLINTNEXTLINE(*-union-access)
249 Botan::store_le(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
250
251#elif defined(BOTAN_SIMD_USE_NEON)
252 if constexpr(std::endian::native == std::endian::little) {
253 vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
254 } else {
255 vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
256 }
257#elif defined(BOTAN_SIMD_USE_LSX)
258 __lsx_vst(raw(), out, 0);
259#elif defined(BOTAN_SIMD_USE_SIMD128)
260 wasm_v128_store(out, m_simd);
261#endif
262 }
263
264 /**
265 * Load a SIMD register with big-endian convention
266 */
267 BOTAN_FN_ISA_SIMD_4X32 void store_be(uint8_t out[]) const noexcept {
268#if defined(BOTAN_SIMD_USE_SSSE3) || defined(BOTAN_SIMD_USE_LSX) || defined(BOTAN_SIMD_USE_SIMD128)
269
270 bswap().store_le(out);
271
272#elif defined(BOTAN_SIMD_USE_ALTIVEC)
273
274 union {
275 __vector unsigned int V;
276 uint32_t R[4];
277 } vec{};
278
279 // NOLINTNEXTLINE(*-union-access)
280 vec.V = m_simd;
281 // NOLINTNEXTLINE(*-union-access)
282 Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
283
284#elif defined(BOTAN_SIMD_USE_NEON)
285 if constexpr(std::endian::native == std::endian::little) {
286 vst1q_u8(out, vreinterpretq_u8_u32(bswap().m_simd));
287 } else {
288 vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
289 }
290#endif
291 }
292
293 void BOTAN_FN_ISA_SIMD_4X32 store_be(std::span<uint8_t, 16> out) const { this->store_be(out.data()); }
294
295 void BOTAN_FN_ISA_SIMD_4X32 store_le(std::span<uint8_t, 16> out) const { this->store_le(out.data()); }
296
297 /*
298 * This is used for SHA-2/SHACAL2
299 */
300 SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 sigma0() const noexcept {
301#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_crypto_vshasigmaw) && defined(_ARCH_PWR8)
302 return SIMD_4x32(__builtin_crypto_vshasigmaw(raw(), 1, 0));
303#else
304 const SIMD_4x32 r1 = this->rotr<2>();
305 const SIMD_4x32 r2 = this->rotr<13>();
306 const SIMD_4x32 r3 = this->rotr<22>();
307 return (r1 ^ r2 ^ r3);
308#endif
309 }
310
311 /*
312 * This is used for SHA-2/SHACAL2
313 */
314 SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 sigma1() const noexcept {
315#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_crypto_vshasigmaw) && defined(_ARCH_PWR8)
316 return SIMD_4x32(__builtin_crypto_vshasigmaw(raw(), 1, 0xF));
317#else
318 const SIMD_4x32 r1 = this->rotr<6>();
319 const SIMD_4x32 r2 = this->rotr<11>();
320 const SIMD_4x32 r3 = this->rotr<25>();
321 return (r1 ^ r2 ^ r3);
322#endif
323 }
324
325 /**
326 * Left rotation by a compile time constant
327 */
328 template <size_t ROT>
329 BOTAN_FN_ISA_SIMD_4X32 SIMD_4x32 rotl() const noexcept
330 requires(ROT > 0 && ROT < 32)
331 {
332#if defined(BOTAN_SIMD_USE_SSSE3)
333 if constexpr(ROT == 8) {
334 const auto shuf_rotl_8 = _mm_set_epi64x(0x0e0d0c0f0a09080b, 0x0605040702010003);
335 return SIMD_4x32(_mm_shuffle_epi8(raw(), shuf_rotl_8));
336 } else if constexpr(ROT == 16) {
337 const auto shuf_rotl_16 = _mm_set_epi64x(0x0d0c0f0e09080b0a, 0x0504070601000302);
338 return SIMD_4x32(_mm_shuffle_epi8(raw(), shuf_rotl_16));
339 } else if constexpr(ROT == 24) {
340 const auto shuf_rotl_24 = _mm_set_epi64x(0x0c0f0e0d080b0a09, 0x0407060500030201);
341 return SIMD_4x32(_mm_shuffle_epi8(raw(), shuf_rotl_24));
342 } else {
343 return SIMD_4x32(_mm_xor_si128(_mm_slli_epi32(raw(), static_cast<int>(ROT)),
344 _mm_srli_epi32(raw(), static_cast<int>(32 - ROT))));
345 }
346
347#elif defined(BOTAN_SIMD_USE_ALTIVEC)
348
349 const unsigned int r = static_cast<unsigned int>(ROT);
350 __vector unsigned int rot = {r, r, r, r};
351 return SIMD_4x32(vec_rl(m_simd, rot));
352
353#elif defined(BOTAN_SIMD_USE_NEON)
354
355 #if defined(BOTAN_TARGET_ARCH_IS_ARM64)
356
357 if constexpr(ROT == 8) {
358 const uint8_t maskb[16] = {3, 0, 1, 2, 7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14};
359 const uint8x16_t mask = vld1q_u8(maskb);
360 return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(m_simd), mask)));
361 } else if constexpr(ROT == 16) {
362 return SIMD_4x32(vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(m_simd))));
363 }
364 #endif
365 return SIMD_4x32(
366 vorrq_u32(vshlq_n_u32(m_simd, static_cast<int>(ROT)), vshrq_n_u32(m_simd, static_cast<int>(32 - ROT))));
367#elif defined(BOTAN_SIMD_USE_LSX)
368 return SIMD_4x32(__lsx_vrotri_w(raw(), 32 - ROT));
369#elif defined(BOTAN_SIMD_USE_SIMD128)
370 return SIMD_4x32(wasm_v128_or(wasm_i32x4_shl(m_simd, ROT), wasm_u32x4_shr(m_simd, 32 - ROT)));
371#endif
372 }
373
374 /**
375 * Right rotation by a compile time constant
376 */
377 template <size_t ROT>
378 SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 rotr() const noexcept {
379 return this->rotl<32 - ROT>();
380 }
381
382 /**
383 * Add elements of a SIMD vector
384 */
385 SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 operator+(const SIMD_4x32& other) const noexcept {
386 SIMD_4x32 retval(*this);
387 retval += other;
388 return retval;
389 }
390
391 /**
392 * Subtract elements of a SIMD vector
393 */
394 SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 operator-(const SIMD_4x32& other) const noexcept {
395 SIMD_4x32 retval(*this);
396 retval -= other;
397 return retval;
398 }
399
400 /**
401 * XOR elements of a SIMD vector
402 */
403 SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 operator^(const SIMD_4x32& other) const noexcept {
404 SIMD_4x32 retval(*this);
405 retval ^= other;
406 return retval;
407 }
408
409 /**
410 * Binary OR elements of a SIMD vector
411 */
412 SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 operator|(const SIMD_4x32& other) const noexcept {
413 SIMD_4x32 retval(*this);
414 retval |= other;
415 return retval;
416 }
417
418 /**
419 * Binary AND elements of a SIMD vector
420 */
421 SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 operator&(const SIMD_4x32& other) const noexcept {
422 SIMD_4x32 retval(*this);
423 retval &= other;
424 return retval;
425 }
426
427 void BOTAN_FN_ISA_SIMD_4X32 operator+=(const SIMD_4x32& other) noexcept {
428#if defined(BOTAN_SIMD_USE_SSSE3)
429 m_simd = _mm_add_epi32(m_simd, other.m_simd);
430#elif defined(BOTAN_SIMD_USE_ALTIVEC)
431 m_simd = vec_add(m_simd, other.m_simd);
432#elif defined(BOTAN_SIMD_USE_NEON)
433 m_simd = vaddq_u32(m_simd, other.m_simd);
434#elif defined(BOTAN_SIMD_USE_LSX)
435 m_simd = __lsx_vadd_w(m_simd, other.m_simd);
436#elif defined(BOTAN_SIMD_USE_SIMD128)
437 m_simd = wasm_i32x4_add(m_simd, other.m_simd);
438#endif
439 }
440
441 void BOTAN_FN_ISA_SIMD_4X32 operator-=(const SIMD_4x32& other) noexcept {
442#if defined(BOTAN_SIMD_USE_SSSE3)
443 m_simd = _mm_sub_epi32(m_simd, other.m_simd);
444#elif defined(BOTAN_SIMD_USE_ALTIVEC)
445 m_simd = vec_sub(m_simd, other.m_simd);
446#elif defined(BOTAN_SIMD_USE_NEON)
447 m_simd = vsubq_u32(m_simd, other.m_simd);
448#elif defined(BOTAN_SIMD_USE_LSX)
449 m_simd = __lsx_vsub_w(m_simd, other.m_simd);
450#elif defined(BOTAN_SIMD_USE_SIMD128)
451 m_simd = wasm_i32x4_sub(m_simd, other.m_simd);
452#endif
453 }
454
455 void BOTAN_FN_ISA_SIMD_4X32 operator^=(const SIMD_4x32& other) noexcept {
456#if defined(BOTAN_SIMD_USE_SSSE3)
457 m_simd = _mm_xor_si128(m_simd, other.m_simd);
458#elif defined(BOTAN_SIMD_USE_ALTIVEC)
459 m_simd = vec_xor(m_simd, other.m_simd);
460#elif defined(BOTAN_SIMD_USE_NEON)
461 m_simd = veorq_u32(m_simd, other.m_simd);
462#elif defined(BOTAN_SIMD_USE_LSX)
463 m_simd = __lsx_vxor_v(m_simd, other.m_simd);
464#elif defined(BOTAN_SIMD_USE_SIMD128)
465 m_simd = wasm_v128_xor(m_simd, other.m_simd);
466#endif
467 }
468
469 void BOTAN_FN_ISA_SIMD_4X32 operator^=(uint32_t other) noexcept { *this ^= SIMD_4x32::splat(other); }
470
471 void BOTAN_FN_ISA_SIMD_4X32 operator|=(const SIMD_4x32& other) noexcept {
472#if defined(BOTAN_SIMD_USE_SSSE3)
473 m_simd = _mm_or_si128(m_simd, other.m_simd);
474#elif defined(BOTAN_SIMD_USE_ALTIVEC)
475 m_simd = vec_or(m_simd, other.m_simd);
476#elif defined(BOTAN_SIMD_USE_NEON)
477 m_simd = vorrq_u32(m_simd, other.m_simd);
478#elif defined(BOTAN_SIMD_USE_LSX)
479 m_simd = __lsx_vor_v(m_simd, other.m_simd);
480#elif defined(BOTAN_SIMD_USE_SIMD128)
481 m_simd = wasm_v128_or(m_simd, other.m_simd);
482#endif
483 }
484
485 void BOTAN_FN_ISA_SIMD_4X32 operator&=(const SIMD_4x32& other) noexcept {
486#if defined(BOTAN_SIMD_USE_SSSE3)
487 m_simd = _mm_and_si128(m_simd, other.m_simd);
488#elif defined(BOTAN_SIMD_USE_ALTIVEC)
489 m_simd = vec_and(m_simd, other.m_simd);
490#elif defined(BOTAN_SIMD_USE_NEON)
491 m_simd = vandq_u32(m_simd, other.m_simd);
492#elif defined(BOTAN_SIMD_USE_LSX)
493 m_simd = __lsx_vand_v(m_simd, other.m_simd);
494#elif defined(BOTAN_SIMD_USE_SIMD128)
495 m_simd = wasm_v128_and(m_simd, other.m_simd);
496#endif
497 }
498
499 template <int SHIFT>
500 SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 shl() const noexcept
501 requires(SHIFT > 0 && SHIFT < 32)
502 {
503#if defined(BOTAN_SIMD_USE_SSSE3)
504 return SIMD_4x32(_mm_slli_epi32(m_simd, SHIFT));
505
506#elif defined(BOTAN_SIMD_USE_ALTIVEC)
507 const unsigned int s = static_cast<unsigned int>(SHIFT);
508 const __vector unsigned int shifts = {s, s, s, s};
509 return SIMD_4x32(vec_sl(m_simd, shifts));
510#elif defined(BOTAN_SIMD_USE_NEON)
511 return SIMD_4x32(vshlq_n_u32(m_simd, SHIFT));
512#elif defined(BOTAN_SIMD_USE_LSX)
513 return SIMD_4x32(__lsx_vslli_w(m_simd, SHIFT));
514#elif defined(BOTAN_SIMD_USE_SIMD128)
515 return SIMD_4x32(wasm_i32x4_shl(m_simd, SHIFT));
516#endif
517 }
518
519 template <int SHIFT>
520 SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 shr() const noexcept {
521#if defined(BOTAN_SIMD_USE_SSSE3)
522 return SIMD_4x32(_mm_srli_epi32(m_simd, SHIFT));
523
524#elif defined(BOTAN_SIMD_USE_ALTIVEC)
525 const unsigned int s = static_cast<unsigned int>(SHIFT);
526 const __vector unsigned int shifts = {s, s, s, s};
527 return SIMD_4x32(vec_sr(m_simd, shifts));
528#elif defined(BOTAN_SIMD_USE_NEON)
529 return SIMD_4x32(vshrq_n_u32(m_simd, SHIFT));
530#elif defined(BOTAN_SIMD_USE_LSX)
531 return SIMD_4x32(__lsx_vsrli_w(m_simd, SHIFT));
532#elif defined(BOTAN_SIMD_USE_SIMD128)
533 return SIMD_4x32(wasm_u32x4_shr(m_simd, SHIFT));
534#endif
535 }
536
537 SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 operator~() const noexcept {
538#if defined(BOTAN_SIMD_USE_SSSE3)
539 return SIMD_4x32(_mm_xor_si128(m_simd, _mm_set1_epi32(0xFFFFFFFF)));
540#elif defined(BOTAN_SIMD_USE_ALTIVEC)
541 return SIMD_4x32(vec_nor(m_simd, m_simd));
542#elif defined(BOTAN_SIMD_USE_NEON)
543 return SIMD_4x32(vmvnq_u32(m_simd));
544#elif defined(BOTAN_SIMD_USE_LSX)
545 return SIMD_4x32(__lsx_vnor_v(m_simd, m_simd));
546#elif defined(BOTAN_SIMD_USE_SIMD128)
547 return SIMD_4x32(wasm_v128_not(m_simd));
548#endif
549 }
550
551 // (~reg) & other
552 SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 andc(const SIMD_4x32& other) const noexcept {
553#if defined(BOTAN_SIMD_USE_SSSE3)
554 return SIMD_4x32(_mm_andnot_si128(m_simd, other.m_simd));
555#elif defined(BOTAN_SIMD_USE_ALTIVEC)
556 /*
557 AltiVec does arg1 & ~arg2 rather than SSE's ~arg1 & arg2
558 so swap the arguments
559 */
560 return SIMD_4x32(vec_andc(other.m_simd, m_simd));
561#elif defined(BOTAN_SIMD_USE_NEON)
562 // NEON is also a & ~b
563 return SIMD_4x32(vbicq_u32(other.m_simd, m_simd));
564#elif defined(BOTAN_SIMD_USE_LSX)
565 // LSX is ~a & b
566 return SIMD_4x32(__lsx_vandn_v(m_simd, other.m_simd));
567#elif defined(BOTAN_SIMD_USE_SIMD128)
568 // SIMD128 is a & ~b
569 return SIMD_4x32(wasm_v128_andnot(other.m_simd, m_simd));
570#endif
571 }
572
573 /**
574 * Return copy *this with each word byte swapped
575 */
576 BOTAN_FN_ISA_SIMD_4X32 SIMD_4x32 bswap() const noexcept {
577#if defined(BOTAN_SIMD_USE_SSSE3)
578 const auto idx = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
579
580 return SIMD_4x32(_mm_shuffle_epi8(raw(), idx));
581#elif defined(BOTAN_SIMD_USE_ALTIVEC)
582 #ifdef BOTAN_SIMD_USE_VSX
583 return SIMD_4x32(vec_revb(m_simd));
584 #else
585 const __vector unsigned char rev[1] = {
586 {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12},
587 };
588
589 return SIMD_4x32(vec_perm(m_simd, m_simd, rev[0]));
590 #endif
591
592#elif defined(BOTAN_SIMD_USE_NEON)
593 return SIMD_4x32(vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(m_simd))));
594#elif defined(BOTAN_SIMD_USE_LSX)
595 return SIMD_4x32(__lsx_vshuf4i_b(m_simd, 0b00011011));
596#elif defined(BOTAN_SIMD_USE_SIMD128)
597 return SIMD_4x32(wasm_i8x16_shuffle(m_simd, m_simd, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12));
598#endif
599 }
600
601 template <size_t I>
602 SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 shift_elems_left() const noexcept
603 requires(I <= 3)
604 {
605#if defined(BOTAN_SIMD_USE_SSSE3)
606 return SIMD_4x32(_mm_slli_si128(raw(), 4 * I));
607#elif defined(BOTAN_SIMD_USE_NEON)
608 return SIMD_4x32(vextq_u32(vdupq_n_u32(0), raw(), 4 - I));
609#elif defined(BOTAN_SIMD_USE_ALTIVEC)
610 const __vector unsigned int zero = vec_splat_u32(0);
611
612 const __vector unsigned char shuf[3] = {
613 {16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
614 {16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7},
615 {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3},
616 };
617
618 return SIMD_4x32(vec_perm(raw(), zero, shuf[I - 1]));
619#elif defined(BOTAN_SIMD_USE_LSX)
620 return SIMD_4x32(__lsx_vbsll_v(raw(), 4 * I));
621#elif defined(BOTAN_SIMD_USE_SIMD128)
622 if constexpr(I == 0) {
623 return SIMD_4x32(m_simd);
624 }
625
626 const auto zero = wasm_u32x4_const_splat(0);
627 if constexpr(I == 1) {
628 return SIMD_4x32(wasm_i8x16_shuffle(m_simd, zero, 16, 16, 16, 16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11));
629 }
630 if constexpr(I == 2) {
631 return SIMD_4x32(wasm_i8x16_shuffle(m_simd, zero, 16, 16, 16, 16, 16, 16, 16, 16, 0, 1, 2, 3, 4, 5, 6, 7));
632 }
633
634 return SIMD_4x32(wasm_i8x16_shuffle(m_simd, zero, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 0, 1, 2, 3));
635#endif
636 }
637
638 template <size_t I>
639 SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 shift_elems_right() const noexcept
640 requires(I <= 3)
641 {
642#if defined(BOTAN_SIMD_USE_SSSE3)
643 return SIMD_4x32(_mm_srli_si128(raw(), 4 * I));
644#elif defined(BOTAN_SIMD_USE_NEON)
645 return SIMD_4x32(vextq_u32(raw(), vdupq_n_u32(0), I));
646#elif defined(BOTAN_SIMD_USE_ALTIVEC)
647 const __vector unsigned int zero = vec_splat_u32(0);
648
649 const __vector unsigned char shuf[3] = {
650 {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19},
651 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
652 {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
653 };
654
655 return SIMD_4x32(vec_perm(raw(), zero, shuf[I - 1]));
656#elif defined(BOTAN_SIMD_USE_LSX)
657 return SIMD_4x32(__lsx_vbsrl_v(raw(), 4 * I));
658#elif defined(BOTAN_SIMD_USE_SIMD128)
659 if constexpr(I == 0) {
660 return SIMD_4x32(m_simd);
661 }
662
663 const auto zero = wasm_u32x4_const_splat(0);
664 if constexpr(I == 1) {
665 return SIMD_4x32(
666 wasm_i8x16_shuffle(m_simd, zero, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16));
667 }
668 if constexpr(I == 2) {
669 return SIMD_4x32(
670 wasm_i8x16_shuffle(m_simd, zero, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16));
671 }
672
673 return SIMD_4x32(
674 wasm_i8x16_shuffle(m_simd, zero, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16));
675#endif
676 }
677
678 /**
679 * 4x4 Transposition on SIMD registers
680 */
681 static void BOTAN_FN_ISA_SIMD_4X32 transpose(SIMD_4x32& B0,
682 SIMD_4x32& B1,
683 SIMD_4x32& B2,
684 SIMD_4x32& B3) noexcept {
685#if defined(BOTAN_SIMD_USE_SSSE3)
686 const __m128i T0 = _mm_unpacklo_epi32(B0.m_simd, B1.m_simd);
687 const __m128i T1 = _mm_unpacklo_epi32(B2.m_simd, B3.m_simd);
688 const __m128i T2 = _mm_unpackhi_epi32(B0.m_simd, B1.m_simd);
689 const __m128i T3 = _mm_unpackhi_epi32(B2.m_simd, B3.m_simd);
690
691 B0.m_simd = _mm_unpacklo_epi64(T0, T1);
692 B1.m_simd = _mm_unpackhi_epi64(T0, T1);
693 B2.m_simd = _mm_unpacklo_epi64(T2, T3);
694 B3.m_simd = _mm_unpackhi_epi64(T2, T3);
695#elif defined(BOTAN_SIMD_USE_ALTIVEC)
696 const __vector unsigned int T0 = vec_mergeh(B0.m_simd, B2.m_simd);
697 const __vector unsigned int T1 = vec_mergeh(B1.m_simd, B3.m_simd);
698 const __vector unsigned int T2 = vec_mergel(B0.m_simd, B2.m_simd);
699 const __vector unsigned int T3 = vec_mergel(B1.m_simd, B3.m_simd);
700
701 B0.m_simd = vec_mergeh(T0, T1);
702 B1.m_simd = vec_mergel(T0, T1);
703 B2.m_simd = vec_mergeh(T2, T3);
704 B3.m_simd = vec_mergel(T2, T3);
705
706#elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM32)
707 const uint32x4x2_t T0 = vzipq_u32(B0.m_simd, B2.m_simd);
708 const uint32x4x2_t T1 = vzipq_u32(B1.m_simd, B3.m_simd);
709 const uint32x4x2_t O0 = vzipq_u32(T0.val[0], T1.val[0]);
710 const uint32x4x2_t O1 = vzipq_u32(T0.val[1], T1.val[1]);
711
712 B0.m_simd = O0.val[0];
713 B1.m_simd = O0.val[1];
714 B2.m_simd = O1.val[0];
715 B3.m_simd = O1.val[1];
716
717#elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM64)
718 const uint32x4_t T0 = vzip1q_u32(B0.m_simd, B2.m_simd);
719 const uint32x4_t T2 = vzip2q_u32(B0.m_simd, B2.m_simd);
720 const uint32x4_t T1 = vzip1q_u32(B1.m_simd, B3.m_simd);
721 const uint32x4_t T3 = vzip2q_u32(B1.m_simd, B3.m_simd);
722
723 B0.m_simd = vzip1q_u32(T0, T1);
724 B1.m_simd = vzip2q_u32(T0, T1);
725 B2.m_simd = vzip1q_u32(T2, T3);
726 B3.m_simd = vzip2q_u32(T2, T3);
727#elif defined(BOTAN_SIMD_USE_LSX)
728 const __m128i T0 = __lsx_vilvl_w(B2.raw(), B0.raw());
729 const __m128i T1 = __lsx_vilvh_w(B2.raw(), B0.raw());
730 const __m128i T2 = __lsx_vilvl_w(B3.raw(), B1.raw());
731 const __m128i T3 = __lsx_vilvh_w(B3.raw(), B1.raw());
732 B0.m_simd = __lsx_vilvl_w(T2, T0);
733 B1.m_simd = __lsx_vilvh_w(T2, T0);
734 B2.m_simd = __lsx_vilvl_w(T3, T1);
735 B3.m_simd = __lsx_vilvh_w(T3, T1);
736#elif defined(BOTAN_SIMD_USE_SIMD128)
737 const auto T0 = wasm_i32x4_shuffle(B0.m_simd, B2.m_simd, 0, 4, 1, 5);
738 const auto T2 = wasm_i32x4_shuffle(B0.m_simd, B2.m_simd, 2, 6, 3, 7);
739 const auto T1 = wasm_i32x4_shuffle(B1.m_simd, B3.m_simd, 0, 4, 1, 5);
740 const auto T3 = wasm_i32x4_shuffle(B1.m_simd, B3.m_simd, 2, 6, 3, 7);
741
742 B0.m_simd = wasm_i32x4_shuffle(T0, T1, 0, 4, 1, 5);
743 B1.m_simd = wasm_i32x4_shuffle(T0, T1, 2, 6, 3, 7);
744 B2.m_simd = wasm_i32x4_shuffle(T2, T3, 0, 4, 1, 5);
745 B3.m_simd = wasm_i32x4_shuffle(T2, T3, 2, 6, 3, 7);
746#endif
747 }
748
749 static inline SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 choose(const SIMD_4x32& mask,
750 const SIMD_4x32& a,
751 const SIMD_4x32& b) noexcept {
752#if defined(BOTAN_SIMD_USE_ALTIVEC)
753 return SIMD_4x32(vec_sel(b.raw(), a.raw(), mask.raw()));
754#elif defined(BOTAN_SIMD_USE_NEON)
755 return SIMD_4x32(vbslq_u32(mask.raw(), a.raw(), b.raw()));
756#elif defined(BOTAN_SIMD_USE_LSX)
757 return SIMD_4x32(__lsx_vbitsel_v(b.raw(), a.raw(), mask.raw()));
758#elif defined(BOTAN_SIMD_USE_SIMD128)
759 return SIMD_4x32(wasm_v128_bitselect(a.raw(), b.raw(), mask.raw()));
760#else
761 return (mask & a) ^ mask.andc(b);
762#endif
763 }
764
765 static inline SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 majority(const SIMD_4x32& x,
766 const SIMD_4x32& y,
767 const SIMD_4x32& z) noexcept {
768 return SIMD_4x32::choose(x ^ y, z, y);
769 }
770
771 /**
772 * Byte shuffle
773 *
774 * This function assumes that each byte of idx is <= 16; it may produce incorrect
775 * results if this does not hold.
776 */
777 static inline SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 byte_shuffle(const SIMD_4x32& tbl, const SIMD_4x32& idx) {
778#if defined(BOTAN_SIMD_USE_SSSE3)
779 return SIMD_4x32(_mm_shuffle_epi8(tbl.raw(), idx.raw()));
780#elif defined(BOTAN_SIMD_USE_NEON)
781 const uint8x16_t tbl8 = vreinterpretq_u8_u32(tbl.raw());
782 const uint8x16_t idx8 = vreinterpretq_u8_u32(idx.raw());
783
784 #if defined(BOTAN_TARGET_ARCH_IS_ARM32)
785 const uint8x8x2_t tbl2 = {vget_low_u8(tbl8), vget_high_u8(tbl8)};
786
787 return SIMD_4x32(
788 vreinterpretq_u32_u8(vcombine_u8(vtbl2_u8(tbl2, vget_low_u8(idx8)), vtbl2_u8(tbl2, vget_high_u8(idx8)))));
789 #else
790 return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(tbl8, idx8)));
791 #endif
792
793#elif defined(BOTAN_SIMD_USE_ALTIVEC)
794 const auto r = vec_perm(reinterpret_cast<__vector signed char>(tbl.raw()),
795 reinterpret_cast<__vector signed char>(tbl.raw()),
796 reinterpret_cast<__vector unsigned char>(idx.raw()));
797 return SIMD_4x32(reinterpret_cast<__vector unsigned int>(r));
798#elif defined(BOTAN_SIMD_USE_LSX)
799 return SIMD_4x32(__lsx_vshuf_b(tbl.raw(), tbl.raw(), idx.raw()));
800#elif defined(BOTAN_SIMD_USE_SIMD128)
801 return SIMD_4x32(wasm_i8x16_swizzle(tbl.raw(), idx.raw()));
802#endif
803 }
804
805 /**
806 * Byte shuffle with masking
807 *
808 * If the index is >= 128 then the output byte is set to zero.
809 *
810 * Warning: for indices between 16 and 128 this function may have different
811 * behaviors depending on the CPU; possibly the output is zero, tbl[idx % 16],
812 * or even undefined.
813 */
814 inline static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 masked_byte_shuffle(const SIMD_4x32& tbl, const SIMD_4x32& idx) {
815#if defined(BOTAN_SIMD_USE_ALTIVEC)
816 const auto zero = vec_splat_s8(0x00);
817 const auto mask = vec_cmplt(reinterpret_cast<__vector signed char>(idx.raw()), zero);
818 const auto r = vec_perm(reinterpret_cast<__vector signed char>(tbl.raw()),
819 reinterpret_cast<__vector signed char>(tbl.raw()),
820 reinterpret_cast<__vector unsigned char>(idx.raw()));
821 return SIMD_4x32(reinterpret_cast<__vector unsigned int>(vec_sel(r, zero, mask)));
822#elif defined(BOTAN_SIMD_USE_LSX)
823 /*
824 * The behavior of vshuf.b unfortunately differs among microarchitectures
825 * when the index is larger than the available elements. In LA664 CPUs,
826 * larger indices result in a zero byte, which is exactly what we want.
827 * Unfortunately on LA464 machines, the output is instead undefined.
828 *
829 * So we must use a slower sequence that handles the larger indices.
830 * If we had a way of knowing at compile time that we are on an LA664
831 * or later, we could use __lsx_vshuf_b without the comparison or select.
832 */
833 const auto zero = __lsx_vldi(0);
834 const auto r = __lsx_vshuf_b(zero, tbl.raw(), idx.raw());
835 const auto mask = __lsx_vslti_bu(idx.raw(), 16);
836 return SIMD_4x32(__lsx_vbitsel_v(zero, r, mask));
837#else
838 // ARM, x86 and Wasm byte shuffles have the behavior we want for out of range idx
839 return SIMD_4x32::byte_shuffle(tbl, idx);
840#endif
841 }
842
843 static inline SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 alignr4(const SIMD_4x32& a, const SIMD_4x32& b) {
844#if defined(BOTAN_SIMD_USE_SSSE3)
845 return SIMD_4x32(_mm_alignr_epi8(a.raw(), b.raw(), 4));
846#elif defined(BOTAN_SIMD_USE_NEON)
847 return SIMD_4x32(vextq_u32(b.raw(), a.raw(), 1));
848#elif defined(BOTAN_SIMD_USE_ALTIVEC)
849 const __vector unsigned char mask = {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
850 return SIMD_4x32(vec_perm(b.raw(), a.raw(), mask));
851#elif defined(BOTAN_SIMD_USE_LSX)
852 const auto mask = SIMD_4x32(0x07060504, 0x0B0A0908, 0x0F0E0D0C, 0x13121110);
853 return SIMD_4x32(__lsx_vshuf_b(a.raw(), b.raw(), mask.raw()));
854#elif defined(BOTAN_SIMD_USE_SIMD128)
855 return SIMD_4x32(
856 wasm_i8x16_shuffle(b.raw(), a.raw(), 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19));
857#endif
858 }
859
860 static inline SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 alignr8(const SIMD_4x32& a, const SIMD_4x32& b) {
861#if defined(BOTAN_SIMD_USE_SSSE3)
862 return SIMD_4x32(_mm_alignr_epi8(a.raw(), b.raw(), 8));
863#elif defined(BOTAN_SIMD_USE_NEON)
864 return SIMD_4x32(vextq_u32(b.raw(), a.raw(), 2));
865#elif defined(BOTAN_SIMD_USE_ALTIVEC)
866 const __vector unsigned char mask = {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
867 return SIMD_4x32(vec_perm(b.raw(), a.raw(), mask));
868#elif defined(BOTAN_SIMD_USE_LSX)
869 return SIMD_4x32(__lsx_vshuf4i_d(a.raw(), b.raw(), 0b0011));
870#elif defined(BOTAN_SIMD_USE_SIMD128)
871 return SIMD_4x32(
872 wasm_i8x16_shuffle(b.raw(), a.raw(), 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23));
873#endif
874 }
875
876 /**
877 If the topmost bit of x is set, return a vector of all ones, otherwise a vector of all zeros
878 ie: (v >> 127) ? splat(0xFFFFFFFF) : zero;
879
880 Most of the implementations work by doing an arithmetic shift of 31 to smear the top bits
881 of each word, followed by a broadcast of the top word.
882 */
883 inline SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 top_bit_mask() const {
884#if defined(BOTAN_SIMD_USE_SSSE3)
885 return SIMD_4x32(_mm_shuffle_epi32(_mm_srai_epi32(raw(), 31), 0b11111111));
886#elif defined(BOTAN_SIMD_USE_NEON)
887 #if defined(BOTAN_TARGET_ARCH_IS_ARM32)
888 int32x4_t v = vshrq_n_s32(vreinterpretq_s32_u32(raw()), 31);
889 int32x2_t hi = vget_high_s32(v);
890 return SIMD_4x32(vreinterpretq_u32_s32(vdupq_lane_s32(hi, 1)));
891 #else
892 return SIMD_4x32(vreinterpretq_u32_s32(vdupq_laneq_s32(vshrq_n_s32(vreinterpretq_s32_u32(raw()), 31), 3)));
893 #endif
894#elif defined(BOTAN_SIMD_USE_ALTIVEC)
895 const __vector unsigned int shift = vec_splats(31U);
896 const __vector signed int shifted = vec_sra(reinterpret_cast<__vector signed int>(raw()), shift);
897 return SIMD_4x32(reinterpret_cast<__vector unsigned int>(vec_splat(shifted, 3)));
898#elif defined(BOTAN_SIMD_USE_LSX)
899 return SIMD_4x32(__lsx_vshuf4i_w(__lsx_vsrai_w(raw(), 31), 0xFF));
900#elif defined(BOTAN_SIMD_USE_SIMD128)
901 return SIMD_4x32(wasm_i32x4_splat(wasm_i32x4_extract_lane(wasm_i32x4_shr(raw(), 31), 3)));
902#endif
903 }
904
905 /**
906 * Swap the upper and lower 64-bit halves of the vector
907 */
908 inline SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 swap_halves() const {
909#if defined(BOTAN_SIMD_USE_SSSE3)
910 return SIMD_4x32(_mm_shuffle_epi32(raw(), 0b01001110));
911#else
912 return SIMD_4x32::alignr8(*this, *this);
913#endif
914 }
915
916 native_simd_type BOTAN_FN_ISA_SIMD_4X32 raw() const noexcept { return m_simd; }
917
918 explicit BOTAN_FN_ISA_SIMD_4X32 SIMD_4x32(native_simd_type x) noexcept : m_simd(x) {}
919
920 private:
921 native_simd_type m_simd;
922};
923
924// NOLINTEND(portability-simd-intrinsics)
925
926template <size_t R>
927inline SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 rotl(SIMD_4x32 input) {
928 return input.rotl<R>();
929}
930
931template <size_t R>
932inline SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 rotr(SIMD_4x32 input) {
933 return input.rotr<R>();
934}
935
936// For Serpent:
937template <size_t S>
938inline SIMD_4x32 shl(SIMD_4x32 input) {
939 return input.shl<S>();
940}
941
942} // namespace Botan
943
944#endif
void BOTAN_FN_ISA_SIMD_4X32 store_le(uint32_t out[4]) const noexcept
Definition simd_4x32.h:219
SIMD_4x32(SIMD_4x32 &&other)=default
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 operator+(const SIMD_4x32 &other) const noexcept
Definition simd_4x32.h:385
SIMD_4x32 & operator=(SIMD_4x32 &&other)=default
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 top_bit_mask() const
Definition simd_4x32.h:883
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 majority(const SIMD_4x32 &x, const SIMD_4x32 &y, const SIMD_4x32 &z) noexcept
Definition simd_4x32.h:765
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(const void *in) noexcept
Definition simd_4x32.h:189
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 shr() const noexcept
Definition simd_4x32.h:520
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_le(const void *in) noexcept
Definition simd_4x32.h:162
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 shl() const noexcept
Definition simd_4x32.h:500
BOTAN_FN_ISA_SIMD_4X32 SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3) noexcept
Definition simd_4x32.h:103
void BOTAN_FN_ISA_SIMD_4X32 operator+=(const SIMD_4x32 &other) noexcept
Definition simd_4x32.h:427
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 byte_shuffle(const SIMD_4x32 &tbl, const SIMD_4x32 &idx)
Definition simd_4x32.h:777
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 alignr4(const SIMD_4x32 &a, const SIMD_4x32 &b)
Definition simd_4x32.h:843
native_simd_type BOTAN_FN_ISA_SIMD_4X32 raw() const noexcept
Definition simd_4x32.h:916
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 operator~() const noexcept
Definition simd_4x32.h:537
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 operator&(const SIMD_4x32 &other) const noexcept
Definition simd_4x32.h:421
void BOTAN_FN_ISA_SIMD_4X32 store_le(std::span< uint8_t, 16 > out) const
Definition simd_4x32.h:295
void BOTAN_FN_ISA_SIMD_4X32 operator|=(const SIMD_4x32 &other) noexcept
Definition simd_4x32.h:471
BOTAN_FN_ISA_SIMD_4X32 SIMD_4x32(native_simd_type x) noexcept
Definition simd_4x32.h:918
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 shift_elems_right() const noexcept
Definition simd_4x32.h:639
SIMD_4x32(const SIMD_4x32 &other)=default
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 operator-(const SIMD_4x32 &other) const noexcept
Definition simd_4x32.h:394
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 operator^(const SIMD_4x32 &other) const noexcept
Definition simd_4x32.h:403
~SIMD_4x32()=default
void BOTAN_FN_ISA_SIMD_4X32 operator&=(const SIMD_4x32 &other) noexcept
Definition simd_4x32.h:485
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 alignr8(const SIMD_4x32 &a, const SIMD_4x32 &b)
Definition simd_4x32.h:860
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 choose(const SIMD_4x32 &mask, const SIMD_4x32 &a, const SIMD_4x32 &b) noexcept
Definition simd_4x32.h:749
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 sigma1() const noexcept
Definition simd_4x32.h:314
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 splat(uint32_t B) noexcept
Definition simd_4x32.h:127
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 rotr() const noexcept
Definition simd_4x32.h:378
void BOTAN_FN_ISA_SIMD_4X32 operator-=(const SIMD_4x32 &other) noexcept
Definition simd_4x32.h:441
static void BOTAN_FN_ISA_SIMD_4X32 transpose(SIMD_4x32 &B0, SIMD_4x32 &B1, SIMD_4x32 &B2, SIMD_4x32 &B3) noexcept
Definition simd_4x32.h:681
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 operator|(const SIMD_4x32 &other) const noexcept
Definition simd_4x32.h:412
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 andc(const SIMD_4x32 &other) const noexcept
Definition simd_4x32.h:552
void BOTAN_FN_ISA_SIMD_4X32 operator^=(const SIMD_4x32 &other) noexcept
Definition simd_4x32.h:455
BOTAN_FN_ISA_SIMD_4X32 SIMD_4x32() noexcept
Definition simd_4x32.h:86
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 masked_byte_shuffle(const SIMD_4x32 &tbl, const SIMD_4x32 &idx)
Definition simd_4x32.h:814
void BOTAN_FN_ISA_SIMD_4X32 store_le(uint8_t out[]) const noexcept
Definition simd_4x32.h:234
void BOTAN_FN_ISA_SIMD_4X32 store_be(uint32_t out[4]) const noexcept
Definition simd_4x32.h:223
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 splat_u8(uint8_t B) noexcept
Definition simd_4x32.h:144
BOTAN_FN_ISA_SIMD_4X32 SIMD_4x32 bswap() const noexcept
Definition simd_4x32.h:576
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 shift_elems_left() const noexcept
Definition simd_4x32.h:602
SIMD_4x32 & operator=(const SIMD_4x32 &other)=default
BOTAN_FN_ISA_SIMD_4X32 SIMD_4x32 rotl() const noexcept
Definition simd_4x32.h:329
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_le(std::span< const uint8_t, 16 > in)
Definition simd_4x32.h:211
void BOTAN_FN_ISA_SIMD_4X32 store_be(std::span< uint8_t, 16 > out) const
Definition simd_4x32.h:293
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(std::span< const uint8_t, 16 > in)
Definition simd_4x32.h:215
void BOTAN_FN_ISA_SIMD_4X32 store_le(uint64_t out[2]) const noexcept
Definition simd_4x32.h:227
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 sigma0() const noexcept
Definition simd_4x32.h:300
void BOTAN_FN_ISA_SIMD_4X32 operator^=(uint32_t other) noexcept
Definition simd_4x32.h:469
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 swap_halves() const
Definition simd_4x32.h:908
BOTAN_FN_ISA_SIMD_4X32 void store_be(uint8_t out[]) const noexcept
Definition simd_4x32.h:267
BOTAN_FORCE_INLINE constexpr T rotr(T input)
Definition rotate.h:35
constexpr uint32_t make_uint32(uint8_t i0, uint8_t i1, uint8_t i2, uint8_t i3)
Definition loadstor.h:104
constexpr auto store_le(ParamTs &&... params)
Definition loadstor.h:736
void R2(uint32_t A, uint32_t &B, uint32_t C, uint32_t &D, uint32_t E, uint32_t &F, uint32_t G, uint32_t &H, uint32_t TJ, uint32_t Wi, uint32_t Wj)
Definition sm3_fn.h:43
BOTAN_FORCE_INLINE constexpr T rotl(T input)
Definition rotate.h:23
void R1(uint32_t A, uint32_t &B, uint32_t C, uint32_t &D, uint32_t E, uint32_t &F, uint32_t G, uint32_t &H, uint32_t TJ, uint32_t Wi, uint32_t Wj)
Definition sm3_fn.h:21
SIMD_4x32 shl(SIMD_4x32 input)
Definition simd_4x32.h:938
constexpr auto load_le(ParamTs &&... params)
Definition loadstor.h:495
constexpr auto store_be(ParamTs &&... params)
Definition loadstor.h:745
constexpr auto load_be(ParamTs &&... params)
Definition loadstor.h:504