8#ifndef BOTAN_SIMD_4X32_H_
9#define BOTAN_SIMD_4X32_H_
11#include <botan/compiler.h>
12#include <botan/types.h>
13#include <botan/internal/isa_extn.h>
14#include <botan/internal/target_info.h>
17#if defined(BOTAN_TARGET_ARCH_SUPPORTS_SSSE3)
18 #include <immintrin.h>
19 #define BOTAN_SIMD_USE_SSSE3
21#elif defined(BOTAN_TARGET_ARCH_SUPPORTS_ALTIVEC)
22 #include <botan/internal/loadstor.h>
26 #define BOTAN_SIMD_USE_ALTIVEC
28 #define BOTAN_SIMD_USE_VSX
31#elif defined(BOTAN_TARGET_ARCH_SUPPORTS_NEON)
34 #define BOTAN_SIMD_USE_NEON
36#elif defined(BOTAN_TARGET_ARCH_SUPPORTS_LSX)
37 #include <lsxintrin.h>
38 #define BOTAN_SIMD_USE_LSX
40#elif defined(BOTAN_TARGET_ARCH_SUPPORTS_SIMD128)
41 #include <wasm_simd128.h>
42 #define BOTAN_SIMD_USE_SIMD128
45 #error "No SIMD instruction set enabled"
63#if defined(BOTAN_SIMD_USE_SSSE3) || defined(BOTAN_SIMD_USE_LSX)
64 using native_simd_type = __m128i;
65#elif defined(BOTAN_SIMD_USE_ALTIVEC)
66 using native_simd_type = __vector
unsigned int;
67#elif defined(BOTAN_SIMD_USE_NEON)
68 using native_simd_type = uint32x4_t;
69#elif defined(BOTAN_SIMD_USE_SIMD128)
70 using native_simd_type = v128_t;
87#if defined(BOTAN_SIMD_USE_SSSE3)
88 m_simd = _mm_setzero_si128();
89#elif defined(BOTAN_SIMD_USE_ALTIVEC)
90 m_simd = vec_splat_u32(0);
91#elif defined(BOTAN_SIMD_USE_NEON)
92 m_simd = vdupq_n_u32(0);
93#elif defined(BOTAN_SIMD_USE_LSX)
94 m_simd = __lsx_vldi(0);
95#elif defined(BOTAN_SIMD_USE_SIMD128)
96 m_simd = wasm_u32x4_const_splat(0);
103 BOTAN_FN_ISA_SIMD_4X32
SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3)
noexcept {
104#if defined(BOTAN_SIMD_USE_SSSE3)
105 m_simd = _mm_set_epi32(B3, B2, B1, B0);
106#elif defined(BOTAN_SIMD_USE_ALTIVEC)
107 __vector
unsigned int val = {B0, B1, B2, B3};
109#elif defined(BOTAN_SIMD_USE_NEON)
111 const uint32_t B[4] = {B0, B1, B2, B3};
112 m_simd = vld1q_u32(B);
113#elif defined(BOTAN_SIMD_USE_LSX)
115 const uint32_t B[4] = {B0, B1, B2, B3};
116 m_simd = __lsx_vld(B, 0);
117#elif defined(BOTAN_SIMD_USE_SIMD128)
118 m_simd = wasm_u32x4_make(B0, B1, B2, B3);
128#if defined(BOTAN_SIMD_USE_SSSE3)
130#elif defined(BOTAN_SIMD_USE_NEON)
132#elif defined(BOTAN_SIMD_USE_LSX)
134#elif defined(BOTAN_SIMD_USE_SIMD128)
145#if defined(BOTAN_SIMD_USE_SSSE3)
147#elif defined(BOTAN_SIMD_USE_NEON)
148 return SIMD_4x32(vreinterpretq_u32_u8(vdupq_n_u8(B)));
149#elif defined(BOTAN_SIMD_USE_LSX)
151#elif defined(BOTAN_SIMD_USE_SIMD128)
163#if defined(BOTAN_SIMD_USE_SSSE3)
164 return SIMD_4x32(_mm_loadu_si128(
reinterpret_cast<const __m128i*
>(in)));
165#elif defined(BOTAN_SIMD_USE_ALTIVEC)
170 __vector
unsigned int val = {R0,
R1,
R2, R3};
172#elif defined(BOTAN_SIMD_USE_NEON)
173 SIMD_4x32 l(vld1q_u32(
static_cast<const uint32_t*
>(in)));
174 if constexpr(std::endian::native == std::endian::big) {
179#elif defined(BOTAN_SIMD_USE_LSX)
181#elif defined(BOTAN_SIMD_USE_SIMD128)
190#if defined(BOTAN_SIMD_USE_SSSE3) || defined(BOTAN_SIMD_USE_LSX) || defined(BOTAN_SIMD_USE_SIMD128)
193#elif defined(BOTAN_SIMD_USE_ALTIVEC)
198 __vector
unsigned int val = {R0,
R1,
R2, R3};
201#elif defined(BOTAN_SIMD_USE_NEON)
202 SIMD_4x32 l(vld1q_u32(
static_cast<const uint32_t*
>(in)));
203 if constexpr(std::endian::native == std::endian::little) {
219 void BOTAN_FN_ISA_SIMD_4X32
store_le(uint32_t out[4])
const noexcept {
220 this->
store_le(
reinterpret_cast<uint8_t*
>(out));
223 void BOTAN_FN_ISA_SIMD_4X32
store_be(uint32_t out[4])
const noexcept {
224 this->
store_be(
reinterpret_cast<uint8_t*
>(out));
227 void BOTAN_FN_ISA_SIMD_4X32
store_le(uint64_t out[2])
const noexcept {
228 this->
store_le(
reinterpret_cast<uint8_t*
>(out));
234 void BOTAN_FN_ISA_SIMD_4X32
store_le(uint8_t out[])
const noexcept {
235#if defined(BOTAN_SIMD_USE_SSSE3)
237 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(out),
raw());
239#elif defined(BOTAN_SIMD_USE_ALTIVEC)
242 __vector
unsigned int V;
251#elif defined(BOTAN_SIMD_USE_NEON)
252 if constexpr(std::endian::native == std::endian::little) {
253 vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
255 vst1q_u8(out, vreinterpretq_u8_u32(
bswap().m_simd));
257#elif defined(BOTAN_SIMD_USE_LSX)
258 __lsx_vst(
raw(), out, 0);
259#elif defined(BOTAN_SIMD_USE_SIMD128)
260 wasm_v128_store(out, m_simd);
267 BOTAN_FN_ISA_SIMD_4X32
void store_be(uint8_t out[])
const noexcept {
268#if defined(BOTAN_SIMD_USE_SSSE3) || defined(BOTAN_SIMD_USE_LSX) || defined(BOTAN_SIMD_USE_SIMD128)
272#elif defined(BOTAN_SIMD_USE_ALTIVEC)
275 __vector
unsigned int V;
284#elif defined(BOTAN_SIMD_USE_NEON)
285 if constexpr(std::endian::native == std::endian::little) {
286 vst1q_u8(out, vreinterpretq_u8_u32(
bswap().m_simd));
288 vst1q_u8(out, vreinterpretq_u8_u32(m_simd));
293 void BOTAN_FN_ISA_SIMD_4X32
store_be(std::span<uint8_t, 16> out)
const { this->
store_be(out.data()); }
295 void BOTAN_FN_ISA_SIMD_4X32
store_le(std::span<uint8_t, 16> out)
const { this->
store_le(out.data()); }
301#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_crypto_vshasigmaw) && defined(_ARCH_PWR8)
302 return SIMD_4x32(__builtin_crypto_vshasigmaw(
raw(), 1, 0));
307 return (r1 ^ r2 ^ r3);
315#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_crypto_vshasigmaw) && defined(_ARCH_PWR8)
316 return SIMD_4x32(__builtin_crypto_vshasigmaw(
raw(), 1, 0xF));
321 return (r1 ^ r2 ^ r3);
328 template <
size_t ROT>
330 requires(ROT > 0 && ROT < 32)
332#if defined(BOTAN_SIMD_USE_SSSE3)
333 if constexpr(ROT == 8) {
334 const auto shuf_rotl_8 = _mm_set_epi64x(0x0e0d0c0f0a09080b, 0x0605040702010003);
336 }
else if constexpr(ROT == 16) {
337 const auto shuf_rotl_16 = _mm_set_epi64x(0x0d0c0f0e09080b0a, 0x0504070601000302);
338 return SIMD_4x32(_mm_shuffle_epi8(
raw(), shuf_rotl_16));
339 }
else if constexpr(ROT == 24) {
340 const auto shuf_rotl_24 = _mm_set_epi64x(0x0c0f0e0d080b0a09, 0x0407060500030201);
341 return SIMD_4x32(_mm_shuffle_epi8(
raw(), shuf_rotl_24));
343 return SIMD_4x32(_mm_xor_si128(_mm_slli_epi32(
raw(),
static_cast<int>(ROT)),
344 _mm_srli_epi32(
raw(),
static_cast<int>(32 - ROT))));
347#elif defined(BOTAN_SIMD_USE_ALTIVEC)
349 const unsigned int r =
static_cast<unsigned int>(ROT);
350 __vector
unsigned int rot = {r, r, r, r};
353#elif defined(BOTAN_SIMD_USE_NEON)
355 #if defined(BOTAN_TARGET_ARCH_IS_ARM64)
357 if constexpr(ROT == 8) {
358 const uint8_t maskb[16] = {3, 0, 1, 2, 7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14};
359 const uint8x16_t mask = vld1q_u8(maskb);
360 return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(m_simd), mask)));
361 }
else if constexpr(ROT == 16) {
362 return SIMD_4x32(vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(m_simd))));
366 vorrq_u32(vshlq_n_u32(m_simd,
static_cast<int>(ROT)), vshrq_n_u32(m_simd,
static_cast<int>(32 - ROT))));
367#elif defined(BOTAN_SIMD_USE_LSX)
369#elif defined(BOTAN_SIMD_USE_SIMD128)
370 return SIMD_4x32(wasm_v128_or(wasm_i32x4_shl(m_simd, ROT), wasm_u32x4_shr(m_simd, 32 - ROT)));
377 template <
size_t ROT>
379 return this->
rotl<32 - ROT>();
428#if defined(BOTAN_SIMD_USE_SSSE3)
429 m_simd = _mm_add_epi32(m_simd, other.m_simd);
430#elif defined(BOTAN_SIMD_USE_ALTIVEC)
431 m_simd = vec_add(m_simd, other.m_simd);
432#elif defined(BOTAN_SIMD_USE_NEON)
433 m_simd = vaddq_u32(m_simd, other.m_simd);
434#elif defined(BOTAN_SIMD_USE_LSX)
435 m_simd = __lsx_vadd_w(m_simd, other.m_simd);
436#elif defined(BOTAN_SIMD_USE_SIMD128)
437 m_simd = wasm_i32x4_add(m_simd, other.m_simd);
442#if defined(BOTAN_SIMD_USE_SSSE3)
443 m_simd = _mm_sub_epi32(m_simd, other.m_simd);
444#elif defined(BOTAN_SIMD_USE_ALTIVEC)
445 m_simd = vec_sub(m_simd, other.m_simd);
446#elif defined(BOTAN_SIMD_USE_NEON)
447 m_simd = vsubq_u32(m_simd, other.m_simd);
448#elif defined(BOTAN_SIMD_USE_LSX)
449 m_simd = __lsx_vsub_w(m_simd, other.m_simd);
450#elif defined(BOTAN_SIMD_USE_SIMD128)
451 m_simd = wasm_i32x4_sub(m_simd, other.m_simd);
456#if defined(BOTAN_SIMD_USE_SSSE3)
457 m_simd = _mm_xor_si128(m_simd, other.m_simd);
458#elif defined(BOTAN_SIMD_USE_ALTIVEC)
459 m_simd = vec_xor(m_simd, other.m_simd);
460#elif defined(BOTAN_SIMD_USE_NEON)
461 m_simd = veorq_u32(m_simd, other.m_simd);
462#elif defined(BOTAN_SIMD_USE_LSX)
463 m_simd = __lsx_vxor_v(m_simd, other.m_simd);
464#elif defined(BOTAN_SIMD_USE_SIMD128)
465 m_simd = wasm_v128_xor(m_simd, other.m_simd);
472#if defined(BOTAN_SIMD_USE_SSSE3)
473 m_simd = _mm_or_si128(m_simd, other.m_simd);
474#elif defined(BOTAN_SIMD_USE_ALTIVEC)
475 m_simd = vec_or(m_simd, other.m_simd);
476#elif defined(BOTAN_SIMD_USE_NEON)
477 m_simd = vorrq_u32(m_simd, other.m_simd);
478#elif defined(BOTAN_SIMD_USE_LSX)
479 m_simd = __lsx_vor_v(m_simd, other.m_simd);
480#elif defined(BOTAN_SIMD_USE_SIMD128)
481 m_simd = wasm_v128_or(m_simd, other.m_simd);
486#if defined(BOTAN_SIMD_USE_SSSE3)
487 m_simd = _mm_and_si128(m_simd, other.m_simd);
488#elif defined(BOTAN_SIMD_USE_ALTIVEC)
489 m_simd = vec_and(m_simd, other.m_simd);
490#elif defined(BOTAN_SIMD_USE_NEON)
491 m_simd = vandq_u32(m_simd, other.m_simd);
492#elif defined(BOTAN_SIMD_USE_LSX)
493 m_simd = __lsx_vand_v(m_simd, other.m_simd);
494#elif defined(BOTAN_SIMD_USE_SIMD128)
495 m_simd = wasm_v128_and(m_simd, other.m_simd);
501 requires(SHIFT > 0 && SHIFT < 32)
503#if defined(BOTAN_SIMD_USE_SSSE3)
504 return SIMD_4x32(_mm_slli_epi32(m_simd, SHIFT));
506#elif defined(BOTAN_SIMD_USE_ALTIVEC)
507 const unsigned int s =
static_cast<unsigned int>(SHIFT);
508 const __vector
unsigned int shifts = {s, s, s, s};
509 return SIMD_4x32(vec_sl(m_simd, shifts));
510#elif defined(BOTAN_SIMD_USE_NEON)
511 return SIMD_4x32(vshlq_n_u32(m_simd, SHIFT));
512#elif defined(BOTAN_SIMD_USE_LSX)
513 return SIMD_4x32(__lsx_vslli_w(m_simd, SHIFT));
514#elif defined(BOTAN_SIMD_USE_SIMD128)
515 return SIMD_4x32(wasm_i32x4_shl(m_simd, SHIFT));
521#if defined(BOTAN_SIMD_USE_SSSE3)
522 return SIMD_4x32(_mm_srli_epi32(m_simd, SHIFT));
524#elif defined(BOTAN_SIMD_USE_ALTIVEC)
525 const unsigned int s =
static_cast<unsigned int>(SHIFT);
526 const __vector
unsigned int shifts = {s, s, s, s};
527 return SIMD_4x32(vec_sr(m_simd, shifts));
528#elif defined(BOTAN_SIMD_USE_NEON)
529 return SIMD_4x32(vshrq_n_u32(m_simd, SHIFT));
530#elif defined(BOTAN_SIMD_USE_LSX)
531 return SIMD_4x32(__lsx_vsrli_w(m_simd, SHIFT));
532#elif defined(BOTAN_SIMD_USE_SIMD128)
533 return SIMD_4x32(wasm_u32x4_shr(m_simd, SHIFT));
538#if defined(BOTAN_SIMD_USE_SSSE3)
539 return SIMD_4x32(_mm_xor_si128(m_simd, _mm_set1_epi32(0xFFFFFFFF)));
540#elif defined(BOTAN_SIMD_USE_ALTIVEC)
541 return SIMD_4x32(vec_nor(m_simd, m_simd));
542#elif defined(BOTAN_SIMD_USE_NEON)
544#elif defined(BOTAN_SIMD_USE_LSX)
545 return SIMD_4x32(__lsx_vnor_v(m_simd, m_simd));
546#elif defined(BOTAN_SIMD_USE_SIMD128)
553#if defined(BOTAN_SIMD_USE_SSSE3)
554 return SIMD_4x32(_mm_andnot_si128(m_simd, other.m_simd));
555#elif defined(BOTAN_SIMD_USE_ALTIVEC)
560 return SIMD_4x32(vec_andc(other.m_simd, m_simd));
561#elif defined(BOTAN_SIMD_USE_NEON)
563 return SIMD_4x32(vbicq_u32(other.m_simd, m_simd));
564#elif defined(BOTAN_SIMD_USE_LSX)
566 return SIMD_4x32(__lsx_vandn_v(m_simd, other.m_simd));
567#elif defined(BOTAN_SIMD_USE_SIMD128)
569 return SIMD_4x32(wasm_v128_andnot(other.m_simd, m_simd));
577#if defined(BOTAN_SIMD_USE_SSSE3)
578 const auto idx = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
581#elif defined(BOTAN_SIMD_USE_ALTIVEC)
582 #ifdef BOTAN_SIMD_USE_VSX
585 const __vector
unsigned char rev[1] = {
586 {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12},
589 return SIMD_4x32(vec_perm(m_simd, m_simd, rev[0]));
592#elif defined(BOTAN_SIMD_USE_NEON)
593 return SIMD_4x32(vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(m_simd))));
594#elif defined(BOTAN_SIMD_USE_LSX)
595 return SIMD_4x32(__lsx_vshuf4i_b(m_simd, 0b00011011));
596#elif defined(BOTAN_SIMD_USE_SIMD128)
597 return SIMD_4x32(wasm_i8x16_shuffle(m_simd, m_simd, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12));
605#if defined(BOTAN_SIMD_USE_SSSE3)
607#elif defined(BOTAN_SIMD_USE_NEON)
608 return SIMD_4x32(vextq_u32(vdupq_n_u32(0),
raw(), 4 - I));
609#elif defined(BOTAN_SIMD_USE_ALTIVEC)
610 const __vector
unsigned int zero = vec_splat_u32(0);
612 const __vector
unsigned char shuf[3] = {
613 {16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
614 {16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7},
615 {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3},
619#elif defined(BOTAN_SIMD_USE_LSX)
621#elif defined(BOTAN_SIMD_USE_SIMD128)
622 if constexpr(I == 0) {
626 const auto zero = wasm_u32x4_const_splat(0);
627 if constexpr(I == 1) {
628 return SIMD_4x32(wasm_i8x16_shuffle(m_simd, zero, 16, 16, 16, 16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11));
630 if constexpr(I == 2) {
631 return SIMD_4x32(wasm_i8x16_shuffle(m_simd, zero, 16, 16, 16, 16, 16, 16, 16, 16, 0, 1, 2, 3, 4, 5, 6, 7));
634 return SIMD_4x32(wasm_i8x16_shuffle(m_simd, zero, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 0, 1, 2, 3));
642#if defined(BOTAN_SIMD_USE_SSSE3)
644#elif defined(BOTAN_SIMD_USE_NEON)
646#elif defined(BOTAN_SIMD_USE_ALTIVEC)
647 const __vector
unsigned int zero = vec_splat_u32(0);
649 const __vector
unsigned char shuf[3] = {
650 {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19},
651 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
652 {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
656#elif defined(BOTAN_SIMD_USE_LSX)
658#elif defined(BOTAN_SIMD_USE_SIMD128)
659 if constexpr(I == 0) {
663 const auto zero = wasm_u32x4_const_splat(0);
664 if constexpr(I == 1) {
666 wasm_i8x16_shuffle(m_simd, zero, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16));
668 if constexpr(I == 2) {
670 wasm_i8x16_shuffle(m_simd, zero, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16));
674 wasm_i8x16_shuffle(m_simd, zero, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16));
685#if defined(BOTAN_SIMD_USE_SSSE3)
686 const __m128i T0 = _mm_unpacklo_epi32(B0.m_simd, B1.m_simd);
687 const __m128i T1 = _mm_unpacklo_epi32(B2.m_simd, B3.m_simd);
688 const __m128i T2 = _mm_unpackhi_epi32(B0.m_simd, B1.m_simd);
689 const __m128i T3 = _mm_unpackhi_epi32(B2.m_simd, B3.m_simd);
691 B0.m_simd = _mm_unpacklo_epi64(T0, T1);
692 B1.m_simd = _mm_unpackhi_epi64(T0, T1);
693 B2.m_simd = _mm_unpacklo_epi64(T2, T3);
694 B3.m_simd = _mm_unpackhi_epi64(T2, T3);
695#elif defined(BOTAN_SIMD_USE_ALTIVEC)
696 const __vector
unsigned int T0 = vec_mergeh(B0.m_simd, B2.m_simd);
697 const __vector
unsigned int T1 = vec_mergeh(B1.m_simd, B3.m_simd);
698 const __vector
unsigned int T2 = vec_mergel(B0.m_simd, B2.m_simd);
699 const __vector
unsigned int T3 = vec_mergel(B1.m_simd, B3.m_simd);
701 B0.m_simd = vec_mergeh(T0, T1);
702 B1.m_simd = vec_mergel(T0, T1);
703 B2.m_simd = vec_mergeh(T2, T3);
704 B3.m_simd = vec_mergel(T2, T3);
706#elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM32)
707 const uint32x4x2_t T0 = vzipq_u32(B0.m_simd, B2.m_simd);
708 const uint32x4x2_t T1 = vzipq_u32(B1.m_simd, B3.m_simd);
709 const uint32x4x2_t O0 = vzipq_u32(T0.val[0], T1.val[0]);
710 const uint32x4x2_t O1 = vzipq_u32(T0.val[1], T1.val[1]);
712 B0.m_simd = O0.val[0];
713 B1.m_simd = O0.val[1];
714 B2.m_simd = O1.val[0];
715 B3.m_simd = O1.val[1];
717#elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM64)
718 const uint32x4_t T0 = vzip1q_u32(B0.m_simd, B2.m_simd);
719 const uint32x4_t T2 = vzip2q_u32(B0.m_simd, B2.m_simd);
720 const uint32x4_t T1 = vzip1q_u32(B1.m_simd, B3.m_simd);
721 const uint32x4_t T3 = vzip2q_u32(B1.m_simd, B3.m_simd);
723 B0.m_simd = vzip1q_u32(T0, T1);
724 B1.m_simd = vzip2q_u32(T0, T1);
725 B2.m_simd = vzip1q_u32(T2, T3);
726 B3.m_simd = vzip2q_u32(T2, T3);
727#elif defined(BOTAN_SIMD_USE_LSX)
728 const __m128i T0 = __lsx_vilvl_w(B2.raw(), B0.raw());
729 const __m128i T1 = __lsx_vilvh_w(B2.raw(), B0.raw());
730 const __m128i T2 = __lsx_vilvl_w(B3.raw(), B1.raw());
731 const __m128i T3 = __lsx_vilvh_w(B3.raw(), B1.raw());
732 B0.m_simd = __lsx_vilvl_w(T2, T0);
733 B1.m_simd = __lsx_vilvh_w(T2, T0);
734 B2.m_simd = __lsx_vilvl_w(T3, T1);
735 B3.m_simd = __lsx_vilvh_w(T3, T1);
736#elif defined(BOTAN_SIMD_USE_SIMD128)
737 const auto T0 = wasm_i32x4_shuffle(B0.m_simd, B2.m_simd, 0, 4, 1, 5);
738 const auto T2 = wasm_i32x4_shuffle(B0.m_simd, B2.m_simd, 2, 6, 3, 7);
739 const auto T1 = wasm_i32x4_shuffle(B1.m_simd, B3.m_simd, 0, 4, 1, 5);
740 const auto T3 = wasm_i32x4_shuffle(B1.m_simd, B3.m_simd, 2, 6, 3, 7);
742 B0.m_simd = wasm_i32x4_shuffle(T0, T1, 0, 4, 1, 5);
743 B1.m_simd = wasm_i32x4_shuffle(T0, T1, 2, 6, 3, 7);
744 B2.m_simd = wasm_i32x4_shuffle(T2, T3, 0, 4, 1, 5);
745 B3.m_simd = wasm_i32x4_shuffle(T2, T3, 2, 6, 3, 7);
752#if defined(BOTAN_SIMD_USE_ALTIVEC)
753 return SIMD_4x32(vec_sel(b.raw(), a.raw(), mask.raw()));
754#elif defined(BOTAN_SIMD_USE_NEON)
755 return SIMD_4x32(vbslq_u32(mask.raw(), a.raw(), b.raw()));
756#elif defined(BOTAN_SIMD_USE_LSX)
757 return SIMD_4x32(__lsx_vbitsel_v(b.raw(), a.raw(), mask.raw()));
758#elif defined(BOTAN_SIMD_USE_SIMD128)
759 return SIMD_4x32(wasm_v128_bitselect(a.raw(), b.raw(), mask.raw()));
761 return (mask & a) ^ mask.andc(b);
804#if defined(BOTAN_SIMD_USE_SSSE3)
806#elif defined(BOTAN_SIMD_USE_NEON)
807 const uint8x16_t tbl8 = vreinterpretq_u8_u32(tbl.
raw());
808 const uint8x16_t idx8 = vreinterpretq_u8_u32(idx.
raw());
810 #if defined(BOTAN_TARGET_ARCH_IS_ARM32)
811 const uint8x8x2_t tbl2 = {vget_low_u8(tbl8), vget_high_u8(tbl8)};
814 vreinterpretq_u32_u8(vcombine_u8(vtbl2_u8(tbl2, vget_low_u8(idx8)), vtbl2_u8(tbl2, vget_high_u8(idx8)))));
816 return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(tbl8, idx8)));
819#elif defined(BOTAN_SIMD_USE_ALTIVEC)
820 const auto r = vec_perm(
reinterpret_cast<__vector
signed char>(tbl.
raw()),
821 reinterpret_cast<__vector
signed char>(tbl.
raw()),
822 reinterpret_cast<__vector
unsigned char>(idx.
raw()));
823 return SIMD_4x32(
reinterpret_cast<__vector
unsigned int>(r));
824#elif defined(BOTAN_SIMD_USE_LSX)
826#elif defined(BOTAN_SIMD_USE_SIMD128)
841#if defined(BOTAN_SIMD_USE_ALTIVEC)
842 const auto zero = vec_splat_s8(0x00);
843 const auto mask = vec_cmplt(
reinterpret_cast<__vector
signed char>(idx.
raw()), zero);
844 const auto r = vec_perm(
reinterpret_cast<__vector
signed char>(tbl.
raw()),
845 reinterpret_cast<__vector
signed char>(tbl.
raw()),
846 reinterpret_cast<__vector
unsigned char>(idx.
raw()));
847 return SIMD_4x32(
reinterpret_cast<__vector
unsigned int>(vec_sel(r, zero, mask)));
848#elif defined(BOTAN_SIMD_USE_LSX)
859 const auto zero = __lsx_vldi(0);
860 const auto r = __lsx_vshuf_b(zero, tbl.
raw(), idx.
raw());
861 const auto mask = __lsx_vslti_bu(idx.
raw(), 16);
862 return SIMD_4x32(__lsx_vbitsel_v(zero, r, mask));
870#if defined(BOTAN_SIMD_USE_SSSE3)
872#elif defined(BOTAN_SIMD_USE_NEON)
874#elif defined(BOTAN_SIMD_USE_ALTIVEC)
875 const __vector
unsigned char mask = {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
877#elif defined(BOTAN_SIMD_USE_LSX)
878 const auto mask =
SIMD_4x32(0x07060504, 0x0B0A0908, 0x0F0E0D0C, 0x13121110);
880#elif defined(BOTAN_SIMD_USE_SIMD128)
882 wasm_i8x16_shuffle(b.
raw(), a.
raw(), 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19));
887#if defined(BOTAN_SIMD_USE_SSSE3)
889#elif defined(BOTAN_SIMD_USE_NEON)
891#elif defined(BOTAN_SIMD_USE_ALTIVEC)
892 const __vector
unsigned char mask = {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
894#elif defined(BOTAN_SIMD_USE_LSX)
896#elif defined(BOTAN_SIMD_USE_SIMD128)
898 wasm_i8x16_shuffle(b.
raw(), a.
raw(), 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23));
910#if defined(BOTAN_SIMD_USE_SSSE3)
911 return SIMD_4x32(_mm_shuffle_epi32(_mm_srai_epi32(
raw(), 31), 0b11111111));
912#elif defined(BOTAN_SIMD_USE_NEON)
913 #if defined(BOTAN_TARGET_ARCH_IS_ARM32)
914 int32x4_t v = vshrq_n_s32(vreinterpretq_s32_u32(
raw()), 31);
915 int32x2_t hi = vget_high_s32(v);
916 return SIMD_4x32(vreinterpretq_u32_s32(vdupq_lane_s32(hi, 1)));
918 return SIMD_4x32(vreinterpretq_u32_s32(vdupq_laneq_s32(vshrq_n_s32(vreinterpretq_s32_u32(
raw()), 31), 3)));
920#elif defined(BOTAN_SIMD_USE_ALTIVEC)
921 const __vector
unsigned int shift = vec_splats(31U);
922 const __vector
signed int shifted = vec_sra(
reinterpret_cast<__vector
signed int>(
raw()), shift);
923 return SIMD_4x32(
reinterpret_cast<__vector
unsigned int>(vec_splat(shifted, 3)));
924#elif defined(BOTAN_SIMD_USE_LSX)
925 return SIMD_4x32(__lsx_vshuf4i_w(__lsx_vsrai_w(
raw(), 31), 0xFF));
926#elif defined(BOTAN_SIMD_USE_SIMD128)
927 return SIMD_4x32(wasm_i32x4_splat(wasm_i32x4_extract_lane(wasm_i32x4_shr(
raw(), 31), 3)));
935#if defined(BOTAN_SIMD_USE_SSSE3)
942 native_simd_type BOTAN_FN_ISA_SIMD_4X32
raw() const noexcept {
return m_simd; }
944 explicit BOTAN_FN_ISA_SIMD_4X32
SIMD_4x32(native_simd_type x) noexcept : m_simd(x) {}
947 native_simd_type m_simd;
954 return input.
rotl<R>();
959 return input.
rotr<R>();
965 return input.
shl<S>();
void BOTAN_FN_ISA_SIMD_4X32 store_le(uint32_t out[4]) const noexcept
SIMD_4x32(SIMD_4x32 &&other)=default
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 operator+(const SIMD_4x32 &other) const noexcept
SIMD_4x32 & operator=(SIMD_4x32 &&other)=default
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 top_bit_mask() const
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 majority(const SIMD_4x32 &x, const SIMD_4x32 &y, const SIMD_4x32 &z) noexcept
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(const void *in) noexcept
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 shr() const noexcept
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_le(const void *in) noexcept
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 shl() const noexcept
BOTAN_FN_ISA_SIMD_4X32 SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3) noexcept
void BOTAN_FN_ISA_SIMD_4X32 operator+=(const SIMD_4x32 &other) noexcept
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 byte_shuffle(const SIMD_4x32 &tbl, const SIMD_4x32 &idx)
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 alignr4(const SIMD_4x32 &a, const SIMD_4x32 &b)
native_simd_type BOTAN_FN_ISA_SIMD_4X32 raw() const noexcept
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 operator~() const noexcept
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 byte_blend(const SIMD_4x32 &mask, const SIMD_4x32 &a, const SIMD_4x32 &b) noexcept
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 operator&(const SIMD_4x32 &other) const noexcept
void BOTAN_FN_ISA_SIMD_4X32 store_le(std::span< uint8_t, 16 > out) const
void BOTAN_FN_ISA_SIMD_4X32 operator|=(const SIMD_4x32 &other) noexcept
BOTAN_FN_ISA_SIMD_4X32 SIMD_4x32(native_simd_type x) noexcept
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 shift_elems_right() const noexcept
SIMD_4x32(const SIMD_4x32 &other)=default
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 operator-(const SIMD_4x32 &other) const noexcept
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 operator^(const SIMD_4x32 &other) const noexcept
void BOTAN_FN_ISA_SIMD_4X32 operator&=(const SIMD_4x32 &other) noexcept
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 alignr8(const SIMD_4x32 &a, const SIMD_4x32 &b)
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 choose(const SIMD_4x32 &mask, const SIMD_4x32 &a, const SIMD_4x32 &b) noexcept
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 sigma1() const noexcept
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 splat(uint32_t B) noexcept
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 rotr() const noexcept
void BOTAN_FN_ISA_SIMD_4X32 operator-=(const SIMD_4x32 &other) noexcept
static void BOTAN_FN_ISA_SIMD_4X32 transpose(SIMD_4x32 &B0, SIMD_4x32 &B1, SIMD_4x32 &B2, SIMD_4x32 &B3) noexcept
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 byte_blend(uint32_t mask, const SIMD_4x32 &a, const SIMD_4x32 &b) noexcept
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 operator|(const SIMD_4x32 &other) const noexcept
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 andc(const SIMD_4x32 &other) const noexcept
void BOTAN_FN_ISA_SIMD_4X32 operator^=(const SIMD_4x32 &other) noexcept
BOTAN_FN_ISA_SIMD_4X32 SIMD_4x32() noexcept
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 masked_byte_shuffle(const SIMD_4x32 &tbl, const SIMD_4x32 &idx)
void BOTAN_FN_ISA_SIMD_4X32 store_le(uint8_t out[]) const noexcept
void BOTAN_FN_ISA_SIMD_4X32 store_be(uint32_t out[4]) const noexcept
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 splat_u8(uint8_t B) noexcept
BOTAN_FN_ISA_SIMD_4X32 SIMD_4x32 bswap() const noexcept
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 shift_elems_left() const noexcept
SIMD_4x32 & operator=(const SIMD_4x32 &other)=default
BOTAN_FN_ISA_SIMD_4X32 SIMD_4x32 rotl() const noexcept
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_le(std::span< const uint8_t, 16 > in)
void BOTAN_FN_ISA_SIMD_4X32 store_be(std::span< uint8_t, 16 > out) const
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(std::span< const uint8_t, 16 > in)
void BOTAN_FN_ISA_SIMD_4X32 store_le(uint64_t out[2]) const noexcept
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 sigma0() const noexcept
void BOTAN_FN_ISA_SIMD_4X32 operator^=(uint32_t other) noexcept
SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 swap_halves() const
BOTAN_FN_ISA_SIMD_4X32 void store_be(uint8_t out[]) const noexcept
BOTAN_FORCE_INLINE constexpr T rotr(T input)
constexpr uint32_t make_uint32(uint8_t i0, uint8_t i1, uint8_t i2, uint8_t i3)
constexpr auto store_le(ParamTs &&... params)
void R2(uint32_t A, uint32_t &B, uint32_t C, uint32_t &D, uint32_t E, uint32_t &F, uint32_t G, uint32_t &H, uint32_t TJ, uint32_t Wi, uint32_t Wj)
BOTAN_FORCE_INLINE constexpr T rotl(T input)
void R1(uint32_t A, uint32_t &B, uint32_t C, uint32_t &D, uint32_t E, uint32_t &F, uint32_t G, uint32_t &H, uint32_t TJ, uint32_t Wi, uint32_t Wj)
SIMD_4x32 shl(SIMD_4x32 input)
constexpr auto load_le(ParamTs &&... params)
constexpr auto store_be(ParamTs &&... params)
constexpr auto load_be(ParamTs &&... params)