103 BOTAN_FN_ISA_SIMD_4X32
SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3)
noexcept {
104#if defined(BOTAN_SIMD_USE_SSSE3)
105 m_simd = _mm_set_epi32(B3, B2, B1, B0);
106#elif defined(BOTAN_SIMD_USE_ALTIVEC)
107 __vector
unsigned int val = {B0, B1, B2, B3};
109#elif defined(BOTAN_SIMD_USE_NEON)
111 const uint32_t B[4] = {B0, B1, B2, B3};
112 m_simd = vld1q_u32(B);
113#elif defined(BOTAN_SIMD_USE_LSX)
115 const uint32_t B[4] = {B0, B1, B2, B3};
116 m_simd = __lsx_vld(B, 0);
117#elif defined(BOTAN_SIMD_USE_SIMD128)
118 m_simd = wasm_u32x4_make(B0, B1, B2, B3);
330 requires(ROT > 0 && ROT < 32)
332#if defined(BOTAN_SIMD_USE_SSSE3)
333 if constexpr(ROT == 8) {
334 const auto shuf_rotl_8 = _mm_set_epi64x(0x0e0d0c0f0a09080b, 0x0605040702010003);
336 }
else if constexpr(ROT == 16) {
337 const auto shuf_rotl_16 = _mm_set_epi64x(0x0d0c0f0e09080b0a, 0x0504070601000302);
338 return SIMD_4x32(_mm_shuffle_epi8(
raw(), shuf_rotl_16));
339 }
else if constexpr(ROT == 24) {
340 const auto shuf_rotl_24 = _mm_set_epi64x(0x0c0f0e0d080b0a09, 0x0407060500030201);
341 return SIMD_4x32(_mm_shuffle_epi8(
raw(), shuf_rotl_24));
343 return SIMD_4x32(_mm_xor_si128(_mm_slli_epi32(
raw(),
static_cast<int>(ROT)),
344 _mm_srli_epi32(
raw(),
static_cast<int>(32 - ROT))));
347#elif defined(BOTAN_SIMD_USE_ALTIVEC)
349 const unsigned int r =
static_cast<unsigned int>(ROT);
350 __vector
unsigned int rot = {r, r, r, r};
353#elif defined(BOTAN_SIMD_USE_NEON)
355 #if defined(BOTAN_TARGET_ARCH_IS_ARM64)
357 if constexpr(ROT == 8) {
358 const uint8_t maskb[16] = {3, 0, 1, 2, 7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14};
359 const uint8x16_t mask = vld1q_u8(maskb);
360 return SIMD_4x32(vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(m_simd), mask)));
361 }
else if constexpr(ROT == 16) {
362 return SIMD_4x32(vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(m_simd))));
366 vorrq_u32(vshlq_n_u32(m_simd,
static_cast<int>(ROT)), vshrq_n_u32(m_simd,
static_cast<int>(32 - ROT))));
367#elif defined(BOTAN_SIMD_USE_LSX)
369#elif defined(BOTAN_SIMD_USE_SIMD128)
370 return SIMD_4x32(wasm_v128_or(wasm_i32x4_shl(m_simd, ROT), wasm_u32x4_shr(m_simd, 32 - ROT)));
577#if defined(BOTAN_SIMD_USE_SSSE3)
578 const auto idx = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3);
581#elif defined(BOTAN_SIMD_USE_ALTIVEC)
582 #ifdef BOTAN_SIMD_USE_VSX
585 const __vector
unsigned char rev[1] = {
586 {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12},
589 return SIMD_4x32(vec_perm(m_simd, m_simd, rev[0]));
592#elif defined(BOTAN_SIMD_USE_NEON)
593 return SIMD_4x32(vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(m_simd))));
594#elif defined(BOTAN_SIMD_USE_LSX)
595 return SIMD_4x32(__lsx_vshuf4i_b(m_simd, 0b00011011));
596#elif defined(BOTAN_SIMD_USE_SIMD128)
597 return SIMD_4x32(wasm_i8x16_shuffle(m_simd, m_simd, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12));
605#if defined(BOTAN_SIMD_USE_SSSE3)
607#elif defined(BOTAN_SIMD_USE_NEON)
608 return SIMD_4x32(vextq_u32(vdupq_n_u32(0),
raw(), 4 - I));
609#elif defined(BOTAN_SIMD_USE_ALTIVEC)
610 const __vector
unsigned int zero = vec_splat_u32(0);
612 const __vector
unsigned char shuf[3] = {
613 {16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
614 {16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7},
615 {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 0, 1, 2, 3},
619#elif defined(BOTAN_SIMD_USE_LSX)
621#elif defined(BOTAN_SIMD_USE_SIMD128)
622 if constexpr(I == 0) {
626 const auto zero = wasm_u32x4_const_splat(0);
627 if constexpr(I == 1) {
628 return SIMD_4x32(wasm_i8x16_shuffle(m_simd, zero, 16, 16, 16, 16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11));
630 if constexpr(I == 2) {
631 return SIMD_4x32(wasm_i8x16_shuffle(m_simd, zero, 16, 16, 16, 16, 16, 16, 16, 16, 0, 1, 2, 3, 4, 5, 6, 7));
634 return SIMD_4x32(wasm_i8x16_shuffle(m_simd, zero, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 0, 1, 2, 3));
642#if defined(BOTAN_SIMD_USE_SSSE3)
644#elif defined(BOTAN_SIMD_USE_NEON)
646#elif defined(BOTAN_SIMD_USE_ALTIVEC)
647 const __vector
unsigned int zero = vec_splat_u32(0);
649 const __vector
unsigned char shuf[3] = {
650 {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19},
651 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
652 {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
656#elif defined(BOTAN_SIMD_USE_LSX)
658#elif defined(BOTAN_SIMD_USE_SIMD128)
659 if constexpr(I == 0) {
663 const auto zero = wasm_u32x4_const_splat(0);
664 if constexpr(I == 1) {
666 wasm_i8x16_shuffle(m_simd, zero, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16));
668 if constexpr(I == 2) {
670 wasm_i8x16_shuffle(m_simd, zero, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16));
674 wasm_i8x16_shuffle(m_simd, zero, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16));
685#if defined(BOTAN_SIMD_USE_SSSE3)
686 const __m128i T0 = _mm_unpacklo_epi32(B0.m_simd, B1.m_simd);
687 const __m128i T1 = _mm_unpacklo_epi32(B2.m_simd, B3.m_simd);
688 const __m128i T2 = _mm_unpackhi_epi32(B0.m_simd, B1.m_simd);
689 const __m128i T3 = _mm_unpackhi_epi32(B2.m_simd, B3.m_simd);
691 B0.m_simd = _mm_unpacklo_epi64(T0, T1);
692 B1.m_simd = _mm_unpackhi_epi64(T0, T1);
693 B2.m_simd = _mm_unpacklo_epi64(T2, T3);
694 B3.m_simd = _mm_unpackhi_epi64(T2, T3);
695#elif defined(BOTAN_SIMD_USE_ALTIVEC)
696 const __vector
unsigned int T0 = vec_mergeh(B0.m_simd, B2.m_simd);
697 const __vector
unsigned int T1 = vec_mergeh(B1.m_simd, B3.m_simd);
698 const __vector
unsigned int T2 = vec_mergel(B0.m_simd, B2.m_simd);
699 const __vector
unsigned int T3 = vec_mergel(B1.m_simd, B3.m_simd);
701 B0.m_simd = vec_mergeh(T0, T1);
702 B1.m_simd = vec_mergel(T0, T1);
703 B2.m_simd = vec_mergeh(T2, T3);
704 B3.m_simd = vec_mergel(T2, T3);
706#elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM32)
707 const uint32x4x2_t T0 = vzipq_u32(B0.m_simd, B2.m_simd);
708 const uint32x4x2_t T1 = vzipq_u32(B1.m_simd, B3.m_simd);
709 const uint32x4x2_t O0 = vzipq_u32(T0.val[0], T1.val[0]);
710 const uint32x4x2_t O1 = vzipq_u32(T0.val[1], T1.val[1]);
712 B0.m_simd = O0.val[0];
713 B1.m_simd = O0.val[1];
714 B2.m_simd = O1.val[0];
715 B3.m_simd = O1.val[1];
717#elif defined(BOTAN_SIMD_USE_NEON) && defined(BOTAN_TARGET_ARCH_IS_ARM64)
718 const uint32x4_t T0 = vzip1q_u32(B0.m_simd, B2.m_simd);
719 const uint32x4_t T2 = vzip2q_u32(B0.m_simd, B2.m_simd);
720 const uint32x4_t T1 = vzip1q_u32(B1.m_simd, B3.m_simd);
721 const uint32x4_t T3 = vzip2q_u32(B1.m_simd, B3.m_simd);
723 B0.m_simd = vzip1q_u32(T0, T1);
724 B1.m_simd = vzip2q_u32(T0, T1);
725 B2.m_simd = vzip1q_u32(T2, T3);
726 B3.m_simd = vzip2q_u32(T2, T3);
727#elif defined(BOTAN_SIMD_USE_LSX)
728 const __m128i T0 = __lsx_vilvl_w(B2.raw(), B0.raw());
729 const __m128i T1 = __lsx_vilvh_w(B2.raw(), B0.raw());
730 const __m128i T2 = __lsx_vilvl_w(B3.raw(), B1.raw());
731 const __m128i T3 = __lsx_vilvh_w(B3.raw(), B1.raw());
732 B0.m_simd = __lsx_vilvl_w(T2, T0);
733 B1.m_simd = __lsx_vilvh_w(T2, T0);
734 B2.m_simd = __lsx_vilvl_w(T3, T1);
735 B3.m_simd = __lsx_vilvh_w(T3, T1);
736#elif defined(BOTAN_SIMD_USE_SIMD128)
737 const auto T0 = wasm_i32x4_shuffle(B0.m_simd, B2.m_simd, 0, 4, 1, 5);
738 const auto T2 = wasm_i32x4_shuffle(B0.m_simd, B2.m_simd, 2, 6, 3, 7);
739 const auto T1 = wasm_i32x4_shuffle(B1.m_simd, B3.m_simd, 0, 4, 1, 5);
740 const auto T3 = wasm_i32x4_shuffle(B1.m_simd, B3.m_simd, 2, 6, 3, 7);
742 B0.m_simd = wasm_i32x4_shuffle(T0, T1, 0, 4, 1, 5);
743 B1.m_simd = wasm_i32x4_shuffle(T0, T1, 2, 6, 3, 7);
744 B2.m_simd = wasm_i32x4_shuffle(T2, T3, 0, 4, 1, 5);
745 B3.m_simd = wasm_i32x4_shuffle(T2, T3, 2, 6, 3, 7);
844#if defined(BOTAN_SIMD_USE_SSSE3)
846#elif defined(BOTAN_SIMD_USE_NEON)
848#elif defined(BOTAN_SIMD_USE_ALTIVEC)
849 const __vector
unsigned char mask = {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
851#elif defined(BOTAN_SIMD_USE_LSX)
852 const auto mask =
SIMD_4x32(0x07060504, 0x0B0A0908, 0x0F0E0D0C, 0x13121110);
854#elif defined(BOTAN_SIMD_USE_SIMD128)
856 wasm_i8x16_shuffle(b.
raw(), a.
raw(), 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19));
861#if defined(BOTAN_SIMD_USE_SSSE3)
863#elif defined(BOTAN_SIMD_USE_NEON)
865#elif defined(BOTAN_SIMD_USE_ALTIVEC)
866 const __vector
unsigned char mask = {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
868#elif defined(BOTAN_SIMD_USE_LSX)
870#elif defined(BOTAN_SIMD_USE_SIMD128)
872 wasm_i8x16_shuffle(b.
raw(), a.
raw(), 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23));