7#include <botan/internal/sm3.h>
9#include <botan/internal/isa_extn.h>
10#include <botan/internal/rotate.h>
11#include <botan/internal/simd_4x32.h>
19 S0 =
SIMD_4x32(_mm_shuffle_epi32(S0.raw(), 0b10110001));
20 S1 =
SIMD_4x32(_mm_shuffle_epi32(S1.raw(), 0b00011011));
23 S1 =
SIMD_4x32(_mm_blend_epi16(S1.rotr<19>().raw(), S0.rotr<9>().raw(), 0xF0));
27BOTAN_FN_ISA_AVX2_SM3
inline void SM3_NI_next(
SIMD_4x32& W0,
31 auto X3 =
SIMD_4x32(_mm_alignr_epi8(W1.raw(), W0.raw(), 12));
32 auto X7 =
SIMD_4x32(_mm_alignr_epi8(W2.raw(), W1.raw(), 12));
34 auto X13 = W3.template shift_elems_right<1>();
36 auto P1_O =
SIMD_4x32(_mm_sm3msg1_epi32(X7.raw(), X13.raw(), W0.raw()));
37 W0 =
SIMD_4x32(_mm_sm3msg2_epi32(P1_O.raw(), X3.raw(), X10.raw()));
42 const auto W0145 =
SIMD_4x32(_mm_unpacklo_epi64(W0.raw(), W1.raw()));
43 const auto W2367 =
SIMD_4x32(_mm_unpackhi_epi64(W0.raw(), W1.raw()));
45 S0 =
SIMD_4x32(_mm_sm3rnds2_epi32(S0.raw(), S1.raw(), W0145.raw(), R));
46 S1 =
SIMD_4x32(_mm_sm3rnds2_epi32(S1.raw(), S0.raw(), W2367.raw(), R + 2));
51BOTAN_FN_ISA_AVX2_SM3
void SM3::compress_digest_x86(digest_type& digest,
52 std::span<const uint8_t> input,
56 sm3_permute_state_in(S0, S1);
58 const uint8_t* data = input.data();
66 const auto S0_save = S0;
67 const auto S1_save = S1;
72 SM3_NI_Rx4<0>(S1, S0, W0, W1);
73 SM3_NI_next(W0, W1, W2, W3);
75 SM3_NI_Rx4<4>(S1, S0, W1, W2);
76 SM3_NI_next(W1, W2, W3, W0);
78 SM3_NI_Rx4<8>(S1, S0, W2, W3);
79 SM3_NI_next(W2, W3, W0, W1);
81 SM3_NI_Rx4<12>(S1, S0, W3, W0);
82 SM3_NI_next(W3, W0, W1, W2);
84 SM3_NI_Rx4<16>(S1, S0, W0, W1);
85 SM3_NI_next(W0, W1, W2, W3);
87 SM3_NI_Rx4<20>(S1, S0, W1, W2);
88 SM3_NI_next(W1, W2, W3, W0);
90 SM3_NI_Rx4<24>(S1, S0, W2, W3);
91 SM3_NI_next(W2, W3, W0, W1);
93 SM3_NI_Rx4<28>(S1, S0, W3, W0);
94 SM3_NI_next(W3, W0, W1, W2);
96 SM3_NI_Rx4<32>(S1, S0, W0, W1);
97 SM3_NI_next(W0, W1, W2, W3);
99 SM3_NI_Rx4<36>(S1, S0, W1, W2);
100 SM3_NI_next(W1, W2, W3, W0);
102 SM3_NI_Rx4<40>(S1, S0, W2, W3);
103 SM3_NI_next(W2, W3, W0, W1);
105 SM3_NI_Rx4<44>(S1, S0, W3, W0);
106 SM3_NI_next(W3, W0, W1, W2);
108 SM3_NI_Rx4<48>(S1, S0, W0, W1);
109 SM3_NI_next(W0, W1, W2, W3);
111 SM3_NI_Rx4<52>(S1, S0, W1, W2);
112 SM3_NI_Rx4<56>(S1, S0, W2, W3);
113 SM3_NI_Rx4<60>(S1, S0, W3, W0);
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(const void *in) noexcept
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_le(const void *in) noexcept
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 alignr8(const SIMD_4x32 &a, const SIMD_4x32 &b)
static constexpr size_t block_bytes
#define BOTAN_FORCE_INLINE
BOTAN_FORCE_INLINE constexpr T rotr(T input)