Botan 3.11.0
Crypto and TLS for C&
sm3_x86.cpp
Go to the documentation of this file.
1/*
2* (C) 2025 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/sm3.h>
8
9#include <botan/internal/isa_extn.h>
10#include <botan/internal/rotate.h>
11#include <botan/internal/simd_4x32.h>
12#include <immintrin.h>
13
14namespace Botan {
15
16namespace {
17
18BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_SM3 void sm3_permute_state_in(SIMD_4x32& S0, SIMD_4x32& S1) {
19 S0 = SIMD_4x32(_mm_shuffle_epi32(S0.raw(), 0b10110001)); // CDAB
20 S1 = SIMD_4x32(_mm_shuffle_epi32(S1.raw(), 0b00011011)); // EFGH
21
22 const auto T = SIMD_4x32::alignr8(S0, S1); // ABEF
23 S1 = SIMD_4x32(_mm_blend_epi16(S1.rotr<19>().raw(), S0.rotr<9>().raw(), 0xF0)); // CDGH
24 S0 = T;
25}
26
27BOTAN_FN_ISA_AVX2_SM3 inline void SM3_NI_next(SIMD_4x32& W0,
28 const SIMD_4x32& W1,
29 const SIMD_4x32& W2,
30 const SIMD_4x32& W3) {
31 auto X3 = SIMD_4x32(_mm_alignr_epi8(W1.raw(), W0.raw(), 12)); // W[3..6]
32 auto X7 = SIMD_4x32(_mm_alignr_epi8(W2.raw(), W1.raw(), 12)); // W[7..10]
33 auto X10 = SIMD_4x32::alignr8(W3, W2); // W[10..13]
34 auto X13 = W3.template shift_elems_right<1>(); // W[13..15] || 0
35
36 auto P1_O = SIMD_4x32(_mm_sm3msg1_epi32(X7.raw(), X13.raw(), W0.raw()));
37 W0 = SIMD_4x32(_mm_sm3msg2_epi32(P1_O.raw(), X3.raw(), X10.raw()));
38}
39
40template <size_t R>
41BOTAN_FN_ISA_AVX2_SM3 inline void SM3_NI_Rx4(SIMD_4x32& S0, SIMD_4x32& S1, SIMD_4x32 W0, SIMD_4x32 W1) {
42 const auto W0145 = SIMD_4x32(_mm_unpacklo_epi64(W0.raw(), W1.raw()));
43 const auto W2367 = SIMD_4x32(_mm_unpackhi_epi64(W0.raw(), W1.raw()));
44
45 S0 = SIMD_4x32(_mm_sm3rnds2_epi32(S0.raw(), S1.raw(), W0145.raw(), R));
46 S1 = SIMD_4x32(_mm_sm3rnds2_epi32(S1.raw(), S0.raw(), W2367.raw(), R + 2));
47}
48
49} // namespace
50
51BOTAN_FN_ISA_AVX2_SM3 void SM3::compress_digest_x86(digest_type& digest,
52 std::span<const uint8_t> input,
53 size_t blocks) {
54 auto S0 = SIMD_4x32::load_le(&digest[0]); // NOLINT(*-container-data-pointer)
55 auto S1 = SIMD_4x32::load_le(&digest[4]);
56 sm3_permute_state_in(S0, S1);
57
58 const uint8_t* data = input.data();
59
60 while(blocks > 0) {
61 SIMD_4x32 W0 = SIMD_4x32::load_be(&data[0]); // NOLINT(*-container-data-pointer)
62 SIMD_4x32 W1 = SIMD_4x32::load_be(&data[16]);
63 SIMD_4x32 W2 = SIMD_4x32::load_be(&data[32]);
64 SIMD_4x32 W3 = SIMD_4x32::load_be(&data[48]);
65
66 const auto S0_save = S0;
67 const auto S1_save = S1;
68
69 data += block_bytes;
70 blocks -= 1;
71
72 SM3_NI_Rx4<0>(S1, S0, W0, W1);
73 SM3_NI_next(W0, W1, W2, W3);
74
75 SM3_NI_Rx4<4>(S1, S0, W1, W2);
76 SM3_NI_next(W1, W2, W3, W0);
77
78 SM3_NI_Rx4<8>(S1, S0, W2, W3);
79 SM3_NI_next(W2, W3, W0, W1);
80
81 SM3_NI_Rx4<12>(S1, S0, W3, W0);
82 SM3_NI_next(W3, W0, W1, W2);
83
84 SM3_NI_Rx4<16>(S1, S0, W0, W1);
85 SM3_NI_next(W0, W1, W2, W3);
86
87 SM3_NI_Rx4<20>(S1, S0, W1, W2);
88 SM3_NI_next(W1, W2, W3, W0);
89
90 SM3_NI_Rx4<24>(S1, S0, W2, W3);
91 SM3_NI_next(W2, W3, W0, W1);
92
93 SM3_NI_Rx4<28>(S1, S0, W3, W0);
94 SM3_NI_next(W3, W0, W1, W2);
95
96 SM3_NI_Rx4<32>(S1, S0, W0, W1);
97 SM3_NI_next(W0, W1, W2, W3);
98
99 SM3_NI_Rx4<36>(S1, S0, W1, W2);
100 SM3_NI_next(W1, W2, W3, W0);
101
102 SM3_NI_Rx4<40>(S1, S0, W2, W3);
103 SM3_NI_next(W2, W3, W0, W1);
104
105 SM3_NI_Rx4<44>(S1, S0, W3, W0);
106 SM3_NI_next(W3, W0, W1, W2);
107
108 SM3_NI_Rx4<48>(S1, S0, W0, W1);
109 SM3_NI_next(W0, W1, W2, W3);
110
111 SM3_NI_Rx4<52>(S1, S0, W1, W2);
112 SM3_NI_Rx4<56>(S1, S0, W2, W3);
113 SM3_NI_Rx4<60>(S1, S0, W3, W0);
114
115 S0 ^= S0_save;
116 S1 ^= S1_save;
117 }
118
119 // TODO do this with SIMD instead
120 uint32_t T[8] = {0};
121 S0.store_le(&T[0]);
122 S1.store_le(&T[4]);
123
124 digest[0] = T[3];
125 digest[1] = T[2];
126 digest[2] = rotr<23>(T[7]);
127 digest[3] = rotr<23>(T[6]);
128 digest[4] = T[1];
129 digest[5] = T[0];
130 digest[6] = rotr<13>(T[5]);
131 digest[7] = rotr<13>(T[4]);
132}
133
134} // namespace Botan
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(const void *in) noexcept
Definition simd_4x32.h:189
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_le(const void *in) noexcept
Definition simd_4x32.h:162
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 alignr8(const SIMD_4x32 &a, const SIMD_4x32 &b)
Definition simd_4x32.h:860
static constexpr size_t block_bytes
Definition sm3.h:24
#define BOTAN_FORCE_INLINE
Definition compiler.h:87
BOTAN_FORCE_INLINE constexpr T rotr(T input)
Definition rotate.h:35