Botan 3.11.1
Crypto and TLS for C&
sm4_hwaes.cpp
Go to the documentation of this file.
1/*
2* (C) 2026 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/sm4.h>
8
9#include <botan/mem_ops.h>
10#include <botan/internal/isa_extn.h>
11#include <botan/internal/simd_4x32.h>
12#include <botan/internal/simd_hwaes.h>
13
14namespace Botan {
15
16namespace {
17
18BOTAN_FORCE_INLINE BOTAN_FN_ISA_HWAES SIMD_4x32 sm4_sbox(const SIMD_4x32& x) {
19 /*
20 * The SM4 sbox is, like the AES sbox, based on inversion in GF(2^8) plus an
21 * affine transformation.
22 *
23 * See
24 * - <https://eprint.iacr.org/2022/1154> sections 3.1 and 3.3
25 * - <https://github.com/mjosaarinen/sm4ni>
26 * - <https://jukivili.kapsi.fi/web/mastersthesis/thesis_final_sRGB_PDFA2b.pdf>
27 * describes a similar approach for implementing Camellia in section 4.4
28 */
29
30 constexpr uint64_t pre_a = gfni_matrix(R"(
31 0 0 1 1 0 0 1 0
32 0 0 0 1 0 1 0 0
33 1 0 1 1 1 1 1 0
34 1 0 0 1 1 1 0 1
35 0 1 0 1 1 0 0 0
36 0 1 0 0 0 1 0 0
37 0 0 0 0 1 0 1 0
38 1 0 1 1 1 0 1 0)");
39 constexpr uint8_t pre_c = 0b00111110;
40
41 constexpr uint64_t post_a = gfni_matrix(R"(
42 1 1 0 0 1 1 1 1
43 1 1 0 1 0 1 0 1
44 0 0 1 0 1 1 0 0
45 1 0 0 1 0 1 0 1
46 0 0 1 0 1 1 1 0
47 0 1 1 0 0 1 0 1
48 1 0 1 0 1 1 0 1
49 1 0 0 1 0 0 0 1)");
50 constexpr uint8_t post_c = 0b11010011;
51
52 constexpr auto pre = Gf2AffineTransformation(pre_a, pre_c);
53 constexpr auto post = Gf2AffineTransformation::post_sbox(post_a, post_c);
54
55 return post.affine_transform(hw_aes_sbox(pre.affine_transform(x)));
56}
57
58BOTAN_FORCE_INLINE BOTAN_FN_ISA_HWAES SIMD_4x32 sm4_f(const SIMD_4x32& x) {
59 const auto sx = sm4_sbox(x);
60 // L linear transform
61 return sx ^ sx.rotl<2>() ^ sx.rotl<10>() ^ sx.rotl<18>() ^ sx.rotl<24>();
62}
63
64BOTAN_FORCE_INLINE BOTAN_FN_ISA_HWAES void sm4_hwaes_encrypt_4(const uint8_t ptext[4 * 16],
65 uint8_t ctext[4 * 16],
66 std::span<const uint32_t> RK) {
67 auto B0 = SIMD_4x32::load_be(ptext + 16 * 0);
68 auto B1 = SIMD_4x32::load_be(ptext + 16 * 1);
69 auto B2 = SIMD_4x32::load_be(ptext + 16 * 2);
70 auto B3 = SIMD_4x32::load_be(ptext + 16 * 3);
71
72 SIMD_4x32::transpose(B0, B1, B2, B3);
73
74 for(size_t j = 0; j != 8; ++j) {
75 const auto K0 = SIMD_4x32::splat(RK[4 * j]);
76 const auto K1 = SIMD_4x32::splat(RK[4 * j + 1]);
77 const auto K2 = SIMD_4x32::splat(RK[4 * j + 2]);
78 const auto K3 = SIMD_4x32::splat(RK[4 * j + 3]);
79 B0 ^= sm4_f(B1 ^ B2 ^ B3 ^ K0);
80 B1 ^= sm4_f(B2 ^ B3 ^ B0 ^ K1);
81 B2 ^= sm4_f(B3 ^ B0 ^ B1 ^ K2);
82 B3 ^= sm4_f(B0 ^ B1 ^ B2 ^ K3);
83 }
84
85 // SM4 reverses word order
86 SIMD_4x32::transpose(B3, B2, B1, B0);
87
88 B3.store_be(ctext + 16 * 0);
89 B2.store_be(ctext + 16 * 1);
90 B1.store_be(ctext + 16 * 2);
91 B0.store_be(ctext + 16 * 3);
92}
93
94// Same as sm4_hwaes_encrypt_4 except interleaved 2x
95BOTAN_FORCE_INLINE BOTAN_FN_ISA_HWAES void sm4_hwaes_encrypt_8(const uint8_t ptext[8 * 16],
96 uint8_t ctext[8 * 16],
97 std::span<const uint32_t> RK) {
98 auto B0 = SIMD_4x32::load_be(ptext + 16 * 0);
99 auto B1 = SIMD_4x32::load_be(ptext + 16 * 1);
100 auto B2 = SIMD_4x32::load_be(ptext + 16 * 2);
101 auto B3 = SIMD_4x32::load_be(ptext + 16 * 3);
102 auto B4 = SIMD_4x32::load_be(ptext + 16 * 4);
103 auto B5 = SIMD_4x32::load_be(ptext + 16 * 5);
104 auto B6 = SIMD_4x32::load_be(ptext + 16 * 6);
105 auto B7 = SIMD_4x32::load_be(ptext + 16 * 7);
106
107 SIMD_4x32::transpose(B0, B1, B2, B3);
108 SIMD_4x32::transpose(B4, B5, B6, B7);
109
110 for(size_t j = 0; j != 8; ++j) {
111 const auto K0 = SIMD_4x32::splat(RK[4 * j]);
112 const auto K1 = SIMD_4x32::splat(RK[4 * j + 1]);
113 const auto K2 = SIMD_4x32::splat(RK[4 * j + 2]);
114 const auto K3 = SIMD_4x32::splat(RK[4 * j + 3]);
115
116 B0 ^= sm4_f(B1 ^ B2 ^ B3 ^ K0);
117 B4 ^= sm4_f(B5 ^ B6 ^ B7 ^ K0);
118
119 B1 ^= sm4_f(B2 ^ B3 ^ B0 ^ K1);
120 B5 ^= sm4_f(B6 ^ B7 ^ B4 ^ K1);
121
122 B2 ^= sm4_f(B3 ^ B0 ^ B1 ^ K2);
123 B6 ^= sm4_f(B7 ^ B4 ^ B5 ^ K2);
124
125 B3 ^= sm4_f(B0 ^ B1 ^ B2 ^ K3);
126 B7 ^= sm4_f(B4 ^ B5 ^ B6 ^ K3);
127 }
128
129 // SM4 reverses word order
130 SIMD_4x32::transpose(B3, B2, B1, B0);
131 SIMD_4x32::transpose(B7, B6, B5, B4);
132
133 B3.store_be(ctext + 16 * 0);
134 B2.store_be(ctext + 16 * 1);
135 B1.store_be(ctext + 16 * 2);
136 B0.store_be(ctext + 16 * 3);
137
138 B7.store_be(ctext + 16 * 4);
139 B6.store_be(ctext + 16 * 5);
140 B5.store_be(ctext + 16 * 6);
141 B4.store_be(ctext + 16 * 7);
142}
143
144BOTAN_FORCE_INLINE BOTAN_FN_ISA_HWAES void sm4_hwaes_decrypt_4(const uint8_t ctext[4 * 16],
145 uint8_t ptext[4 * 16],
146 std::span<const uint32_t> RK) {
147 auto B0 = SIMD_4x32::load_be(ctext + 16 * 0);
148 auto B1 = SIMD_4x32::load_be(ctext + 16 * 1);
149 auto B2 = SIMD_4x32::load_be(ctext + 16 * 2);
150 auto B3 = SIMD_4x32::load_be(ctext + 16 * 3);
151
152 SIMD_4x32::transpose(B0, B1, B2, B3);
153
154 for(size_t j = 0; j != 8; ++j) {
155 const auto K0 = SIMD_4x32::splat(RK[32 - (4 * j + 1)]);
156 const auto K1 = SIMD_4x32::splat(RK[32 - (4 * j + 2)]);
157 const auto K2 = SIMD_4x32::splat(RK[32 - (4 * j + 3)]);
158 const auto K3 = SIMD_4x32::splat(RK[32 - (4 * j + 4)]);
159 B0 ^= sm4_f(B1 ^ B2 ^ B3 ^ K0);
160 B1 ^= sm4_f(B2 ^ B3 ^ B0 ^ K1);
161 B2 ^= sm4_f(B3 ^ B0 ^ B1 ^ K2);
162 B3 ^= sm4_f(B0 ^ B1 ^ B2 ^ K3);
163 }
164
165 // SM4 reverses word order
166 SIMD_4x32::transpose(B3, B2, B1, B0);
167
168 B3.store_be(ptext + 16 * 0);
169 B2.store_be(ptext + 16 * 1);
170 B1.store_be(ptext + 16 * 2);
171 B0.store_be(ptext + 16 * 3);
172}
173
174// Same as sm4_hwaes_decrypt_4 except interleaved 2x
175BOTAN_FORCE_INLINE BOTAN_FN_ISA_HWAES void sm4_hwaes_decrypt_8(const uint8_t ctext[8 * 16],
176 uint8_t ptext[8 * 16],
177 std::span<const uint32_t> RK) {
178 auto B0 = SIMD_4x32::load_be(ctext + 16 * 0);
179 auto B1 = SIMD_4x32::load_be(ctext + 16 * 1);
180 auto B2 = SIMD_4x32::load_be(ctext + 16 * 2);
181 auto B3 = SIMD_4x32::load_be(ctext + 16 * 3);
182 auto B4 = SIMD_4x32::load_be(ctext + 16 * 4);
183 auto B5 = SIMD_4x32::load_be(ctext + 16 * 5);
184 auto B6 = SIMD_4x32::load_be(ctext + 16 * 6);
185 auto B7 = SIMD_4x32::load_be(ctext + 16 * 7);
186
187 SIMD_4x32::transpose(B0, B1, B2, B3);
188 SIMD_4x32::transpose(B4, B5, B6, B7);
189
190 for(size_t j = 0; j != 8; ++j) {
191 const auto K0 = SIMD_4x32::splat(RK[32 - (4 * j + 1)]);
192 const auto K1 = SIMD_4x32::splat(RK[32 - (4 * j + 2)]);
193 const auto K2 = SIMD_4x32::splat(RK[32 - (4 * j + 3)]);
194 const auto K3 = SIMD_4x32::splat(RK[32 - (4 * j + 4)]);
195
196 B0 ^= sm4_f(B1 ^ B2 ^ B3 ^ K0);
197 B4 ^= sm4_f(B5 ^ B6 ^ B7 ^ K0);
198
199 B1 ^= sm4_f(B2 ^ B3 ^ B0 ^ K1);
200 B5 ^= sm4_f(B6 ^ B7 ^ B4 ^ K1);
201
202 B2 ^= sm4_f(B3 ^ B0 ^ B1 ^ K2);
203 B6 ^= sm4_f(B7 ^ B4 ^ B5 ^ K2);
204
205 B3 ^= sm4_f(B0 ^ B1 ^ B2 ^ K3);
206 B7 ^= sm4_f(B4 ^ B5 ^ B6 ^ K3);
207 }
208
209 // SM4 reverses word order
210 SIMD_4x32::transpose(B3, B2, B1, B0);
211 SIMD_4x32::transpose(B7, B6, B5, B4);
212
213 B3.store_be(ptext + 16 * 0);
214 B2.store_be(ptext + 16 * 1);
215 B1.store_be(ptext + 16 * 2);
216 B0.store_be(ptext + 16 * 3);
217
218 B7.store_be(ptext + 16 * 4);
219 B6.store_be(ptext + 16 * 5);
220 B5.store_be(ptext + 16 * 6);
221 B4.store_be(ptext + 16 * 7);
222}
223
224} // namespace
225
226void BOTAN_FN_ISA_HWAES SM4::sm4_hwaes_encrypt(const uint8_t ptext[], uint8_t ctext[], size_t blocks) const {
227 while(blocks >= 8) {
228 sm4_hwaes_encrypt_8(ptext, ctext, m_RK);
229 ptext += 16 * 8;
230 ctext += 16 * 8;
231 blocks -= 8;
232 }
233
234 while(blocks >= 4) {
235 sm4_hwaes_encrypt_4(ptext, ctext, m_RK);
236 ptext += 16 * 4;
237 ctext += 16 * 4;
238 blocks -= 4;
239 }
240
241 if(blocks > 0) {
242 uint8_t pbuf[4 * 16] = {0};
243 uint8_t cbuf[4 * 16] = {0};
244 copy_mem(pbuf, ptext, blocks * 16);
245 sm4_hwaes_encrypt_4(pbuf, cbuf, m_RK);
246 copy_mem(ctext, cbuf, blocks * 16);
247 }
248}
249
250void BOTAN_FN_ISA_HWAES SM4::sm4_hwaes_decrypt(const uint8_t ctext[], uint8_t ptext[], size_t blocks) const {
251 while(blocks >= 8) {
252 sm4_hwaes_decrypt_8(ctext, ptext, m_RK);
253 ptext += 16 * 8;
254 ctext += 16 * 8;
255 blocks -= 8;
256 }
257
258 while(blocks >= 4) {
259 sm4_hwaes_decrypt_4(ctext, ptext, m_RK);
260 ptext += 16 * 4;
261 ctext += 16 * 4;
262 blocks -= 4;
263 }
264
265 if(blocks > 0) {
266 uint8_t cbuf[4 * 16] = {0};
267 uint8_t pbuf[4 * 16] = {0};
268 copy_mem(cbuf, ctext, blocks * 16);
269 sm4_hwaes_decrypt_4(cbuf, pbuf, m_RK);
270 copy_mem(ptext, pbuf, blocks * 16);
271 }
272}
273
274} // namespace Botan
static consteval Gf2AffineTransformation post_sbox(uint64_t M, uint8_t c)
Definition simd_hwaes.h:139
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(const void *in) noexcept
Definition simd_4x32.h:189
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 splat(uint32_t B) noexcept
Definition simd_4x32.h:127
static void BOTAN_FN_ISA_SIMD_4X32 transpose(SIMD_4x32 &B0, SIMD_4x32 &B1, SIMD_4x32 &B2, SIMD_4x32 &B3) noexcept
Definition simd_4x32.h:681
#define BOTAN_FORCE_INLINE
Definition compiler.h:87
constexpr uint32_t K1
Definition sha1_f.h:16
constexpr uint32_t K3
Definition sha1_f.h:18
constexpr uint32_t K2
Definition sha1_f.h:17
constexpr void copy_mem(T *out, const T *in, size_t n)
Definition mem_ops.h:144
consteval uint64_t gfni_matrix(std::string_view s)
Definition gfni_utils.h:17
SIMD_4x32 BOTAN_FN_ISA_HWAES hw_aes_sbox(SIMD_4x32 x)
Definition simd_hwaes.h:19