7#include <botan/internal/sm4.h>
9#include <botan/mem_ops.h>
10#include <botan/internal/isa_extn.h>
11#include <botan/internal/simd_4x32.h>
12#include <botan/internal/simd_hwaes.h>
39 constexpr uint8_t pre_c = 0b00111110;
50 constexpr uint8_t post_c = 0b11010011;
55 return post.affine_transform(
hw_aes_sbox(pre.affine_transform(x)));
59 const auto sx = sm4_sbox(x);
61 return sx ^ sx.rotl<2>() ^ sx.rotl<10>() ^ sx.rotl<18>() ^ sx.rotl<24>();
64BOTAN_FORCE_INLINE BOTAN_FN_ISA_HWAES
void sm4_hwaes_encrypt_4(
const uint8_t ptext[4 * 16],
65 uint8_t ctext[4 * 16],
66 std::span<const uint32_t> RK) {
74 for(
size_t j = 0; j != 8; ++j) {
79 B0 ^= sm4_f(B1 ^ B2 ^ B3 ^ K0);
80 B1 ^= sm4_f(B2 ^ B3 ^ B0 ^ K1);
81 B2 ^= sm4_f(B3 ^ B0 ^ B1 ^ K2);
82 B3 ^= sm4_f(B0 ^ B1 ^ B2 ^ K3);
88 B3.store_be(ctext + 16 * 0);
89 B2.store_be(ctext + 16 * 1);
90 B1.store_be(ctext + 16 * 2);
91 B0.store_be(ctext + 16 * 3);
95BOTAN_FORCE_INLINE BOTAN_FN_ISA_HWAES
void sm4_hwaes_encrypt_8(
const uint8_t ptext[8 * 16],
96 uint8_t ctext[8 * 16],
97 std::span<const uint32_t> RK) {
110 for(
size_t j = 0; j != 8; ++j) {
116 B0 ^= sm4_f(B1 ^ B2 ^ B3 ^ K0);
117 B4 ^= sm4_f(B5 ^ B6 ^ B7 ^ K0);
119 B1 ^= sm4_f(B2 ^ B3 ^ B0 ^ K1);
120 B5 ^= sm4_f(B6 ^ B7 ^ B4 ^ K1);
122 B2 ^= sm4_f(B3 ^ B0 ^ B1 ^ K2);
123 B6 ^= sm4_f(B7 ^ B4 ^ B5 ^ K2);
125 B3 ^= sm4_f(B0 ^ B1 ^ B2 ^ K3);
126 B7 ^= sm4_f(B4 ^ B5 ^ B6 ^ K3);
133 B3.store_be(ctext + 16 * 0);
134 B2.store_be(ctext + 16 * 1);
135 B1.store_be(ctext + 16 * 2);
136 B0.store_be(ctext + 16 * 3);
138 B7.store_be(ctext + 16 * 4);
139 B6.store_be(ctext + 16 * 5);
140 B5.store_be(ctext + 16 * 6);
141 B4.store_be(ctext + 16 * 7);
144BOTAN_FORCE_INLINE BOTAN_FN_ISA_HWAES
void sm4_hwaes_decrypt_4(
const uint8_t ctext[4 * 16],
145 uint8_t ptext[4 * 16],
146 std::span<const uint32_t> RK) {
154 for(
size_t j = 0; j != 8; ++j) {
159 B0 ^= sm4_f(B1 ^ B2 ^ B3 ^ K0);
160 B1 ^= sm4_f(B2 ^ B3 ^ B0 ^ K1);
161 B2 ^= sm4_f(B3 ^ B0 ^ B1 ^ K2);
162 B3 ^= sm4_f(B0 ^ B1 ^ B2 ^ K3);
168 B3.store_be(ptext + 16 * 0);
169 B2.store_be(ptext + 16 * 1);
170 B1.store_be(ptext + 16 * 2);
171 B0.store_be(ptext + 16 * 3);
175BOTAN_FORCE_INLINE BOTAN_FN_ISA_HWAES
void sm4_hwaes_decrypt_8(
const uint8_t ctext[8 * 16],
176 uint8_t ptext[8 * 16],
177 std::span<const uint32_t> RK) {
190 for(
size_t j = 0; j != 8; ++j) {
196 B0 ^= sm4_f(B1 ^ B2 ^ B3 ^ K0);
197 B4 ^= sm4_f(B5 ^ B6 ^ B7 ^ K0);
199 B1 ^= sm4_f(B2 ^ B3 ^ B0 ^ K1);
200 B5 ^= sm4_f(B6 ^ B7 ^ B4 ^ K1);
202 B2 ^= sm4_f(B3 ^ B0 ^ B1 ^ K2);
203 B6 ^= sm4_f(B7 ^ B4 ^ B5 ^ K2);
205 B3 ^= sm4_f(B0 ^ B1 ^ B2 ^ K3);
206 B7 ^= sm4_f(B4 ^ B5 ^ B6 ^ K3);
213 B3.store_be(ptext + 16 * 0);
214 B2.store_be(ptext + 16 * 1);
215 B1.store_be(ptext + 16 * 2);
216 B0.store_be(ptext + 16 * 3);
218 B7.store_be(ptext + 16 * 4);
219 B6.store_be(ptext + 16 * 5);
220 B5.store_be(ptext + 16 * 6);
221 B4.store_be(ptext + 16 * 7);
226void BOTAN_FN_ISA_HWAES SM4::sm4_hwaes_encrypt(
const uint8_t ptext[], uint8_t ctext[],
size_t blocks)
const {
228 sm4_hwaes_encrypt_8(ptext, ctext, m_RK);
235 sm4_hwaes_encrypt_4(ptext, ctext, m_RK);
242 uint8_t pbuf[4 * 16] = {0};
243 uint8_t cbuf[4 * 16] = {0};
245 sm4_hwaes_encrypt_4(pbuf, cbuf, m_RK);
250void BOTAN_FN_ISA_HWAES SM4::sm4_hwaes_decrypt(
const uint8_t ctext[], uint8_t ptext[],
size_t blocks)
const {
252 sm4_hwaes_decrypt_8(ctext, ptext, m_RK);
259 sm4_hwaes_decrypt_4(ctext, ptext, m_RK);
266 uint8_t cbuf[4 * 16] = {0};
267 uint8_t pbuf[4 * 16] = {0};
269 sm4_hwaes_decrypt_4(cbuf, pbuf, m_RK);
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(const void *in) noexcept
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 splat(uint32_t B) noexcept
static void BOTAN_FN_ISA_SIMD_4X32 transpose(SIMD_4x32 &B0, SIMD_4x32 &B1, SIMD_4x32 &B2, SIMD_4x32 &B3) noexcept
#define BOTAN_FORCE_INLINE
constexpr void copy_mem(T *out, const T *in, size_t n)
consteval uint64_t gfni_matrix(std::string_view s)
SIMD_4x32 BOTAN_FN_ISA_HWAES hw_aes_sbox(SIMD_4x32 x)