Botan 3.9.0
Crypto and TLS for C&
sm4_x86.cpp
Go to the documentation of this file.
1/*
2* (C) 2025 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/sm4.h>
8
9#include <botan/mem_ops.h>
10#include <botan/internal/isa_extn.h>
11#include <botan/internal/simd_avx2.h>
12
13namespace Botan {
14
15namespace {
16
17BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_SM4 SIMD_8x32 sm4_x86_rnds4(const SIMD_8x32& b, const SIMD_8x32& k) {
18 return SIMD_8x32(_mm256_sm4rnds4_epi32(b.raw(), k.raw()));
19}
20
21BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_SM4 void sm4_x86_encrypt_x2(uint8_t out[2 * 16],
22 const uint8_t inp[2 * 16],
23 std::span<const uint32_t> RK) {
24 auto B0 = SIMD_8x32::load_be(inp);
25
26 for(size_t i = 0; i != 8; ++i) {
27 const auto RK_i = SIMD_8x32::load_le128(&RK[4 * i]);
28 B0 = sm4_x86_rnds4(B0, RK_i);
29 }
30
31 B0.reverse().store_le(out);
32}
33
34BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_SM4 void sm4_x86_encrypt_x8(uint8_t out[8 * 16],
35 const uint8_t inp[8 * 16],
36 std::span<const uint32_t> RK) {
37 auto B0 = SIMD_8x32::load_be(inp);
38 auto B1 = SIMD_8x32::load_be(inp + 32);
39 auto B2 = SIMD_8x32::load_be(inp + 64);
40 auto B3 = SIMD_8x32::load_be(inp + 96);
41
42 for(size_t i = 0; i != 8; ++i) {
43 auto RK_i = SIMD_8x32::load_le128(&RK[4 * i]);
44 B0 = sm4_x86_rnds4(B0, RK_i);
45 B1 = sm4_x86_rnds4(B1, RK_i);
46 B2 = sm4_x86_rnds4(B2, RK_i);
47 B3 = sm4_x86_rnds4(B3, RK_i);
48 }
49
50 B0.reverse().store_le(out);
51 B1.reverse().store_le(out + 32);
52 B2.reverse().store_le(out + 64);
53 B3.reverse().store_le(out + 96);
54}
55
56BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_SM4 void sm4_x86_decrypt_x2(uint8_t out[2 * 16],
57 const uint8_t inp[2 * 16],
58 std::span<const uint32_t> RK) {
59 auto B0 = SIMD_8x32::load_be(inp);
60
61 for(size_t i = 0; i != 8; ++i) {
62 auto RK_i = SIMD_8x32::load_le128(&RK[28 - 4 * i]).rev_words();
63 B0 = sm4_x86_rnds4(B0, RK_i);
64 }
65
66 B0.reverse().store_le(out);
67}
68
69BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_SM4 void sm4_x86_decrypt_x8(uint8_t out[8 * 16],
70 const uint8_t inp[8 * 16],
71 std::span<const uint32_t> RK) {
72 auto B0 = SIMD_8x32::load_be(inp);
73 auto B1 = SIMD_8x32::load_be(inp + 32);
74 auto B2 = SIMD_8x32::load_be(inp + 64);
75 auto B3 = SIMD_8x32::load_be(inp + 96);
76
77 for(size_t i = 0; i != 8; ++i) {
78 auto RK_i = SIMD_8x32::load_le128(&RK[28 - 4 * i]).rev_words();
79 B0 = sm4_x86_rnds4(B0, RK_i);
80 B1 = sm4_x86_rnds4(B1, RK_i);
81 B2 = sm4_x86_rnds4(B2, RK_i);
82 B3 = sm4_x86_rnds4(B3, RK_i);
83 }
84
85 B0.reverse().store_le(out);
86 B1.reverse().store_le(out + 32);
87 B2.reverse().store_le(out + 64);
88 B3.reverse().store_le(out + 96);
89}
90
91} // namespace
92
93void BOTAN_FN_ISA_AVX2_SM4 SM4::sm4_x86_encrypt(const uint8_t inp[], uint8_t out[], size_t blocks) const {
94 while(blocks >= 8) {
95 sm4_x86_encrypt_x8(out, inp, m_RK);
96 inp += 8 * 16;
97 out += 8 * 16;
98 blocks -= 8;
99 }
100
101 while(blocks >= 2) {
102 sm4_x86_encrypt_x2(out, inp, m_RK);
103 inp += 2 * 16;
104 out += 2 * 16;
105 blocks -= 2;
106 }
107
108 if(blocks > 0) {
109 uint8_t ibuf[2 * 16] = {0};
110 uint8_t obuf[2 * 16] = {0};
111 copy_mem(ibuf, inp, blocks * 16);
112 sm4_x86_encrypt_x2(obuf, ibuf, m_RK);
113 copy_mem(out, obuf, blocks * 16);
114 }
115}
116
117void BOTAN_FN_ISA_AVX2_SM4 SM4::sm4_x86_decrypt(const uint8_t inp[], uint8_t out[], size_t blocks) const {
118 while(blocks >= 8) {
119 sm4_x86_decrypt_x8(out, inp, m_RK);
120 inp += 8 * 16;
121 out += 8 * 16;
122 blocks -= 8;
123 }
124
125 while(blocks >= 2) {
126 sm4_x86_decrypt_x2(out, inp, m_RK);
127 inp += 2 * 16;
128 out += 2 * 16;
129 blocks -= 2;
130 }
131
132 if(blocks > 0) {
133 uint8_t ibuf[2 * 16] = {0};
134 uint8_t obuf[2 * 16] = {0};
135 copy_mem(ibuf, inp, blocks * 16);
136 sm4_x86_decrypt_x2(obuf, ibuf, m_RK);
137 copy_mem(out, obuf, blocks * 16);
138 }
139}
140
141} // namespace Botan
static BOTAN_FN_ISA_AVX2 SIMD_8x32 load_le128(const uint8_t *in) noexcept
Definition simd_avx2.h:71
BOTAN_FN_ISA_AVX2 SIMD_8x32 rev_words() const noexcept
Definition simd_avx2.h:261
static BOTAN_FN_ISA_AVX2 SIMD_8x32 load_be(const uint8_t *in) noexcept
Definition simd_avx2.h:81
#define BOTAN_FORCE_INLINE
Definition compiler.h:87
constexpr void copy_mem(T *out, const T *in, size_t n)
Definition mem_ops.h:145