Botan 3.10.0
Crypto and TLS for C&
sm4_x86.cpp
Go to the documentation of this file.
1/*
2* (C) 2025 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/sm4.h>
8
9#include <botan/mem_ops.h>
10#include <botan/internal/isa_extn.h>
11#include <botan/internal/simd_avx2.h>
12
13namespace Botan {
14
15namespace {
16
17BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_SM4 SIMD_8x32 sm4_x86_rnds4(const SIMD_8x32& b, const SIMD_8x32& k) {
18 // NOLINTNEXTLINE(portability-simd-intrinsics)
19 return SIMD_8x32(_mm256_sm4rnds4_epi32(b.raw(), k.raw()));
20}
21
22BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_SM4 void sm4_x86_encrypt_x2(uint8_t out[2 * 16],
23 const uint8_t inp[2 * 16],
24 std::span<const uint32_t> RK) {
25 auto B0 = SIMD_8x32::load_be(inp);
26
27 for(size_t i = 0; i != 8; ++i) {
28 const auto RK_i = SIMD_8x32::load_le128(&RK[4 * i]);
29 B0 = sm4_x86_rnds4(B0, RK_i);
30 }
31
32 B0.reverse().store_le(out);
33}
34
35BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_SM4 void sm4_x86_encrypt_x8(uint8_t out[8 * 16],
36 const uint8_t inp[8 * 16],
37 std::span<const uint32_t> RK) {
38 auto B0 = SIMD_8x32::load_be(inp);
39 auto B1 = SIMD_8x32::load_be(inp + 32);
40 auto B2 = SIMD_8x32::load_be(inp + 64);
41 auto B3 = SIMD_8x32::load_be(inp + 96);
42
43 for(size_t i = 0; i != 8; ++i) {
44 auto RK_i = SIMD_8x32::load_le128(&RK[4 * i]);
45 B0 = sm4_x86_rnds4(B0, RK_i);
46 B1 = sm4_x86_rnds4(B1, RK_i);
47 B2 = sm4_x86_rnds4(B2, RK_i);
48 B3 = sm4_x86_rnds4(B3, RK_i);
49 }
50
51 B0.reverse().store_le(out);
52 B1.reverse().store_le(out + 32);
53 B2.reverse().store_le(out + 64);
54 B3.reverse().store_le(out + 96);
55}
56
57BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_SM4 void sm4_x86_decrypt_x2(uint8_t out[2 * 16],
58 const uint8_t inp[2 * 16],
59 std::span<const uint32_t> RK) {
60 auto B0 = SIMD_8x32::load_be(inp);
61
62 for(size_t i = 0; i != 8; ++i) {
63 auto RK_i = SIMD_8x32::load_le128(&RK[28 - 4 * i]).rev_words();
64 B0 = sm4_x86_rnds4(B0, RK_i);
65 }
66
67 B0.reverse().store_le(out);
68}
69
70BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_SM4 void sm4_x86_decrypt_x8(uint8_t out[8 * 16],
71 const uint8_t inp[8 * 16],
72 std::span<const uint32_t> RK) {
73 auto B0 = SIMD_8x32::load_be(inp);
74 auto B1 = SIMD_8x32::load_be(inp + 32);
75 auto B2 = SIMD_8x32::load_be(inp + 64);
76 auto B3 = SIMD_8x32::load_be(inp + 96);
77
78 for(size_t i = 0; i != 8; ++i) {
79 auto RK_i = SIMD_8x32::load_le128(&RK[28 - 4 * i]).rev_words();
80 B0 = sm4_x86_rnds4(B0, RK_i);
81 B1 = sm4_x86_rnds4(B1, RK_i);
82 B2 = sm4_x86_rnds4(B2, RK_i);
83 B3 = sm4_x86_rnds4(B3, RK_i);
84 }
85
86 B0.reverse().store_le(out);
87 B1.reverse().store_le(out + 32);
88 B2.reverse().store_le(out + 64);
89 B3.reverse().store_le(out + 96);
90}
91
92} // namespace
93
94void BOTAN_FN_ISA_AVX2_SM4 SM4::sm4_x86_encrypt(const uint8_t inp[], uint8_t out[], size_t blocks) const {
95 while(blocks >= 8) {
96 sm4_x86_encrypt_x8(out, inp, m_RK);
97 inp += 8 * 16;
98 out += 8 * 16;
99 blocks -= 8;
100 }
101
102 while(blocks >= 2) {
103 sm4_x86_encrypt_x2(out, inp, m_RK);
104 inp += 2 * 16;
105 out += 2 * 16;
106 blocks -= 2;
107 }
108
109 if(blocks > 0) {
110 uint8_t ibuf[2 * 16] = {0};
111 uint8_t obuf[2 * 16] = {0};
112 copy_mem(ibuf, inp, blocks * 16);
113 sm4_x86_encrypt_x2(obuf, ibuf, m_RK);
114 copy_mem(out, obuf, blocks * 16);
115 }
116}
117
118void BOTAN_FN_ISA_AVX2_SM4 SM4::sm4_x86_decrypt(const uint8_t inp[], uint8_t out[], size_t blocks) const {
119 while(blocks >= 8) {
120 sm4_x86_decrypt_x8(out, inp, m_RK);
121 inp += 8 * 16;
122 out += 8 * 16;
123 blocks -= 8;
124 }
125
126 while(blocks >= 2) {
127 sm4_x86_decrypt_x2(out, inp, m_RK);
128 inp += 2 * 16;
129 out += 2 * 16;
130 blocks -= 2;
131 }
132
133 if(blocks > 0) {
134 uint8_t ibuf[2 * 16] = {0};
135 uint8_t obuf[2 * 16] = {0};
136 copy_mem(ibuf, inp, blocks * 16);
137 sm4_x86_decrypt_x2(obuf, ibuf, m_RK);
138 copy_mem(out, obuf, blocks * 16);
139 }
140}
141
142} // namespace Botan
static BOTAN_FN_ISA_AVX2 SIMD_8x32 load_le128(const uint8_t *in) noexcept
Definition simd_avx2.h:71
BOTAN_FN_ISA_AVX2 SIMD_8x32 rev_words() const noexcept
Definition simd_avx2.h:261
static BOTAN_FN_ISA_AVX2 SIMD_8x32 load_be(const uint8_t *in) noexcept
Definition simd_avx2.h:81
#define BOTAN_FORCE_INLINE
Definition compiler.h:87
constexpr void copy_mem(T *out, const T *in, size_t n)
Definition mem_ops.h:145