Botan 3.7.1
Crypto and TLS for C&
sm4_armv8.cpp
Go to the documentation of this file.
1/*
2* (C) 2018 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/sm4.h>
8
9#include <botan/compiler.h>
10#include <arm_neon.h>
11
12namespace Botan {
13
14namespace {
15
16alignas(16) static const uint8_t qswap_tbl[16] = {12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3};
17
18alignas(16) static const uint8_t bswap_tbl[16] = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
19
20inline uint32x4_t qswap_32(uint32x4_t B) {
21 return vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(B), vld1q_u8(qswap_tbl)));
22}
23
24inline uint32x4_t bswap_32(uint32x4_t B) {
25 return vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(B)));
26}
27
28/*
29 Swap both the quad-words and bytes within each word
30 equivalent to return bswap_32(qswap_32(B))
31*/
32inline uint32x4_t bqswap_32(uint32x4_t B) {
33 return vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(B), vld1q_u8(bswap_tbl)));
34}
35
36inline void BOTAN_FUNC_ISA("arch=armv8.2-a+sm4")
37 SM4_E(uint32x4_t& B0, uint32x4_t& B1, uint32x4_t& B2, uint32x4_t& B3, uint32x4_t K) {
38 B0 = vsm4eq_u32(B0, K);
39 B1 = vsm4eq_u32(B1, K);
40 B2 = vsm4eq_u32(B2, K);
41 B3 = vsm4eq_u32(B3, K);
42}
43
44} // namespace
45
46void BOTAN_FUNC_ISA("arch=armv8.2-a+sm4") SM4::sm4_armv8_encrypt(const uint8_t input8[],
47 uint8_t output8[],
48 size_t blocks) const {
49 const uint32x4_t K0 = vld1q_u32(&m_RK[0]);
50 const uint32x4_t K1 = vld1q_u32(&m_RK[4]);
51 const uint32x4_t K2 = vld1q_u32(&m_RK[8]);
52 const uint32x4_t K3 = vld1q_u32(&m_RK[12]);
53 const uint32x4_t K4 = vld1q_u32(&m_RK[16]);
54 const uint32x4_t K5 = vld1q_u32(&m_RK[20]);
55 const uint32x4_t K6 = vld1q_u32(&m_RK[24]);
56 const uint32x4_t K7 = vld1q_u32(&m_RK[28]);
57
58 const uint32_t* input32 = reinterpret_cast<const uint32_t*>(reinterpret_cast<const void*>(input8));
59 uint32_t* output32 = reinterpret_cast<uint32_t*>(reinterpret_cast<void*>(output8));
60
61 while(blocks >= 4) {
62 uint32x4_t B0 = bswap_32(vld1q_u32(input32));
63 uint32x4_t B1 = bswap_32(vld1q_u32(input32 + 4));
64 uint32x4_t B2 = bswap_32(vld1q_u32(input32 + 8));
65 uint32x4_t B3 = bswap_32(vld1q_u32(input32 + 12));
66
67 SM4_E(B0, B1, B2, B3, K0);
68 SM4_E(B0, B1, B2, B3, K1);
69 SM4_E(B0, B1, B2, B3, K2);
70 SM4_E(B0, B1, B2, B3, K3);
71 SM4_E(B0, B1, B2, B3, K4);
72 SM4_E(B0, B1, B2, B3, K5);
73 SM4_E(B0, B1, B2, B3, K6);
74 SM4_E(B0, B1, B2, B3, K7);
75
76 vst1q_u32(output32, bqswap_32(B0));
77 vst1q_u32(output32 + 4, bqswap_32(B1));
78 vst1q_u32(output32 + 8, bqswap_32(B2));
79 vst1q_u32(output32 + 12, bqswap_32(B3));
80
81 input32 += 4 * 4;
82 output32 += 4 * 4;
83 blocks -= 4;
84 }
85
86 for(size_t i = 0; i != blocks; ++i) {
87 uint32x4_t B = bswap_32(vld1q_u32(input32));
88
89 B = vsm4eq_u32(B, K0);
90 B = vsm4eq_u32(B, K1);
91 B = vsm4eq_u32(B, K2);
92 B = vsm4eq_u32(B, K3);
93 B = vsm4eq_u32(B, K4);
94 B = vsm4eq_u32(B, K5);
95 B = vsm4eq_u32(B, K6);
96 B = vsm4eq_u32(B, K7);
97
98 vst1q_u32(output32, bqswap_32(B));
99
100 input32 += 4;
101 output32 += 4;
102 }
103}
104
105void BOTAN_FUNC_ISA("arch=armv8.2-a+sm4") SM4::sm4_armv8_decrypt(const uint8_t input8[],
106 uint8_t output8[],
107 size_t blocks) const {
108 const uint32x4_t K0 = qswap_32(vld1q_u32(&m_RK[0]));
109 const uint32x4_t K1 = qswap_32(vld1q_u32(&m_RK[4]));
110 const uint32x4_t K2 = qswap_32(vld1q_u32(&m_RK[8]));
111 const uint32x4_t K3 = qswap_32(vld1q_u32(&m_RK[12]));
112 const uint32x4_t K4 = qswap_32(vld1q_u32(&m_RK[16]));
113 const uint32x4_t K5 = qswap_32(vld1q_u32(&m_RK[20]));
114 const uint32x4_t K6 = qswap_32(vld1q_u32(&m_RK[24]));
115 const uint32x4_t K7 = qswap_32(vld1q_u32(&m_RK[28]));
116
117 const uint32_t* input32 = reinterpret_cast<const uint32_t*>(reinterpret_cast<const void*>(input8));
118 uint32_t* output32 = reinterpret_cast<uint32_t*>(reinterpret_cast<void*>(output8));
119
120 while(blocks >= 4) {
121 uint32x4_t B0 = bswap_32(vld1q_u32(input32));
122 uint32x4_t B1 = bswap_32(vld1q_u32(input32 + 4));
123 uint32x4_t B2 = bswap_32(vld1q_u32(input32 + 8));
124 uint32x4_t B3 = bswap_32(vld1q_u32(input32 + 12));
125
126 SM4_E(B0, B1, B2, B3, K7);
127 SM4_E(B0, B1, B2, B3, K6);
128 SM4_E(B0, B1, B2, B3, K5);
129 SM4_E(B0, B1, B2, B3, K4);
130 SM4_E(B0, B1, B2, B3, K3);
131 SM4_E(B0, B1, B2, B3, K2);
132 SM4_E(B0, B1, B2, B3, K1);
133 SM4_E(B0, B1, B2, B3, K0);
134
135 vst1q_u32(output32, bqswap_32(B0));
136 vst1q_u32(output32 + 4, bqswap_32(B1));
137 vst1q_u32(output32 + 8, bqswap_32(B2));
138 vst1q_u32(output32 + 12, bqswap_32(B3));
139
140 input32 += 4 * 4;
141 output32 += 4 * 4;
142 blocks -= 4;
143 }
144
145 for(size_t i = 0; i != blocks; ++i) {
146 uint32x4_t B = bswap_32(vld1q_u32(input32));
147
148 B = vsm4eq_u32(B, K7);
149 B = vsm4eq_u32(B, K6);
150 B = vsm4eq_u32(B, K5);
151 B = vsm4eq_u32(B, K4);
152 B = vsm4eq_u32(B, K3);
153 B = vsm4eq_u32(B, K2);
154 B = vsm4eq_u32(B, K1);
155 B = vsm4eq_u32(B, K0);
156
157 vst1q_u32(output32, bqswap_32(B));
158
159 input32 += 4;
160 output32 += 4;
161 }
162}
163
164} // namespace Botan
#define BOTAN_FUNC_ISA(isa)
Definition compiler.h:42
uint8x16_t uint8x16_t K2
Definition aes_armv8.cpp:32