Botan  2.12.1
Crypto and TLS for C++11
sm4.cpp
Go to the documentation of this file.
1 /*
2 * SM4
3 * (C) 2017 Ribose Inc
4 * (C) 2018 Jack Lloyd
5 *
6 * Botan is released under the Simplified BSD License (see license.txt)
7 */
8 
9 #include <botan/sm4.h>
10 #include <botan/loadstor.h>
11 #include <botan/rotate.h>
12 #include <botan/cpuid.h>
13 
14 namespace Botan {
15 
16 namespace {
17 
18 alignas(64)
19 const uint8_t SM4_SBOX[256] = {
20 0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05,
21 0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
22 0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62,
23 0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, 0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6,
24 0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8,
25 0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35,
26 0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87,
27 0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E,
28 0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1,
29 0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3,
30 0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60, 0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F,
31 0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51,
32 0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8,
33 0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0,
34 0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84,
35 0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48
36 };
37 
38 /*
39 * SM4_SBOX_T[j] == L(SM4_SBOX[j]).
40 */
41 alignas(64)
42 const uint32_t SM4_SBOX_T[256] = {
43  0x8ED55B5B, 0xD0924242, 0x4DEAA7A7, 0x06FDFBFB, 0xFCCF3333, 0x65E28787,
44  0xC93DF4F4, 0x6BB5DEDE, 0x4E165858, 0x6EB4DADA, 0x44145050, 0xCAC10B0B,
45  0x8828A0A0, 0x17F8EFEF, 0x9C2CB0B0, 0x11051414, 0x872BACAC, 0xFB669D9D,
46  0xF2986A6A, 0xAE77D9D9, 0x822AA8A8, 0x46BCFAFA, 0x14041010, 0xCFC00F0F,
47  0x02A8AAAA, 0x54451111, 0x5F134C4C, 0xBE269898, 0x6D482525, 0x9E841A1A,
48  0x1E061818, 0xFD9B6666, 0xEC9E7272, 0x4A430909, 0x10514141, 0x24F7D3D3,
49  0xD5934646, 0x53ECBFBF, 0xF89A6262, 0x927BE9E9, 0xFF33CCCC, 0x04555151,
50  0x270B2C2C, 0x4F420D0D, 0x59EEB7B7, 0xF3CC3F3F, 0x1CAEB2B2, 0xEA638989,
51  0x74E79393, 0x7FB1CECE, 0x6C1C7070, 0x0DABA6A6, 0xEDCA2727, 0x28082020,
52  0x48EBA3A3, 0xC1975656, 0x80820202, 0xA3DC7F7F, 0xC4965252, 0x12F9EBEB,
53  0xA174D5D5, 0xB38D3E3E, 0xC33FFCFC, 0x3EA49A9A, 0x5B461D1D, 0x1B071C1C,
54  0x3BA59E9E, 0x0CFFF3F3, 0x3FF0CFCF, 0xBF72CDCD, 0x4B175C5C, 0x52B8EAEA,
55  0x8F810E0E, 0x3D586565, 0xCC3CF0F0, 0x7D196464, 0x7EE59B9B, 0x91871616,
56  0x734E3D3D, 0x08AAA2A2, 0xC869A1A1, 0xC76AADAD, 0x85830606, 0x7AB0CACA,
57  0xB570C5C5, 0xF4659191, 0xB2D96B6B, 0xA7892E2E, 0x18FBE3E3, 0x47E8AFAF,
58  0x330F3C3C, 0x674A2D2D, 0xB071C1C1, 0x0E575959, 0xE99F7676, 0xE135D4D4,
59  0x661E7878, 0xB4249090, 0x360E3838, 0x265F7979, 0xEF628D8D, 0x38596161,
60  0x95D24747, 0x2AA08A8A, 0xB1259494, 0xAA228888, 0x8C7DF1F1, 0xD73BECEC,
61  0x05010404, 0xA5218484, 0x9879E1E1, 0x9B851E1E, 0x84D75353, 0x00000000,
62  0x5E471919, 0x0B565D5D, 0xE39D7E7E, 0x9FD04F4F, 0xBB279C9C, 0x1A534949,
63  0x7C4D3131, 0xEE36D8D8, 0x0A020808, 0x7BE49F9F, 0x20A28282, 0xD4C71313,
64  0xE8CB2323, 0xE69C7A7A, 0x42E9ABAB, 0x43BDFEFE, 0xA2882A2A, 0x9AD14B4B,
65  0x40410101, 0xDBC41F1F, 0xD838E0E0, 0x61B7D6D6, 0x2FA18E8E, 0x2BF4DFDF,
66  0x3AF1CBCB, 0xF6CD3B3B, 0x1DFAE7E7, 0xE5608585, 0x41155454, 0x25A38686,
67  0x60E38383, 0x16ACBABA, 0x295C7575, 0x34A69292, 0xF7996E6E, 0xE434D0D0,
68  0x721A6868, 0x01545555, 0x19AFB6B6, 0xDF914E4E, 0xFA32C8C8, 0xF030C0C0,
69  0x21F6D7D7, 0xBC8E3232, 0x75B3C6C6, 0x6FE08F8F, 0x691D7474, 0x2EF5DBDB,
70  0x6AE18B8B, 0x962EB8B8, 0x8A800A0A, 0xFE679999, 0xE2C92B2B, 0xE0618181,
71  0xC0C30303, 0x8D29A4A4, 0xAF238C8C, 0x07A9AEAE, 0x390D3434, 0x1F524D4D,
72  0x764F3939, 0xD36EBDBD, 0x81D65757, 0xB7D86F6F, 0xEB37DCDC, 0x51441515,
73  0xA6DD7B7B, 0x09FEF7F7, 0xB68C3A3A, 0x932FBCBC, 0x0F030C0C, 0x03FCFFFF,
74  0xC26BA9A9, 0xBA73C9C9, 0xD96CB5B5, 0xDC6DB1B1, 0x375A6D6D, 0x15504545,
75  0xB98F3636, 0x771B6C6C, 0x13ADBEBE, 0xDA904A4A, 0x57B9EEEE, 0xA9DE7777,
76  0x4CBEF2F2, 0x837EFDFD, 0x55114444, 0xBDDA6767, 0x2C5D7171, 0x45400505,
77  0x631F7C7C, 0x50104040, 0x325B6969, 0xB8DB6363, 0x220A2828, 0xC5C20707,
78  0xF531C4C4, 0xA88A2222, 0x31A79696, 0xF9CE3737, 0x977AEDED, 0x49BFF6F6,
79  0x992DB4B4, 0xA475D1D1, 0x90D34343, 0x5A124848, 0x58BAE2E2, 0x71E69797,
80  0x64B6D2D2, 0x70B2C2C2, 0xAD8B2626, 0xCD68A5A5, 0xCB955E5E, 0x624B2929,
81  0x3C0C3030, 0xCE945A5A, 0xAB76DDDD, 0x867FF9F9, 0xF1649595, 0x5DBBE6E6,
82  0x35F2C7C7, 0x2D092424, 0xD1C61717, 0xD66FB9B9, 0xDEC51B1B, 0x94861212,
83  0x78186060, 0x30F3C3C3, 0x897CF5F5, 0x5CEFB3B3, 0xD23AE8E8, 0xACDF7373,
84  0x794C3535, 0xA0208080, 0x9D78E5E5, 0x56EDBBBB, 0x235E7D7D, 0xC63EF8F8,
85  0x8BD45F5F, 0xE7C82F2F, 0xDD39E4E4, 0x68492121 };
86 
87 inline uint32_t SM4_T_slow(uint32_t b)
88  {
89  const uint32_t t = make_uint32(SM4_SBOX[get_byte(0,b)],
90  SM4_SBOX[get_byte(1,b)],
91  SM4_SBOX[get_byte(2,b)],
92  SM4_SBOX[get_byte(3,b)]);
93 
94  // L linear transform
95  return t ^ rotl<2>(t) ^ rotl<10>(t) ^ rotl<18>(t) ^ rotl<24>(t);
96  }
97 
98 inline uint32_t SM4_T(uint32_t b)
99  {
100  return SM4_SBOX_T[get_byte(0,b)] ^
101  rotr< 8>(SM4_SBOX_T[get_byte(1,b)]) ^
102  rotr<16>(SM4_SBOX_T[get_byte(2,b)]) ^
103  rotr<24>(SM4_SBOX_T[get_byte(3,b)]);
104  }
105 
106 // Variant of T for key schedule
107 inline uint32_t SM4_Tp(uint32_t b)
108  {
109  const uint32_t t = make_uint32(SM4_SBOX[get_byte(0,b)],
110  SM4_SBOX[get_byte(1,b)],
111  SM4_SBOX[get_byte(2,b)],
112  SM4_SBOX[get_byte(3,b)]);
113 
114  // L' linear transform
115  return t ^ rotl<13>(t) ^ rotl<23>(t);
116  }
117 
118 #define SM4_E_RNDS(B, R, F) do { \
119  B##0 ^= F(B##1 ^ B##2 ^ B##3 ^ m_RK[4*R+0]); \
120  B##1 ^= F(B##0 ^ B##2 ^ B##3 ^ m_RK[4*R+1]); \
121  B##2 ^= F(B##0 ^ B##1 ^ B##3 ^ m_RK[4*R+2]); \
122  B##3 ^= F(B##0 ^ B##1 ^ B##2 ^ m_RK[4*R+3]); \
123  } while(0)
124 
125 #define SM4_D_RNDS(B, R, F) do { \
126  B##0 ^= F(B##1 ^ B##2 ^ B##3 ^ m_RK[4*R+3]); \
127  B##1 ^= F(B##0 ^ B##2 ^ B##3 ^ m_RK[4*R+2]); \
128  B##2 ^= F(B##0 ^ B##1 ^ B##3 ^ m_RK[4*R+1]); \
129  B##3 ^= F(B##0 ^ B##1 ^ B##2 ^ m_RK[4*R+0]); \
130  } while(0)
131 
132 }
133 
134 /*
135 * SM4 Encryption
136 */
137 void SM4::encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
138  {
139  verify_key_set(m_RK.empty() == false);
140 
141 #if defined(BOTAN_HAS_SM4_ARMV8)
142  if(CPUID::has_arm_sm4())
143  return sm4_armv8_encrypt(in, out, blocks);
144 #endif
145 
146  while(blocks >= 2)
147  {
148  uint32_t B0 = load_be<uint32_t>(in, 0);
149  uint32_t B1 = load_be<uint32_t>(in, 1);
150  uint32_t B2 = load_be<uint32_t>(in, 2);
151  uint32_t B3 = load_be<uint32_t>(in, 3);
152 
153  uint32_t C0 = load_be<uint32_t>(in, 4);
154  uint32_t C1 = load_be<uint32_t>(in, 5);
155  uint32_t C2 = load_be<uint32_t>(in, 6);
156  uint32_t C3 = load_be<uint32_t>(in, 7);
157 
158  SM4_E_RNDS(B, 0, SM4_T_slow);
159  SM4_E_RNDS(C, 0, SM4_T_slow);
160  SM4_E_RNDS(B, 1, SM4_T);
161  SM4_E_RNDS(C, 1, SM4_T);
162  SM4_E_RNDS(B, 2, SM4_T);
163  SM4_E_RNDS(C, 2, SM4_T);
164  SM4_E_RNDS(B, 3, SM4_T);
165  SM4_E_RNDS(C, 3, SM4_T);
166  SM4_E_RNDS(B, 4, SM4_T);
167  SM4_E_RNDS(C, 4, SM4_T);
168  SM4_E_RNDS(B, 5, SM4_T);
169  SM4_E_RNDS(C, 5, SM4_T);
170  SM4_E_RNDS(B, 6, SM4_T);
171  SM4_E_RNDS(C, 6, SM4_T);
172  SM4_E_RNDS(B, 7, SM4_T_slow);
173  SM4_E_RNDS(C, 7, SM4_T_slow);
174 
175  store_be(out, B3, B2, B1, B0, C3, C2, C1, C0);
176 
177  in += 2*BLOCK_SIZE;
178  out += 2*BLOCK_SIZE;
179  blocks -= 2;
180  }
181 
182  for(size_t i = 0; i != blocks; ++i)
183  {
184  uint32_t B0 = load_be<uint32_t>(in, 0);
185  uint32_t B1 = load_be<uint32_t>(in, 1);
186  uint32_t B2 = load_be<uint32_t>(in, 2);
187  uint32_t B3 = load_be<uint32_t>(in, 3);
188 
189  SM4_E_RNDS(B, 0, SM4_T_slow);
190  SM4_E_RNDS(B, 1, SM4_T);
191  SM4_E_RNDS(B, 2, SM4_T);
192  SM4_E_RNDS(B, 3, SM4_T);
193  SM4_E_RNDS(B, 4, SM4_T);
194  SM4_E_RNDS(B, 5, SM4_T);
195  SM4_E_RNDS(B, 6, SM4_T);
196  SM4_E_RNDS(B, 7, SM4_T_slow);
197 
198  store_be(out, B3, B2, B1, B0);
199 
200  in += BLOCK_SIZE;
201  out += BLOCK_SIZE;
202  }
203  }
204 
205 /*
206 * SM4 Decryption
207 */
208 void SM4::decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
209  {
210  verify_key_set(m_RK.empty() == false);
211 
212 #if defined(BOTAN_HAS_SM4_ARMV8)
213  if(CPUID::has_arm_sm4())
214  return sm4_armv8_decrypt(in, out, blocks);
215 #endif
216 
217  while(blocks >= 2)
218  {
219  uint32_t B0 = load_be<uint32_t>(in, 0);
220  uint32_t B1 = load_be<uint32_t>(in, 1);
221  uint32_t B2 = load_be<uint32_t>(in, 2);
222  uint32_t B3 = load_be<uint32_t>(in, 3);
223 
224  uint32_t C0 = load_be<uint32_t>(in, 4);
225  uint32_t C1 = load_be<uint32_t>(in, 5);
226  uint32_t C2 = load_be<uint32_t>(in, 6);
227  uint32_t C3 = load_be<uint32_t>(in, 7);
228 
229  SM4_D_RNDS(B, 7, SM4_T_slow);
230  SM4_D_RNDS(C, 7, SM4_T_slow);
231  SM4_D_RNDS(B, 6, SM4_T);
232  SM4_D_RNDS(C, 6, SM4_T);
233  SM4_D_RNDS(B, 5, SM4_T);
234  SM4_D_RNDS(C, 5, SM4_T);
235  SM4_D_RNDS(B, 4, SM4_T);
236  SM4_D_RNDS(C, 4, SM4_T);
237  SM4_D_RNDS(B, 3, SM4_T);
238  SM4_D_RNDS(C, 3, SM4_T);
239  SM4_D_RNDS(B, 2, SM4_T);
240  SM4_D_RNDS(C, 2, SM4_T);
241  SM4_D_RNDS(B, 1, SM4_T);
242  SM4_D_RNDS(C, 1, SM4_T);
243  SM4_D_RNDS(B, 0, SM4_T_slow);
244  SM4_D_RNDS(C, 0, SM4_T_slow);
245 
246  store_be(out, B3, B2, B1, B0, C3, C2, C1, C0);
247 
248  in += 2*BLOCK_SIZE;
249  out += 2*BLOCK_SIZE;
250  blocks -= 2;
251  }
252 
253  for(size_t i = 0; i != blocks; ++i)
254  {
255  uint32_t B0 = load_be<uint32_t>(in, 0);
256  uint32_t B1 = load_be<uint32_t>(in, 1);
257  uint32_t B2 = load_be<uint32_t>(in, 2);
258  uint32_t B3 = load_be<uint32_t>(in, 3);
259 
260  SM4_D_RNDS(B, 7, SM4_T_slow);
261  SM4_D_RNDS(B, 6, SM4_T);
262  SM4_D_RNDS(B, 5, SM4_T);
263  SM4_D_RNDS(B, 4, SM4_T);
264  SM4_D_RNDS(B, 3, SM4_T);
265  SM4_D_RNDS(B, 2, SM4_T);
266  SM4_D_RNDS(B, 1, SM4_T);
267  SM4_D_RNDS(B, 0, SM4_T_slow);
268 
269  store_be(out, B3, B2, B1, B0);
270 
271  in += BLOCK_SIZE;
272  out += BLOCK_SIZE;
273  }
274  }
275 
276 #undef SM4_E_RNDS
277 #undef SM4_D_RNDS
278 
279 /*
280 * SM4 Key Schedule
281 */
282 void SM4::key_schedule(const uint8_t key[], size_t)
283  {
284  // System parameter or family key
285  const uint32_t FK[4] = { 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc };
286 
287  const uint32_t CK[32] = {
288  0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269,
289  0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9,
290  0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249,
291  0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9,
292  0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229,
293  0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299,
294  0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209,
295  0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279
296  };
297 
298  secure_vector<uint32_t> K(4);
299  K[0] = load_be<uint32_t>(key, 0) ^ FK[0];
300  K[1] = load_be<uint32_t>(key, 1) ^ FK[1];
301  K[2] = load_be<uint32_t>(key, 2) ^ FK[2];
302  K[3] = load_be<uint32_t>(key, 3) ^ FK[3];
303 
304  m_RK.resize(32);
305  for(size_t i = 0; i != 32; ++i)
306  {
307  K[i % 4] ^= SM4_Tp(K[(i+1)%4] ^ K[(i+2)%4] ^ K[(i+3)%4] ^ CK[i]);
308  m_RK[i] = K[i % 4];
309  }
310  }
311 
313  {
314  zap(m_RK);
315  }
316 
317 size_t SM4::parallelism() const
318  {
319 #if defined(BOTAN_HAS_SM4_ARMV8)
320  if(CPUID::has_arm_sm4())
321  {
322  return 4;
323  }
324 #endif
325 
326  return 1;
327  }
328 
329 std::string SM4::provider() const
330  {
331 #if defined(BOTAN_HAS_SM4_ARMV8)
332  if(CPUID::has_arm_sm4())
333  {
334  return "armv8";
335  }
336 #endif
337 
338  return "base";
339  }
340 
341 }
void verify_key_set(bool cond) const
Definition: sym_algo.h:89
void zap(std::vector< T, Alloc > &vec)
Definition: secmem.h:170
void store_be(uint16_t in, uint8_t out[2])
Definition: loadstor.h:438
uint32_t load_be< uint32_t >(const uint8_t in[], size_t off)
Definition: loadstor.h:179
constexpr uint8_t get_byte(size_t byte_num, T input)
Definition: loadstor.h:41
#define SM4_E_RNDS(B, R, F)
Definition: sm4.cpp:118
size_t parallelism() const override
Definition: sm4.cpp:317
constexpr uint32_t make_uint32(uint8_t i0, uint8_t i1, uint8_t i2, uint8_t i3)
Definition: loadstor.h:67
void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override
Definition: sm4.cpp:137
Definition: alg_id.cpp:13
std::string provider() const override
Definition: sm4.cpp:329
void decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const override
Definition: sm4.cpp:208
void clear() override
Definition: sm4.cpp:312
#define SM4_D_RNDS(B, R, F)
Definition: sm4.cpp:125