Botan 3.11.0
Crypto and TLS for C&
sha1_simd.cpp
Go to the documentation of this file.
1/*
2* SHA-1 using SIMD instructions
3* Based on public domain code by Dean Gaudet
4* (http://arctic.org/~dean/crypto/sha1.html)
5* (C) 2009-2011,2023,2025 Jack Lloyd
6*
7* Botan is released under the Simplified BSD License (see license.txt)
8*/
9
10#include <botan/internal/sha1.h>
11
12#include <botan/internal/isa_extn.h>
13#include <botan/internal/sha1_f.h>
14#include <botan/internal/simd_4x32.h>
15
16namespace Botan {
17
18namespace {
19
20/*
21For each multiple of 4, t, we want to calculate this:
22
23W[t+0] = rol(W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1);
24W[t+1] = rol(W[t-2] ^ W[t-7] ^ W[t-13] ^ W[t-15], 1);
25W[t+2] = rol(W[t-1] ^ W[t-6] ^ W[t-12] ^ W[t-14], 1);
26W[t+3] = rol(W[t] ^ W[t-5] ^ W[t-11] ^ W[t-13], 1);
27
28we'll actually calculate this:
29
30W[t+0] = rol(W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1);
31W[t+1] = rol(W[t-2] ^ W[t-7] ^ W[t-13] ^ W[t-15], 1);
32W[t+2] = rol(W[t-1] ^ W[t-6] ^ W[t-12] ^ W[t-14], 1);
33W[t+3] = rol( 0 ^ W[t-5] ^ W[t-11] ^ W[t-13], 1);
34W[t+3] ^= rol(W[t+0], 1);
35
36the parameters are:
37
38W0 = &W[t-16];
39W1 = &W[t-12];
40W2 = &W[t- 8];
41W3 = &W[t- 4];
42
43and on output:
44W0 = W[t]..W[t+3]
45*/
46BOTAN_FORCE_INLINE SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 sha1_simd_next_w(SIMD_4x32& XW0,
47 SIMD_4x32 XW1,
48 SIMD_4x32 XW2,
49 SIMD_4x32 XW3) {
50 SIMD_4x32 T0 = XW0; // W[t-16..t-13]
51 T0 ^= SIMD_4x32::alignr8(XW1, XW0); // W[t-14..t-11]
52 T0 ^= XW2; // W[t-8..t-5]
53 T0 ^= XW3.shift_elems_right<1>(); // W[t-3..t-1] || 0
54
55 /* unrotated W[t]..W[t+2] in T0 ... still need W[t+3] */
56
57 // Extract w[t+0] into T2
58 auto T2 = T0.shift_elems_left<3>();
59
60 // Main rotation
61 T0 = T0.rotl<1>();
62
63 // Rotation of W[t+3] has rot by 2 to account for us working on non-rotated words
64 T2 = T2.rotl<2>();
65
66 // Merge rol(W[t+0], 1) into W[t+3]
67 T0 ^= T2;
68
69 XW0 = T0;
70 return T0;
71}
72
73} // namespace
74
75/*
76* SHA-1 Compression Function using SIMD for message expansion
77*/
78//static
79void BOTAN_FN_ISA_SIMD_4X32 SHA_1::simd_compress_n(digest_type& digest, std::span<const uint8_t> input, size_t blocks) {
80 using namespace SHA1_F;
81
82 const SIMD_4x32 K00_19 = SIMD_4x32::splat(K1);
83 const SIMD_4x32 K20_39 = SIMD_4x32::splat(K2);
84 const SIMD_4x32 K40_59 = SIMD_4x32::splat(K3);
85 const SIMD_4x32 K60_79 = SIMD_4x32::splat(K4);
86
87 uint32_t A = digest[0];
88 uint32_t B = digest[1];
89 uint32_t C = digest[2];
90 uint32_t D = digest[3];
91 uint32_t E = digest[4];
92
93 BufferSlicer in(input);
94
95 for(size_t i = 0; i != blocks; ++i) {
96 uint32_t PT[4];
97
98 const auto block = in.take(block_bytes);
99
100 SIMD_4x32 W0 = SIMD_4x32::load_be(&block[0]); // NOLINT(*-container-data-pointer)
101 SIMD_4x32 W1 = SIMD_4x32::load_be(&block[16]);
102 SIMD_4x32 W2 = SIMD_4x32::load_be(&block[32]);
103 SIMD_4x32 W3 = SIMD_4x32::load_be(&block[48]);
104
105 SIMD_4x32 P0 = W0 + K00_19;
106 SIMD_4x32 P1 = W1 + K00_19;
107 SIMD_4x32 P2 = W2 + K00_19;
108 SIMD_4x32 P3 = W3 + K00_19;
109
110 P0.store_le(PT);
111 F1(A, B, C, D, E, PT[0]);
112 F1(E, A, B, C, D, PT[1]);
113 F1(D, E, A, B, C, PT[2]);
114 F1(C, D, E, A, B, PT[3]);
115 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K00_19;
116
117 P1.store_le(PT);
118 F1(B, C, D, E, A, PT[0]);
119 F1(A, B, C, D, E, PT[1]);
120 F1(E, A, B, C, D, PT[2]);
121 F1(D, E, A, B, C, PT[3]);
122 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K20_39;
123
124 P2.store_le(PT);
125 F1(C, D, E, A, B, PT[0]);
126 F1(B, C, D, E, A, PT[1]);
127 F1(A, B, C, D, E, PT[2]);
128 F1(E, A, B, C, D, PT[3]);
129 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K20_39;
130
131 P3.store_le(PT);
132 F1(D, E, A, B, C, PT[0]);
133 F1(C, D, E, A, B, PT[1]);
134 F1(B, C, D, E, A, PT[2]);
135 F1(A, B, C, D, E, PT[3]);
136 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K20_39;
137
138 P0.store_le(PT);
139 F1(E, A, B, C, D, PT[0]);
140 F1(D, E, A, B, C, PT[1]);
141 F1(C, D, E, A, B, PT[2]);
142 F1(B, C, D, E, A, PT[3]);
143 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K20_39;
144
145 P1.store_le(PT);
146 F2(A, B, C, D, E, PT[0]);
147 F2(E, A, B, C, D, PT[1]);
148 F2(D, E, A, B, C, PT[2]);
149 F2(C, D, E, A, B, PT[3]);
150 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K20_39;
151
152 P2.store_le(PT);
153 F2(B, C, D, E, A, PT[0]);
154 F2(A, B, C, D, E, PT[1]);
155 F2(E, A, B, C, D, PT[2]);
156 F2(D, E, A, B, C, PT[3]);
157 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K40_59;
158
159 P3.store_le(PT);
160 F2(C, D, E, A, B, PT[0]);
161 F2(B, C, D, E, A, PT[1]);
162 F2(A, B, C, D, E, PT[2]);
163 F2(E, A, B, C, D, PT[3]);
164 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K40_59;
165
166 P0.store_le(PT);
167 F2(D, E, A, B, C, PT[0]);
168 F2(C, D, E, A, B, PT[1]);
169 F2(B, C, D, E, A, PT[2]);
170 F2(A, B, C, D, E, PT[3]);
171 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K40_59;
172
173 P1.store_le(PT);
174 F2(E, A, B, C, D, PT[0]);
175 F2(D, E, A, B, C, PT[1]);
176 F2(C, D, E, A, B, PT[2]);
177 F2(B, C, D, E, A, PT[3]);
178 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K40_59;
179
180 P2.store_le(PT);
181 F3(A, B, C, D, E, PT[0]);
182 F3(E, A, B, C, D, PT[1]);
183 F3(D, E, A, B, C, PT[2]);
184 F3(C, D, E, A, B, PT[3]);
185 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K40_59;
186
187 P3.store_le(PT);
188 F3(B, C, D, E, A, PT[0]);
189 F3(A, B, C, D, E, PT[1]);
190 F3(E, A, B, C, D, PT[2]);
191 F3(D, E, A, B, C, PT[3]);
192 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K60_79;
193
194 P0.store_le(PT);
195 F3(C, D, E, A, B, PT[0]);
196 F3(B, C, D, E, A, PT[1]);
197 F3(A, B, C, D, E, PT[2]);
198 F3(E, A, B, C, D, PT[3]);
199 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K60_79;
200
201 P1.store_le(PT);
202 F3(D, E, A, B, C, PT[0]);
203 F3(C, D, E, A, B, PT[1]);
204 F3(B, C, D, E, A, PT[2]);
205 F3(A, B, C, D, E, PT[3]);
206 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K60_79;
207
208 P2.store_le(PT);
209 F3(E, A, B, C, D, PT[0]);
210 F3(D, E, A, B, C, PT[1]);
211 F3(C, D, E, A, B, PT[2]);
212 F3(B, C, D, E, A, PT[3]);
213 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K60_79;
214
215 P3.store_le(PT);
216 F4(A, B, C, D, E, PT[0]);
217 F4(E, A, B, C, D, PT[1]);
218 F4(D, E, A, B, C, PT[2]);
219 F4(C, D, E, A, B, PT[3]);
220 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K60_79;
221
222 P0.store_le(PT);
223 F4(B, C, D, E, A, PT[0]);
224 F4(A, B, C, D, E, PT[1]);
225 F4(E, A, B, C, D, PT[2]);
226 F4(D, E, A, B, C, PT[3]);
227
228 P1.store_le(PT);
229 F4(C, D, E, A, B, PT[0]);
230 F4(B, C, D, E, A, PT[1]);
231 F4(A, B, C, D, E, PT[2]);
232 F4(E, A, B, C, D, PT[3]);
233
234 P2.store_le(PT);
235 F4(D, E, A, B, C, PT[0]);
236 F4(C, D, E, A, B, PT[1]);
237 F4(B, C, D, E, A, PT[2]);
238 F4(A, B, C, D, E, PT[3]);
239
240 P3.store_le(PT);
241 F4(E, A, B, C, D, PT[0]);
242 F4(D, E, A, B, C, PT[1]);
243 F4(C, D, E, A, B, PT[2]);
244 F4(B, C, D, E, A, PT[3]);
245
246 A = (digest[0] += A);
247 B = (digest[1] += B);
248 C = (digest[2] += C);
249 D = (digest[3] += D);
250 E = (digest[4] += E);
251 }
252}
253
254} // namespace Botan
static constexpr size_t block_bytes
Definition sha1.h:24
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(const void *in) noexcept
Definition simd_4x32.h:189
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 alignr8(const SIMD_4x32 &a, const SIMD_4x32 &b)
Definition simd_4x32.h:860
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 splat(uint32_t B) noexcept
Definition simd_4x32.h:127
#define BOTAN_FORCE_INLINE
Definition compiler.h:87
void F2(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)
Definition sha1_f.h:26
void F4(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)
Definition sha1_f.h:37
void F3(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)
Definition sha1_f.h:31
void F1(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)
Definition sha1_f.h:21
uint32_t P1(uint32_t X)
Definition sm3_fn.h:65
uint32_t P0(uint32_t X)
Definition sm3_fn.h:17