Botan 3.9.0
Crypto and TLS for C&
sha1_simd.cpp
Go to the documentation of this file.
1/*
2* SHA-1 using SIMD instructions
3* Based on public domain code by Dean Gaudet
4* (http://arctic.org/~dean/crypto/sha1.html)
5* (C) 2009-2011,2023,2025 Jack Lloyd
6*
7* Botan is released under the Simplified BSD License (see license.txt)
8*/
9
10#include <botan/internal/sha1.h>
11
12#include <botan/internal/bit_ops.h>
13#include <botan/internal/isa_extn.h>
14#include <botan/internal/rotate.h>
15#include <botan/internal/sha1_f.h>
16#include <botan/internal/simd_4x32.h>
17
18namespace Botan {
19
20namespace {
21
22/*
23For each multiple of 4, t, we want to calculate this:
24
25W[t+0] = rol(W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1);
26W[t+1] = rol(W[t-2] ^ W[t-7] ^ W[t-13] ^ W[t-15], 1);
27W[t+2] = rol(W[t-1] ^ W[t-6] ^ W[t-12] ^ W[t-14], 1);
28W[t+3] = rol(W[t] ^ W[t-5] ^ W[t-11] ^ W[t-13], 1);
29
30we'll actually calculate this:
31
32W[t+0] = rol(W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1);
33W[t+1] = rol(W[t-2] ^ W[t-7] ^ W[t-13] ^ W[t-15], 1);
34W[t+2] = rol(W[t-1] ^ W[t-6] ^ W[t-12] ^ W[t-14], 1);
35W[t+3] = rol( 0 ^ W[t-5] ^ W[t-11] ^ W[t-13], 1);
36W[t+3] ^= rol(W[t+0], 1);
37
38the parameters are:
39
40W0 = &W[t-16];
41W1 = &W[t-12];
42W2 = &W[t- 8];
43W3 = &W[t- 4];
44
45and on output:
46W0 = W[t]..W[t+3]
47*/
48BOTAN_FORCE_INLINE SIMD_4x32 sha1_simd_next_w(SIMD_4x32& XW0, SIMD_4x32 XW1, SIMD_4x32 XW2, SIMD_4x32 XW3) {
49 SIMD_4x32 T0 = XW0; // W[t-16..t-13]
50 T0 ^= SIMD_4x32::alignr8(XW1, XW0); // W[t-14..t-11]
51 T0 ^= XW2; // W[t-8..t-5]
52 T0 ^= XW3.shift_elems_right<1>(); // W[t-3..t-1] || 0
53
54 /* unrotated W[t]..W[t+2] in T0 ... still need W[t+3] */
55
56 // Extract w[t+0] into T2
57 auto T2 = T0.shift_elems_left<3>();
58
59 // Main rotation
60 T0 = T0.rotl<1>();
61
62 // Rotation of W[t+3] has rot by 2 to account for us working on non-rotated words
63 T2 = T2.rotl<2>();
64
65 // Merge rol(W[t+0], 1) into W[t+3]
66 T0 ^= T2;
67
68 XW0 = T0;
69 return T0;
70}
71
72} // namespace
73
74/*
75* SHA-1 Compression Function using SIMD for message expansion
76*/
77//static
78void BOTAN_FN_ISA_SIMD_4X32 SHA_1::simd_compress_n(digest_type& digest, std::span<const uint8_t> input, size_t blocks) {
79 using namespace SHA1_F;
80
81 const SIMD_4x32 K00_19 = SIMD_4x32::splat(K1);
82 const SIMD_4x32 K20_39 = SIMD_4x32::splat(K2);
83 const SIMD_4x32 K40_59 = SIMD_4x32::splat(K3);
84 const SIMD_4x32 K60_79 = SIMD_4x32::splat(K4);
85
86 uint32_t A = digest[0];
87 uint32_t B = digest[1];
88 uint32_t C = digest[2];
89 uint32_t D = digest[3];
90 uint32_t E = digest[4];
91
92 BufferSlicer in(input);
93
94 for(size_t i = 0; i != blocks; ++i) {
95 uint32_t PT[4];
96
97 const auto block = in.take(block_bytes);
98
99 SIMD_4x32 W0 = SIMD_4x32::load_be(&block[0]); // NOLINT(*-container-data-pointer)
100 SIMD_4x32 W1 = SIMD_4x32::load_be(&block[16]);
101 SIMD_4x32 W2 = SIMD_4x32::load_be(&block[32]);
102 SIMD_4x32 W3 = SIMD_4x32::load_be(&block[48]);
103
104 SIMD_4x32 P0 = W0 + K00_19;
105 SIMD_4x32 P1 = W1 + K00_19;
106 SIMD_4x32 P2 = W2 + K00_19;
107 SIMD_4x32 P3 = W3 + K00_19;
108
109 P0.store_le(PT);
110 F1(A, B, C, D, E, PT[0]);
111 F1(E, A, B, C, D, PT[1]);
112 F1(D, E, A, B, C, PT[2]);
113 F1(C, D, E, A, B, PT[3]);
114 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K00_19;
115
116 P1.store_le(PT);
117 F1(B, C, D, E, A, PT[0]);
118 F1(A, B, C, D, E, PT[1]);
119 F1(E, A, B, C, D, PT[2]);
120 F1(D, E, A, B, C, PT[3]);
121 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K20_39;
122
123 P2.store_le(PT);
124 F1(C, D, E, A, B, PT[0]);
125 F1(B, C, D, E, A, PT[1]);
126 F1(A, B, C, D, E, PT[2]);
127 F1(E, A, B, C, D, PT[3]);
128 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K20_39;
129
130 P3.store_le(PT);
131 F1(D, E, A, B, C, PT[0]);
132 F1(C, D, E, A, B, PT[1]);
133 F1(B, C, D, E, A, PT[2]);
134 F1(A, B, C, D, E, PT[3]);
135 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K20_39;
136
137 P0.store_le(PT);
138 F1(E, A, B, C, D, PT[0]);
139 F1(D, E, A, B, C, PT[1]);
140 F1(C, D, E, A, B, PT[2]);
141 F1(B, C, D, E, A, PT[3]);
142 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K20_39;
143
144 P1.store_le(PT);
145 F2(A, B, C, D, E, PT[0]);
146 F2(E, A, B, C, D, PT[1]);
147 F2(D, E, A, B, C, PT[2]);
148 F2(C, D, E, A, B, PT[3]);
149 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K20_39;
150
151 P2.store_le(PT);
152 F2(B, C, D, E, A, PT[0]);
153 F2(A, B, C, D, E, PT[1]);
154 F2(E, A, B, C, D, PT[2]);
155 F2(D, E, A, B, C, PT[3]);
156 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K40_59;
157
158 P3.store_le(PT);
159 F2(C, D, E, A, B, PT[0]);
160 F2(B, C, D, E, A, PT[1]);
161 F2(A, B, C, D, E, PT[2]);
162 F2(E, A, B, C, D, PT[3]);
163 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K40_59;
164
165 P0.store_le(PT);
166 F2(D, E, A, B, C, PT[0]);
167 F2(C, D, E, A, B, PT[1]);
168 F2(B, C, D, E, A, PT[2]);
169 F2(A, B, C, D, E, PT[3]);
170 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K40_59;
171
172 P1.store_le(PT);
173 F2(E, A, B, C, D, PT[0]);
174 F2(D, E, A, B, C, PT[1]);
175 F2(C, D, E, A, B, PT[2]);
176 F2(B, C, D, E, A, PT[3]);
177 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K40_59;
178
179 P2.store_le(PT);
180 F3(A, B, C, D, E, PT[0]);
181 F3(E, A, B, C, D, PT[1]);
182 F3(D, E, A, B, C, PT[2]);
183 F3(C, D, E, A, B, PT[3]);
184 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K40_59;
185
186 P3.store_le(PT);
187 F3(B, C, D, E, A, PT[0]);
188 F3(A, B, C, D, E, PT[1]);
189 F3(E, A, B, C, D, PT[2]);
190 F3(D, E, A, B, C, PT[3]);
191 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K60_79;
192
193 P0.store_le(PT);
194 F3(C, D, E, A, B, PT[0]);
195 F3(B, C, D, E, A, PT[1]);
196 F3(A, B, C, D, E, PT[2]);
197 F3(E, A, B, C, D, PT[3]);
198 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K60_79;
199
200 P1.store_le(PT);
201 F3(D, E, A, B, C, PT[0]);
202 F3(C, D, E, A, B, PT[1]);
203 F3(B, C, D, E, A, PT[2]);
204 F3(A, B, C, D, E, PT[3]);
205 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K60_79;
206
207 P2.store_le(PT);
208 F3(E, A, B, C, D, PT[0]);
209 F3(D, E, A, B, C, PT[1]);
210 F3(C, D, E, A, B, PT[2]);
211 F3(B, C, D, E, A, PT[3]);
212 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K60_79;
213
214 P3.store_le(PT);
215 F4(A, B, C, D, E, PT[0]);
216 F4(E, A, B, C, D, PT[1]);
217 F4(D, E, A, B, C, PT[2]);
218 F4(C, D, E, A, B, PT[3]);
219 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K60_79;
220
221 P0.store_le(PT);
222 F4(B, C, D, E, A, PT[0]);
223 F4(A, B, C, D, E, PT[1]);
224 F4(E, A, B, C, D, PT[2]);
225 F4(D, E, A, B, C, PT[3]);
226
227 P1.store_le(PT);
228 F4(C, D, E, A, B, PT[0]);
229 F4(B, C, D, E, A, PT[1]);
230 F4(A, B, C, D, E, PT[2]);
231 F4(E, A, B, C, D, PT[3]);
232
233 P2.store_le(PT);
234 F4(D, E, A, B, C, PT[0]);
235 F4(C, D, E, A, B, PT[1]);
236 F4(B, C, D, E, A, PT[2]);
237 F4(A, B, C, D, E, PT[3]);
238
239 P3.store_le(PT);
240 F4(E, A, B, C, D, PT[0]);
241 F4(D, E, A, B, C, PT[1]);
242 F4(C, D, E, A, B, PT[2]);
243 F4(B, C, D, E, A, PT[3]);
244
245 A = (digest[0] += A);
246 B = (digest[1] += B);
247 C = (digest[2] += C);
248 D = (digest[3] += D);
249 E = (digest[4] += E);
250 }
251}
252
253} // namespace Botan
static constexpr size_t block_bytes
Definition sha1.h:24
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(const void *in) noexcept
Definition simd_4x32.h:174
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 alignr8(const SIMD_4x32 &a, const SIMD_4x32 &b)
Definition simd_4x32.h:755
static SIMD_4x32 splat(uint32_t B) noexcept
Definition simd_4x32.h:118
#define BOTAN_FORCE_INLINE
Definition compiler.h:87
void F2(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)
Definition sha1_f.h:26
void F4(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)
Definition sha1_f.h:37
void F3(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)
Definition sha1_f.h:31
void F1(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)
Definition sha1_f.h:21