Botan 3.10.0
Crypto and TLS for C&
sha1_simd.cpp
Go to the documentation of this file.
1/*
2* SHA-1 using SIMD instructions
3* Based on public domain code by Dean Gaudet
4* (http://arctic.org/~dean/crypto/sha1.html)
5* (C) 2009-2011,2023,2025 Jack Lloyd
6*
7* Botan is released under the Simplified BSD License (see license.txt)
8*/
9
10#include <botan/internal/sha1.h>
11
12#include <botan/internal/isa_extn.h>
13#include <botan/internal/sha1_f.h>
14#include <botan/internal/simd_4x32.h>
15
16namespace Botan {
17
18namespace {
19
20/*
21For each multiple of 4, t, we want to calculate this:
22
23W[t+0] = rol(W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1);
24W[t+1] = rol(W[t-2] ^ W[t-7] ^ W[t-13] ^ W[t-15], 1);
25W[t+2] = rol(W[t-1] ^ W[t-6] ^ W[t-12] ^ W[t-14], 1);
26W[t+3] = rol(W[t] ^ W[t-5] ^ W[t-11] ^ W[t-13], 1);
27
28we'll actually calculate this:
29
30W[t+0] = rol(W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1);
31W[t+1] = rol(W[t-2] ^ W[t-7] ^ W[t-13] ^ W[t-15], 1);
32W[t+2] = rol(W[t-1] ^ W[t-6] ^ W[t-12] ^ W[t-14], 1);
33W[t+3] = rol( 0 ^ W[t-5] ^ W[t-11] ^ W[t-13], 1);
34W[t+3] ^= rol(W[t+0], 1);
35
36the parameters are:
37
38W0 = &W[t-16];
39W1 = &W[t-12];
40W2 = &W[t- 8];
41W3 = &W[t- 4];
42
43and on output:
44W0 = W[t]..W[t+3]
45*/
46BOTAN_FORCE_INLINE SIMD_4x32 sha1_simd_next_w(SIMD_4x32& XW0, SIMD_4x32 XW1, SIMD_4x32 XW2, SIMD_4x32 XW3) {
47 SIMD_4x32 T0 = XW0; // W[t-16..t-13]
48 T0 ^= SIMD_4x32::alignr8(XW1, XW0); // W[t-14..t-11]
49 T0 ^= XW2; // W[t-8..t-5]
50 T0 ^= XW3.shift_elems_right<1>(); // W[t-3..t-1] || 0
51
52 /* unrotated W[t]..W[t+2] in T0 ... still need W[t+3] */
53
54 // Extract w[t+0] into T2
55 auto T2 = T0.shift_elems_left<3>();
56
57 // Main rotation
58 T0 = T0.rotl<1>();
59
60 // Rotation of W[t+3] has rot by 2 to account for us working on non-rotated words
61 T2 = T2.rotl<2>();
62
63 // Merge rol(W[t+0], 1) into W[t+3]
64 T0 ^= T2;
65
66 XW0 = T0;
67 return T0;
68}
69
70} // namespace
71
72/*
73* SHA-1 Compression Function using SIMD for message expansion
74*/
75//static
76void BOTAN_FN_ISA_SIMD_4X32 SHA_1::simd_compress_n(digest_type& digest, std::span<const uint8_t> input, size_t blocks) {
77 using namespace SHA1_F;
78
79 const SIMD_4x32 K00_19 = SIMD_4x32::splat(K1);
80 const SIMD_4x32 K20_39 = SIMD_4x32::splat(K2);
81 const SIMD_4x32 K40_59 = SIMD_4x32::splat(K3);
82 const SIMD_4x32 K60_79 = SIMD_4x32::splat(K4);
83
84 uint32_t A = digest[0];
85 uint32_t B = digest[1];
86 uint32_t C = digest[2];
87 uint32_t D = digest[3];
88 uint32_t E = digest[4];
89
90 BufferSlicer in(input);
91
92 for(size_t i = 0; i != blocks; ++i) {
93 uint32_t PT[4];
94
95 const auto block = in.take(block_bytes);
96
97 SIMD_4x32 W0 = SIMD_4x32::load_be(&block[0]); // NOLINT(*-container-data-pointer)
98 SIMD_4x32 W1 = SIMD_4x32::load_be(&block[16]);
99 SIMD_4x32 W2 = SIMD_4x32::load_be(&block[32]);
100 SIMD_4x32 W3 = SIMD_4x32::load_be(&block[48]);
101
102 SIMD_4x32 P0 = W0 + K00_19;
103 SIMD_4x32 P1 = W1 + K00_19;
104 SIMD_4x32 P2 = W2 + K00_19;
105 SIMD_4x32 P3 = W3 + K00_19;
106
107 P0.store_le(PT);
108 F1(A, B, C, D, E, PT[0]);
109 F1(E, A, B, C, D, PT[1]);
110 F1(D, E, A, B, C, PT[2]);
111 F1(C, D, E, A, B, PT[3]);
112 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K00_19;
113
114 P1.store_le(PT);
115 F1(B, C, D, E, A, PT[0]);
116 F1(A, B, C, D, E, PT[1]);
117 F1(E, A, B, C, D, PT[2]);
118 F1(D, E, A, B, C, PT[3]);
119 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K20_39;
120
121 P2.store_le(PT);
122 F1(C, D, E, A, B, PT[0]);
123 F1(B, C, D, E, A, PT[1]);
124 F1(A, B, C, D, E, PT[2]);
125 F1(E, A, B, C, D, PT[3]);
126 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K20_39;
127
128 P3.store_le(PT);
129 F1(D, E, A, B, C, PT[0]);
130 F1(C, D, E, A, B, PT[1]);
131 F1(B, C, D, E, A, PT[2]);
132 F1(A, B, C, D, E, PT[3]);
133 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K20_39;
134
135 P0.store_le(PT);
136 F1(E, A, B, C, D, PT[0]);
137 F1(D, E, A, B, C, PT[1]);
138 F1(C, D, E, A, B, PT[2]);
139 F1(B, C, D, E, A, PT[3]);
140 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K20_39;
141
142 P1.store_le(PT);
143 F2(A, B, C, D, E, PT[0]);
144 F2(E, A, B, C, D, PT[1]);
145 F2(D, E, A, B, C, PT[2]);
146 F2(C, D, E, A, B, PT[3]);
147 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K20_39;
148
149 P2.store_le(PT);
150 F2(B, C, D, E, A, PT[0]);
151 F2(A, B, C, D, E, PT[1]);
152 F2(E, A, B, C, D, PT[2]);
153 F2(D, E, A, B, C, PT[3]);
154 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K40_59;
155
156 P3.store_le(PT);
157 F2(C, D, E, A, B, PT[0]);
158 F2(B, C, D, E, A, PT[1]);
159 F2(A, B, C, D, E, PT[2]);
160 F2(E, A, B, C, D, PT[3]);
161 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K40_59;
162
163 P0.store_le(PT);
164 F2(D, E, A, B, C, PT[0]);
165 F2(C, D, E, A, B, PT[1]);
166 F2(B, C, D, E, A, PT[2]);
167 F2(A, B, C, D, E, PT[3]);
168 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K40_59;
169
170 P1.store_le(PT);
171 F2(E, A, B, C, D, PT[0]);
172 F2(D, E, A, B, C, PT[1]);
173 F2(C, D, E, A, B, PT[2]);
174 F2(B, C, D, E, A, PT[3]);
175 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K40_59;
176
177 P2.store_le(PT);
178 F3(A, B, C, D, E, PT[0]);
179 F3(E, A, B, C, D, PT[1]);
180 F3(D, E, A, B, C, PT[2]);
181 F3(C, D, E, A, B, PT[3]);
182 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K40_59;
183
184 P3.store_le(PT);
185 F3(B, C, D, E, A, PT[0]);
186 F3(A, B, C, D, E, PT[1]);
187 F3(E, A, B, C, D, PT[2]);
188 F3(D, E, A, B, C, PT[3]);
189 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K60_79;
190
191 P0.store_le(PT);
192 F3(C, D, E, A, B, PT[0]);
193 F3(B, C, D, E, A, PT[1]);
194 F3(A, B, C, D, E, PT[2]);
195 F3(E, A, B, C, D, PT[3]);
196 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K60_79;
197
198 P1.store_le(PT);
199 F3(D, E, A, B, C, PT[0]);
200 F3(C, D, E, A, B, PT[1]);
201 F3(B, C, D, E, A, PT[2]);
202 F3(A, B, C, D, E, PT[3]);
203 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K60_79;
204
205 P2.store_le(PT);
206 F3(E, A, B, C, D, PT[0]);
207 F3(D, E, A, B, C, PT[1]);
208 F3(C, D, E, A, B, PT[2]);
209 F3(B, C, D, E, A, PT[3]);
210 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K60_79;
211
212 P3.store_le(PT);
213 F4(A, B, C, D, E, PT[0]);
214 F4(E, A, B, C, D, PT[1]);
215 F4(D, E, A, B, C, PT[2]);
216 F4(C, D, E, A, B, PT[3]);
217 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K60_79;
218
219 P0.store_le(PT);
220 F4(B, C, D, E, A, PT[0]);
221 F4(A, B, C, D, E, PT[1]);
222 F4(E, A, B, C, D, PT[2]);
223 F4(D, E, A, B, C, PT[3]);
224
225 P1.store_le(PT);
226 F4(C, D, E, A, B, PT[0]);
227 F4(B, C, D, E, A, PT[1]);
228 F4(A, B, C, D, E, PT[2]);
229 F4(E, A, B, C, D, PT[3]);
230
231 P2.store_le(PT);
232 F4(D, E, A, B, C, PT[0]);
233 F4(C, D, E, A, B, PT[1]);
234 F4(B, C, D, E, A, PT[2]);
235 F4(A, B, C, D, E, PT[3]);
236
237 P3.store_le(PT);
238 F4(E, A, B, C, D, PT[0]);
239 F4(D, E, A, B, C, PT[1]);
240 F4(C, D, E, A, B, PT[2]);
241 F4(B, C, D, E, A, PT[3]);
242
243 A = (digest[0] += A);
244 B = (digest[1] += B);
245 C = (digest[2] += C);
246 D = (digest[3] += D);
247 E = (digest[4] += E);
248 }
249}
250
251} // namespace Botan
static constexpr size_t block_bytes
Definition sha1.h:24
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(const void *in) noexcept
Definition simd_4x32.h:174
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 alignr8(const SIMD_4x32 &a, const SIMD_4x32 &b)
Definition simd_4x32.h:755
static SIMD_4x32 splat(uint32_t B) noexcept
Definition simd_4x32.h:118
#define BOTAN_FORCE_INLINE
Definition compiler.h:87
void F2(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)
Definition sha1_f.h:26
void F4(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)
Definition sha1_f.h:37
void F3(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)
Definition sha1_f.h:31
void F1(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)
Definition sha1_f.h:21