10#include <botan/internal/sha1.h>
12#include <botan/internal/isa_extn.h>
13#include <botan/internal/sha1_f.h>
14#include <botan/internal/simd_4x32.h>
53 T0 ^= XW3.shift_elems_right<1>();
58 auto T2 = T0.shift_elems_left<3>();
79void BOTAN_FN_ISA_SIMD_4X32 SHA_1::simd_compress_n(digest_type& digest, std::span<const uint8_t> input,
size_t blocks) {
80 using namespace SHA1_F;
87 uint32_t A = digest[0];
88 uint32_t B = digest[1];
89 uint32_t C = digest[2];
90 uint32_t D = digest[3];
91 uint32_t E = digest[4];
93 BufferSlicer in(input);
95 for(
size_t i = 0; i != blocks; ++i) {
105 SIMD_4x32
P0 = W0 + K00_19;
106 SIMD_4x32
P1 = W1 + K00_19;
107 SIMD_4x32 P2 = W2 + K00_19;
108 SIMD_4x32 P3 = W3 + K00_19;
111 F1(A, B, C, D, E, PT[0]);
112 F1(E, A, B, C, D, PT[1]);
113 F1(D, E, A, B, C, PT[2]);
114 F1(C, D, E, A, B, PT[3]);
115 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K00_19;
118 F1(B, C, D, E, A, PT[0]);
119 F1(A, B, C, D, E, PT[1]);
120 F1(E, A, B, C, D, PT[2]);
121 F1(D, E, A, B, C, PT[3]);
122 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K20_39;
125 F1(C, D, E, A, B, PT[0]);
126 F1(B, C, D, E, A, PT[1]);
127 F1(A, B, C, D, E, PT[2]);
128 F1(E, A, B, C, D, PT[3]);
129 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K20_39;
132 F1(D, E, A, B, C, PT[0]);
133 F1(C, D, E, A, B, PT[1]);
134 F1(B, C, D, E, A, PT[2]);
135 F1(A, B, C, D, E, PT[3]);
136 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K20_39;
139 F1(E, A, B, C, D, PT[0]);
140 F1(D, E, A, B, C, PT[1]);
141 F1(C, D, E, A, B, PT[2]);
142 F1(B, C, D, E, A, PT[3]);
143 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K20_39;
146 F2(A, B, C, D, E, PT[0]);
147 F2(E, A, B, C, D, PT[1]);
148 F2(D, E, A, B, C, PT[2]);
149 F2(C, D, E, A, B, PT[3]);
150 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K20_39;
153 F2(B, C, D, E, A, PT[0]);
154 F2(A, B, C, D, E, PT[1]);
155 F2(E, A, B, C, D, PT[2]);
156 F2(D, E, A, B, C, PT[3]);
157 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K40_59;
160 F2(C, D, E, A, B, PT[0]);
161 F2(B, C, D, E, A, PT[1]);
162 F2(A, B, C, D, E, PT[2]);
163 F2(E, A, B, C, D, PT[3]);
164 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K40_59;
167 F2(D, E, A, B, C, PT[0]);
168 F2(C, D, E, A, B, PT[1]);
169 F2(B, C, D, E, A, PT[2]);
170 F2(A, B, C, D, E, PT[3]);
171 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K40_59;
174 F2(E, A, B, C, D, PT[0]);
175 F2(D, E, A, B, C, PT[1]);
176 F2(C, D, E, A, B, PT[2]);
177 F2(B, C, D, E, A, PT[3]);
178 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K40_59;
181 F3(A, B, C, D, E, PT[0]);
182 F3(E, A, B, C, D, PT[1]);
183 F3(D, E, A, B, C, PT[2]);
184 F3(C, D, E, A, B, PT[3]);
185 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K40_59;
188 F3(B, C, D, E, A, PT[0]);
189 F3(A, B, C, D, E, PT[1]);
190 F3(E, A, B, C, D, PT[2]);
191 F3(D, E, A, B, C, PT[3]);
192 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K60_79;
195 F3(C, D, E, A, B, PT[0]);
196 F3(B, C, D, E, A, PT[1]);
197 F3(A, B, C, D, E, PT[2]);
198 F3(E, A, B, C, D, PT[3]);
199 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K60_79;
202 F3(D, E, A, B, C, PT[0]);
203 F3(C, D, E, A, B, PT[1]);
204 F3(B, C, D, E, A, PT[2]);
205 F3(A, B, C, D, E, PT[3]);
206 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K60_79;
209 F3(E, A, B, C, D, PT[0]);
210 F3(D, E, A, B, C, PT[1]);
211 F3(C, D, E, A, B, PT[2]);
212 F3(B, C, D, E, A, PT[3]);
213 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K60_79;
216 F4(A, B, C, D, E, PT[0]);
217 F4(E, A, B, C, D, PT[1]);
218 F4(D, E, A, B, C, PT[2]);
219 F4(C, D, E, A, B, PT[3]);
220 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K60_79;
223 F4(B, C, D, E, A, PT[0]);
224 F4(A, B, C, D, E, PT[1]);
225 F4(E, A, B, C, D, PT[2]);
226 F4(D, E, A, B, C, PT[3]);
229 F4(C, D, E, A, B, PT[0]);
230 F4(B, C, D, E, A, PT[1]);
231 F4(A, B, C, D, E, PT[2]);
232 F4(E, A, B, C, D, PT[3]);
235 F4(D, E, A, B, C, PT[0]);
236 F4(C, D, E, A, B, PT[1]);
237 F4(B, C, D, E, A, PT[2]);
238 F4(A, B, C, D, E, PT[3]);
241 F4(E, A, B, C, D, PT[0]);
242 F4(D, E, A, B, C, PT[1]);
243 F4(C, D, E, A, B, PT[2]);
244 F4(B, C, D, E, A, PT[3]);
246 A = (digest[0] += A);
247 B = (digest[1] += B);
248 C = (digest[2] += C);
249 D = (digest[3] += D);
250 E = (digest[4] += E);
static constexpr size_t block_bytes
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(const void *in) noexcept
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 alignr8(const SIMD_4x32 &a, const SIMD_4x32 &b)
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 splat(uint32_t B) noexcept
#define BOTAN_FORCE_INLINE
void F2(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)
void F4(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)
void F3(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)
void F1(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)