10#include <botan/internal/sha1.h>
12#include <botan/internal/isa_extn.h>
13#include <botan/internal/sha1_f.h>
14#include <botan/internal/simd_4x32.h>
50 T0 ^= XW3.shift_elems_right<1>();
55 auto T2 = T0.shift_elems_left<3>();
76void BOTAN_FN_ISA_SIMD_4X32 SHA_1::simd_compress_n(digest_type& digest, std::span<const uint8_t> input,
size_t blocks) {
77 using namespace SHA1_F;
84 uint32_t A = digest[0];
85 uint32_t B = digest[1];
86 uint32_t C = digest[2];
87 uint32_t D = digest[3];
88 uint32_t E = digest[4];
90 BufferSlicer in(input);
92 for(
size_t i = 0; i != blocks; ++i) {
102 SIMD_4x32 P0 = W0 + K00_19;
103 SIMD_4x32 P1 = W1 + K00_19;
104 SIMD_4x32 P2 = W2 + K00_19;
105 SIMD_4x32 P3 = W3 + K00_19;
108 F1(A, B, C, D, E, PT[0]);
109 F1(E, A, B, C, D, PT[1]);
110 F1(D, E, A, B, C, PT[2]);
111 F1(C, D, E, A, B, PT[3]);
112 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K00_19;
115 F1(B, C, D, E, A, PT[0]);
116 F1(A, B, C, D, E, PT[1]);
117 F1(E, A, B, C, D, PT[2]);
118 F1(D, E, A, B, C, PT[3]);
119 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K20_39;
122 F1(C, D, E, A, B, PT[0]);
123 F1(B, C, D, E, A, PT[1]);
124 F1(A, B, C, D, E, PT[2]);
125 F1(E, A, B, C, D, PT[3]);
126 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K20_39;
129 F1(D, E, A, B, C, PT[0]);
130 F1(C, D, E, A, B, PT[1]);
131 F1(B, C, D, E, A, PT[2]);
132 F1(A, B, C, D, E, PT[3]);
133 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K20_39;
136 F1(E, A, B, C, D, PT[0]);
137 F1(D, E, A, B, C, PT[1]);
138 F1(C, D, E, A, B, PT[2]);
139 F1(B, C, D, E, A, PT[3]);
140 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K20_39;
143 F2(A, B, C, D, E, PT[0]);
144 F2(E, A, B, C, D, PT[1]);
145 F2(D, E, A, B, C, PT[2]);
146 F2(C, D, E, A, B, PT[3]);
147 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K20_39;
150 F2(B, C, D, E, A, PT[0]);
151 F2(A, B, C, D, E, PT[1]);
152 F2(E, A, B, C, D, PT[2]);
153 F2(D, E, A, B, C, PT[3]);
154 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K40_59;
157 F2(C, D, E, A, B, PT[0]);
158 F2(B, C, D, E, A, PT[1]);
159 F2(A, B, C, D, E, PT[2]);
160 F2(E, A, B, C, D, PT[3]);
161 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K40_59;
164 F2(D, E, A, B, C, PT[0]);
165 F2(C, D, E, A, B, PT[1]);
166 F2(B, C, D, E, A, PT[2]);
167 F2(A, B, C, D, E, PT[3]);
168 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K40_59;
171 F2(E, A, B, C, D, PT[0]);
172 F2(D, E, A, B, C, PT[1]);
173 F2(C, D, E, A, B, PT[2]);
174 F2(B, C, D, E, A, PT[3]);
175 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K40_59;
178 F3(A, B, C, D, E, PT[0]);
179 F3(E, A, B, C, D, PT[1]);
180 F3(D, E, A, B, C, PT[2]);
181 F3(C, D, E, A, B, PT[3]);
182 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K40_59;
185 F3(B, C, D, E, A, PT[0]);
186 F3(A, B, C, D, E, PT[1]);
187 F3(E, A, B, C, D, PT[2]);
188 F3(D, E, A, B, C, PT[3]);
189 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K60_79;
192 F3(C, D, E, A, B, PT[0]);
193 F3(B, C, D, E, A, PT[1]);
194 F3(A, B, C, D, E, PT[2]);
195 F3(E, A, B, C, D, PT[3]);
196 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K60_79;
199 F3(D, E, A, B, C, PT[0]);
200 F3(C, D, E, A, B, PT[1]);
201 F3(B, C, D, E, A, PT[2]);
202 F3(A, B, C, D, E, PT[3]);
203 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K60_79;
206 F3(E, A, B, C, D, PT[0]);
207 F3(D, E, A, B, C, PT[1]);
208 F3(C, D, E, A, B, PT[2]);
209 F3(B, C, D, E, A, PT[3]);
210 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K60_79;
213 F4(A, B, C, D, E, PT[0]);
214 F4(E, A, B, C, D, PT[1]);
215 F4(D, E, A, B, C, PT[2]);
216 F4(C, D, E, A, B, PT[3]);
217 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K60_79;
220 F4(B, C, D, E, A, PT[0]);
221 F4(A, B, C, D, E, PT[1]);
222 F4(E, A, B, C, D, PT[2]);
223 F4(D, E, A, B, C, PT[3]);
226 F4(C, D, E, A, B, PT[0]);
227 F4(B, C, D, E, A, PT[1]);
228 F4(A, B, C, D, E, PT[2]);
229 F4(E, A, B, C, D, PT[3]);
232 F4(D, E, A, B, C, PT[0]);
233 F4(C, D, E, A, B, PT[1]);
234 F4(B, C, D, E, A, PT[2]);
235 F4(A, B, C, D, E, PT[3]);
238 F4(E, A, B, C, D, PT[0]);
239 F4(D, E, A, B, C, PT[1]);
240 F4(C, D, E, A, B, PT[2]);
241 F4(B, C, D, E, A, PT[3]);
243 A = (digest[0] += A);
244 B = (digest[1] += B);
245 C = (digest[2] += C);
246 D = (digest[3] += D);
247 E = (digest[4] += E);
static constexpr size_t block_bytes
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(const void *in) noexcept
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 alignr8(const SIMD_4x32 &a, const SIMD_4x32 &b)
static SIMD_4x32 splat(uint32_t B) noexcept
#define BOTAN_FORCE_INLINE
void F2(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)
void F4(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)
void F3(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)
void F1(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)