10#include <botan/internal/sha1.h>
12#include <botan/internal/bit_ops.h>
13#include <botan/internal/isa_extn.h>
14#include <botan/internal/rotate.h>
15#include <botan/internal/sha1_f.h>
16#include <botan/internal/simd_4x32.h>
52 T0 ^= XW3.shift_elems_right<1>();
57 auto T2 = T0.shift_elems_left<3>();
78void BOTAN_FN_ISA_SIMD_4X32 SHA_1::simd_compress_n(digest_type& digest, std::span<const uint8_t> input,
size_t blocks) {
79 using namespace SHA1_F;
86 uint32_t A = digest[0];
87 uint32_t B = digest[1];
88 uint32_t C = digest[2];
89 uint32_t D = digest[3];
90 uint32_t E = digest[4];
92 BufferSlicer in(input);
94 for(
size_t i = 0; i != blocks; ++i) {
104 SIMD_4x32 P0 = W0 + K00_19;
105 SIMD_4x32 P1 = W1 + K00_19;
106 SIMD_4x32 P2 = W2 + K00_19;
107 SIMD_4x32 P3 = W3 + K00_19;
110 F1(A, B, C, D, E, PT[0]);
111 F1(E, A, B, C, D, PT[1]);
112 F1(D, E, A, B, C, PT[2]);
113 F1(C, D, E, A, B, PT[3]);
114 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K00_19;
117 F1(B, C, D, E, A, PT[0]);
118 F1(A, B, C, D, E, PT[1]);
119 F1(E, A, B, C, D, PT[2]);
120 F1(D, E, A, B, C, PT[3]);
121 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K20_39;
124 F1(C, D, E, A, B, PT[0]);
125 F1(B, C, D, E, A, PT[1]);
126 F1(A, B, C, D, E, PT[2]);
127 F1(E, A, B, C, D, PT[3]);
128 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K20_39;
131 F1(D, E, A, B, C, PT[0]);
132 F1(C, D, E, A, B, PT[1]);
133 F1(B, C, D, E, A, PT[2]);
134 F1(A, B, C, D, E, PT[3]);
135 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K20_39;
138 F1(E, A, B, C, D, PT[0]);
139 F1(D, E, A, B, C, PT[1]);
140 F1(C, D, E, A, B, PT[2]);
141 F1(B, C, D, E, A, PT[3]);
142 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K20_39;
145 F2(A, B, C, D, E, PT[0]);
146 F2(E, A, B, C, D, PT[1]);
147 F2(D, E, A, B, C, PT[2]);
148 F2(C, D, E, A, B, PT[3]);
149 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K20_39;
152 F2(B, C, D, E, A, PT[0]);
153 F2(A, B, C, D, E, PT[1]);
154 F2(E, A, B, C, D, PT[2]);
155 F2(D, E, A, B, C, PT[3]);
156 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K40_59;
159 F2(C, D, E, A, B, PT[0]);
160 F2(B, C, D, E, A, PT[1]);
161 F2(A, B, C, D, E, PT[2]);
162 F2(E, A, B, C, D, PT[3]);
163 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K40_59;
166 F2(D, E, A, B, C, PT[0]);
167 F2(C, D, E, A, B, PT[1]);
168 F2(B, C, D, E, A, PT[2]);
169 F2(A, B, C, D, E, PT[3]);
170 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K40_59;
173 F2(E, A, B, C, D, PT[0]);
174 F2(D, E, A, B, C, PT[1]);
175 F2(C, D, E, A, B, PT[2]);
176 F2(B, C, D, E, A, PT[3]);
177 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K40_59;
180 F3(A, B, C, D, E, PT[0]);
181 F3(E, A, B, C, D, PT[1]);
182 F3(D, E, A, B, C, PT[2]);
183 F3(C, D, E, A, B, PT[3]);
184 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K40_59;
187 F3(B, C, D, E, A, PT[0]);
188 F3(A, B, C, D, E, PT[1]);
189 F3(E, A, B, C, D, PT[2]);
190 F3(D, E, A, B, C, PT[3]);
191 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K60_79;
194 F3(C, D, E, A, B, PT[0]);
195 F3(B, C, D, E, A, PT[1]);
196 F3(A, B, C, D, E, PT[2]);
197 F3(E, A, B, C, D, PT[3]);
198 P0 = sha1_simd_next_w(W0, W1, W2, W3) + K60_79;
201 F3(D, E, A, B, C, PT[0]);
202 F3(C, D, E, A, B, PT[1]);
203 F3(B, C, D, E, A, PT[2]);
204 F3(A, B, C, D, E, PT[3]);
205 P1 = sha1_simd_next_w(W1, W2, W3, W0) + K60_79;
208 F3(E, A, B, C, D, PT[0]);
209 F3(D, E, A, B, C, PT[1]);
210 F3(C, D, E, A, B, PT[2]);
211 F3(B, C, D, E, A, PT[3]);
212 P2 = sha1_simd_next_w(W2, W3, W0, W1) + K60_79;
215 F4(A, B, C, D, E, PT[0]);
216 F4(E, A, B, C, D, PT[1]);
217 F4(D, E, A, B, C, PT[2]);
218 F4(C, D, E, A, B, PT[3]);
219 P3 = sha1_simd_next_w(W3, W0, W1, W2) + K60_79;
222 F4(B, C, D, E, A, PT[0]);
223 F4(A, B, C, D, E, PT[1]);
224 F4(E, A, B, C, D, PT[2]);
225 F4(D, E, A, B, C, PT[3]);
228 F4(C, D, E, A, B, PT[0]);
229 F4(B, C, D, E, A, PT[1]);
230 F4(A, B, C, D, E, PT[2]);
231 F4(E, A, B, C, D, PT[3]);
234 F4(D, E, A, B, C, PT[0]);
235 F4(C, D, E, A, B, PT[1]);
236 F4(B, C, D, E, A, PT[2]);
237 F4(A, B, C, D, E, PT[3]);
240 F4(E, A, B, C, D, PT[0]);
241 F4(D, E, A, B, C, PT[1]);
242 F4(C, D, E, A, B, PT[2]);
243 F4(B, C, D, E, A, PT[3]);
245 A = (digest[0] += A);
246 B = (digest[1] += B);
247 C = (digest[2] += C);
248 D = (digest[3] += D);
249 E = (digest[4] += E);
static constexpr size_t block_bytes
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(const void *in) noexcept
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 alignr8(const SIMD_4x32 &a, const SIMD_4x32 &b)
static SIMD_4x32 splat(uint32_t B) noexcept
#define BOTAN_FORCE_INLINE
void F2(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)
void F4(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)
void F3(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)
void F1(uint32_t A, uint32_t &B, uint32_t C, uint32_t D, uint32_t &E, uint32_t M)