10#include <botan/internal/sha1.h>
12#include <botan/internal/bit_ops.h>
13#include <botan/internal/rotate.h>
14#include <botan/internal/simd_32.h>
15#include <botan/internal/stl_util.h>
20namespace SHA1_SSE2_F {
78inline void F1(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg) {
86inline void F2(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg) {
87 E += (B ^ C ^ D) + msg +
rotl<5>(A);
94inline void F3(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg) {
102inline void F4(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg) {
103 E += (B ^ C ^ D) + msg +
rotl<5>(A);
115BOTAN_FUNC_ISA(
"sse2") void
SHA_1::sse2_compress_n(digest_type& digest, std::span<const uint8_t> input,
size_t blocks) {
116 using namespace SHA1_SSE2_F;
123 uint32_t A = digest[0], B = digest[1], C = digest[2], D = digest[3], E = digest[4];
127 for(
size_t i = 0; i != blocks; ++i) {
130 const auto block = in.take(block_bytes);
143 F1(A, B, C, D, E, PT[0]);
144 F1(E, A, B, C, D, PT[1]);
145 F1(D, E, A, B, C, PT[2]);
146 F1(C, D, E, A, B, PT[3]);
147 P0 = prep(W0, W1, W2, W3, K00_19);
150 F1(B, C, D, E, A, PT[0]);
151 F1(A, B, C, D, E, PT[1]);
152 F1(E, A, B, C, D, PT[2]);
153 F1(D, E, A, B, C, PT[3]);
154 P1 = prep(W1, W2, W3, W0, K20_39);
157 F1(C, D, E, A, B, PT[0]);
158 F1(B, C, D, E, A, PT[1]);
159 F1(A, B, C, D, E, PT[2]);
160 F1(E, A, B, C, D, PT[3]);
161 P2 = prep(W2, W3, W0, W1, K20_39);
164 F1(D, E, A, B, C, PT[0]);
165 F1(C, D, E, A, B, PT[1]);
166 F1(B, C, D, E, A, PT[2]);
167 F1(A, B, C, D, E, PT[3]);
168 P3 = prep(W3, W0, W1, W2, K20_39);
171 F1(E, A, B, C, D, PT[0]);
172 F1(D, E, A, B, C, PT[1]);
173 F1(C, D, E, A, B, PT[2]);
174 F1(B, C, D, E, A, PT[3]);
175 P0 = prep(W0, W1, W2, W3, K20_39);
178 F2(A, B, C, D, E, PT[0]);
179 F2(E, A, B, C, D, PT[1]);
180 F2(D, E, A, B, C, PT[2]);
181 F2(C, D, E, A, B, PT[3]);
182 P1 = prep(W1, W2, W3, W0, K20_39);
185 F2(B, C, D, E, A, PT[0]);
186 F2(A, B, C, D, E, PT[1]);
187 F2(E, A, B, C, D, PT[2]);
188 F2(D, E, A, B, C, PT[3]);
189 P2 = prep(W2, W3, W0, W1, K40_59);
192 F2(C, D, E, A, B, PT[0]);
193 F2(B, C, D, E, A, PT[1]);
194 F2(A, B, C, D, E, PT[2]);
195 F2(E, A, B, C, D, PT[3]);
196 P3 = prep(W3, W0, W1, W2, K40_59);
199 F2(D, E, A, B, C, PT[0]);
200 F2(C, D, E, A, B, PT[1]);
201 F2(B, C, D, E, A, PT[2]);
202 F2(A, B, C, D, E, PT[3]);
203 P0 = prep(W0, W1, W2, W3, K40_59);
206 F2(E, A, B, C, D, PT[0]);
207 F2(D, E, A, B, C, PT[1]);
208 F2(C, D, E, A, B, PT[2]);
209 F2(B, C, D, E, A, PT[3]);
210 P1 = prep(W1, W2, W3, W0, K40_59);
213 F3(A, B, C, D, E, PT[0]);
214 F3(E, A, B, C, D, PT[1]);
215 F3(D, E, A, B, C, PT[2]);
216 F3(C, D, E, A, B, PT[3]);
217 P2 = prep(W2, W3, W0, W1, K40_59);
220 F3(B, C, D, E, A, PT[0]);
221 F3(A, B, C, D, E, PT[1]);
222 F3(E, A, B, C, D, PT[2]);
223 F3(D, E, A, B, C, PT[3]);
224 P3 = prep(W3, W0, W1, W2, K60_79);
227 F3(C, D, E, A, B, PT[0]);
228 F3(B, C, D, E, A, PT[1]);
229 F3(A, B, C, D, E, PT[2]);
230 F3(E, A, B, C, D, PT[3]);
231 P0 = prep(W0, W1, W2, W3, K60_79);
234 F3(D, E, A, B, C, PT[0]);
235 F3(C, D, E, A, B, PT[1]);
236 F3(B, C, D, E, A, PT[2]);
237 F3(A, B, C, D, E, PT[3]);
238 P1 = prep(W1, W2, W3, W0, K60_79);
241 F3(E, A, B, C, D, PT[0]);
242 F3(D, E, A, B, C, PT[1]);
243 F3(C, D, E, A, B, PT[2]);
244 F3(B, C, D, E, A, PT[3]);
245 P2 = prep(W2, W3, W0, W1, K60_79);
248 F4(A, B, C, D, E, PT[0]);
249 F4(E, A, B, C, D, PT[1]);
250 F4(D, E, A, B, C, PT[2]);
251 F4(C, D, E, A, B, PT[3]);
252 P3 = prep(W3, W0, W1, W2, K60_79);
255 F4(B, C, D, E, A, PT[0]);
256 F4(A, B, C, D, E, PT[1]);
257 F4(E, A, B, C, D, PT[2]);
258 F4(D, E, A, B, C, PT[3]);
261 F4(C, D, E, A, B, PT[0]);
262 F4(B, C, D, E, A, PT[1]);
263 F4(A, B, C, D, E, PT[2]);
264 F4(E, A, B, C, D, PT[3]);
267 F4(D, E, A, B, C, PT[0]);
268 F4(C, D, E, A, B, PT[1]);
269 F4(B, C, D, E, A, PT[2]);
270 F4(A, B, C, D, E, PT[3]);
273 F4(E, A, B, C, D, PT[0]);
274 F4(D, E, A, B, C, PT[1]);
275 F4(C, D, E, A, B, PT[2]);
276 F4(B, C, D, E, A, PT[3]);
278 A = (digest[0] += A);
279 B = (digest[1] += B);
280 C = (digest[2] += C);
281 D = (digest[3] += D);
282 E = (digest[4] += E);
static SIMD_4x32 load_be(const void *in) noexcept
native_simd_type raw() const noexcept
void store_le(uint32_t out[4]) const noexcept
SIMD_4x32 shift_elems_left() const noexcept
SIMD_4x32 shift_elems_right() const noexcept
SIMD_4x32 rotl() const noexcept
static SIMD_4x32 splat(uint32_t B) noexcept
#define BOTAN_FUNC_ISA(isa)
#define BOTAN_FORCE_INLINE
constexpr T rotl(T input)
constexpr T choose(T mask, T a, T b)
constexpr T majority(T a, T b, T c)