7#include <botan/internal/sha2_32.h>
9#include <botan/internal/isa_extn.h>
10#include <botan/internal/sha2_32_f.h>
11#include <botan/internal/simd_4x32.h>
12#include <botan/internal/simd_avx2.h>
13#include <botan/internal/stack_scrubbing.h>
22 return SIMD_4x32(_mm_alignr_epi8(a.raw(), b.raw(), 4));
27 return SIMD_4x32(_mm_srli_epi64(a.raw(), S));
32 return SIMD_4x32(_mm_shuffle_epi32(a.raw(), S));
36 return SIMD_8x32(_mm256_alignr_epi8(a.raw(), b.raw(), 4));
41 return SIMD_8x32(_mm256_srli_epi64(a.raw(), S));
46 return SIMD_8x32(_mm256_shuffle_epi32(a.raw(), S));
49template <
typename SIMD_T>
51 constexpr size_t sigma0_0 = 7;
52 constexpr size_t sigma0_1 = 18;
53 constexpr size_t sigma0_2 = 3;
54 constexpr size_t sigma1_0 = 17;
55 constexpr size_t sigma1_1 = 19;
56 constexpr size_t sigma1_2 = 10;
58 const SIMD_T lo_mask = SIMD_T(0x03020100, 0x0b0a0908, 0x80808080, 0x80808080);
59 const SIMD_T hi_mask = SIMD_T(0x80808080, 0x80808080, 0x03020100, 0x0b0a0908);
61 auto t0 = alignr4(x[1], x[0]);
62 x[0] += alignr4(x[3], x[2]);
64 auto t1 = t0.template
shl<32 - sigma0_1>();
65 auto t2 = t0.template shr<sigma0_0>();
66 auto t3 = t0.template shr<sigma0_2>();
69 t3 = shuffle_32<0b11111010>(x[3]);
70 t2 = t2.template shr<sigma0_1 - sigma0_0>();
72 t1 = t1.template
shl<sigma0_1 - sigma0_0>();
73 t2 = t3.template shr<sigma1_2>();
74 t3 = shr64<sigma1_0>(t3);
78 t3 = shr64<sigma1_1 - sigma1_0>(t3);
79 x[0] += SIMD_T::byte_shuffle(t2 ^ t3, lo_mask);
81 t3 = shuffle_32<0b01010000>(x[0]);
82 t2 = t3.template shr<sigma1_2>();
83 t3 = shr64<sigma1_0>(t3);
85 t3 = shr64<sigma1_1 - sigma1_0>(t3);
86 x[0] += SIMD_T::byte_shuffle(t2 ^ t3, hi_mask);
88 const auto tmp = x[0];
100 digest_type& digest, std::span<const uint8_t> input,
size_t blocks) {
103 alignas(64)
const uint32_t K[64] = {
104 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
105 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
106 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
107 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
108 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
109 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
110 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
111 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2};
113 alignas(64)
const uint32_t K2[2 * 64] = {
114 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5,
115 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5, 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
116 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3,
117 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174, 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
118 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC,
119 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
120 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7,
121 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967, 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
122 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13,
123 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85, 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
124 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3,
125 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
126 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5,
127 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3, 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
128 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208,
129 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2};
133 alignas(64) uint32_t W[16];
134 alignas(64) uint32_t W2[64];
136 uint32_t A = digest[0];
137 uint32_t B = digest[1];
138 uint32_t C = digest[2];
139 uint32_t D = digest[3];
140 uint32_t E = digest[4];
141 uint32_t F = digest[5];
142 uint32_t G = digest[6];
143 uint32_t H = digest[7];
145 const uint8_t* data = input.data();
150 for(
size_t i = 0; i < 4; i++) {
153 WK.store_le128(&W[4 * i], &W2[4 * i]);
159 for(
size_t r = 0; r != 48; r += 16) {
167 w.store_le128(&W[0], &W2[r + 16]);
176 w.store_le128(&W[4], &W2[r + 20]);
182 SHA2_32_F(G, H, A, B, C, D, E, F, W[10]);
183 SHA2_32_F(F, G, H, A, B, C, D, E, W[11]);
185 w.store_le128(&W[8], &W2[r + 24]);
189 SHA2_32_F(E, F, G, H, A, B, C, D, W[12]);
190 SHA2_32_F(D, E, F, G, H, A, B, C, W[13]);
191 SHA2_32_F(C, D, E, F, G, H, A, B, W[14]);
192 SHA2_32_F(B, C, D, E, F, G, H, A, W[15]);
194 w.store_le128(&W[12], &W2[r + 28]);
207 SHA2_32_F(G, H, A, B, C, D, E, F, W[10]);
208 SHA2_32_F(F, G, H, A, B, C, D, E, W[11]);
209 SHA2_32_F(E, F, G, H, A, B, C, D, W[12]);
210 SHA2_32_F(D, E, F, G, H, A, B, C, W[13]);
211 SHA2_32_F(C, D, E, F, G, H, A, B, W[14]);
212 SHA2_32_F(B, C, D, E, F, G, H, A, W[15]);
214 A = (digest[0] += A);
215 B = (digest[1] += B);
216 C = (digest[2] += C);
217 D = (digest[3] += D);
218 E = (digest[4] += E);
219 F = (digest[5] += F);
220 G = (digest[6] += G);
221 H = (digest[7] += H);
224 SHA2_32_F(A, B, C, D, E, F, G, H, W2[0]);
225 SHA2_32_F(H, A, B, C, D, E, F, G, W2[1]);
226 SHA2_32_F(G, H, A, B, C, D, E, F, W2[2]);
227 SHA2_32_F(F, G, H, A, B, C, D, E, W2[3]);
228 SHA2_32_F(E, F, G, H, A, B, C, D, W2[4]);
229 SHA2_32_F(D, E, F, G, H, A, B, C, W2[5]);
230 SHA2_32_F(C, D, E, F, G, H, A, B, W2[6]);
231 SHA2_32_F(B, C, D, E, F, G, H, A, W2[7]);
232 SHA2_32_F(A, B, C, D, E, F, G, H, W2[8]);
233 SHA2_32_F(H, A, B, C, D, E, F, G, W2[9]);
234 SHA2_32_F(G, H, A, B, C, D, E, F, W2[10]);
235 SHA2_32_F(F, G, H, A, B, C, D, E, W2[11]);
236 SHA2_32_F(E, F, G, H, A, B, C, D, W2[12]);
237 SHA2_32_F(D, E, F, G, H, A, B, C, W2[13]);
238 SHA2_32_F(C, D, E, F, G, H, A, B, W2[14]);
239 SHA2_32_F(B, C, D, E, F, G, H, A, W2[15]);
241 SHA2_32_F(A, B, C, D, E, F, G, H, W2[16]);
242 SHA2_32_F(H, A, B, C, D, E, F, G, W2[17]);
243 SHA2_32_F(G, H, A, B, C, D, E, F, W2[18]);
244 SHA2_32_F(F, G, H, A, B, C, D, E, W2[19]);
245 SHA2_32_F(E, F, G, H, A, B, C, D, W2[20]);
246 SHA2_32_F(D, E, F, G, H, A, B, C, W2[21]);
247 SHA2_32_F(C, D, E, F, G, H, A, B, W2[22]);
248 SHA2_32_F(B, C, D, E, F, G, H, A, W2[23]);
249 SHA2_32_F(A, B, C, D, E, F, G, H, W2[24]);
250 SHA2_32_F(H, A, B, C, D, E, F, G, W2[25]);
251 SHA2_32_F(G, H, A, B, C, D, E, F, W2[26]);
252 SHA2_32_F(F, G, H, A, B, C, D, E, W2[27]);
253 SHA2_32_F(E, F, G, H, A, B, C, D, W2[28]);
254 SHA2_32_F(D, E, F, G, H, A, B, C, W2[29]);
255 SHA2_32_F(C, D, E, F, G, H, A, B, W2[30]);
256 SHA2_32_F(B, C, D, E, F, G, H, A, W2[31]);
258 SHA2_32_F(A, B, C, D, E, F, G, H, W2[32]);
259 SHA2_32_F(H, A, B, C, D, E, F, G, W2[33]);
260 SHA2_32_F(G, H, A, B, C, D, E, F, W2[34]);
261 SHA2_32_F(F, G, H, A, B, C, D, E, W2[35]);
262 SHA2_32_F(E, F, G, H, A, B, C, D, W2[36]);
263 SHA2_32_F(D, E, F, G, H, A, B, C, W2[37]);
264 SHA2_32_F(C, D, E, F, G, H, A, B, W2[38]);
265 SHA2_32_F(B, C, D, E, F, G, H, A, W2[39]);
266 SHA2_32_F(A, B, C, D, E, F, G, H, W2[40]);
267 SHA2_32_F(H, A, B, C, D, E, F, G, W2[41]);
268 SHA2_32_F(G, H, A, B, C, D, E, F, W2[42]);
269 SHA2_32_F(F, G, H, A, B, C, D, E, W2[43]);
270 SHA2_32_F(E, F, G, H, A, B, C, D, W2[44]);
271 SHA2_32_F(D, E, F, G, H, A, B, C, W2[45]);
272 SHA2_32_F(C, D, E, F, G, H, A, B, W2[46]);
273 SHA2_32_F(B, C, D, E, F, G, H, A, W2[47]);
275 SHA2_32_F(A, B, C, D, E, F, G, H, W2[48]);
276 SHA2_32_F(H, A, B, C, D, E, F, G, W2[49]);
277 SHA2_32_F(G, H, A, B, C, D, E, F, W2[50]);
278 SHA2_32_F(F, G, H, A, B, C, D, E, W2[51]);
279 SHA2_32_F(E, F, G, H, A, B, C, D, W2[52]);
280 SHA2_32_F(D, E, F, G, H, A, B, C, W2[53]);
281 SHA2_32_F(C, D, E, F, G, H, A, B, W2[54]);
282 SHA2_32_F(B, C, D, E, F, G, H, A, W2[55]);
283 SHA2_32_F(A, B, C, D, E, F, G, H, W2[56]);
284 SHA2_32_F(H, A, B, C, D, E, F, G, W2[57]);
285 SHA2_32_F(G, H, A, B, C, D, E, F, W2[58]);
286 SHA2_32_F(F, G, H, A, B, C, D, E, W2[59]);
287 SHA2_32_F(E, F, G, H, A, B, C, D, W2[60]);
288 SHA2_32_F(D, E, F, G, H, A, B, C, W2[61]);
289 SHA2_32_F(C, D, E, F, G, H, A, B, W2[62]);
290 SHA2_32_F(B, C, D, E, F, G, H, A, W2[63]);
292 A = (digest[0] += A);
293 B = (digest[1] += B);
294 C = (digest[2] += C);
295 D = (digest[3] += D);
296 E = (digest[4] += E);
297 F = (digest[5] += F);
298 G = (digest[6] += G);
299 H = (digest[7] += H);
305 for(
size_t i = 0; i < 4; i++) {
308 WK.store_le(&W[4 * i]);
314 for(
size_t r = 0; r != 48; r += 16) {
337 SHA2_32_F(G, H, A, B, C, D, E, F, W[10]);
338 SHA2_32_F(F, G, H, A, B, C, D, E, W[11]);
344 SHA2_32_F(E, F, G, H, A, B, C, D, W[12]);
345 SHA2_32_F(D, E, F, G, H, A, B, C, W[13]);
346 SHA2_32_F(C, D, E, F, G, H, A, B, W[14]);
347 SHA2_32_F(B, C, D, E, F, G, H, A, W[15]);
362 SHA2_32_F(G, H, A, B, C, D, E, F, W[10]);
363 SHA2_32_F(F, G, H, A, B, C, D, E, W[11]);
364 SHA2_32_F(E, F, G, H, A, B, C, D, W[12]);
365 SHA2_32_F(D, E, F, G, H, A, B, C, W[13]);
366 SHA2_32_F(C, D, E, F, G, H, A, B, W[14]);
367 SHA2_32_F(B, C, D, E, F, G, H, A, W[15]);
369 A = (digest[0] += A);
370 B = (digest[1] += B);
371 C = (digest[2] += C);
372 D = (digest[3] += D);
373 E = (digest[4] += E);
374 F = (digest[5] += F);
375 G = (digest[6] += G);
376 H = (digest[7] += H);
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(const void *in) noexcept
static SIMD_4x32 load_le(const void *in) noexcept
static BOTAN_FN_ISA_AVX2 SIMD_8x32 load_le(const uint8_t *in) noexcept
static BOTAN_FN_ISA_AVX2 SIMD_8x32 load_be128(const uint8_t in1[], const uint8_t in2[]) noexcept
#define BOTAN_FORCE_INLINE
BOTAN_FORCE_INLINE void SHA2_32_F(uint32_t A, uint32_t B, uint32_t C, uint32_t &D, uint32_t E, uint32_t F, uint32_t G, uint32_t &H, uint32_t &M1, uint32_t M2, uint32_t M3, uint32_t M4, uint32_t magic)
SIMD_4x32 shl(SIMD_4x32 input)
#define BOTAN_SCRUB_STACK_AFTER_RETURN