7#include <botan/internal/sha2_32.h> 
    9#include <botan/internal/bit_ops.h> 
   10#include <botan/internal/isa_extn.h> 
   11#include <botan/internal/rotate.h> 
   12#include <botan/internal/sha2_32_f.h> 
   13#include <botan/internal/simd_4x32.h> 
   14#include <botan/internal/simd_avx2.h> 
   15#include <botan/internal/stack_scrubbing.h> 
   24   return SIMD_4x32(_mm_alignr_epi8(a.raw(), b.raw(), 4));
 
   29   return SIMD_4x32(_mm_srli_epi64(a.raw(), S));
 
   34   return SIMD_4x32(_mm_shuffle_epi32(a.raw(), S));
 
   38   return SIMD_8x32(_mm256_alignr_epi8(a.raw(), b.raw(), 4));
 
   43   return SIMD_8x32(_mm256_srli_epi64(a.raw(), S));
 
   48   return SIMD_8x32(_mm256_shuffle_epi32(a.raw(), S));
 
   51template <
typename SIMD_T>
 
   53   constexpr size_t sigma0_0 = 7;
 
   54   constexpr size_t sigma0_1 = 18;
 
   55   constexpr size_t sigma0_2 = 3;
 
   56   constexpr size_t sigma1_0 = 17;
 
   57   constexpr size_t sigma1_1 = 19;
 
   58   constexpr size_t sigma1_2 = 10;
 
   60   const SIMD_T lo_mask = SIMD_T(0x03020100, 0x0b0a0908, 0x80808080, 0x80808080);
 
   61   const SIMD_T hi_mask = SIMD_T(0x80808080, 0x80808080, 0x03020100, 0x0b0a0908);
 
   63   auto t0 = alignr4(x[1], x[0]);
 
   64   x[0] += alignr4(x[3], x[2]);
 
   66   auto t1 = t0.template 
shl<32 - sigma0_1>();
 
   67   auto t2 = t0.template shr<sigma0_0>();
 
   68   auto t3 = t0.template shr<sigma0_2>();
 
   71   t3 = shuffle_32<0b11111010>(x[3]);
 
   72   t2 = t2.template shr<sigma0_1 - sigma0_0>();
 
   74   t1 = t1.template 
shl<sigma0_1 - sigma0_0>();
 
   75   t2 = t3.template shr<sigma1_2>();
 
   76   t3 = shr64<sigma1_0>(t3);
 
   80   t3 = shr64<sigma1_1 - sigma1_0>(t3);
 
   81   x[0] += SIMD_T::byte_shuffle(t2 ^ t3, lo_mask);
 
   83   t3 = shuffle_32<0b01010000>(x[0]);
 
   84   t2 = t3.template shr<sigma1_2>();
 
   85   t3 = shr64<sigma1_0>(t3);
 
   87   t3 = shr64<sigma1_1 - sigma1_0>(t3);
 
   88   x[0] += SIMD_T::byte_shuffle(t2 ^ t3, hi_mask);
 
   90   const auto tmp = x[0];
 
  102   digest_type& digest, std::span<const uint8_t> input, 
size_t blocks) {
 
  105   alignas(64) 
const uint32_t K[64] = {
 
  106      0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
 
  107      0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
 
  108      0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
 
  109      0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
 
  110      0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
 
  111      0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
 
  112      0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
 
  113      0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2};
 
  115   alignas(64) 
const uint32_t K2[2 * 64] = {
 
  116      0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5,
 
  117      0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5, 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
 
  118      0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3,
 
  119      0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174, 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
 
  120      0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC,
 
  121      0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
 
  122      0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7,
 
  123      0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967, 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
 
  124      0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13,
 
  125      0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85, 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
 
  126      0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3,
 
  127      0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
 
  128      0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5,
 
  129      0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3, 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
 
  130      0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208,
 
  131      0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2};
 
  135   alignas(64) uint32_t W[16];
 
  136   alignas(64) uint32_t W2[64];
 
  138   uint32_t A = digest[0];
 
  139   uint32_t B = digest[1];
 
  140   uint32_t C = digest[2];
 
  141   uint32_t D = digest[3];
 
  142   uint32_t E = digest[4];
 
  143   uint32_t F = digest[5];
 
  144   uint32_t G = digest[6];
 
  145   uint32_t H = digest[7];
 
  147   const uint8_t* data = input.data();
 
  152      for(
size_t i = 0; i < 4; i++) {
 
  155         WK.store_le128(&W[4 * i], &W2[4 * i]);
 
  161      for(
size_t r = 0; r != 48; r += 16) {
 
  169         w.store_le128(&W[0], &W2[r + 16]);
 
  178         w.store_le128(&W[4], &W2[r + 20]);
 
  184         SHA2_32_F(G, H, A, B, C, D, E, F, W[10]);
 
  185         SHA2_32_F(F, G, H, A, B, C, D, E, W[11]);
 
  187         w.store_le128(&W[8], &W2[r + 24]);
 
  191         SHA2_32_F(E, F, G, H, A, B, C, D, W[12]);
 
  192         SHA2_32_F(D, E, F, G, H, A, B, C, W[13]);
 
  193         SHA2_32_F(C, D, E, F, G, H, A, B, W[14]);
 
  194         SHA2_32_F(B, C, D, E, F, G, H, A, W[15]);
 
  196         w.store_le128(&W[12], &W2[r + 28]);
 
  209      SHA2_32_F(G, H, A, B, C, D, E, F, W[10]);
 
  210      SHA2_32_F(F, G, H, A, B, C, D, E, W[11]);
 
  211      SHA2_32_F(E, F, G, H, A, B, C, D, W[12]);
 
  212      SHA2_32_F(D, E, F, G, H, A, B, C, W[13]);
 
  213      SHA2_32_F(C, D, E, F, G, H, A, B, W[14]);
 
  214      SHA2_32_F(B, C, D, E, F, G, H, A, W[15]);
 
  216      A = (digest[0] += A);
 
  217      B = (digest[1] += B);
 
  218      C = (digest[2] += C);
 
  219      D = (digest[3] += D);
 
  220      E = (digest[4] += E);
 
  221      F = (digest[5] += F);
 
  222      G = (digest[6] += G);
 
  223      H = (digest[7] += H);
 
  226      SHA2_32_F(A, B, C, D, E, F, G, H, W2[0]);
 
  227      SHA2_32_F(H, A, B, C, D, E, F, G, W2[1]);
 
  228      SHA2_32_F(G, H, A, B, C, D, E, F, W2[2]);
 
  229      SHA2_32_F(F, G, H, A, B, C, D, E, W2[3]);
 
  230      SHA2_32_F(E, F, G, H, A, B, C, D, W2[4]);
 
  231      SHA2_32_F(D, E, F, G, H, A, B, C, W2[5]);
 
  232      SHA2_32_F(C, D, E, F, G, H, A, B, W2[6]);
 
  233      SHA2_32_F(B, C, D, E, F, G, H, A, W2[7]);
 
  234      SHA2_32_F(A, B, C, D, E, F, G, H, W2[8]);
 
  235      SHA2_32_F(H, A, B, C, D, E, F, G, W2[9]);
 
  236      SHA2_32_F(G, H, A, B, C, D, E, F, W2[10]);
 
  237      SHA2_32_F(F, G, H, A, B, C, D, E, W2[11]);
 
  238      SHA2_32_F(E, F, G, H, A, B, C, D, W2[12]);
 
  239      SHA2_32_F(D, E, F, G, H, A, B, C, W2[13]);
 
  240      SHA2_32_F(C, D, E, F, G, H, A, B, W2[14]);
 
  241      SHA2_32_F(B, C, D, E, F, G, H, A, W2[15]);
 
  243      SHA2_32_F(A, B, C, D, E, F, G, H, W2[16]);
 
  244      SHA2_32_F(H, A, B, C, D, E, F, G, W2[17]);
 
  245      SHA2_32_F(G, H, A, B, C, D, E, F, W2[18]);
 
  246      SHA2_32_F(F, G, H, A, B, C, D, E, W2[19]);
 
  247      SHA2_32_F(E, F, G, H, A, B, C, D, W2[20]);
 
  248      SHA2_32_F(D, E, F, G, H, A, B, C, W2[21]);
 
  249      SHA2_32_F(C, D, E, F, G, H, A, B, W2[22]);
 
  250      SHA2_32_F(B, C, D, E, F, G, H, A, W2[23]);
 
  251      SHA2_32_F(A, B, C, D, E, F, G, H, W2[24]);
 
  252      SHA2_32_F(H, A, B, C, D, E, F, G, W2[25]);
 
  253      SHA2_32_F(G, H, A, B, C, D, E, F, W2[26]);
 
  254      SHA2_32_F(F, G, H, A, B, C, D, E, W2[27]);
 
  255      SHA2_32_F(E, F, G, H, A, B, C, D, W2[28]);
 
  256      SHA2_32_F(D, E, F, G, H, A, B, C, W2[29]);
 
  257      SHA2_32_F(C, D, E, F, G, H, A, B, W2[30]);
 
  258      SHA2_32_F(B, C, D, E, F, G, H, A, W2[31]);
 
  260      SHA2_32_F(A, B, C, D, E, F, G, H, W2[32]);
 
  261      SHA2_32_F(H, A, B, C, D, E, F, G, W2[33]);
 
  262      SHA2_32_F(G, H, A, B, C, D, E, F, W2[34]);
 
  263      SHA2_32_F(F, G, H, A, B, C, D, E, W2[35]);
 
  264      SHA2_32_F(E, F, G, H, A, B, C, D, W2[36]);
 
  265      SHA2_32_F(D, E, F, G, H, A, B, C, W2[37]);
 
  266      SHA2_32_F(C, D, E, F, G, H, A, B, W2[38]);
 
  267      SHA2_32_F(B, C, D, E, F, G, H, A, W2[39]);
 
  268      SHA2_32_F(A, B, C, D, E, F, G, H, W2[40]);
 
  269      SHA2_32_F(H, A, B, C, D, E, F, G, W2[41]);
 
  270      SHA2_32_F(G, H, A, B, C, D, E, F, W2[42]);
 
  271      SHA2_32_F(F, G, H, A, B, C, D, E, W2[43]);
 
  272      SHA2_32_F(E, F, G, H, A, B, C, D, W2[44]);
 
  273      SHA2_32_F(D, E, F, G, H, A, B, C, W2[45]);
 
  274      SHA2_32_F(C, D, E, F, G, H, A, B, W2[46]);
 
  275      SHA2_32_F(B, C, D, E, F, G, H, A, W2[47]);
 
  277      SHA2_32_F(A, B, C, D, E, F, G, H, W2[48]);
 
  278      SHA2_32_F(H, A, B, C, D, E, F, G, W2[49]);
 
  279      SHA2_32_F(G, H, A, B, C, D, E, F, W2[50]);
 
  280      SHA2_32_F(F, G, H, A, B, C, D, E, W2[51]);
 
  281      SHA2_32_F(E, F, G, H, A, B, C, D, W2[52]);
 
  282      SHA2_32_F(D, E, F, G, H, A, B, C, W2[53]);
 
  283      SHA2_32_F(C, D, E, F, G, H, A, B, W2[54]);
 
  284      SHA2_32_F(B, C, D, E, F, G, H, A, W2[55]);
 
  285      SHA2_32_F(A, B, C, D, E, F, G, H, W2[56]);
 
  286      SHA2_32_F(H, A, B, C, D, E, F, G, W2[57]);
 
  287      SHA2_32_F(G, H, A, B, C, D, E, F, W2[58]);
 
  288      SHA2_32_F(F, G, H, A, B, C, D, E, W2[59]);
 
  289      SHA2_32_F(E, F, G, H, A, B, C, D, W2[60]);
 
  290      SHA2_32_F(D, E, F, G, H, A, B, C, W2[61]);
 
  291      SHA2_32_F(C, D, E, F, G, H, A, B, W2[62]);
 
  292      SHA2_32_F(B, C, D, E, F, G, H, A, W2[63]);
 
  294      A = (digest[0] += A);
 
  295      B = (digest[1] += B);
 
  296      C = (digest[2] += C);
 
  297      D = (digest[3] += D);
 
  298      E = (digest[4] += E);
 
  299      F = (digest[5] += F);
 
  300      G = (digest[6] += G);
 
  301      H = (digest[7] += H);
 
  307      for(
size_t i = 0; i < 4; i++) {
 
  310         WK.store_le(&W[4 * i]);
 
  316      for(
size_t r = 0; r != 48; r += 16) {
 
  339         SHA2_32_F(G, H, A, B, C, D, E, F, W[10]);
 
  340         SHA2_32_F(F, G, H, A, B, C, D, E, W[11]);
 
  346         SHA2_32_F(E, F, G, H, A, B, C, D, W[12]);
 
  347         SHA2_32_F(D, E, F, G, H, A, B, C, W[13]);
 
  348         SHA2_32_F(C, D, E, F, G, H, A, B, W[14]);
 
  349         SHA2_32_F(B, C, D, E, F, G, H, A, W[15]);
 
  364      SHA2_32_F(G, H, A, B, C, D, E, F, W[10]);
 
  365      SHA2_32_F(F, G, H, A, B, C, D, E, W[11]);
 
  366      SHA2_32_F(E, F, G, H, A, B, C, D, W[12]);
 
  367      SHA2_32_F(D, E, F, G, H, A, B, C, W[13]);
 
  368      SHA2_32_F(C, D, E, F, G, H, A, B, W[14]);
 
  369      SHA2_32_F(B, C, D, E, F, G, H, A, W[15]);
 
  371      A = (digest[0] += A);
 
  372      B = (digest[1] += B);
 
  373      C = (digest[2] += C);
 
  374      D = (digest[3] += D);
 
  375      E = (digest[4] += E);
 
  376      F = (digest[5] += F);
 
  377      G = (digest[6] += G);
 
  378      H = (digest[7] += H);
 
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(const void *in) noexcept
 
static SIMD_4x32 load_le(const void *in) noexcept
 
static BOTAN_FN_ISA_AVX2 SIMD_8x32 load_le(const uint8_t *in) noexcept
 
static BOTAN_FN_ISA_AVX2 SIMD_8x32 load_be128(const uint8_t in1[], const uint8_t in2[]) noexcept
 
#define BOTAN_FORCE_INLINE
 
BOTAN_FORCE_INLINE void SHA2_32_F(uint32_t A, uint32_t B, uint32_t C, uint32_t &D, uint32_t E, uint32_t F, uint32_t G, uint32_t &H, uint32_t &M1, uint32_t M2, uint32_t M3, uint32_t M4, uint32_t magic)
 
SIMD_4x32 shl(SIMD_4x32 input)
 
#define BOTAN_SCRUB_STACK_AFTER_RETURN