7#include <botan/internal/sha2_64.h> 
    9#include <botan/internal/bit_ops.h> 
   10#include <botan/internal/isa_extn.h> 
   11#include <botan/internal/rotate.h> 
   12#include <botan/internal/sha2_64_f.h> 
   13#include <botan/internal/simd_2x64.h> 
   14#include <botan/internal/simd_4x64.h> 
   20template <
typename SIMD_T>
 
   22   auto t0 = SIMD_T::alignr8(x[1], x[0]);
 
   23   auto t1 = SIMD_T::alignr8(x[5], x[4]);
 
   25   auto s0 = t0.template 
rotr<1>() ^ t0.template 
rotr<8>() ^ t0.template shr<7>();
 
   26   auto s1 = x[7].template 
rotr<19>() ^ x[7].template 
rotr<61>() ^ x[7].template shr<6>();
 
   28   auto nx = x[0] + s0 + s1 + t1;
 
   44BOTAN_FN_ISA_AVX2_BMI2 
void SHA_512::compress_digest_x86_avx2(digest_type& digest,
 
   45                                                              std::span<const uint8_t> input,
 
   48   alignas(64) 
const uint64_t K[80] = {
 
   49      0x428A2F98D728AE22, 0x7137449123EF65CD, 0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC,
 
   50      0x3956C25BF348B538, 0x59F111F1B605D019, 0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118,
 
   51      0xD807AA98A3030242, 0x12835B0145706FBE, 0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2,
 
   52      0x72BE5D74F27B896F, 0x80DEB1FE3B1696B1, 0x9BDC06A725C71235, 0xC19BF174CF692694,
 
   53      0xE49B69C19EF14AD2, 0xEFBE4786384F25E3, 0x0FC19DC68B8CD5B5, 0x240CA1CC77AC9C65,
 
   54      0x2DE92C6F592B0275, 0x4A7484AA6EA6E483, 0x5CB0A9DCBD41FBD4, 0x76F988DA831153B5,
 
   55      0x983E5152EE66DFAB, 0xA831C66D2DB43210, 0xB00327C898FB213F, 0xBF597FC7BEEF0EE4,
 
   56      0xC6E00BF33DA88FC2, 0xD5A79147930AA725, 0x06CA6351E003826F, 0x142929670A0E6E70,
 
   57      0x27B70A8546D22FFC, 0x2E1B21385C26C926, 0x4D2C6DFC5AC42AED, 0x53380D139D95B3DF,
 
   58      0x650A73548BAF63DE, 0x766A0ABB3C77B2A8, 0x81C2C92E47EDAEE6, 0x92722C851482353B,
 
   59      0xA2BFE8A14CF10364, 0xA81A664BBC423001, 0xC24B8B70D0F89791, 0xC76C51A30654BE30,
 
   60      0xD192E819D6EF5218, 0xD69906245565A910, 0xF40E35855771202A, 0x106AA07032BBD1B8,
 
   61      0x19A4C116B8D2D0C8, 0x1E376C085141AB53, 0x2748774CDF8EEB99, 0x34B0BCB5E19B48A8,
 
   62      0x391C0CB3C5C95A63, 0x4ED8AA4AE3418ACB, 0x5B9CCA4F7763E373, 0x682E6FF3D6B2B8A3,
 
   63      0x748F82EE5DEFB2FC, 0x78A5636F43172F60, 0x84C87814A1F0AB72, 0x8CC702081A6439EC,
 
   64      0x90BEFFFA23631E28, 0xA4506CEBDE82BDE9, 0xBEF9A3F7B2C67915, 0xC67178F2E372532B,
 
   65      0xCA273ECEEA26619C, 0xD186B8C721C0C207, 0xEADA7DD6CDE0EB1E, 0xF57D4F7FEE6ED178,
 
   66      0x06F067AA72176FBA, 0x0A637DC5A2C898A6, 0x113F9804BEF90DAE, 0x1B710B35131C471B,
 
   67      0x28DB77F523047D84, 0x32CAAB7B40C72493, 0x3C9EBE0A15C9BEBC, 0x431D67C49C100D4C,
 
   68      0x4CC5D4BECB3E42B6, 0x597F299CFC657E2A, 0x5FCB6FAB3AD6FAEC, 0x6C44198C4A475817,
 
   73   alignas(64) 
const uint64_t K2[2 * 80] = {
 
   74      0x428A2F98D728AE22, 0x7137449123EF65CD, 0x428A2F98D728AE22, 0x7137449123EF65CD,
 
   75      0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC, 0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC,
 
   76      0x3956C25BF348B538, 0x59F111F1B605D019, 0x3956C25BF348B538, 0x59F111F1B605D019,
 
   77      0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118, 0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118,
 
   78      0xD807AA98A3030242, 0x12835B0145706FBE, 0xD807AA98A3030242, 0x12835B0145706FBE,
 
   79      0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2, 0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2,
 
   80      0x72BE5D74F27B896F, 0x80DEB1FE3B1696B1, 0x72BE5D74F27B896F, 0x80DEB1FE3B1696B1,
 
   81      0x9BDC06A725C71235, 0xC19BF174CF692694, 0x9BDC06A725C71235, 0xC19BF174CF692694,
 
   82      0xE49B69C19EF14AD2, 0xEFBE4786384F25E3, 0xE49B69C19EF14AD2, 0xEFBE4786384F25E3,
 
   83      0x0FC19DC68B8CD5B5, 0x240CA1CC77AC9C65, 0x0FC19DC68B8CD5B5, 0x240CA1CC77AC9C65,
 
   84      0x2DE92C6F592B0275, 0x4A7484AA6EA6E483, 0x2DE92C6F592B0275, 0x4A7484AA6EA6E483,
 
   85      0x5CB0A9DCBD41FBD4, 0x76F988DA831153B5, 0x5CB0A9DCBD41FBD4, 0x76F988DA831153B5,
 
   86      0x983E5152EE66DFAB, 0xA831C66D2DB43210, 0x983E5152EE66DFAB, 0xA831C66D2DB43210,
 
   87      0xB00327C898FB213F, 0xBF597FC7BEEF0EE4, 0xB00327C898FB213F, 0xBF597FC7BEEF0EE4,
 
   88      0xC6E00BF33DA88FC2, 0xD5A79147930AA725, 0xC6E00BF33DA88FC2, 0xD5A79147930AA725,
 
   89      0x06CA6351E003826F, 0x142929670A0E6E70, 0x06CA6351E003826F, 0x142929670A0E6E70,
 
   90      0x27B70A8546D22FFC, 0x2E1B21385C26C926, 0x27B70A8546D22FFC, 0x2E1B21385C26C926,
 
   91      0x4D2C6DFC5AC42AED, 0x53380D139D95B3DF, 0x4D2C6DFC5AC42AED, 0x53380D139D95B3DF,
 
   92      0x650A73548BAF63DE, 0x766A0ABB3C77B2A8, 0x650A73548BAF63DE, 0x766A0ABB3C77B2A8,
 
   93      0x81C2C92E47EDAEE6, 0x92722C851482353B, 0x81C2C92E47EDAEE6, 0x92722C851482353B,
 
   94      0xA2BFE8A14CF10364, 0xA81A664BBC423001, 0xA2BFE8A14CF10364, 0xA81A664BBC423001,
 
   95      0xC24B8B70D0F89791, 0xC76C51A30654BE30, 0xC24B8B70D0F89791, 0xC76C51A30654BE30,
 
   96      0xD192E819D6EF5218, 0xD69906245565A910, 0xD192E819D6EF5218, 0xD69906245565A910,
 
   97      0xF40E35855771202A, 0x106AA07032BBD1B8, 0xF40E35855771202A, 0x106AA07032BBD1B8,
 
   98      0x19A4C116B8D2D0C8, 0x1E376C085141AB53, 0x19A4C116B8D2D0C8, 0x1E376C085141AB53,
 
   99      0x2748774CDF8EEB99, 0x34B0BCB5E19B48A8, 0x2748774CDF8EEB99, 0x34B0BCB5E19B48A8,
 
  100      0x391C0CB3C5C95A63, 0x4ED8AA4AE3418ACB, 0x391C0CB3C5C95A63, 0x4ED8AA4AE3418ACB,
 
  101      0x5B9CCA4F7763E373, 0x682E6FF3D6B2B8A3, 0x5B9CCA4F7763E373, 0x682E6FF3D6B2B8A3,
 
  102      0x748F82EE5DEFB2FC, 0x78A5636F43172F60, 0x748F82EE5DEFB2FC, 0x78A5636F43172F60,
 
  103      0x84C87814A1F0AB72, 0x8CC702081A6439EC, 0x84C87814A1F0AB72, 0x8CC702081A6439EC,
 
  104      0x90BEFFFA23631E28, 0xA4506CEBDE82BDE9, 0x90BEFFFA23631E28, 0xA4506CEBDE82BDE9,
 
  105      0xBEF9A3F7B2C67915, 0xC67178F2E372532B, 0xBEF9A3F7B2C67915, 0xC67178F2E372532B,
 
  106      0xCA273ECEEA26619C, 0xD186B8C721C0C207, 0xCA273ECEEA26619C, 0xD186B8C721C0C207,
 
  107      0xEADA7DD6CDE0EB1E, 0xF57D4F7FEE6ED178, 0xEADA7DD6CDE0EB1E, 0xF57D4F7FEE6ED178,
 
  108      0x06F067AA72176FBA, 0x0A637DC5A2C898A6, 0x06F067AA72176FBA, 0x0A637DC5A2C898A6,
 
  109      0x113F9804BEF90DAE, 0x1B710B35131C471B, 0x113F9804BEF90DAE, 0x1B710B35131C471B,
 
  110      0x28DB77F523047D84, 0x32CAAB7B40C72493, 0x28DB77F523047D84, 0x32CAAB7B40C72493,
 
  111      0x3C9EBE0A15C9BEBC, 0x431D67C49C100D4C, 0x3C9EBE0A15C9BEBC, 0x431D67C49C100D4C,
 
  112      0x4CC5D4BECB3E42B6, 0x597F299CFC657E2A, 0x4CC5D4BECB3E42B6, 0x597F299CFC657E2A,
 
  113      0x5FCB6FAB3AD6FAEC, 0x6C44198C4A475817, 0x5FCB6FAB3AD6FAEC, 0x6C44198C4A475817,
 
  117   alignas(64) uint64_t W[16] = {0};
 
  118   alignas(64) uint64_t W2[80];
 
  120   uint64_t A = digest[0];
 
  121   uint64_t B = digest[1];
 
  122   uint64_t C = digest[2];
 
  123   uint64_t D = digest[3];
 
  124   uint64_t E = digest[4];
 
  125   uint64_t F = digest[5];
 
  126   uint64_t G = digest[6];
 
  127   uint64_t H = digest[7];
 
  129   const uint8_t* data = input.data();
 
  134      for(
size_t i = 0; i < 8; i++) {
 
  137         WK.store_le2(&W[2 * i], &W2[2 * i]);
 
  144      for(
size_t r = 0; r != 64; r += 16) {
 
  148         w.store_le2(&W[0], &W2[r + 16]);
 
  153         w.store_le2(&W[2], &W2[r + 18]);
 
  158         w.store_le2(&W[4], &W2[r + 20]);
 
  163         w.store_le2(&W[6], &W2[r + 22]);
 
  168         w.store_le2(&W[8], &W2[r + 24]);
 
  171         SHA2_64_F(G, H, A, B, C, D, E, F, W[10]);
 
  172         SHA2_64_F(F, G, H, A, B, C, D, E, W[11]);
 
  173         w.store_le2(&W[10], &W2[r + 26]);
 
  176         SHA2_64_F(E, F, G, H, A, B, C, D, W[12]);
 
  177         SHA2_64_F(D, E, F, G, H, A, B, C, W[13]);
 
  178         w.store_le2(&W[12], &W2[r + 28]);
 
  181         SHA2_64_F(C, D, E, F, G, H, A, B, W[14]);
 
  182         SHA2_64_F(B, C, D, E, F, G, H, A, W[15]);
 
  183         w.store_le2(&W[14], &W2[r + 30]);
 
  197      SHA2_64_F(G, H, A, B, C, D, E, F, W[10]);
 
  198      SHA2_64_F(F, G, H, A, B, C, D, E, W[11]);
 
  199      SHA2_64_F(E, F, G, H, A, B, C, D, W[12]);
 
  200      SHA2_64_F(D, E, F, G, H, A, B, C, W[13]);
 
  201      SHA2_64_F(C, D, E, F, G, H, A, B, W[14]);
 
  202      SHA2_64_F(B, C, D, E, F, G, H, A, W[15]);
 
  204      A = (digest[0] += A);
 
  205      B = (digest[1] += B);
 
  206      C = (digest[2] += C);
 
  207      D = (digest[3] += D);
 
  208      E = (digest[4] += E);
 
  209      F = (digest[5] += F);
 
  210      G = (digest[6] += G);
 
  211      H = (digest[7] += H);
 
  214      SHA2_64_F(A, B, C, D, E, F, G, H, W2[0]);
 
  215      SHA2_64_F(H, A, B, C, D, E, F, G, W2[1]);
 
  216      SHA2_64_F(G, H, A, B, C, D, E, F, W2[2]);
 
  217      SHA2_64_F(F, G, H, A, B, C, D, E, W2[3]);
 
  218      SHA2_64_F(E, F, G, H, A, B, C, D, W2[4]);
 
  219      SHA2_64_F(D, E, F, G, H, A, B, C, W2[5]);
 
  220      SHA2_64_F(C, D, E, F, G, H, A, B, W2[6]);
 
  221      SHA2_64_F(B, C, D, E, F, G, H, A, W2[7]);
 
  222      SHA2_64_F(A, B, C, D, E, F, G, H, W2[8]);
 
  223      SHA2_64_F(H, A, B, C, D, E, F, G, W2[9]);
 
  224      SHA2_64_F(G, H, A, B, C, D, E, F, W2[10]);
 
  225      SHA2_64_F(F, G, H, A, B, C, D, E, W2[11]);
 
  226      SHA2_64_F(E, F, G, H, A, B, C, D, W2[12]);
 
  227      SHA2_64_F(D, E, F, G, H, A, B, C, W2[13]);
 
  228      SHA2_64_F(C, D, E, F, G, H, A, B, W2[14]);
 
  229      SHA2_64_F(B, C, D, E, F, G, H, A, W2[15]);
 
  231      SHA2_64_F(A, B, C, D, E, F, G, H, W2[16]);
 
  232      SHA2_64_F(H, A, B, C, D, E, F, G, W2[17]);
 
  233      SHA2_64_F(G, H, A, B, C, D, E, F, W2[18]);
 
  234      SHA2_64_F(F, G, H, A, B, C, D, E, W2[19]);
 
  235      SHA2_64_F(E, F, G, H, A, B, C, D, W2[20]);
 
  236      SHA2_64_F(D, E, F, G, H, A, B, C, W2[21]);
 
  237      SHA2_64_F(C, D, E, F, G, H, A, B, W2[22]);
 
  238      SHA2_64_F(B, C, D, E, F, G, H, A, W2[23]);
 
  239      SHA2_64_F(A, B, C, D, E, F, G, H, W2[24]);
 
  240      SHA2_64_F(H, A, B, C, D, E, F, G, W2[25]);
 
  241      SHA2_64_F(G, H, A, B, C, D, E, F, W2[26]);
 
  242      SHA2_64_F(F, G, H, A, B, C, D, E, W2[27]);
 
  243      SHA2_64_F(E, F, G, H, A, B, C, D, W2[28]);
 
  244      SHA2_64_F(D, E, F, G, H, A, B, C, W2[29]);
 
  245      SHA2_64_F(C, D, E, F, G, H, A, B, W2[30]);
 
  246      SHA2_64_F(B, C, D, E, F, G, H, A, W2[31]);
 
  248      SHA2_64_F(A, B, C, D, E, F, G, H, W2[32]);
 
  249      SHA2_64_F(H, A, B, C, D, E, F, G, W2[33]);
 
  250      SHA2_64_F(G, H, A, B, C, D, E, F, W2[34]);
 
  251      SHA2_64_F(F, G, H, A, B, C, D, E, W2[35]);
 
  252      SHA2_64_F(E, F, G, H, A, B, C, D, W2[36]);
 
  253      SHA2_64_F(D, E, F, G, H, A, B, C, W2[37]);
 
  254      SHA2_64_F(C, D, E, F, G, H, A, B, W2[38]);
 
  255      SHA2_64_F(B, C, D, E, F, G, H, A, W2[39]);
 
  256      SHA2_64_F(A, B, C, D, E, F, G, H, W2[40]);
 
  257      SHA2_64_F(H, A, B, C, D, E, F, G, W2[41]);
 
  258      SHA2_64_F(G, H, A, B, C, D, E, F, W2[42]);
 
  259      SHA2_64_F(F, G, H, A, B, C, D, E, W2[43]);
 
  260      SHA2_64_F(E, F, G, H, A, B, C, D, W2[44]);
 
  261      SHA2_64_F(D, E, F, G, H, A, B, C, W2[45]);
 
  262      SHA2_64_F(C, D, E, F, G, H, A, B, W2[46]);
 
  263      SHA2_64_F(B, C, D, E, F, G, H, A, W2[47]);
 
  265      SHA2_64_F(A, B, C, D, E, F, G, H, W2[48]);
 
  266      SHA2_64_F(H, A, B, C, D, E, F, G, W2[49]);
 
  267      SHA2_64_F(G, H, A, B, C, D, E, F, W2[50]);
 
  268      SHA2_64_F(F, G, H, A, B, C, D, E, W2[51]);
 
  269      SHA2_64_F(E, F, G, H, A, B, C, D, W2[52]);
 
  270      SHA2_64_F(D, E, F, G, H, A, B, C, W2[53]);
 
  271      SHA2_64_F(C, D, E, F, G, H, A, B, W2[54]);
 
  272      SHA2_64_F(B, C, D, E, F, G, H, A, W2[55]);
 
  273      SHA2_64_F(A, B, C, D, E, F, G, H, W2[56]);
 
  274      SHA2_64_F(H, A, B, C, D, E, F, G, W2[57]);
 
  275      SHA2_64_F(G, H, A, B, C, D, E, F, W2[58]);
 
  276      SHA2_64_F(F, G, H, A, B, C, D, E, W2[59]);
 
  277      SHA2_64_F(E, F, G, H, A, B, C, D, W2[60]);
 
  278      SHA2_64_F(D, E, F, G, H, A, B, C, W2[61]);
 
  279      SHA2_64_F(C, D, E, F, G, H, A, B, W2[62]);
 
  280      SHA2_64_F(B, C, D, E, F, G, H, A, W2[63]);
 
  282      SHA2_64_F(A, B, C, D, E, F, G, H, W2[64]);
 
  283      SHA2_64_F(H, A, B, C, D, E, F, G, W2[65]);
 
  284      SHA2_64_F(G, H, A, B, C, D, E, F, W2[66]);
 
  285      SHA2_64_F(F, G, H, A, B, C, D, E, W2[67]);
 
  286      SHA2_64_F(E, F, G, H, A, B, C, D, W2[68]);
 
  287      SHA2_64_F(D, E, F, G, H, A, B, C, W2[69]);
 
  288      SHA2_64_F(C, D, E, F, G, H, A, B, W2[70]);
 
  289      SHA2_64_F(B, C, D, E, F, G, H, A, W2[71]);
 
  290      SHA2_64_F(A, B, C, D, E, F, G, H, W2[72]);
 
  291      SHA2_64_F(H, A, B, C, D, E, F, G, W2[73]);
 
  292      SHA2_64_F(G, H, A, B, C, D, E, F, W2[74]);
 
  293      SHA2_64_F(F, G, H, A, B, C, D, E, W2[75]);
 
  294      SHA2_64_F(E, F, G, H, A, B, C, D, W2[76]);
 
  295      SHA2_64_F(D, E, F, G, H, A, B, C, W2[77]);
 
  296      SHA2_64_F(C, D, E, F, G, H, A, B, W2[78]);
 
  297      SHA2_64_F(B, C, D, E, F, G, H, A, W2[79]);
 
  299      A = (digest[0] += A);
 
  300      B = (digest[1] += B);
 
  301      C = (digest[2] += C);
 
  302      D = (digest[3] += D);
 
  303      E = (digest[4] += E);
 
  304      F = (digest[5] += F);
 
  305      G = (digest[6] += G);
 
  306      H = (digest[7] += H);
 
  312      for(
size_t i = 0; i < 8; i++) {
 
  315         WK.store_le(&W[2 * i]);
 
  322      for(
size_t r = 0; r != 64; r += 16) {
 
  349         SHA2_64_F(G, H, A, B, C, D, E, F, W[10]);
 
  350         SHA2_64_F(F, G, H, A, B, C, D, E, W[11]);
 
  354         SHA2_64_F(E, F, G, H, A, B, C, D, W[12]);
 
  355         SHA2_64_F(D, E, F, G, H, A, B, C, W[13]);
 
  359         SHA2_64_F(C, D, E, F, G, H, A, B, W[14]);
 
  360         SHA2_64_F(B, C, D, E, F, G, H, A, W[15]);
 
  375      SHA2_64_F(G, H, A, B, C, D, E, F, W[10]);
 
  376      SHA2_64_F(F, G, H, A, B, C, D, E, W[11]);
 
  377      SHA2_64_F(E, F, G, H, A, B, C, D, W[12]);
 
  378      SHA2_64_F(D, E, F, G, H, A, B, C, W[13]);
 
  379      SHA2_64_F(C, D, E, F, G, H, A, B, W[14]);
 
  380      SHA2_64_F(B, C, D, E, F, G, H, A, W[15]);
 
  382      A = (digest[0] += A);
 
  383      B = (digest[1] += B);
 
  384      C = (digest[2] += C);
 
  385      D = (digest[3] += D);
 
  386      E = (digest[4] += E);
 
  387      F = (digest[5] += F);
 
  388      G = (digest[6] += G);
 
  389      H = (digest[7] += H);
 
static SIMD_2x64 load_le(const void *in)
 
static SIMD_2x64 load_be(const void *in)
 
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_be2(const void *inl, const void *inh)
 
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_le(const void *in)
 
#define BOTAN_FORCE_INLINE
 
BOTAN_FORCE_INLINE void SHA2_64_F(uint64_t A, uint64_t B, uint64_t C, uint64_t &D, uint64_t E, uint64_t F, uint64_t G, uint64_t &H, uint64_t &M1, uint64_t M2, uint64_t M3, uint64_t M4, uint64_t magic)
 
BOTAN_FORCE_INLINE constexpr T rotr(T input)