Botan 3.9.0
Crypto and TLS for C&
sha2_64_avx2.cpp
Go to the documentation of this file.
1/*
2* (C) 2025 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/sha2_64.h>
8
9#include <botan/internal/bit_ops.h>
10#include <botan/internal/isa_extn.h>
11#include <botan/internal/rotate.h>
12#include <botan/internal/sha2_64_f.h>
13#include <botan/internal/simd_2x64.h>
14#include <botan/internal/simd_4x64.h>
15
16namespace Botan {
17
18namespace {
19
20template <typename SIMD_T>
21BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX2_BMI2 SIMD_T sha512_next_w(SIMD_T x[8]) {
22 auto t0 = SIMD_T::alignr8(x[1], x[0]);
23 auto t1 = SIMD_T::alignr8(x[5], x[4]);
24
25 auto s0 = t0.template rotr<1>() ^ t0.template rotr<8>() ^ t0.template shr<7>();
26 auto s1 = x[7].template rotr<19>() ^ x[7].template rotr<61>() ^ x[7].template shr<6>();
27
28 auto nx = x[0] + s0 + s1 + t1;
29
30 x[0] = x[1];
31 x[1] = x[2];
32 x[2] = x[3];
33 x[3] = x[4];
34 x[4] = x[5];
35 x[5] = x[6];
36 x[6] = x[7];
37 x[7] = nx;
38
39 return x[7];
40}
41
42} // namespace
43
44BOTAN_FN_ISA_AVX2_BMI2 void SHA_512::compress_digest_x86_avx2(digest_type& digest,
45 std::span<const uint8_t> input,
46 size_t blocks) {
47 // clang-format off
48 alignas(64) const uint64_t K[80] = {
49 0x428A2F98D728AE22, 0x7137449123EF65CD, 0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC,
50 0x3956C25BF348B538, 0x59F111F1B605D019, 0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118,
51 0xD807AA98A3030242, 0x12835B0145706FBE, 0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2,
52 0x72BE5D74F27B896F, 0x80DEB1FE3B1696B1, 0x9BDC06A725C71235, 0xC19BF174CF692694,
53 0xE49B69C19EF14AD2, 0xEFBE4786384F25E3, 0x0FC19DC68B8CD5B5, 0x240CA1CC77AC9C65,
54 0x2DE92C6F592B0275, 0x4A7484AA6EA6E483, 0x5CB0A9DCBD41FBD4, 0x76F988DA831153B5,
55 0x983E5152EE66DFAB, 0xA831C66D2DB43210, 0xB00327C898FB213F, 0xBF597FC7BEEF0EE4,
56 0xC6E00BF33DA88FC2, 0xD5A79147930AA725, 0x06CA6351E003826F, 0x142929670A0E6E70,
57 0x27B70A8546D22FFC, 0x2E1B21385C26C926, 0x4D2C6DFC5AC42AED, 0x53380D139D95B3DF,
58 0x650A73548BAF63DE, 0x766A0ABB3C77B2A8, 0x81C2C92E47EDAEE6, 0x92722C851482353B,
59 0xA2BFE8A14CF10364, 0xA81A664BBC423001, 0xC24B8B70D0F89791, 0xC76C51A30654BE30,
60 0xD192E819D6EF5218, 0xD69906245565A910, 0xF40E35855771202A, 0x106AA07032BBD1B8,
61 0x19A4C116B8D2D0C8, 0x1E376C085141AB53, 0x2748774CDF8EEB99, 0x34B0BCB5E19B48A8,
62 0x391C0CB3C5C95A63, 0x4ED8AA4AE3418ACB, 0x5B9CCA4F7763E373, 0x682E6FF3D6B2B8A3,
63 0x748F82EE5DEFB2FC, 0x78A5636F43172F60, 0x84C87814A1F0AB72, 0x8CC702081A6439EC,
64 0x90BEFFFA23631E28, 0xA4506CEBDE82BDE9, 0xBEF9A3F7B2C67915, 0xC67178F2E372532B,
65 0xCA273ECEEA26619C, 0xD186B8C721C0C207, 0xEADA7DD6CDE0EB1E, 0xF57D4F7FEE6ED178,
66 0x06F067AA72176FBA, 0x0A637DC5A2C898A6, 0x113F9804BEF90DAE, 0x1B710B35131C471B,
67 0x28DB77F523047D84, 0x32CAAB7B40C72493, 0x3C9EBE0A15C9BEBC, 0x431D67C49C100D4C,
68 0x4CC5D4BECB3E42B6, 0x597F299CFC657E2A, 0x5FCB6FAB3AD6FAEC, 0x6C44198C4A475817,
69 };
70
71 // K2 is each pair of elements in K repeated since we are performing 2 parallel
72 // message expansions
73 alignas(64) const uint64_t K2[2 * 80] = {
74 0x428A2F98D728AE22, 0x7137449123EF65CD, 0x428A2F98D728AE22, 0x7137449123EF65CD,
75 0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC, 0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC,
76 0x3956C25BF348B538, 0x59F111F1B605D019, 0x3956C25BF348B538, 0x59F111F1B605D019,
77 0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118, 0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118,
78 0xD807AA98A3030242, 0x12835B0145706FBE, 0xD807AA98A3030242, 0x12835B0145706FBE,
79 0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2, 0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2,
80 0x72BE5D74F27B896F, 0x80DEB1FE3B1696B1, 0x72BE5D74F27B896F, 0x80DEB1FE3B1696B1,
81 0x9BDC06A725C71235, 0xC19BF174CF692694, 0x9BDC06A725C71235, 0xC19BF174CF692694,
82 0xE49B69C19EF14AD2, 0xEFBE4786384F25E3, 0xE49B69C19EF14AD2, 0xEFBE4786384F25E3,
83 0x0FC19DC68B8CD5B5, 0x240CA1CC77AC9C65, 0x0FC19DC68B8CD5B5, 0x240CA1CC77AC9C65,
84 0x2DE92C6F592B0275, 0x4A7484AA6EA6E483, 0x2DE92C6F592B0275, 0x4A7484AA6EA6E483,
85 0x5CB0A9DCBD41FBD4, 0x76F988DA831153B5, 0x5CB0A9DCBD41FBD4, 0x76F988DA831153B5,
86 0x983E5152EE66DFAB, 0xA831C66D2DB43210, 0x983E5152EE66DFAB, 0xA831C66D2DB43210,
87 0xB00327C898FB213F, 0xBF597FC7BEEF0EE4, 0xB00327C898FB213F, 0xBF597FC7BEEF0EE4,
88 0xC6E00BF33DA88FC2, 0xD5A79147930AA725, 0xC6E00BF33DA88FC2, 0xD5A79147930AA725,
89 0x06CA6351E003826F, 0x142929670A0E6E70, 0x06CA6351E003826F, 0x142929670A0E6E70,
90 0x27B70A8546D22FFC, 0x2E1B21385C26C926, 0x27B70A8546D22FFC, 0x2E1B21385C26C926,
91 0x4D2C6DFC5AC42AED, 0x53380D139D95B3DF, 0x4D2C6DFC5AC42AED, 0x53380D139D95B3DF,
92 0x650A73548BAF63DE, 0x766A0ABB3C77B2A8, 0x650A73548BAF63DE, 0x766A0ABB3C77B2A8,
93 0x81C2C92E47EDAEE6, 0x92722C851482353B, 0x81C2C92E47EDAEE6, 0x92722C851482353B,
94 0xA2BFE8A14CF10364, 0xA81A664BBC423001, 0xA2BFE8A14CF10364, 0xA81A664BBC423001,
95 0xC24B8B70D0F89791, 0xC76C51A30654BE30, 0xC24B8B70D0F89791, 0xC76C51A30654BE30,
96 0xD192E819D6EF5218, 0xD69906245565A910, 0xD192E819D6EF5218, 0xD69906245565A910,
97 0xF40E35855771202A, 0x106AA07032BBD1B8, 0xF40E35855771202A, 0x106AA07032BBD1B8,
98 0x19A4C116B8D2D0C8, 0x1E376C085141AB53, 0x19A4C116B8D2D0C8, 0x1E376C085141AB53,
99 0x2748774CDF8EEB99, 0x34B0BCB5E19B48A8, 0x2748774CDF8EEB99, 0x34B0BCB5E19B48A8,
100 0x391C0CB3C5C95A63, 0x4ED8AA4AE3418ACB, 0x391C0CB3C5C95A63, 0x4ED8AA4AE3418ACB,
101 0x5B9CCA4F7763E373, 0x682E6FF3D6B2B8A3, 0x5B9CCA4F7763E373, 0x682E6FF3D6B2B8A3,
102 0x748F82EE5DEFB2FC, 0x78A5636F43172F60, 0x748F82EE5DEFB2FC, 0x78A5636F43172F60,
103 0x84C87814A1F0AB72, 0x8CC702081A6439EC, 0x84C87814A1F0AB72, 0x8CC702081A6439EC,
104 0x90BEFFFA23631E28, 0xA4506CEBDE82BDE9, 0x90BEFFFA23631E28, 0xA4506CEBDE82BDE9,
105 0xBEF9A3F7B2C67915, 0xC67178F2E372532B, 0xBEF9A3F7B2C67915, 0xC67178F2E372532B,
106 0xCA273ECEEA26619C, 0xD186B8C721C0C207, 0xCA273ECEEA26619C, 0xD186B8C721C0C207,
107 0xEADA7DD6CDE0EB1E, 0xF57D4F7FEE6ED178, 0xEADA7DD6CDE0EB1E, 0xF57D4F7FEE6ED178,
108 0x06F067AA72176FBA, 0x0A637DC5A2C898A6, 0x06F067AA72176FBA, 0x0A637DC5A2C898A6,
109 0x113F9804BEF90DAE, 0x1B710B35131C471B, 0x113F9804BEF90DAE, 0x1B710B35131C471B,
110 0x28DB77F523047D84, 0x32CAAB7B40C72493, 0x28DB77F523047D84, 0x32CAAB7B40C72493,
111 0x3C9EBE0A15C9BEBC, 0x431D67C49C100D4C, 0x3C9EBE0A15C9BEBC, 0x431D67C49C100D4C,
112 0x4CC5D4BECB3E42B6, 0x597F299CFC657E2A, 0x4CC5D4BECB3E42B6, 0x597F299CFC657E2A,
113 0x5FCB6FAB3AD6FAEC, 0x6C44198C4A475817, 0x5FCB6FAB3AD6FAEC, 0x6C44198C4A475817,
114 };
115 // clang-format on
116
117 alignas(64) uint64_t W[16] = {0};
118 alignas(64) uint64_t W2[80];
119
120 uint64_t A = digest[0];
121 uint64_t B = digest[1];
122 uint64_t C = digest[2];
123 uint64_t D = digest[3];
124 uint64_t E = digest[4];
125 uint64_t F = digest[5];
126 uint64_t G = digest[6];
127 uint64_t H = digest[7];
128
129 const uint8_t* data = input.data();
130
131 while(blocks >= 2) {
132 SIMD_4x64 WS[8];
133
134 for(size_t i = 0; i < 8; i++) {
135 WS[i] = SIMD_4x64::load_be2(&data[16 * i], &data[128 + 16 * i]);
136 auto WK = WS[i] + SIMD_4x64::load_le(&K2[4 * i]);
137 WK.store_le2(&W[2 * i], &W2[2 * i]);
138 }
139
140 data += 2 * 128;
141 blocks -= 2;
142
143 // First 64 rounds of SHA-512
144 for(size_t r = 0; r != 64; r += 16) {
145 auto w = sha512_next_w(WS) + SIMD_4x64::load_le(&K2[2 * (r + 16)]);
146 SHA2_64_F(A, B, C, D, E, F, G, H, W[0]);
147 SHA2_64_F(H, A, B, C, D, E, F, G, W[1]);
148 w.store_le2(&W[0], &W2[r + 16]);
149
150 w = sha512_next_w(WS) + SIMD_4x64::load_le(&K2[2 * (r + 18)]);
151 SHA2_64_F(G, H, A, B, C, D, E, F, W[2]);
152 SHA2_64_F(F, G, H, A, B, C, D, E, W[3]);
153 w.store_le2(&W[2], &W2[r + 18]);
154
155 w = sha512_next_w(WS) + SIMD_4x64::load_le(&K2[2 * (r + 20)]);
156 SHA2_64_F(E, F, G, H, A, B, C, D, W[4]);
157 SHA2_64_F(D, E, F, G, H, A, B, C, W[5]);
158 w.store_le2(&W[4], &W2[r + 20]);
159
160 w = sha512_next_w(WS) + SIMD_4x64::load_le(&K2[2 * (r + 22)]);
161 SHA2_64_F(C, D, E, F, G, H, A, B, W[6]);
162 SHA2_64_F(B, C, D, E, F, G, H, A, W[7]);
163 w.store_le2(&W[6], &W2[r + 22]);
164
165 w = sha512_next_w(WS) + SIMD_4x64::load_le(&K2[2 * (r + 24)]);
166 SHA2_64_F(A, B, C, D, E, F, G, H, W[8]);
167 SHA2_64_F(H, A, B, C, D, E, F, G, W[9]);
168 w.store_le2(&W[8], &W2[r + 24]);
169
170 w = sha512_next_w(WS) + SIMD_4x64::load_le(&K2[2 * (r + 26)]);
171 SHA2_64_F(G, H, A, B, C, D, E, F, W[10]);
172 SHA2_64_F(F, G, H, A, B, C, D, E, W[11]);
173 w.store_le2(&W[10], &W2[r + 26]);
174
175 w = sha512_next_w(WS) + SIMD_4x64::load_le(&K2[2 * (r + 28)]);
176 SHA2_64_F(E, F, G, H, A, B, C, D, W[12]);
177 SHA2_64_F(D, E, F, G, H, A, B, C, W[13]);
178 w.store_le2(&W[12], &W2[r + 28]);
179
180 w = sha512_next_w(WS) + SIMD_4x64::load_le(&K2[2 * (r + 30)]);
181 SHA2_64_F(C, D, E, F, G, H, A, B, W[14]);
182 SHA2_64_F(B, C, D, E, F, G, H, A, W[15]);
183 w.store_le2(&W[14], &W2[r + 30]);
184 }
185
186 // Final 16 rounds of SHA-512
187 SHA2_64_F(A, B, C, D, E, F, G, H, W[0]);
188 SHA2_64_F(H, A, B, C, D, E, F, G, W[1]);
189 SHA2_64_F(G, H, A, B, C, D, E, F, W[2]);
190 SHA2_64_F(F, G, H, A, B, C, D, E, W[3]);
191 SHA2_64_F(E, F, G, H, A, B, C, D, W[4]);
192 SHA2_64_F(D, E, F, G, H, A, B, C, W[5]);
193 SHA2_64_F(C, D, E, F, G, H, A, B, W[6]);
194 SHA2_64_F(B, C, D, E, F, G, H, A, W[7]);
195 SHA2_64_F(A, B, C, D, E, F, G, H, W[8]);
196 SHA2_64_F(H, A, B, C, D, E, F, G, W[9]);
197 SHA2_64_F(G, H, A, B, C, D, E, F, W[10]);
198 SHA2_64_F(F, G, H, A, B, C, D, E, W[11]);
199 SHA2_64_F(E, F, G, H, A, B, C, D, W[12]);
200 SHA2_64_F(D, E, F, G, H, A, B, C, W[13]);
201 SHA2_64_F(C, D, E, F, G, H, A, B, W[14]);
202 SHA2_64_F(B, C, D, E, F, G, H, A, W[15]);
203
204 A = (digest[0] += A);
205 B = (digest[1] += B);
206 C = (digest[2] += C);
207 D = (digest[3] += D);
208 E = (digest[4] += E);
209 F = (digest[5] += F);
210 G = (digest[6] += G);
211 H = (digest[7] += H);
212
213 // Second block of SHA-512 compression, with pre-expanded message
214 SHA2_64_F(A, B, C, D, E, F, G, H, W2[0]);
215 SHA2_64_F(H, A, B, C, D, E, F, G, W2[1]);
216 SHA2_64_F(G, H, A, B, C, D, E, F, W2[2]);
217 SHA2_64_F(F, G, H, A, B, C, D, E, W2[3]);
218 SHA2_64_F(E, F, G, H, A, B, C, D, W2[4]);
219 SHA2_64_F(D, E, F, G, H, A, B, C, W2[5]);
220 SHA2_64_F(C, D, E, F, G, H, A, B, W2[6]);
221 SHA2_64_F(B, C, D, E, F, G, H, A, W2[7]);
222 SHA2_64_F(A, B, C, D, E, F, G, H, W2[8]);
223 SHA2_64_F(H, A, B, C, D, E, F, G, W2[9]);
224 SHA2_64_F(G, H, A, B, C, D, E, F, W2[10]);
225 SHA2_64_F(F, G, H, A, B, C, D, E, W2[11]);
226 SHA2_64_F(E, F, G, H, A, B, C, D, W2[12]);
227 SHA2_64_F(D, E, F, G, H, A, B, C, W2[13]);
228 SHA2_64_F(C, D, E, F, G, H, A, B, W2[14]);
229 SHA2_64_F(B, C, D, E, F, G, H, A, W2[15]);
230
231 SHA2_64_F(A, B, C, D, E, F, G, H, W2[16]);
232 SHA2_64_F(H, A, B, C, D, E, F, G, W2[17]);
233 SHA2_64_F(G, H, A, B, C, D, E, F, W2[18]);
234 SHA2_64_F(F, G, H, A, B, C, D, E, W2[19]);
235 SHA2_64_F(E, F, G, H, A, B, C, D, W2[20]);
236 SHA2_64_F(D, E, F, G, H, A, B, C, W2[21]);
237 SHA2_64_F(C, D, E, F, G, H, A, B, W2[22]);
238 SHA2_64_F(B, C, D, E, F, G, H, A, W2[23]);
239 SHA2_64_F(A, B, C, D, E, F, G, H, W2[24]);
240 SHA2_64_F(H, A, B, C, D, E, F, G, W2[25]);
241 SHA2_64_F(G, H, A, B, C, D, E, F, W2[26]);
242 SHA2_64_F(F, G, H, A, B, C, D, E, W2[27]);
243 SHA2_64_F(E, F, G, H, A, B, C, D, W2[28]);
244 SHA2_64_F(D, E, F, G, H, A, B, C, W2[29]);
245 SHA2_64_F(C, D, E, F, G, H, A, B, W2[30]);
246 SHA2_64_F(B, C, D, E, F, G, H, A, W2[31]);
247
248 SHA2_64_F(A, B, C, D, E, F, G, H, W2[32]);
249 SHA2_64_F(H, A, B, C, D, E, F, G, W2[33]);
250 SHA2_64_F(G, H, A, B, C, D, E, F, W2[34]);
251 SHA2_64_F(F, G, H, A, B, C, D, E, W2[35]);
252 SHA2_64_F(E, F, G, H, A, B, C, D, W2[36]);
253 SHA2_64_F(D, E, F, G, H, A, B, C, W2[37]);
254 SHA2_64_F(C, D, E, F, G, H, A, B, W2[38]);
255 SHA2_64_F(B, C, D, E, F, G, H, A, W2[39]);
256 SHA2_64_F(A, B, C, D, E, F, G, H, W2[40]);
257 SHA2_64_F(H, A, B, C, D, E, F, G, W2[41]);
258 SHA2_64_F(G, H, A, B, C, D, E, F, W2[42]);
259 SHA2_64_F(F, G, H, A, B, C, D, E, W2[43]);
260 SHA2_64_F(E, F, G, H, A, B, C, D, W2[44]);
261 SHA2_64_F(D, E, F, G, H, A, B, C, W2[45]);
262 SHA2_64_F(C, D, E, F, G, H, A, B, W2[46]);
263 SHA2_64_F(B, C, D, E, F, G, H, A, W2[47]);
264
265 SHA2_64_F(A, B, C, D, E, F, G, H, W2[48]);
266 SHA2_64_F(H, A, B, C, D, E, F, G, W2[49]);
267 SHA2_64_F(G, H, A, B, C, D, E, F, W2[50]);
268 SHA2_64_F(F, G, H, A, B, C, D, E, W2[51]);
269 SHA2_64_F(E, F, G, H, A, B, C, D, W2[52]);
270 SHA2_64_F(D, E, F, G, H, A, B, C, W2[53]);
271 SHA2_64_F(C, D, E, F, G, H, A, B, W2[54]);
272 SHA2_64_F(B, C, D, E, F, G, H, A, W2[55]);
273 SHA2_64_F(A, B, C, D, E, F, G, H, W2[56]);
274 SHA2_64_F(H, A, B, C, D, E, F, G, W2[57]);
275 SHA2_64_F(G, H, A, B, C, D, E, F, W2[58]);
276 SHA2_64_F(F, G, H, A, B, C, D, E, W2[59]);
277 SHA2_64_F(E, F, G, H, A, B, C, D, W2[60]);
278 SHA2_64_F(D, E, F, G, H, A, B, C, W2[61]);
279 SHA2_64_F(C, D, E, F, G, H, A, B, W2[62]);
280 SHA2_64_F(B, C, D, E, F, G, H, A, W2[63]);
281
282 SHA2_64_F(A, B, C, D, E, F, G, H, W2[64]);
283 SHA2_64_F(H, A, B, C, D, E, F, G, W2[65]);
284 SHA2_64_F(G, H, A, B, C, D, E, F, W2[66]);
285 SHA2_64_F(F, G, H, A, B, C, D, E, W2[67]);
286 SHA2_64_F(E, F, G, H, A, B, C, D, W2[68]);
287 SHA2_64_F(D, E, F, G, H, A, B, C, W2[69]);
288 SHA2_64_F(C, D, E, F, G, H, A, B, W2[70]);
289 SHA2_64_F(B, C, D, E, F, G, H, A, W2[71]);
290 SHA2_64_F(A, B, C, D, E, F, G, H, W2[72]);
291 SHA2_64_F(H, A, B, C, D, E, F, G, W2[73]);
292 SHA2_64_F(G, H, A, B, C, D, E, F, W2[74]);
293 SHA2_64_F(F, G, H, A, B, C, D, E, W2[75]);
294 SHA2_64_F(E, F, G, H, A, B, C, D, W2[76]);
295 SHA2_64_F(D, E, F, G, H, A, B, C, W2[77]);
296 SHA2_64_F(C, D, E, F, G, H, A, B, W2[78]);
297 SHA2_64_F(B, C, D, E, F, G, H, A, W2[79]);
298
299 A = (digest[0] += A);
300 B = (digest[1] += B);
301 C = (digest[2] += C);
302 D = (digest[3] += D);
303 E = (digest[4] += E);
304 F = (digest[5] += F);
305 G = (digest[6] += G);
306 H = (digest[7] += H);
307 }
308
309 while(blocks > 0) {
310 SIMD_2x64 WS[8];
311
312 for(size_t i = 0; i < 8; i++) {
313 WS[i] = SIMD_2x64::load_be(&data[16 * i]);
314 auto WK = WS[i] + SIMD_2x64::load_le(&K[2 * i]);
315 WK.store_le(&W[2 * i]);
316 }
317
318 data += 128;
319 blocks -= 1;
320
321 // First 64 rounds of SHA-512
322 for(size_t r = 0; r != 64; r += 16) {
323 auto w = sha512_next_w(WS) + SIMD_2x64::load_le(&K[r + 16]);
324 SHA2_64_F(A, B, C, D, E, F, G, H, W[0]);
325 SHA2_64_F(H, A, B, C, D, E, F, G, W[1]);
326 w.store_le(&W[0]);
327
328 w = sha512_next_w(WS) + SIMD_2x64::load_le(&K[r + 18]);
329 SHA2_64_F(G, H, A, B, C, D, E, F, W[2]);
330 SHA2_64_F(F, G, H, A, B, C, D, E, W[3]);
331 w.store_le(&W[2]);
332
333 w = sha512_next_w(WS) + SIMD_2x64::load_le(&K[r + 20]);
334 SHA2_64_F(E, F, G, H, A, B, C, D, W[4]);
335 SHA2_64_F(D, E, F, G, H, A, B, C, W[5]);
336 w.store_le(&W[4]);
337
338 w = sha512_next_w(WS) + SIMD_2x64::load_le(&K[r + 22]);
339 SHA2_64_F(C, D, E, F, G, H, A, B, W[6]);
340 SHA2_64_F(B, C, D, E, F, G, H, A, W[7]);
341 w.store_le(&W[6]);
342
343 w = sha512_next_w(WS) + SIMD_2x64::load_le(&K[r + 24]);
344 SHA2_64_F(A, B, C, D, E, F, G, H, W[8]);
345 SHA2_64_F(H, A, B, C, D, E, F, G, W[9]);
346 w.store_le(&W[8]);
347
348 w = sha512_next_w(WS) + SIMD_2x64::load_le(&K[r + 26]);
349 SHA2_64_F(G, H, A, B, C, D, E, F, W[10]);
350 SHA2_64_F(F, G, H, A, B, C, D, E, W[11]);
351 w.store_le(&W[10]);
352
353 w = sha512_next_w(WS) + SIMD_2x64::load_le(&K[r + 28]);
354 SHA2_64_F(E, F, G, H, A, B, C, D, W[12]);
355 SHA2_64_F(D, E, F, G, H, A, B, C, W[13]);
356 w.store_le(&W[12]);
357
358 w = sha512_next_w(WS) + SIMD_2x64::load_le(&K[r + 30]);
359 SHA2_64_F(C, D, E, F, G, H, A, B, W[14]);
360 SHA2_64_F(B, C, D, E, F, G, H, A, W[15]);
361 w.store_le(&W[14]);
362 }
363
364 // Final 16 rounds of SHA-512
365 SHA2_64_F(A, B, C, D, E, F, G, H, W[0]);
366 SHA2_64_F(H, A, B, C, D, E, F, G, W[1]);
367 SHA2_64_F(G, H, A, B, C, D, E, F, W[2]);
368 SHA2_64_F(F, G, H, A, B, C, D, E, W[3]);
369 SHA2_64_F(E, F, G, H, A, B, C, D, W[4]);
370 SHA2_64_F(D, E, F, G, H, A, B, C, W[5]);
371 SHA2_64_F(C, D, E, F, G, H, A, B, W[6]);
372 SHA2_64_F(B, C, D, E, F, G, H, A, W[7]);
373 SHA2_64_F(A, B, C, D, E, F, G, H, W[8]);
374 SHA2_64_F(H, A, B, C, D, E, F, G, W[9]);
375 SHA2_64_F(G, H, A, B, C, D, E, F, W[10]);
376 SHA2_64_F(F, G, H, A, B, C, D, E, W[11]);
377 SHA2_64_F(E, F, G, H, A, B, C, D, W[12]);
378 SHA2_64_F(D, E, F, G, H, A, B, C, W[13]);
379 SHA2_64_F(C, D, E, F, G, H, A, B, W[14]);
380 SHA2_64_F(B, C, D, E, F, G, H, A, W[15]);
381
382 A = (digest[0] += A);
383 B = (digest[1] += B);
384 C = (digest[2] += C);
385 D = (digest[3] += D);
386 E = (digest[4] += E);
387 F = (digest[5] += F);
388 G = (digest[6] += G);
389 H = (digest[7] += H);
390 }
391}
392
393} // namespace Botan
static SIMD_2x64 load_le(const void *in)
Definition simd_2x64.h:38
static SIMD_2x64 load_be(const void *in)
Definition simd_2x64.h:42
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_be2(const void *inl, const void *inh)
Definition simd_4x64.h:42
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_le(const void *in)
Definition simd_4x64.h:46
#define BOTAN_FORCE_INLINE
Definition compiler.h:87
BOTAN_FORCE_INLINE void SHA2_64_F(uint64_t A, uint64_t B, uint64_t C, uint64_t &D, uint64_t E, uint64_t F, uint64_t G, uint64_t &H, uint64_t &M1, uint64_t M2, uint64_t M3, uint64_t M4, uint64_t magic)
Definition sha2_64_f.h:19
BOTAN_FORCE_INLINE constexpr T rotr(T input)
Definition rotate.h:35