Botan 3.11.0
Crypto and TLS for C&
sha2_32_simd.cpp
Go to the documentation of this file.
1/*
2* (C) 2025 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/sha2_32.h>
8
9#include <botan/internal/sha2_32_f.h>
10#include <botan/internal/simd_4x32.h>
11#include <botan/internal/stack_scrubbing.h>
12
13namespace Botan {
14
15namespace {
16
17BOTAN_FN_ISA_SIMD_4X32 BOTAN_FORCE_INLINE SIMD_4x32 sha256_simd_next_w(SIMD_4x32 x[4]) {
18 const SIMD_4x32 lo_mask = SIMD_4x32(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000);
19 const SIMD_4x32 hi_mask = SIMD_4x32(0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF);
20
21 const SIMD_4x32 lo_word_shuf = SIMD_4x32(0x03020100, 0x07060504, 0x03020100, 0x07060504);
22 const SIMD_4x32 hi_word_shuf = SIMD_4x32(0x0B0A0908, 0x0F0E0D0C, 0x0B0A0908, 0x0F0E0D0C);
23
24 auto t0 = SIMD_4x32::alignr4(x[1], x[0]);
25 x[0] += SIMD_4x32::alignr4(x[3], x[2]);
26
27 x[0] += t0.rotr<7>() ^ t0.rotr<18>() ^ t0.shr<3>();
28
29 t0 = SIMD_4x32::byte_shuffle(x[3], hi_word_shuf);
30 auto s1 = t0.rotr<17>() ^ t0.rotr<19>() ^ t0.shr<10>();
31 x[0] += s1 & lo_mask;
32
33 t0 = SIMD_4x32::byte_shuffle(x[0], lo_word_shuf);
34 s1 = t0.rotr<17>() ^ t0.rotr<19>() ^ t0.shr<10>();
35 x[0] += s1 & hi_mask;
36
37 const auto tmp = x[0];
38 x[0] = x[1];
39 x[1] = x[2];
40 x[2] = x[3];
41 x[3] = tmp;
42
43 return x[3];
44}
45
46} // namespace
47
48void BOTAN_FN_ISA_SIMD_4X32 BOTAN_SCRUB_STACK_AFTER_RETURN
49SHA_256::compress_digest_x86_simd(digest_type& digest, std::span<const uint8_t> input, size_t blocks) {
50 // clang-format off
51
52 alignas(64) const uint32_t K[64] = {
53 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
54 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
55 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
56 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
57 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
58 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
59 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
60 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2};
61
62 // clang-format on
63
64 alignas(64) uint32_t W[16];
65
66 uint32_t A = digest[0];
67 uint32_t B = digest[1];
68 uint32_t C = digest[2];
69 uint32_t D = digest[3];
70 uint32_t E = digest[4];
71 uint32_t F = digest[5];
72 uint32_t G = digest[6];
73 uint32_t H = digest[7];
74
75 const uint8_t* data = input.data();
76
77 while(blocks > 0) {
78 SIMD_4x32 WS[4];
79
80 for(size_t i = 0; i < 4; i++) {
81 WS[i] = SIMD_4x32::load_be(&data[16 * i]);
82 auto WK = WS[i] + SIMD_4x32::load_le(&K[4 * i]);
83 WK.store_le(&W[4 * i]);
84 }
85
86 data += 64;
87 blocks -= 1;
88
89 for(size_t r = 0; r != 48; r += 16) {
90 auto w = sha256_simd_next_w(WS) + SIMD_4x32::load_le(&K[r + 16]);
91
92 SHA2_32_F(A, B, C, D, E, F, G, H, W[0]);
93 SHA2_32_F(H, A, B, C, D, E, F, G, W[1]);
94 SHA2_32_F(G, H, A, B, C, D, E, F, W[2]);
95 SHA2_32_F(F, G, H, A, B, C, D, E, W[3]);
96
97 w.store_le(&W[0]);
98
99 w = sha256_simd_next_w(WS) + SIMD_4x32::load_le(&K[r + 20]);
100
101 SHA2_32_F(E, F, G, H, A, B, C, D, W[4]);
102 SHA2_32_F(D, E, F, G, H, A, B, C, W[5]);
103 SHA2_32_F(C, D, E, F, G, H, A, B, W[6]);
104 SHA2_32_F(B, C, D, E, F, G, H, A, W[7]);
105
106 w.store_le(&W[4]);
107
108 w = sha256_simd_next_w(WS) + SIMD_4x32::load_le(&K[r + 24]);
109
110 SHA2_32_F(A, B, C, D, E, F, G, H, W[8]);
111 SHA2_32_F(H, A, B, C, D, E, F, G, W[9]);
112 SHA2_32_F(G, H, A, B, C, D, E, F, W[10]);
113 SHA2_32_F(F, G, H, A, B, C, D, E, W[11]);
114
115 w.store_le(&W[8]);
116
117 w = sha256_simd_next_w(WS) + SIMD_4x32::load_le(&K[r + 28]);
118
119 SHA2_32_F(E, F, G, H, A, B, C, D, W[12]);
120 SHA2_32_F(D, E, F, G, H, A, B, C, W[13]);
121 SHA2_32_F(C, D, E, F, G, H, A, B, W[14]);
122 SHA2_32_F(B, C, D, E, F, G, H, A, W[15]);
123
124 w.store_le(&W[12]);
125 }
126
127 SHA2_32_F(A, B, C, D, E, F, G, H, W[0]);
128 SHA2_32_F(H, A, B, C, D, E, F, G, W[1]);
129 SHA2_32_F(G, H, A, B, C, D, E, F, W[2]);
130 SHA2_32_F(F, G, H, A, B, C, D, E, W[3]);
131 SHA2_32_F(E, F, G, H, A, B, C, D, W[4]);
132 SHA2_32_F(D, E, F, G, H, A, B, C, W[5]);
133 SHA2_32_F(C, D, E, F, G, H, A, B, W[6]);
134 SHA2_32_F(B, C, D, E, F, G, H, A, W[7]);
135 SHA2_32_F(A, B, C, D, E, F, G, H, W[8]);
136 SHA2_32_F(H, A, B, C, D, E, F, G, W[9]);
137 SHA2_32_F(G, H, A, B, C, D, E, F, W[10]);
138 SHA2_32_F(F, G, H, A, B, C, D, E, W[11]);
139 SHA2_32_F(E, F, G, H, A, B, C, D, W[12]);
140 SHA2_32_F(D, E, F, G, H, A, B, C, W[13]);
141 SHA2_32_F(C, D, E, F, G, H, A, B, W[14]);
142 SHA2_32_F(B, C, D, E, F, G, H, A, W[15]);
143
144 A = (digest[0] += A);
145 B = (digest[1] += B);
146 C = (digest[2] += C);
147 D = (digest[3] += D);
148 E = (digest[4] += E);
149 F = (digest[5] += F);
150 G = (digest[6] += G);
151 H = (digest[7] += H);
152 }
153}
154
155} // namespace Botan
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(const void *in) noexcept
Definition simd_4x32.h:189
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_le(const void *in) noexcept
Definition simd_4x32.h:162
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 byte_shuffle(const SIMD_4x32 &tbl, const SIMD_4x32 &idx)
Definition simd_4x32.h:777
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 alignr4(const SIMD_4x32 &a, const SIMD_4x32 &b)
Definition simd_4x32.h:843
#define BOTAN_FORCE_INLINE
Definition compiler.h:87
BOTAN_FORCE_INLINE void SHA2_32_F(uint32_t A, uint32_t B, uint32_t C, uint32_t &D, uint32_t E, uint32_t F, uint32_t G, uint32_t &H, uint32_t &M1, uint32_t M2, uint32_t M3, uint32_t M4, uint32_t magic)
Definition sha2_32_f.h:19
#define BOTAN_SCRUB_STACK_AFTER_RETURN