Botan 3.9.0
Crypto and TLS for C&
sha2_32_simd.cpp
Go to the documentation of this file.
1/*
2* (C) 2025 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/sha2_32.h>
8
9#include <botan/internal/bit_ops.h>
10#include <botan/internal/rotate.h>
11#include <botan/internal/sha2_32_f.h>
12#include <botan/internal/simd_4x32.h>
13#include <botan/internal/stack_scrubbing.h>
14
15namespace Botan {
16
17namespace {
18
19BOTAN_FN_ISA_SIMD_4X32 BOTAN_FORCE_INLINE SIMD_4x32 sha256_simd_next_w(SIMD_4x32 x[4]) {
20 const SIMD_4x32 lo_mask = SIMD_4x32(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000);
21 const SIMD_4x32 hi_mask = SIMD_4x32(0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF);
22
23 const SIMD_4x32 lo_word_shuf = SIMD_4x32(0x03020100, 0x07060504, 0x03020100, 0x07060504);
24 const SIMD_4x32 hi_word_shuf = SIMD_4x32(0x0B0A0908, 0x0F0E0D0C, 0x0B0A0908, 0x0F0E0D0C);
25
26 auto t0 = SIMD_4x32::alignr4(x[1], x[0]);
27 x[0] += SIMD_4x32::alignr4(x[3], x[2]);
28
29 x[0] += t0.rotr<7>() ^ t0.rotr<18>() ^ t0.shr<3>();
30
31 t0 = SIMD_4x32::byte_shuffle(x[3], hi_word_shuf);
32 auto s1 = t0.rotr<17>() ^ t0.rotr<19>() ^ t0.shr<10>();
33 x[0] += s1 & lo_mask;
34
35 t0 = SIMD_4x32::byte_shuffle(x[0], lo_word_shuf);
36 s1 = t0.rotr<17>() ^ t0.rotr<19>() ^ t0.shr<10>();
37 x[0] += s1 & hi_mask;
38
39 const auto tmp = x[0];
40 x[0] = x[1];
41 x[1] = x[2];
42 x[2] = x[3];
43 x[3] = tmp;
44
45 return x[3];
46}
47
48} // namespace
49
50void BOTAN_FN_ISA_SIMD_4X32 BOTAN_SCRUB_STACK_AFTER_RETURN
51SHA_256::compress_digest_x86_simd(digest_type& digest, std::span<const uint8_t> input, size_t blocks) {
52 // clang-format off
53
54 alignas(64) const uint32_t K[64] = {
55 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
56 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
57 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
58 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
59 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
60 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
61 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
62 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2};
63
64 // clang-format on
65
66 alignas(64) uint32_t W[16];
67
68 uint32_t A = digest[0];
69 uint32_t B = digest[1];
70 uint32_t C = digest[2];
71 uint32_t D = digest[3];
72 uint32_t E = digest[4];
73 uint32_t F = digest[5];
74 uint32_t G = digest[6];
75 uint32_t H = digest[7];
76
77 const uint8_t* data = input.data();
78
79 while(blocks > 0) {
80 SIMD_4x32 WS[4];
81
82 for(size_t i = 0; i < 4; i++) {
83 WS[i] = SIMD_4x32::load_be(&data[16 * i]);
84 auto WK = WS[i] + SIMD_4x32::load_le(&K[4 * i]);
85 WK.store_le(&W[4 * i]);
86 }
87
88 data += 64;
89 blocks -= 1;
90
91 for(size_t r = 0; r != 48; r += 16) {
92 auto w = sha256_simd_next_w(WS) + SIMD_4x32::load_le(&K[r + 16]);
93
94 SHA2_32_F(A, B, C, D, E, F, G, H, W[0]);
95 SHA2_32_F(H, A, B, C, D, E, F, G, W[1]);
96 SHA2_32_F(G, H, A, B, C, D, E, F, W[2]);
97 SHA2_32_F(F, G, H, A, B, C, D, E, W[3]);
98
99 w.store_le(&W[0]);
100
101 w = sha256_simd_next_w(WS) + SIMD_4x32::load_le(&K[r + 20]);
102
103 SHA2_32_F(E, F, G, H, A, B, C, D, W[4]);
104 SHA2_32_F(D, E, F, G, H, A, B, C, W[5]);
105 SHA2_32_F(C, D, E, F, G, H, A, B, W[6]);
106 SHA2_32_F(B, C, D, E, F, G, H, A, W[7]);
107
108 w.store_le(&W[4]);
109
110 w = sha256_simd_next_w(WS) + SIMD_4x32::load_le(&K[r + 24]);
111
112 SHA2_32_F(A, B, C, D, E, F, G, H, W[8]);
113 SHA2_32_F(H, A, B, C, D, E, F, G, W[9]);
114 SHA2_32_F(G, H, A, B, C, D, E, F, W[10]);
115 SHA2_32_F(F, G, H, A, B, C, D, E, W[11]);
116
117 w.store_le(&W[8]);
118
119 w = sha256_simd_next_w(WS) + SIMD_4x32::load_le(&K[r + 28]);
120
121 SHA2_32_F(E, F, G, H, A, B, C, D, W[12]);
122 SHA2_32_F(D, E, F, G, H, A, B, C, W[13]);
123 SHA2_32_F(C, D, E, F, G, H, A, B, W[14]);
124 SHA2_32_F(B, C, D, E, F, G, H, A, W[15]);
125
126 w.store_le(&W[12]);
127 }
128
129 SHA2_32_F(A, B, C, D, E, F, G, H, W[0]);
130 SHA2_32_F(H, A, B, C, D, E, F, G, W[1]);
131 SHA2_32_F(G, H, A, B, C, D, E, F, W[2]);
132 SHA2_32_F(F, G, H, A, B, C, D, E, W[3]);
133 SHA2_32_F(E, F, G, H, A, B, C, D, W[4]);
134 SHA2_32_F(D, E, F, G, H, A, B, C, W[5]);
135 SHA2_32_F(C, D, E, F, G, H, A, B, W[6]);
136 SHA2_32_F(B, C, D, E, F, G, H, A, W[7]);
137 SHA2_32_F(A, B, C, D, E, F, G, H, W[8]);
138 SHA2_32_F(H, A, B, C, D, E, F, G, W[9]);
139 SHA2_32_F(G, H, A, B, C, D, E, F, W[10]);
140 SHA2_32_F(F, G, H, A, B, C, D, E, W[11]);
141 SHA2_32_F(E, F, G, H, A, B, C, D, W[12]);
142 SHA2_32_F(D, E, F, G, H, A, B, C, W[13]);
143 SHA2_32_F(C, D, E, F, G, H, A, B, W[14]);
144 SHA2_32_F(B, C, D, E, F, G, H, A, W[15]);
145
146 A = (digest[0] += A);
147 B = (digest[1] += B);
148 C = (digest[2] += C);
149 D = (digest[3] += D);
150 E = (digest[4] += E);
151 F = (digest[5] += F);
152 G = (digest[6] += G);
153 H = (digest[7] += H);
154 }
155}
156
157} // namespace Botan
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(const void *in) noexcept
Definition simd_4x32.h:174
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 byte_shuffle(const SIMD_4x32 &tbl, const SIMD_4x32 &idx)
Definition simd_4x32.h:677
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 alignr4(const SIMD_4x32 &a, const SIMD_4x32 &b)
Definition simd_4x32.h:741
static SIMD_4x32 load_le(const void *in) noexcept
Definition simd_4x32.h:149
#define BOTAN_FORCE_INLINE
Definition compiler.h:87
BOTAN_FORCE_INLINE void SHA2_32_F(uint32_t A, uint32_t B, uint32_t C, uint32_t &D, uint32_t E, uint32_t F, uint32_t G, uint32_t &H, uint32_t &M1, uint32_t M2, uint32_t M3, uint32_t M4, uint32_t magic)
Definition sha2_32_f.h:19
#define BOTAN_SCRUB_STACK_AFTER_RETURN