Botan 3.11.0
Crypto and TLS for C&
sha2_32_x86.cpp
Go to the documentation of this file.
1/*
2* Based on public domain code by Sean Gulley
3*
4* Further changes
5*
6* (C) 2017,2020,2025,2026 Jack Lloyd
7*
8* Botan is released under the Simplified BSD License (see license.txt)
9*/
10
11#include <botan/internal/sha2_32.h>
12
13#include <botan/internal/isa_extn.h>
14#include <botan/internal/simd_4x32.h>
15#include <botan/internal/stack_scrubbing.h>
16#include <immintrin.h>
17
18namespace Botan {
19
20namespace {
21
22// NOLINTBEGIN(portability-simd-intrinsics)
23
24BOTAN_FORCE_INLINE BOTAN_FN_ISA_SHANI void sha256_rnds4(SIMD_4x32& S0,
25 SIMD_4x32& S1,
26 const SIMD_4x32& msg,
27 const SIMD_4x32& k) {
28 const auto mk = msg + k;
29 S1 = SIMD_4x32(_mm_sha256rnds2_epu32(S1.raw(), S0.raw(), mk.raw()));
30 S0 = SIMD_4x32(_mm_sha256rnds2_epu32(S0.raw(), S1.raw(), mk.shift_elems_right<2>().raw()));
31}
32
33BOTAN_FORCE_INLINE BOTAN_FN_ISA_SHANI void sha256_msg_exp(SIMD_4x32& W0, SIMD_4x32& W1, SIMD_4x32& W2, SIMD_4x32& W3) {
34 W2 += SIMD_4x32::alignr4(W1, W0);
35 W0 = SIMD_4x32(_mm_sha256msg1_epu32(W0.raw(), W1.raw()));
36 W2 = SIMD_4x32(_mm_sha256msg2_epu32(W2.raw(), W1.raw()));
37
38 W3 += SIMD_4x32::alignr4(W2, W1);
39 W1 = SIMD_4x32(_mm_sha256msg1_epu32(W1.raw(), W2.raw()));
40 W3 = SIMD_4x32(_mm_sha256msg2_epu32(W3.raw(), W2.raw()));
41}
42
43BOTAN_FORCE_INLINE BOTAN_FN_ISA_SHANI void sha256_permute_state(SIMD_4x32& S0, SIMD_4x32& S1) {
44 S0 = SIMD_4x32(_mm_shuffle_epi32(S0.raw(), 0b10110001)); // CDAB
45 S1 = SIMD_4x32(_mm_shuffle_epi32(S1.raw(), 0b00011011)); // EFGH
46
47 const auto T = SIMD_4x32::alignr8(S0, S1); // ABEF
48 S1 = SIMD_4x32(_mm_blend_epi16(S1.raw(), S0.raw(), 0xF0)); // CDGH
49 S0 = T;
50}
51
52// NOLINTEND(portability-simd-intrinsics)
53
54} // namespace
55
57 std::span<const uint8_t> input_span,
58 size_t blocks) {
59 alignas(64) static const uint32_t K[] = {
60 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
61 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
62 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
63 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
64 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
65 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
66 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
67 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2,
68 };
69
70 const uint8_t* input = input_span.data();
71
72 SIMD_4x32 S0 = SIMD_4x32::load_le(&digest[0]); // NOLINT(*container-data-pointer)
73 SIMD_4x32 S1 = SIMD_4x32::load_le(&digest[4]);
74
75 sha256_permute_state(S0, S1);
76
77 while(blocks > 0) {
78 const auto S0_SAVE = S0;
79 const auto S1_SAVE = S1;
80
81 auto W0 = SIMD_4x32::load_be(input);
82 auto W1 = SIMD_4x32::load_be(input + 16);
83 auto W2 = SIMD_4x32::load_be(input + 32);
84 auto W3 = SIMD_4x32::load_be(input + 48);
85
86 sha256_rnds4(S0, S1, W0, SIMD_4x32::load_le(&K[0]));
87 sha256_rnds4(S0, S1, W1, SIMD_4x32::load_le(&K[4]));
88 sha256_rnds4(S0, S1, W2, SIMD_4x32::load_le(&K[8]));
89 sha256_rnds4(S0, S1, W3, SIMD_4x32::load_le(&K[12]));
90
91 W0 = SIMD_4x32(_mm_sha256msg1_epu32(W0.raw(), W1.raw()));
92 W1 = SIMD_4x32(_mm_sha256msg1_epu32(W1.raw(), W2.raw()));
93
94 sha256_msg_exp(W2, W3, W0, W1);
95
96 sha256_rnds4(S0, S1, W0, SIMD_4x32::load_le(&K[4 * 4]));
97 sha256_rnds4(S0, S1, W1, SIMD_4x32::load_le(&K[4 * 5]));
98
99 sha256_msg_exp(W0, W1, W2, W3);
100
101 sha256_rnds4(S0, S1, W2, SIMD_4x32::load_le(&K[4 * 6]));
102 sha256_rnds4(S0, S1, W3, SIMD_4x32::load_le(&K[4 * 7]));
103
104 sha256_msg_exp(W2, W3, W0, W1);
105
106 sha256_rnds4(S0, S1, W0, SIMD_4x32::load_le(&K[4 * 8]));
107 sha256_rnds4(S0, S1, W1, SIMD_4x32::load_le(&K[4 * 9]));
108
109 sha256_msg_exp(W0, W1, W2, W3);
110
111 sha256_rnds4(S0, S1, W2, SIMD_4x32::load_le(&K[4 * 10]));
112 sha256_rnds4(S0, S1, W3, SIMD_4x32::load_le(&K[4 * 11]));
113
114 sha256_msg_exp(W2, W3, W0, W1);
115
116 sha256_rnds4(S0, S1, W0, SIMD_4x32::load_le(&K[4 * 12]));
117 sha256_rnds4(S0, S1, W1, SIMD_4x32::load_le(&K[4 * 13]));
118
119 sha256_msg_exp(W0, W1, W2, W3);
120
121 sha256_rnds4(S0, S1, W2, SIMD_4x32::load_le(&K[4 * 14]));
122 sha256_rnds4(S0, S1, W3, SIMD_4x32::load_le(&K[4 * 15]));
123
124 // Add values back to state
125 S0 += S0_SAVE;
126 S1 += S1_SAVE;
127
128 input += 64;
129 blocks--;
130 }
131
132 sha256_permute_state(S1, S0);
133
134 S0.store_le(&digest[0]); // NOLINT(*container-data-pointer)
135 S1.store_le(&digest[4]);
136}
137
138} // namespace Botan
secure_vector< uint32_t > digest_type
Definition sha2_32.h:61
static void compress_digest_x86(digest_type &digest, std::span< const uint8_t > input, size_t blocks)
void BOTAN_FN_ISA_SIMD_4X32 store_le(uint32_t out[4]) const noexcept
Definition simd_4x32.h:219
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(const void *in) noexcept
Definition simd_4x32.h:189
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_le(const void *in) noexcept
Definition simd_4x32.h:162
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 alignr4(const SIMD_4x32 &a, const SIMD_4x32 &b)
Definition simd_4x32.h:843
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 alignr8(const SIMD_4x32 &a, const SIMD_4x32 &b)
Definition simd_4x32.h:860
#define BOTAN_FORCE_INLINE
Definition compiler.h:87
#define BOTAN_SCRUB_STACK_AFTER_RETURN