Botan 3.9.0
Crypto and TLS for C&
sha2_32_x86.cpp
Go to the documentation of this file.
1/*
2* Based on public domain code by Sean Gulley
3*
4* Further changes
5*
6* (C) 2017,2020,2025 Jack Lloyd
7*
8* Botan is released under the Simplified BSD License (see license.txt)
9*/
10
11#include <botan/internal/sha2_32.h>
12
13#include <botan/internal/isa_extn.h>
14#include <botan/internal/simd_4x32.h>
15#include <botan/internal/stack_scrubbing.h>
16#include <immintrin.h>
17
18namespace Botan {
19
20namespace {
21
22BOTAN_FORCE_INLINE BOTAN_FN_ISA_SHANI void sha256_rnds4(SIMD_4x32& S0,
23 SIMD_4x32& S1,
24 const SIMD_4x32& msg,
25 const SIMD_4x32& k) {
26 const auto mk = msg + k;
27 S1 = SIMD_4x32(_mm_sha256rnds2_epu32(S1.raw(), S0.raw(), mk.raw()));
28 S0 = SIMD_4x32(_mm_sha256rnds2_epu32(S0.raw(), S1.raw(), mk.shift_elems_right<2>().raw()));
29}
30
31BOTAN_FORCE_INLINE BOTAN_FN_ISA_SHANI void sha256_msg_exp(SIMD_4x32& m0, SIMD_4x32& m1, SIMD_4x32& m2) {
32 m2 += SIMD_4x32(_mm_alignr_epi8(m1.raw(), m0.raw(), 4));
33 m0 = SIMD_4x32(_mm_sha256msg1_epu32(m0.raw(), m1.raw()));
34 m2 = SIMD_4x32(_mm_sha256msg2_epu32(m2.raw(), m1.raw()));
35}
36
37BOTAN_FORCE_INLINE BOTAN_FN_ISA_SHANI void sha256_permute_state(SIMD_4x32& S0, SIMD_4x32& S1) {
38 S0 = SIMD_4x32(_mm_shuffle_epi32(S0.raw(), 0b10110001)); // CDAB
39 S1 = SIMD_4x32(_mm_shuffle_epi32(S1.raw(), 0b00011011)); // EFGH
40
41 __m128i tmp = _mm_alignr_epi8(S0.raw(), S1.raw(), 8); // ABEF
42 S1 = SIMD_4x32(_mm_blend_epi16(S1.raw(), S0.raw(), 0xF0)); // CDGH
43 S0 = SIMD_4x32(tmp);
44}
45
46} // namespace
47
49 std::span<const uint8_t> input_span,
50 size_t blocks) {
51 alignas(64) static const uint32_t K[] = {
52 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
53 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
54 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
55 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
56 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
57 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
58 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
59 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2,
60 };
61
62 const uint8_t* input = input_span.data();
63
64 SIMD_4x32 S0 = SIMD_4x32::load_le(&digest[0]); // NOLINT(*container-data-pointer)
65 SIMD_4x32 S1 = SIMD_4x32::load_le(&digest[4]);
66
67 sha256_permute_state(S0, S1);
68
69 while(blocks > 0) {
70 const auto S0_SAVE = S0;
71 const auto S1_SAVE = S1;
72
73 auto W0 = SIMD_4x32::load_be(input);
74 auto W1 = SIMD_4x32::load_be(input + 16);
75 auto W2 = SIMD_4x32::load_be(input + 32);
76 auto W3 = SIMD_4x32::load_be(input + 48);
77
78 sha256_rnds4(S0, S1, W0, SIMD_4x32::load_le(&K[0]));
79 sha256_rnds4(S0, S1, W1, SIMD_4x32::load_le(&K[4]));
80 sha256_rnds4(S0, S1, W2, SIMD_4x32::load_le(&K[8]));
81 sha256_rnds4(S0, S1, W3, SIMD_4x32::load_le(&K[12]));
82
83 W0 = SIMD_4x32(_mm_sha256msg1_epu32(W0.raw(), W1.raw()));
84 W1 = SIMD_4x32(_mm_sha256msg1_epu32(W1.raw(), W2.raw()));
85
86 for(size_t r = 4; r != 16; r += 4) {
87 sha256_msg_exp(W2, W3, W0);
88 sha256_rnds4(S0, S1, W0, SIMD_4x32::load_le(&K[4 * (r + 0)]));
89
90 sha256_msg_exp(W3, W0, W1);
91 sha256_rnds4(S0, S1, W1, SIMD_4x32::load_le(&K[4 * (r + 1)]));
92
93 sha256_msg_exp(W0, W1, W2);
94 sha256_rnds4(S0, S1, W2, SIMD_4x32::load_le(&K[4 * (r + 2)]));
95
96 sha256_msg_exp(W1, W2, W3);
97 sha256_rnds4(S0, S1, W3, SIMD_4x32::load_le(&K[4 * (r + 3)]));
98 }
99
100 // Add values back to state
101 S0 += S0_SAVE;
102 S1 += S1_SAVE;
103
104 input += 64;
105 blocks--;
106 }
107
108 sha256_permute_state(S1, S0);
109
110 S0.store_le(&digest[0]); // NOLINT(*container-data-pointer)
111 S1.store_le(&digest[4]);
112}
113
114} // namespace Botan
secure_vector< uint32_t > digest_type
Definition sha2_32.h:61
static void compress_digest_x86(digest_type &digest, std::span< const uint8_t > input, size_t blocks)
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_be(const void *in) noexcept
Definition simd_4x32.h:174
void store_le(uint32_t out[4]) const noexcept
Definition simd_4x32.h:200
static SIMD_4x32 load_le(const void *in) noexcept
Definition simd_4x32.h:149
#define BOTAN_FORCE_INLINE
Definition compiler.h:87
#define BOTAN_SCRUB_STACK_AFTER_RETURN