7#include <botan/internal/sm3.h>
9#include <botan/internal/isa_extn.h>
17alignas(64)
const uint32_t SM3_TJ[64] = {
18 0x79CC4519, 0xF3988A32, 0xE7311465, 0xCE6228CB,
19 0x9CC45197, 0x3988A32F, 0x7311465E, 0xE6228CBC,
20 0xCC451979, 0x988A32F3, 0x311465E7, 0x6228CBCE,
21 0xC451979C, 0x88A32F39, 0x11465E73, 0x228CBCE6,
22 0x9D8A7A87, 0x3B14F50F, 0x7629EA1E, 0xEC53D43C,
23 0xD8A7A879, 0xB14F50F3, 0x629EA1E7, 0xC53D43CE,
24 0x8A7A879D, 0x14F50F3B, 0x29EA1E76, 0x53D43CEC,
25 0xA7A879D8, 0x4F50F3B1, 0x9EA1E762, 0x3D43CEC5,
26 0x7A879D8A, 0xF50F3B14, 0xEA1E7629, 0xD43CEC53,
27 0xA879D8A7, 0x50F3B14F, 0xA1E7629E, 0x43CEC53D,
28 0x879D8A7A, 0x0F3B14F5, 0x1E7629EA, 0x3CEC53D4,
29 0x79D8A7A8, 0xF3B14F50, 0xE7629EA1, 0xCEC53D43,
30 0x9D8A7A87, 0x3B14F50F, 0x7629EA1E, 0xEC53D43C,
31 0xD8A7A879, 0xB14F50F3, 0x629EA1E7, 0xC53D43CE,
32 0x8A7A879D, 0x14F50F3B, 0x29EA1E76, 0x53D43CEC,
33 0xA7A879D8, 0x4F50F3B1, 0x9EA1E762, 0x3D43CEC5,
41 return vextq_u32(v, v, 2);
46 return vsetq_lane_u32(SM3_TJ[round], vdupq_n_u32(0), 3);
50 uint32x4_t& S0, uint32x4_t& S1, uint32x4_t w, uint32x4_t w_prime,
size_t round) {
51 auto t = vsm3ss1q_u32(S0, S1, sm3_tj(round));
52 S0 = vsm3tt1aq_u32(S0, t, w_prime, 0);
53 S1 = vsm3tt2aq_u32(S1, t, w, 0);
55 t = vsm3ss1q_u32(S0, S1, sm3_tj(round + 1));
56 S0 = vsm3tt1aq_u32(S0, t, w_prime, 1);
57 S1 = vsm3tt2aq_u32(S1, t, w, 1);
59 t = vsm3ss1q_u32(S0, S1, sm3_tj(round + 2));
60 S0 = vsm3tt1aq_u32(S0, t, w_prime, 2);
61 S1 = vsm3tt2aq_u32(S1, t, w, 2);
63 t = vsm3ss1q_u32(S0, S1, sm3_tj(round + 3));
64 S0 = vsm3tt1aq_u32(S0, t, w_prime, 3);
65 S1 = vsm3tt2aq_u32(S1, t, w, 3);
69 uint32x4_t& S0, uint32x4_t& S1, uint32x4_t w, uint32x4_t w_prime,
size_t round) {
70 auto t = vsm3ss1q_u32(S0, S1, sm3_tj(round));
71 S0 = vsm3tt1bq_u32(S0, t, w_prime, 0);
72 S1 = vsm3tt2bq_u32(S1, t, w, 0);
74 t = vsm3ss1q_u32(S0, S1, sm3_tj(round + 1));
75 S0 = vsm3tt1bq_u32(S0, t, w_prime, 1);
76 S1 = vsm3tt2bq_u32(S1, t, w, 1);
78 t = vsm3ss1q_u32(S0, S1, sm3_tj(round + 2));
79 S0 = vsm3tt1bq_u32(S0, t, w_prime, 2);
80 S1 = vsm3tt2bq_u32(S1, t, w, 2);
82 t = vsm3ss1q_u32(S0, S1, sm3_tj(round + 3));
83 S0 = vsm3tt1bq_u32(S0, t, w_prime, 3);
84 S1 = vsm3tt2bq_u32(S1, t, w, 3);
90 const uint32x4_t& w3) {
91 const uint32x4_t w7_10 = vextq_u32(w1, w2, 3);
92 const uint32x4_t w3_6 = vextq_u32(w0, w1, 3);
93 const uint32x4_t w10_13 = vextq_u32(w2, w3, 2);
95 uint32x4_t t = vsm3partw1q_u32(w0, w7_10, w3);
96 w0 = vsm3partw2q_u32(t, w10_13, w3_6);
101void BOTAN_FN_ISA_SM3 SM3::compress_digest_armv8(digest_type& digest, std::span<const uint8_t> input,
size_t blocks) {
102 uint32x4_t S0 = sm3_reverse_words(vld1q_u32(&digest[0]));
103 uint32x4_t S1 = sm3_reverse_words(vld1q_u32(&digest[4]));
105 const uint8_t* data = input.data();
108 const uint32x4_t S0_save = S0;
109 const uint32x4_t S1_save = S1;
111 uint32x4_t W0 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(data)));
112 uint32x4_t W1 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(data + 16)));
113 uint32x4_t W2 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(data + 32)));
114 uint32x4_t W3 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(data + 48)));
119 sm3_x4_r1(S0, S1, W0, veorq_u32(W0, W1), 0);
120 sm3_msg_expand(W0, W1, W2, W3);
122 sm3_x4_r1(S0, S1, W1, veorq_u32(W1, W2), 4);
123 sm3_msg_expand(W1, W2, W3, W0);
125 sm3_x4_r1(S0, S1, W2, veorq_u32(W2, W3), 8);
126 sm3_msg_expand(W2, W3, W0, W1);
128 sm3_x4_r1(S0, S1, W3, veorq_u32(W3, W0), 12);
129 sm3_msg_expand(W3, W0, W1, W2);
131 sm3_x4_r2(S0, S1, W0, veorq_u32(W0, W1), 16);
132 sm3_msg_expand(W0, W1, W2, W3);
134 sm3_x4_r2(S0, S1, W1, veorq_u32(W1, W2), 20);
135 sm3_msg_expand(W1, W2, W3, W0);
137 sm3_x4_r2(S0, S1, W2, veorq_u32(W2, W3), 24);
138 sm3_msg_expand(W2, W3, W0, W1);
140 sm3_x4_r2(S0, S1, W3, veorq_u32(W3, W0), 28);
141 sm3_msg_expand(W3, W0, W1, W2);
143 sm3_x4_r2(S0, S1, W0, veorq_u32(W0, W1), 32);
144 sm3_msg_expand(W0, W1, W2, W3);
146 sm3_x4_r2(S0, S1, W1, veorq_u32(W1, W2), 36);
147 sm3_msg_expand(W1, W2, W3, W0);
149 sm3_x4_r2(S0, S1, W2, veorq_u32(W2, W3), 40);
150 sm3_msg_expand(W2, W3, W0, W1);
152 sm3_x4_r2(S0, S1, W3, veorq_u32(W3, W0), 44);
153 sm3_msg_expand(W3, W0, W1, W2);
155 sm3_x4_r2(S0, S1, W0, veorq_u32(W0, W1), 48);
156 sm3_msg_expand(W0, W1, W2, W3);
158 sm3_x4_r2(S0, S1, W1, veorq_u32(W1, W2), 52);
159 sm3_x4_r2(S0, S1, W2, veorq_u32(W2, W3), 56);
160 sm3_x4_r2(S0, S1, W3, veorq_u32(W3, W0), 60);
162 S0 = veorq_u32(S0, S0_save);
163 S1 = veorq_u32(S1, S1_save);
166 vst1q_u32(&digest[0], sm3_reverse_words(S0));
167 vst1q_u32(&digest[4], sm3_reverse_words(S1));
static constexpr size_t block_bytes
#define BOTAN_FORCE_INLINE