Botan  2.8.0
Crypto and TLS for C++11
sha2_32_armv8.cpp
Go to the documentation of this file.
1 /*
2 * SHA-256 using CPU instructions in ARMv8
3 *
4 * Contributed by Jeffrey Walton. Based on public domain code by
5 * Johannes Schneiders, Skip Hovsmith and Barry O'Rourke.
6 *
7 * Botan is released under the Simplified BSD License (see license.txt)
8 */
9 
10 #include <botan/sha2_32.h>
11 #include <arm_neon.h>
12 
13 namespace Botan {
14 
15 /*
16 * SHA-256 using CPU instructions in ARMv8
17 */
18 //static
19 #if defined(BOTAN_HAS_SHA2_32_ARMV8)
20 BOTAN_FUNC_ISA("+crypto")
21 void SHA_256::compress_digest_armv8(secure_vector<uint32_t>& digest, const uint8_t input8[], size_t blocks)
22  {
23  static const uint32_t K[] = {
24  0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5,
25  0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
26  0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3,
27  0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
28  0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC,
29  0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
30  0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7,
31  0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
32  0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13,
33  0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
34  0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3,
35  0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
36  0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5,
37  0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
38  0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208,
39  0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2,
40  };
41 
42  uint32x4_t STATE0, STATE1, ABEF_SAVE, CDGH_SAVE;
43  uint32x4_t MSG0, MSG1, MSG2, MSG3;
44  uint32x4_t TMP0, TMP1, TMP2;
45 
46  // Load initial values
47  STATE0 = vld1q_u32(&digest[0]);
48  STATE1 = vld1q_u32(&digest[4]);
49 
50  // Intermediate void* cast due to https://llvm.org/bugs/show_bug.cgi?id=20670
51  const uint32_t* input32 = reinterpret_cast<const uint32_t*>(reinterpret_cast<const void*>(input8));
52 
53  while (blocks)
54  {
55  // Save current state
56  ABEF_SAVE = STATE0;
57  CDGH_SAVE = STATE1;
58 
59  MSG0 = vld1q_u32(input32 + 0);
60  MSG1 = vld1q_u32(input32 + 4);
61  MSG2 = vld1q_u32(input32 + 8);
62  MSG3 = vld1q_u32(input32 + 12);
63 
64  MSG0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG0)));
65  MSG1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG1)));
66  MSG2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG2)));
67  MSG3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG3)));
68 
69  TMP0 = vaddq_u32(MSG0, vld1q_u32(&K[0x00]));
70 
71  // Rounds 0-3
72  MSG0 = vsha256su0q_u32(MSG0, MSG1);
73  TMP2 = STATE0;
74  TMP1 = vaddq_u32(MSG1, vld1q_u32(&K[0x04]));
75  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
76  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
77  MSG0 = vsha256su1q_u32(MSG0, MSG2, MSG3);
78 
79  // Rounds 4-7
80  MSG1 = vsha256su0q_u32(MSG1, MSG2);
81  TMP2 = STATE0;
82  TMP0 = vaddq_u32(MSG2, vld1q_u32(&K[0x08]));
83  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
84  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
85  MSG1 = vsha256su1q_u32(MSG1, MSG3, MSG0);
86 
87  // Rounds 8-11
88  MSG2 = vsha256su0q_u32(MSG2, MSG3);
89  TMP2 = STATE0;
90  TMP1 = vaddq_u32(MSG3, vld1q_u32(&K[0x0c]));
91  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
92  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
93  MSG2 = vsha256su1q_u32(MSG2, MSG0, MSG1);
94 
95  // Rounds 12-15
96  MSG3 = vsha256su0q_u32(MSG3, MSG0);
97  TMP2 = STATE0;
98  TMP0 = vaddq_u32(MSG0, vld1q_u32(&K[0x10]));
99  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
100  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
101  MSG3 = vsha256su1q_u32(MSG3, MSG1, MSG2);
102 
103  // Rounds 16-19
104  MSG0 = vsha256su0q_u32(MSG0, MSG1);
105  TMP2 = STATE0;
106  TMP1 = vaddq_u32(MSG1, vld1q_u32(&K[0x14]));
107  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
108  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
109  MSG0 = vsha256su1q_u32(MSG0, MSG2, MSG3);
110 
111  // Rounds 20-23
112  MSG1 = vsha256su0q_u32(MSG1, MSG2);
113  TMP2 = STATE0;
114  TMP0 = vaddq_u32(MSG2, vld1q_u32(&K[0x18]));
115  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
116  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
117  MSG1 = vsha256su1q_u32(MSG1, MSG3, MSG0);
118 
119  // Rounds 24-27
120  MSG2 = vsha256su0q_u32(MSG2, MSG3);
121  TMP2 = STATE0;
122  TMP1 = vaddq_u32(MSG3, vld1q_u32(&K[0x1c]));
123  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
124  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
125  MSG2 = vsha256su1q_u32(MSG2, MSG0, MSG1);
126 
127  // Rounds 28-31
128  MSG3 = vsha256su0q_u32(MSG3, MSG0);
129  TMP2 = STATE0;
130  TMP0 = vaddq_u32(MSG0, vld1q_u32(&K[0x20]));
131  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
132  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
133  MSG3 = vsha256su1q_u32(MSG3, MSG1, MSG2);
134 
135  // Rounds 32-35
136  MSG0 = vsha256su0q_u32(MSG0, MSG1);
137  TMP2 = STATE0;
138  TMP1 = vaddq_u32(MSG1, vld1q_u32(&K[0x24]));
139  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
140  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
141  MSG0 = vsha256su1q_u32(MSG0, MSG2, MSG3);
142 
143  // Rounds 36-39
144  MSG1 = vsha256su0q_u32(MSG1, MSG2);
145  TMP2 = STATE0;
146  TMP0 = vaddq_u32(MSG2, vld1q_u32(&K[0x28]));
147  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
148  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
149  MSG1 = vsha256su1q_u32(MSG1, MSG3, MSG0);
150 
151  // Rounds 40-43
152  MSG2 = vsha256su0q_u32(MSG2, MSG3);
153  TMP2 = STATE0;
154  TMP1 = vaddq_u32(MSG3, vld1q_u32(&K[0x2c]));
155  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
156  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
157  MSG2 = vsha256su1q_u32(MSG2, MSG0, MSG1);
158 
159  // Rounds 44-47
160  MSG3 = vsha256su0q_u32(MSG3, MSG0);
161  TMP2 = STATE0;
162  TMP0 = vaddq_u32(MSG0, vld1q_u32(&K[0x30]));
163  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
164  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
165  MSG3 = vsha256su1q_u32(MSG3, MSG1, MSG2);
166 
167  // Rounds 48-51
168  TMP2 = STATE0;
169  TMP1 = vaddq_u32(MSG1, vld1q_u32(&K[0x34]));
170  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
171  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
172 
173  // Rounds 52-55
174  TMP2 = STATE0;
175  TMP0 = vaddq_u32(MSG2, vld1q_u32(&K[0x38]));
176  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
177  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
178 
179  // Rounds 56-59
180  TMP2 = STATE0;
181  TMP1 = vaddq_u32(MSG3, vld1q_u32(&K[0x3c]));
182  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
183  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
184 
185  // Rounds 60-63
186  TMP2 = STATE0;
187  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
188  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
189 
190  // Add back to state
191  STATE0 = vaddq_u32(STATE0, ABEF_SAVE);
192  STATE1 = vaddq_u32(STATE1, CDGH_SAVE);
193 
194  input32 += 64/4;
195  blocks--;
196  }
197 
198  // Save state
199  vst1q_u32(&digest[0], STATE0);
200  vst1q_u32(&digest[4], STATE1);
201  }
202 #endif
203 
204 }
#define BOTAN_FUNC_ISA(isa)
Definition: compiler.h:75
Definition: alg_id.cpp:13
std::vector< T, secure_allocator< T > > secure_vector
Definition: secmem.h:88