Botan 3.3.0
Crypto and TLS for C&
chacha_simd32.cpp
Go to the documentation of this file.
1/*
2* (C) 2018 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/chacha.h>
8
9#include <botan/internal/simd_32.h>
10
11namespace Botan {
12
13//static
14void ChaCha::chacha_simd32_x4(uint8_t output[64 * 4], uint32_t state[16], size_t rounds) {
15 BOTAN_ASSERT(rounds % 2 == 0, "Valid rounds");
16 const SIMD_4x32 CTR0 = SIMD_4x32(0, 1, 2, 3);
17
18 const uint32_t C = 0xFFFFFFFF - state[12];
19 const SIMD_4x32 CTR1 = SIMD_4x32(0, C < 1, C < 2, C < 3);
20
21 SIMD_4x32 R00 = SIMD_4x32::splat(state[0]);
22 SIMD_4x32 R01 = SIMD_4x32::splat(state[1]);
23 SIMD_4x32 R02 = SIMD_4x32::splat(state[2]);
24 SIMD_4x32 R03 = SIMD_4x32::splat(state[3]);
25 SIMD_4x32 R04 = SIMD_4x32::splat(state[4]);
26 SIMD_4x32 R05 = SIMD_4x32::splat(state[5]);
27 SIMD_4x32 R06 = SIMD_4x32::splat(state[6]);
28 SIMD_4x32 R07 = SIMD_4x32::splat(state[7]);
29 SIMD_4x32 R08 = SIMD_4x32::splat(state[8]);
30 SIMD_4x32 R09 = SIMD_4x32::splat(state[9]);
31 SIMD_4x32 R10 = SIMD_4x32::splat(state[10]);
32 SIMD_4x32 R11 = SIMD_4x32::splat(state[11]);
33 SIMD_4x32 R12 = SIMD_4x32::splat(state[12]) + CTR0;
34 SIMD_4x32 R13 = SIMD_4x32::splat(state[13]) + CTR1;
35 SIMD_4x32 R14 = SIMD_4x32::splat(state[14]);
36 SIMD_4x32 R15 = SIMD_4x32::splat(state[15]);
37
38 for(size_t r = 0; r != rounds / 2; ++r) {
39 R00 += R04;
40 R01 += R05;
41 R02 += R06;
42 R03 += R07;
43
44 R12 ^= R00;
45 R13 ^= R01;
46 R14 ^= R02;
47 R15 ^= R03;
48
49 R12 = R12.rotl<16>();
50 R13 = R13.rotl<16>();
51 R14 = R14.rotl<16>();
52 R15 = R15.rotl<16>();
53
54 R08 += R12;
55 R09 += R13;
56 R10 += R14;
57 R11 += R15;
58
59 R04 ^= R08;
60 R05 ^= R09;
61 R06 ^= R10;
62 R07 ^= R11;
63
64 R04 = R04.rotl<12>();
65 R05 = R05.rotl<12>();
66 R06 = R06.rotl<12>();
67 R07 = R07.rotl<12>();
68
69 R00 += R04;
70 R01 += R05;
71 R02 += R06;
72 R03 += R07;
73
74 R12 ^= R00;
75 R13 ^= R01;
76 R14 ^= R02;
77 R15 ^= R03;
78
79 R12 = R12.rotl<8>();
80 R13 = R13.rotl<8>();
81 R14 = R14.rotl<8>();
82 R15 = R15.rotl<8>();
83
84 R08 += R12;
85 R09 += R13;
86 R10 += R14;
87 R11 += R15;
88
89 R04 ^= R08;
90 R05 ^= R09;
91 R06 ^= R10;
92 R07 ^= R11;
93
94 R04 = R04.rotl<7>();
95 R05 = R05.rotl<7>();
96 R06 = R06.rotl<7>();
97 R07 = R07.rotl<7>();
98
99 R00 += R05;
100 R01 += R06;
101 R02 += R07;
102 R03 += R04;
103
104 R15 ^= R00;
105 R12 ^= R01;
106 R13 ^= R02;
107 R14 ^= R03;
108
109 R15 = R15.rotl<16>();
110 R12 = R12.rotl<16>();
111 R13 = R13.rotl<16>();
112 R14 = R14.rotl<16>();
113
114 R10 += R15;
115 R11 += R12;
116 R08 += R13;
117 R09 += R14;
118
119 R05 ^= R10;
120 R06 ^= R11;
121 R07 ^= R08;
122 R04 ^= R09;
123
124 R05 = R05.rotl<12>();
125 R06 = R06.rotl<12>();
126 R07 = R07.rotl<12>();
127 R04 = R04.rotl<12>();
128
129 R00 += R05;
130 R01 += R06;
131 R02 += R07;
132 R03 += R04;
133
134 R15 ^= R00;
135 R12 ^= R01;
136 R13 ^= R02;
137 R14 ^= R03;
138
139 R15 = R15.rotl<8>();
140 R12 = R12.rotl<8>();
141 R13 = R13.rotl<8>();
142 R14 = R14.rotl<8>();
143
144 R10 += R15;
145 R11 += R12;
146 R08 += R13;
147 R09 += R14;
148
149 R05 ^= R10;
150 R06 ^= R11;
151 R07 ^= R08;
152 R04 ^= R09;
153
154 R05 = R05.rotl<7>();
155 R06 = R06.rotl<7>();
156 R07 = R07.rotl<7>();
157 R04 = R04.rotl<7>();
158 }
159
160 R00 += SIMD_4x32::splat(state[0]);
161 R01 += SIMD_4x32::splat(state[1]);
162 R02 += SIMD_4x32::splat(state[2]);
163 R03 += SIMD_4x32::splat(state[3]);
164 R04 += SIMD_4x32::splat(state[4]);
165 R05 += SIMD_4x32::splat(state[5]);
166 R06 += SIMD_4x32::splat(state[6]);
167 R07 += SIMD_4x32::splat(state[7]);
168 R08 += SIMD_4x32::splat(state[8]);
169 R09 += SIMD_4x32::splat(state[9]);
170 R10 += SIMD_4x32::splat(state[10]);
171 R11 += SIMD_4x32::splat(state[11]);
172 R12 += SIMD_4x32::splat(state[12]) + CTR0;
173 R13 += SIMD_4x32::splat(state[13]) + CTR1;
174 R14 += SIMD_4x32::splat(state[14]);
175 R15 += SIMD_4x32::splat(state[15]);
176
177 SIMD_4x32::transpose(R00, R01, R02, R03);
178 SIMD_4x32::transpose(R04, R05, R06, R07);
179 SIMD_4x32::transpose(R08, R09, R10, R11);
180 SIMD_4x32::transpose(R12, R13, R14, R15);
181
182 R00.store_le(output + 0 * 16);
183 R04.store_le(output + 1 * 16);
184 R08.store_le(output + 2 * 16);
185 R12.store_le(output + 3 * 16);
186 R01.store_le(output + 4 * 16);
187 R05.store_le(output + 5 * 16);
188 R09.store_le(output + 6 * 16);
189 R13.store_le(output + 7 * 16);
190 R02.store_le(output + 8 * 16);
191 R06.store_le(output + 9 * 16);
192 R10.store_le(output + 10 * 16);
193 R14.store_le(output + 11 * 16);
194 R03.store_le(output + 12 * 16);
195 R07.store_le(output + 13 * 16);
196 R11.store_le(output + 14 * 16);
197 R15.store_le(output + 15 * 16);
198
199 state[12] += 4;
200 if(state[12] < 4) {
201 state[13]++;
202 }
203}
204
205} // namespace Botan
#define BOTAN_ASSERT(expr, assertion_made)
Definition assert.h:50
static void transpose(SIMD_4x32 &B0, SIMD_4x32 &B1, SIMD_4x32 &B2, SIMD_4x32 &B3) noexcept
Definition simd_32.h:535
static SIMD_4x32 splat(uint32_t B) noexcept
Definition simd_32.h:129