Botan 3.9.0
Crypto and TLS for C&
chacha_simd32.cpp
Go to the documentation of this file.
1/*
2* (C) 2018 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/chacha.h>
8
9#include <botan/assert.h>
10#include <botan/internal/simd_4x32.h>
11
12namespace Botan {
13
14//static
15void ChaCha::chacha_simd32_x4(uint8_t output[64 * 4], uint32_t state[16], size_t rounds) {
16 BOTAN_ASSERT(rounds % 2 == 0, "Valid rounds");
17 const SIMD_4x32 CTR0 = SIMD_4x32(0, 1, 2, 3);
18
19 const uint32_t C = 0xFFFFFFFF - state[12];
20
21 // NOLINTNEXTLINE(*-implicit-bool-conversion)
22 const SIMD_4x32 CTR1 = SIMD_4x32(0, C < 1, C < 2, C < 3);
23
24 SIMD_4x32 R00 = SIMD_4x32::splat(state[0]);
25 SIMD_4x32 R01 = SIMD_4x32::splat(state[1]);
26 SIMD_4x32 R02 = SIMD_4x32::splat(state[2]);
27 SIMD_4x32 R03 = SIMD_4x32::splat(state[3]);
28 SIMD_4x32 R04 = SIMD_4x32::splat(state[4]);
29 SIMD_4x32 R05 = SIMD_4x32::splat(state[5]);
30 SIMD_4x32 R06 = SIMD_4x32::splat(state[6]);
31 SIMD_4x32 R07 = SIMD_4x32::splat(state[7]);
32 SIMD_4x32 R08 = SIMD_4x32::splat(state[8]);
33 SIMD_4x32 R09 = SIMD_4x32::splat(state[9]);
34 SIMD_4x32 R10 = SIMD_4x32::splat(state[10]);
35 SIMD_4x32 R11 = SIMD_4x32::splat(state[11]);
36 SIMD_4x32 R12 = SIMD_4x32::splat(state[12]) + CTR0;
37 SIMD_4x32 R13 = SIMD_4x32::splat(state[13]) + CTR1;
38 SIMD_4x32 R14 = SIMD_4x32::splat(state[14]);
39 SIMD_4x32 R15 = SIMD_4x32::splat(state[15]);
40
41 for(size_t r = 0; r != rounds / 2; ++r) {
42 R00 += R04;
43 R01 += R05;
44 R02 += R06;
45 R03 += R07;
46
47 R12 ^= R00;
48 R13 ^= R01;
49 R14 ^= R02;
50 R15 ^= R03;
51
52 R12 = R12.rotl<16>();
53 R13 = R13.rotl<16>();
54 R14 = R14.rotl<16>();
55 R15 = R15.rotl<16>();
56
57 R08 += R12;
58 R09 += R13;
59 R10 += R14;
60 R11 += R15;
61
62 R04 ^= R08;
63 R05 ^= R09;
64 R06 ^= R10;
65 R07 ^= R11;
66
67 R04 = R04.rotl<12>();
68 R05 = R05.rotl<12>();
69 R06 = R06.rotl<12>();
70 R07 = R07.rotl<12>();
71
72 R00 += R04;
73 R01 += R05;
74 R02 += R06;
75 R03 += R07;
76
77 R12 ^= R00;
78 R13 ^= R01;
79 R14 ^= R02;
80 R15 ^= R03;
81
82 R12 = R12.rotl<8>();
83 R13 = R13.rotl<8>();
84 R14 = R14.rotl<8>();
85 R15 = R15.rotl<8>();
86
87 R08 += R12;
88 R09 += R13;
89 R10 += R14;
90 R11 += R15;
91
92 R04 ^= R08;
93 R05 ^= R09;
94 R06 ^= R10;
95 R07 ^= R11;
96
97 R04 = R04.rotl<7>();
98 R05 = R05.rotl<7>();
99 R06 = R06.rotl<7>();
100 R07 = R07.rotl<7>();
101
102 R00 += R05;
103 R01 += R06;
104 R02 += R07;
105 R03 += R04;
106
107 R15 ^= R00;
108 R12 ^= R01;
109 R13 ^= R02;
110 R14 ^= R03;
111
112 R15 = R15.rotl<16>();
113 R12 = R12.rotl<16>();
114 R13 = R13.rotl<16>();
115 R14 = R14.rotl<16>();
116
117 R10 += R15;
118 R11 += R12;
119 R08 += R13;
120 R09 += R14;
121
122 R05 ^= R10;
123 R06 ^= R11;
124 R07 ^= R08;
125 R04 ^= R09;
126
127 R05 = R05.rotl<12>();
128 R06 = R06.rotl<12>();
129 R07 = R07.rotl<12>();
130 R04 = R04.rotl<12>();
131
132 R00 += R05;
133 R01 += R06;
134 R02 += R07;
135 R03 += R04;
136
137 R15 ^= R00;
138 R12 ^= R01;
139 R13 ^= R02;
140 R14 ^= R03;
141
142 R15 = R15.rotl<8>();
143 R12 = R12.rotl<8>();
144 R13 = R13.rotl<8>();
145 R14 = R14.rotl<8>();
146
147 R10 += R15;
148 R11 += R12;
149 R08 += R13;
150 R09 += R14;
151
152 R05 ^= R10;
153 R06 ^= R11;
154 R07 ^= R08;
155 R04 ^= R09;
156
157 R05 = R05.rotl<7>();
158 R06 = R06.rotl<7>();
159 R07 = R07.rotl<7>();
160 R04 = R04.rotl<7>();
161 }
162
163 R00 += SIMD_4x32::splat(state[0]);
164 R01 += SIMD_4x32::splat(state[1]);
165 R02 += SIMD_4x32::splat(state[2]);
166 R03 += SIMD_4x32::splat(state[3]);
167 R04 += SIMD_4x32::splat(state[4]);
168 R05 += SIMD_4x32::splat(state[5]);
169 R06 += SIMD_4x32::splat(state[6]);
170 R07 += SIMD_4x32::splat(state[7]);
171 R08 += SIMD_4x32::splat(state[8]);
172 R09 += SIMD_4x32::splat(state[9]);
173 R10 += SIMD_4x32::splat(state[10]);
174 R11 += SIMD_4x32::splat(state[11]);
175 R12 += SIMD_4x32::splat(state[12]) + CTR0;
176 R13 += SIMD_4x32::splat(state[13]) + CTR1;
177 R14 += SIMD_4x32::splat(state[14]);
178 R15 += SIMD_4x32::splat(state[15]);
179
180 SIMD_4x32::transpose(R00, R01, R02, R03);
181 SIMD_4x32::transpose(R04, R05, R06, R07);
182 SIMD_4x32::transpose(R08, R09, R10, R11);
183 SIMD_4x32::transpose(R12, R13, R14, R15);
184
185 R00.store_le(output + 0 * 16);
186 R04.store_le(output + 1 * 16);
187 R08.store_le(output + 2 * 16);
188 R12.store_le(output + 3 * 16);
189 R01.store_le(output + 4 * 16);
190 R05.store_le(output + 5 * 16);
191 R09.store_le(output + 6 * 16);
192 R13.store_le(output + 7 * 16);
193 R02.store_le(output + 8 * 16);
194 R06.store_le(output + 9 * 16);
195 R10.store_le(output + 10 * 16);
196 R14.store_le(output + 11 * 16);
197 R03.store_le(output + 12 * 16);
198 R07.store_le(output + 13 * 16);
199 R11.store_le(output + 14 * 16);
200 R15.store_le(output + 15 * 16);
201
202 state[12] += 4;
203 if(state[12] < 4) {
204 state[13]++;
205 }
206}
207
208} // namespace Botan
#define BOTAN_ASSERT(expr, assertion_made)
Definition assert.h:62
static void transpose(SIMD_4x32 &B0, SIMD_4x32 &B1, SIMD_4x32 &B2, SIMD_4x32 &B3) noexcept
Definition simd_4x32.h:600
static SIMD_4x32 splat(uint32_t B) noexcept
Definition simd_4x32.h:118