Botan 3.9.0
Crypto and TLS for C&
chacha_avx512.cpp
Go to the documentation of this file.
1/*
2* (C) 2023 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/chacha.h>
8
9#include <botan/assert.h>
10#include <botan/internal/simd_avx512.h>
11
12namespace Botan {
13
14//static
15void BOTAN_FN_ISA_AVX512 ChaCha::chacha_avx512_x16(uint8_t output[64 * 16], uint32_t state[16], size_t rounds) {
16 BOTAN_ASSERT(rounds % 2 == 0, "Valid rounds");
17 const SIMD_16x32 CTR0 = SIMD_16x32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18
19 const uint32_t C = 0xFFFFFFFF - state[12];
20
21 // clang-format off
22 const SIMD_16x32 CTR1 = SIMD_16x32(
23 // NOLINTNEXTLINE(*-implicit-bool-conversion)
24 0, C < 1, C < 2, C < 3, C < 4, C < 5, C < 6, C < 7, C < 8, C < 9, C < 10, C < 11, C < 12, C < 13, C < 14, C < 15);
25 // clang-format on
26
27 SIMD_16x32 R00 = SIMD_16x32::splat(state[0]);
28 SIMD_16x32 R01 = SIMD_16x32::splat(state[1]);
29 SIMD_16x32 R02 = SIMD_16x32::splat(state[2]);
30 SIMD_16x32 R03 = SIMD_16x32::splat(state[3]);
31 SIMD_16x32 R04 = SIMD_16x32::splat(state[4]);
32 SIMD_16x32 R05 = SIMD_16x32::splat(state[5]);
33 SIMD_16x32 R06 = SIMD_16x32::splat(state[6]);
34 SIMD_16x32 R07 = SIMD_16x32::splat(state[7]);
35 SIMD_16x32 R08 = SIMD_16x32::splat(state[8]);
36 SIMD_16x32 R09 = SIMD_16x32::splat(state[9]);
37 SIMD_16x32 R10 = SIMD_16x32::splat(state[10]);
38 SIMD_16x32 R11 = SIMD_16x32::splat(state[11]);
39 SIMD_16x32 R12 = SIMD_16x32::splat(state[12]) + CTR0;
40 SIMD_16x32 R13 = SIMD_16x32::splat(state[13]) + CTR1;
41 SIMD_16x32 R14 = SIMD_16x32::splat(state[14]);
42 SIMD_16x32 R15 = SIMD_16x32::splat(state[15]);
43
44 for(size_t r = 0; r != rounds / 2; ++r) {
45 R00 += R04;
46 R01 += R05;
47 R02 += R06;
48 R03 += R07;
49
50 R12 ^= R00;
51 R13 ^= R01;
52 R14 ^= R02;
53 R15 ^= R03;
54
55 R12 = R12.rotl<16>();
56 R13 = R13.rotl<16>();
57 R14 = R14.rotl<16>();
58 R15 = R15.rotl<16>();
59
60 R08 += R12;
61 R09 += R13;
62 R10 += R14;
63 R11 += R15;
64
65 R04 ^= R08;
66 R05 ^= R09;
67 R06 ^= R10;
68 R07 ^= R11;
69
70 R04 = R04.rotl<12>();
71 R05 = R05.rotl<12>();
72 R06 = R06.rotl<12>();
73 R07 = R07.rotl<12>();
74
75 R00 += R04;
76 R01 += R05;
77 R02 += R06;
78 R03 += R07;
79
80 R12 ^= R00;
81 R13 ^= R01;
82 R14 ^= R02;
83 R15 ^= R03;
84
85 R12 = R12.rotl<8>();
86 R13 = R13.rotl<8>();
87 R14 = R14.rotl<8>();
88 R15 = R15.rotl<8>();
89
90 R08 += R12;
91 R09 += R13;
92 R10 += R14;
93 R11 += R15;
94
95 R04 ^= R08;
96 R05 ^= R09;
97 R06 ^= R10;
98 R07 ^= R11;
99
100 R04 = R04.rotl<7>();
101 R05 = R05.rotl<7>();
102 R06 = R06.rotl<7>();
103 R07 = R07.rotl<7>();
104
105 R00 += R05;
106 R01 += R06;
107 R02 += R07;
108 R03 += R04;
109
110 R15 ^= R00;
111 R12 ^= R01;
112 R13 ^= R02;
113 R14 ^= R03;
114
115 R15 = R15.rotl<16>();
116 R12 = R12.rotl<16>();
117 R13 = R13.rotl<16>();
118 R14 = R14.rotl<16>();
119
120 R10 += R15;
121 R11 += R12;
122 R08 += R13;
123 R09 += R14;
124
125 R05 ^= R10;
126 R06 ^= R11;
127 R07 ^= R08;
128 R04 ^= R09;
129
130 R05 = R05.rotl<12>();
131 R06 = R06.rotl<12>();
132 R07 = R07.rotl<12>();
133 R04 = R04.rotl<12>();
134
135 R00 += R05;
136 R01 += R06;
137 R02 += R07;
138 R03 += R04;
139
140 R15 ^= R00;
141 R12 ^= R01;
142 R13 ^= R02;
143 R14 ^= R03;
144
145 R15 = R15.rotl<8>();
146 R12 = R12.rotl<8>();
147 R13 = R13.rotl<8>();
148 R14 = R14.rotl<8>();
149
150 R10 += R15;
151 R11 += R12;
152 R08 += R13;
153 R09 += R14;
154
155 R05 ^= R10;
156 R06 ^= R11;
157 R07 ^= R08;
158 R04 ^= R09;
159
160 R05 = R05.rotl<7>();
161 R06 = R06.rotl<7>();
162 R07 = R07.rotl<7>();
163 R04 = R04.rotl<7>();
164 }
165
166 R00 += SIMD_16x32::splat(state[0]);
167 R01 += SIMD_16x32::splat(state[1]);
168 R02 += SIMD_16x32::splat(state[2]);
169 R03 += SIMD_16x32::splat(state[3]);
170 R04 += SIMD_16x32::splat(state[4]);
171 R05 += SIMD_16x32::splat(state[5]);
172 R06 += SIMD_16x32::splat(state[6]);
173 R07 += SIMD_16x32::splat(state[7]);
174 R08 += SIMD_16x32::splat(state[8]);
175 R09 += SIMD_16x32::splat(state[9]);
176 R10 += SIMD_16x32::splat(state[10]);
177 R11 += SIMD_16x32::splat(state[11]);
178 R12 += SIMD_16x32::splat(state[12]) + CTR0;
179 R13 += SIMD_16x32::splat(state[13]) + CTR1;
180 R14 += SIMD_16x32::splat(state[14]);
181 R15 += SIMD_16x32::splat(state[15]);
182
183 SIMD_16x32::transpose(R00, R01, R02, R03, R04, R05, R06, R07, R08, R09, R10, R11, R12, R13, R14, R15);
184
185 R00.store_le(output);
186 R01.store_le(output + 64 * 1);
187 R02.store_le(output + 64 * 2);
188 R03.store_le(output + 64 * 3);
189 R04.store_le(output + 64 * 4);
190 R05.store_le(output + 64 * 5);
191 R06.store_le(output + 64 * 6);
192 R07.store_le(output + 64 * 7);
193 R08.store_le(output + 64 * 8);
194 R09.store_le(output + 64 * 9);
195 R10.store_le(output + 64 * 10);
196 R11.store_le(output + 64 * 11);
197 R12.store_le(output + 64 * 12);
198 R13.store_le(output + 64 * 13);
199 R14.store_le(output + 64 * 14);
200 R15.store_le(output + 64 * 15);
201
203
204 state[12] += 16;
205 if(state[12] < 16) {
206 state[13]++;
207 }
208}
209} // namespace Botan
#define BOTAN_ASSERT(expr, assertion_made)
Definition assert.h:62
static BOTAN_FN_ISA_AVX512 void transpose(SIMD_16x32 &B0, SIMD_16x32 &B1, SIMD_16x32 &B2, SIMD_16x32 &B3)
static BOTAN_FN_ISA_AVX512 void zero_registers()
static BOTAN_FN_ISA_AVX512 SIMD_16x32 splat(uint32_t B)
Definition simd_avx512.h:60