Botan 3.8.1
Crypto and TLS for C&
chacha_avx512.cpp
Go to the documentation of this file.
1/*
2* (C) 2023 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/chacha.h>
8
9#include <botan/assert.h>
10#include <botan/internal/simd_avx512.h>
11
12namespace Botan {
13
14//static
15void BOTAN_FN_ISA_AVX512 ChaCha::chacha_avx512_x16(uint8_t output[64 * 16], uint32_t state[16], size_t rounds) {
16 BOTAN_ASSERT(rounds % 2 == 0, "Valid rounds");
17 const SIMD_16x32 CTR0 = SIMD_16x32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18
19 const uint32_t C = 0xFFFFFFFF - state[12];
20 const SIMD_16x32 CTR1 = SIMD_16x32(
21 0, C < 1, C < 2, C < 3, C < 4, C < 5, C < 6, C < 7, C < 8, C < 9, C < 10, C < 11, C < 12, C < 13, C < 14, C < 15);
22
23 SIMD_16x32 R00 = SIMD_16x32::splat(state[0]);
24 SIMD_16x32 R01 = SIMD_16x32::splat(state[1]);
25 SIMD_16x32 R02 = SIMD_16x32::splat(state[2]);
26 SIMD_16x32 R03 = SIMD_16x32::splat(state[3]);
27 SIMD_16x32 R04 = SIMD_16x32::splat(state[4]);
28 SIMD_16x32 R05 = SIMD_16x32::splat(state[5]);
29 SIMD_16x32 R06 = SIMD_16x32::splat(state[6]);
30 SIMD_16x32 R07 = SIMD_16x32::splat(state[7]);
31 SIMD_16x32 R08 = SIMD_16x32::splat(state[8]);
32 SIMD_16x32 R09 = SIMD_16x32::splat(state[9]);
33 SIMD_16x32 R10 = SIMD_16x32::splat(state[10]);
34 SIMD_16x32 R11 = SIMD_16x32::splat(state[11]);
35 SIMD_16x32 R12 = SIMD_16x32::splat(state[12]) + CTR0;
36 SIMD_16x32 R13 = SIMD_16x32::splat(state[13]) + CTR1;
37 SIMD_16x32 R14 = SIMD_16x32::splat(state[14]);
38 SIMD_16x32 R15 = SIMD_16x32::splat(state[15]);
39
40 for(size_t r = 0; r != rounds / 2; ++r) {
41 R00 += R04;
42 R01 += R05;
43 R02 += R06;
44 R03 += R07;
45
46 R12 ^= R00;
47 R13 ^= R01;
48 R14 ^= R02;
49 R15 ^= R03;
50
51 R12 = R12.rotl<16>();
52 R13 = R13.rotl<16>();
53 R14 = R14.rotl<16>();
54 R15 = R15.rotl<16>();
55
56 R08 += R12;
57 R09 += R13;
58 R10 += R14;
59 R11 += R15;
60
61 R04 ^= R08;
62 R05 ^= R09;
63 R06 ^= R10;
64 R07 ^= R11;
65
66 R04 = R04.rotl<12>();
67 R05 = R05.rotl<12>();
68 R06 = R06.rotl<12>();
69 R07 = R07.rotl<12>();
70
71 R00 += R04;
72 R01 += R05;
73 R02 += R06;
74 R03 += R07;
75
76 R12 ^= R00;
77 R13 ^= R01;
78 R14 ^= R02;
79 R15 ^= R03;
80
81 R12 = R12.rotl<8>();
82 R13 = R13.rotl<8>();
83 R14 = R14.rotl<8>();
84 R15 = R15.rotl<8>();
85
86 R08 += R12;
87 R09 += R13;
88 R10 += R14;
89 R11 += R15;
90
91 R04 ^= R08;
92 R05 ^= R09;
93 R06 ^= R10;
94 R07 ^= R11;
95
96 R04 = R04.rotl<7>();
97 R05 = R05.rotl<7>();
98 R06 = R06.rotl<7>();
99 R07 = R07.rotl<7>();
100
101 R00 += R05;
102 R01 += R06;
103 R02 += R07;
104 R03 += R04;
105
106 R15 ^= R00;
107 R12 ^= R01;
108 R13 ^= R02;
109 R14 ^= R03;
110
111 R15 = R15.rotl<16>();
112 R12 = R12.rotl<16>();
113 R13 = R13.rotl<16>();
114 R14 = R14.rotl<16>();
115
116 R10 += R15;
117 R11 += R12;
118 R08 += R13;
119 R09 += R14;
120
121 R05 ^= R10;
122 R06 ^= R11;
123 R07 ^= R08;
124 R04 ^= R09;
125
126 R05 = R05.rotl<12>();
127 R06 = R06.rotl<12>();
128 R07 = R07.rotl<12>();
129 R04 = R04.rotl<12>();
130
131 R00 += R05;
132 R01 += R06;
133 R02 += R07;
134 R03 += R04;
135
136 R15 ^= R00;
137 R12 ^= R01;
138 R13 ^= R02;
139 R14 ^= R03;
140
141 R15 = R15.rotl<8>();
142 R12 = R12.rotl<8>();
143 R13 = R13.rotl<8>();
144 R14 = R14.rotl<8>();
145
146 R10 += R15;
147 R11 += R12;
148 R08 += R13;
149 R09 += R14;
150
151 R05 ^= R10;
152 R06 ^= R11;
153 R07 ^= R08;
154 R04 ^= R09;
155
156 R05 = R05.rotl<7>();
157 R06 = R06.rotl<7>();
158 R07 = R07.rotl<7>();
159 R04 = R04.rotl<7>();
160 }
161
162 R00 += SIMD_16x32::splat(state[0]);
163 R01 += SIMD_16x32::splat(state[1]);
164 R02 += SIMD_16x32::splat(state[2]);
165 R03 += SIMD_16x32::splat(state[3]);
166 R04 += SIMD_16x32::splat(state[4]);
167 R05 += SIMD_16x32::splat(state[5]);
168 R06 += SIMD_16x32::splat(state[6]);
169 R07 += SIMD_16x32::splat(state[7]);
170 R08 += SIMD_16x32::splat(state[8]);
171 R09 += SIMD_16x32::splat(state[9]);
172 R10 += SIMD_16x32::splat(state[10]);
173 R11 += SIMD_16x32::splat(state[11]);
174 R12 += SIMD_16x32::splat(state[12]) + CTR0;
175 R13 += SIMD_16x32::splat(state[13]) + CTR1;
176 R14 += SIMD_16x32::splat(state[14]);
177 R15 += SIMD_16x32::splat(state[15]);
178
179 SIMD_16x32::transpose(R00, R01, R02, R03, R04, R05, R06, R07, R08, R09, R10, R11, R12, R13, R14, R15);
180
181 R00.store_le(output);
182 R01.store_le(output + 64 * 1);
183 R02.store_le(output + 64 * 2);
184 R03.store_le(output + 64 * 3);
185 R04.store_le(output + 64 * 4);
186 R05.store_le(output + 64 * 5);
187 R06.store_le(output + 64 * 6);
188 R07.store_le(output + 64 * 7);
189 R08.store_le(output + 64 * 8);
190 R09.store_le(output + 64 * 9);
191 R10.store_le(output + 64 * 10);
192 R11.store_le(output + 64 * 11);
193 R12.store_le(output + 64 * 12);
194 R13.store_le(output + 64 * 13);
195 R14.store_le(output + 64 * 14);
196 R15.store_le(output + 64 * 15);
197
199
200 state[12] += 16;
201 if(state[12] < 16) {
202 state[13]++;
203 }
204}
205} // namespace Botan
#define BOTAN_ASSERT(expr, assertion_made)
Definition assert.h:52
static BOTAN_FN_ISA_AVX512 void transpose(SIMD_16x32 &B0, SIMD_16x32 &B1, SIMD_16x32 &B2, SIMD_16x32 &B3)
static BOTAN_FN_ISA_AVX512 void zero_registers()
static BOTAN_FN_ISA_AVX512 SIMD_16x32 splat(uint32_t B)
Definition simd_avx512.h:52