7#include <botan/internal/chacha.h>
8#include <botan/internal/simd_avx512.h>
14void ChaCha::chacha_avx512_x16(uint8_t output[64 * 16], uint32_t state[16],
size_t rounds) {
16 const SIMD_16x32 CTR0 = SIMD_16x32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18 const uint32_t C = 0xFFFFFFFF - state[12];
19 const SIMD_16x32 CTR1 = SIMD_16x32(
20 0, C < 1, C < 2, C < 3, C < 4, C < 5, C < 6, C < 7, C < 8, C < 9, C < 10, C < 11, C < 12, C < 13, C < 14, C < 15);
22 SIMD_16x32 R00 = SIMD_16x32::splat(state[0]);
23 SIMD_16x32 R01 = SIMD_16x32::splat(state[1]);
24 SIMD_16x32 R02 = SIMD_16x32::splat(state[2]);
25 SIMD_16x32 R03 = SIMD_16x32::splat(state[3]);
26 SIMD_16x32 R04 = SIMD_16x32::splat(state[4]);
27 SIMD_16x32 R05 = SIMD_16x32::splat(state[5]);
28 SIMD_16x32 R06 = SIMD_16x32::splat(state[6]);
29 SIMD_16x32 R07 = SIMD_16x32::splat(state[7]);
30 SIMD_16x32 R08 = SIMD_16x32::splat(state[8]);
31 SIMD_16x32 R09 = SIMD_16x32::splat(state[9]);
32 SIMD_16x32 R10 = SIMD_16x32::splat(state[10]);
33 SIMD_16x32 R11 = SIMD_16x32::splat(state[11]);
34 SIMD_16x32 R12 = SIMD_16x32::splat(state[12]) + CTR0;
35 SIMD_16x32 R13 = SIMD_16x32::splat(state[13]) + CTR1;
36 SIMD_16x32 R14 = SIMD_16x32::splat(state[14]);
37 SIMD_16x32 R15 = SIMD_16x32::splat(state[15]);
39 for(
size_t r = 0; r != rounds / 2; ++r) {
110 R15 = R15.rotl<16>();
111 R12 = R12.rotl<16>();
112 R13 = R13.rotl<16>();
113 R14 = R14.rotl<16>();
125 R05 = R05.rotl<12>();
126 R06 = R06.rotl<12>();
127 R07 = R07.rotl<12>();
128 R04 = R04.rotl<12>();
161 R00 += SIMD_16x32::splat(state[0]);
162 R01 += SIMD_16x32::splat(state[1]);
163 R02 += SIMD_16x32::splat(state[2]);
164 R03 += SIMD_16x32::splat(state[3]);
165 R04 += SIMD_16x32::splat(state[4]);
166 R05 += SIMD_16x32::splat(state[5]);
167 R06 += SIMD_16x32::splat(state[6]);
168 R07 += SIMD_16x32::splat(state[7]);
169 R08 += SIMD_16x32::splat(state[8]);
170 R09 += SIMD_16x32::splat(state[9]);
171 R10 += SIMD_16x32::splat(state[10]);
172 R11 += SIMD_16x32::splat(state[11]);
173 R12 += SIMD_16x32::splat(state[12]) + CTR0;
174 R13 += SIMD_16x32::splat(state[13]) + CTR1;
175 R14 += SIMD_16x32::splat(state[14]);
176 R15 += SIMD_16x32::splat(state[15]);
178 SIMD_16x32::transpose(R00, R01, R02, R03, R04, R05, R06, R07, R08, R09, R10, R11, R12, R13, R14, R15);
180 R00.store_le(output);
181 R01.store_le(output + 64 * 1);
182 R02.store_le(output + 64 * 2);
183 R03.store_le(output + 64 * 3);
184 R04.store_le(output + 64 * 4);
185 R05.store_le(output + 64 * 5);
186 R06.store_le(output + 64 * 6);
187 R07.store_le(output + 64 * 7);
188 R08.store_le(output + 64 * 8);
189 R09.store_le(output + 64 * 9);
190 R10.store_le(output + 64 * 10);
191 R11.store_le(output + 64 * 11);
192 R12.store_le(output + 64 * 12);
193 R13.store_le(output + 64 * 13);
194 R14.store_le(output + 64 * 14);
195 R15.store_le(output + 64 * 15);
197 SIMD_16x32::zero_registers();
#define BOTAN_ASSERT(expr, assertion_made)