7#include <botan/internal/chacha.h>
9#include <botan/assert.h>
10#include <botan/internal/simd_avx512.h>
15void BOTAN_FN_ISA_AVX512 ChaCha::chacha_avx512_x16(uint8_t output[64 * 16], uint32_t state[16],
size_t rounds) {
17 const SIMD_16x32 CTR0 = SIMD_16x32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19 const uint32_t C = 0xFFFFFFFF - state[12];
20 const SIMD_16x32 CTR1 = SIMD_16x32(
21 0, C < 1, C < 2, C < 3, C < 4, C < 5, C < 6, C < 7, C < 8, C < 9, C < 10, C < 11, C < 12, C < 13, C < 14, C < 15);
40 for(
size_t r = 0; r != rounds / 2; ++r) {
111 R15 = R15.rotl<16>();
112 R12 = R12.rotl<16>();
113 R13 = R13.rotl<16>();
114 R14 = R14.rotl<16>();
126 R05 = R05.rotl<12>();
127 R06 = R06.rotl<12>();
128 R07 = R07.rotl<12>();
129 R04 = R04.rotl<12>();
179 SIMD_16x32::transpose(R00, R01, R02, R03, R04, R05, R06, R07, R08, R09, R10, R11, R12, R13, R14, R15);
181 R00.store_le(output);
182 R01.store_le(output + 64 * 1);
183 R02.store_le(output + 64 * 2);
184 R03.store_le(output + 64 * 3);
185 R04.store_le(output + 64 * 4);
186 R05.store_le(output + 64 * 5);
187 R06.store_le(output + 64 * 6);
188 R07.store_le(output + 64 * 7);
189 R08.store_le(output + 64 * 8);
190 R09.store_le(output + 64 * 9);
191 R10.store_le(output + 64 * 10);
192 R11.store_le(output + 64 * 11);
193 R12.store_le(output + 64 * 12);
194 R13.store_le(output + 64 * 13);
195 R14.store_le(output + 64 * 14);
196 R15.store_le(output + 64 * 15);
#define BOTAN_ASSERT(expr, assertion_made)
static BOTAN_FN_ISA_AVX512 void transpose(SIMD_16x32 &B0, SIMD_16x32 &B1, SIMD_16x32 &B2, SIMD_16x32 &B3)
static BOTAN_FN_ISA_AVX512 void zero_registers()
static BOTAN_FN_ISA_AVX512 SIMD_16x32 splat(uint32_t B)