8#include <botan/internal/chacha.h>
10#include <botan/exceptn.h>
11#include <botan/internal/cpuid.h>
12#include <botan/internal/fmt.h>
13#include <botan/internal/loadstor.h>
14#include <botan/internal/rotate.h>
20inline void chacha_quarter_round(uint32_t& a, uint32_t&
b, uint32_t& c, uint32_t& d) {
38void hchacha(uint32_t output[8],
const uint32_t input[16],
size_t rounds) {
41 uint32_t x00 = input[0], x01 = input[1], x02 = input[2], x03 = input[3], x04 = input[4], x05 = input[5],
42 x06 = input[6], x07 = input[7], x08 = input[8], x09 = input[9], x10 = input[10], x11 = input[11],
43 x12 = input[12], x13 = input[13], x14 = input[14], x15 = input[15];
45 for(
size_t i = 0; i != rounds / 2; ++i) {
46 chacha_quarter_round(x00, x04, x08, x12);
47 chacha_quarter_round(x01, x05, x09, x13);
48 chacha_quarter_round(x02, x06, x10, x14);
49 chacha_quarter_round(x03, x07, x11, x15);
51 chacha_quarter_round(x00, x05, x10, x15);
52 chacha_quarter_round(x01, x06, x11, x12);
53 chacha_quarter_round(x02, x07, x08, x13);
54 chacha_quarter_round(x03, x04, x09, x14);
70 BOTAN_ARG_CHECK(m_rounds == 8 || m_rounds == 12 || m_rounds == 20,
"ChaCha only supports 8, 12 or 20 rounds");
73size_t ChaCha::parallelism() {
74#if defined(BOTAN_HAS_CHACHA_AVX512)
75 if(CPUID::has_avx512()) {
80#if defined(BOTAN_HAS_CHACHA_AVX2)
81 if(CPUID::has_avx2()) {
90#if defined(BOTAN_HAS_CHACHA_AVX512)
91 if(CPUID::has_avx512()) {
96#if defined(BOTAN_HAS_CHACHA_AVX2)
97 if(CPUID::has_avx2()) {
102#if defined(BOTAN_HAS_CHACHA_SIMD32)
111void ChaCha::chacha(uint8_t output[],
size_t output_blocks, uint32_t state[16],
size_t rounds) {
114#if defined(BOTAN_HAS_CHACHA_AVX512)
115 if(CPUID::has_avx512()) {
116 while(output_blocks >= 16) {
117 ChaCha::chacha_avx512_x16(output, state, rounds);
124#if defined(BOTAN_HAS_CHACHA_AVX2)
125 if(CPUID::has_avx2()) {
126 while(output_blocks >= 8) {
127 ChaCha::chacha_avx2_x8(output, state, rounds);
134#if defined(BOTAN_HAS_CHACHA_SIMD32)
136 while(output_blocks >= 4) {
137 ChaCha::chacha_simd32_x4(output, state, rounds);
145 for(
size_t i = 0; i != output_blocks; ++i) {
146 uint32_t x00 = state[0], x01 = state[1], x02 = state[2], x03 = state[3], x04 = state[4], x05 = state[5],
147 x06 = state[6], x07 = state[7], x08 = state[8], x09 = state[9], x10 = state[10], x11 = state[11],
148 x12 = state[12], x13 = state[13], x14 = state[14], x15 = state[15];
150 for(
size_t r = 0; r != rounds / 2; ++r) {
151 chacha_quarter_round(x00, x04, x08, x12);
152 chacha_quarter_round(x01, x05, x09, x13);
153 chacha_quarter_round(x02, x06, x10, x14);
154 chacha_quarter_round(x03, x07, x11, x15);
156 chacha_quarter_round(x00, x05, x10, x15);
157 chacha_quarter_round(x01, x06, x11, x12);
158 chacha_quarter_round(x02, x07, x08, x13);
159 chacha_quarter_round(x03, x04, x09, x14);
179 store_le(x00, output + 64 * i + 4 * 0);
180 store_le(x01, output + 64 * i + 4 * 1);
181 store_le(x02, output + 64 * i + 4 * 2);
182 store_le(x03, output + 64 * i + 4 * 3);
183 store_le(x04, output + 64 * i + 4 * 4);
184 store_le(x05, output + 64 * i + 4 * 5);
185 store_le(x06, output + 64 * i + 4 * 6);
186 store_le(x07, output + 64 * i + 4 * 7);
187 store_le(x08, output + 64 * i + 4 * 8);
188 store_le(x09, output + 64 * i + 4 * 9);
189 store_le(x10, output + 64 * i + 4 * 10);
190 store_le(x11, output + 64 * i + 4 * 11);
191 store_le(x12, output + 64 * i + 4 * 12);
192 store_le(x13, output + 64 * i + 4 * 13);
193 store_le(x14, output + 64 * i + 4 * 14);
194 store_le(x15, output + 64 * i + 4 * 15);
197 state[13] += (state[12] == 0);
204void ChaCha::cipher_bytes(
const uint8_t in[], uint8_t out[],
size_t length) {
207 while(length >= m_buffer.size() - m_position) {
208 const size_t available = m_buffer.size() - m_position;
210 xor_buf(out, in, &m_buffer[m_position], available);
211 chacha(m_buffer.data(), m_buffer.size() / 64, m_state.data(), m_rounds);
219 xor_buf(out, in, &m_buffer[m_position], length);
221 m_position += length;
224void ChaCha::generate_keystream(uint8_t out[],
size_t length) {
227 while(length >= m_buffer.size() - m_position) {
228 const size_t available = m_buffer.size() - m_position;
232 copy_mem(out, &m_buffer[m_position], available);
233 chacha(m_buffer.data(), m_buffer.size() / 64, m_state.data(), m_rounds);
240 copy_mem(out, &m_buffer[m_position], length);
242 m_position += length;
245void ChaCha::initialize_state() {
246 static const uint32_t TAU[] = {0x61707865, 0x3120646e, 0x79622d36, 0x6b206574};
248 static const uint32_t SIGMA[] = {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574};
250 m_state[4] = m_key[0];
251 m_state[5] = m_key[1];
252 m_state[6] = m_key[2];
253 m_state[7] = m_key[3];
255 if(m_key.size() == 4) {
261 m_state[8] = m_key[0];
262 m_state[9] = m_key[1];
263 m_state[10] = m_key[2];
264 m_state[11] = m_key[3];
266 m_state[0] = SIGMA[0];
267 m_state[1] = SIGMA[1];
268 m_state[2] = SIGMA[2];
269 m_state[3] = SIGMA[3];
271 m_state[8] = m_key[4];
272 m_state[9] = m_key[5];
273 m_state[10] = m_key[6];
274 m_state[11] = m_key[7];
286 return !m_state.empty();
296void ChaCha::key_schedule(std::span<const uint8_t> key) {
297 m_key.resize(key.size() / 4);
302 const size_t chacha_block = 64;
303 m_buffer.resize(parallelism() * chacha_block);
317 return std::make_unique<ChaCha>(m_rounds);
321 return (iv_len == 0 || iv_len == 8 || iv_len == 12 || iv_len == 24);
324void ChaCha::set_iv_bytes(
const uint8_t iv[],
size_t length) {
337 }
else if(length == 8) {
340 }
else if(length == 12) {
344 }
else if(length == 24) {
351 hchacha(hc.data(), m_state.data(), m_rounds);
367 chacha(m_buffer.data(), m_buffer.size() / 64, m_state.data(), m_rounds);
379 return fmt(
"ChaCha({})", m_rounds);
386 const uint64_t counter = offset / 64;
395 chacha(m_buffer.data(), m_buffer.size() / 64, m_state.data(), m_rounds);
396 m_position = offset % 64;
#define BOTAN_ARG_CHECK(expr, msg)
#define BOTAN_ASSERT(expr, assertion_made)
static bool has_simd_32()
std::string name() const override
size_t buffer_size() const override
std::unique_ptr< StreamCipher > new_object() const override
Key_Length_Specification key_spec() const override
bool valid_iv_length(size_t iv_len) const override
size_t default_iv_length() const override
std::string provider() const override
bool has_keying_material() const override
void seek(uint64_t offset) override
void set_iv(const uint8_t iv[], size_t iv_len)
void assert_key_material_set() const
void zap(std::vector< T, Alloc > &vec)
constexpr T rotl(T input)
std::string fmt(std::string_view format, const T &... args)
constexpr auto store_le(ParamTs &&... params)
constexpr auto load_le(ParamTs &&... params)
constexpr void xor_buf(ranges::contiguous_output_range< uint8_t > auto &&out, ranges::contiguous_range< uint8_t > auto &&in)
std::vector< T, secure_allocator< T > > secure_vector
constexpr void copy_mem(T *out, const T *in, size_t n)