8#include <botan/internal/chacha.h>
10#include <botan/exceptn.h>
11#include <botan/internal/fmt.h>
12#include <botan/internal/loadstor.h>
13#include <botan/internal/rotate.h>
15#if defined(BOTAN_HAS_CPUID)
16 #include <botan/internal/cpuid.h>
23inline void chacha_quarter_round(uint32_t& a, uint32_t& b, uint32_t& c, uint32_t& d) {
41void hchacha(uint32_t output[8],
const uint32_t input[16],
size_t rounds) {
44 uint32_t x00 = input[0];
45 uint32_t x01 = input[1];
46 uint32_t x02 = input[2];
47 uint32_t x03 = input[3];
48 uint32_t x04 = input[4];
49 uint32_t x05 = input[5];
50 uint32_t x06 = input[6];
51 uint32_t x07 = input[7];
52 uint32_t x08 = input[8];
53 uint32_t x09 = input[9];
54 uint32_t x10 = input[10];
55 uint32_t x11 = input[11];
56 uint32_t x12 = input[12];
57 uint32_t x13 = input[13];
58 uint32_t x14 = input[14];
59 uint32_t x15 = input[15];
61 for(
size_t i = 0; i != rounds / 2; ++i) {
62 chacha_quarter_round(x00, x04, x08, x12);
63 chacha_quarter_round(x01, x05, x09, x13);
64 chacha_quarter_round(x02, x06, x10, x14);
65 chacha_quarter_round(x03, x07, x11, x15);
67 chacha_quarter_round(x00, x05, x10, x15);
68 chacha_quarter_round(x01, x06, x11, x12);
69 chacha_quarter_round(x02, x07, x08, x13);
70 chacha_quarter_round(x03, x04, x09, x14);
86 BOTAN_ARG_CHECK(m_rounds == 8 || m_rounds == 12 || m_rounds == 20,
"ChaCha only supports 8, 12 or 20 rounds");
89size_t ChaCha::parallelism() {
90#if defined(BOTAN_HAS_CHACHA_AVX512)
96#if defined(BOTAN_HAS_CHACHA_AVX2)
106#if defined(BOTAN_HAS_CHACHA_AVX512)
112#if defined(BOTAN_HAS_CHACHA_AVX2)
118#if defined(BOTAN_HAS_CHACHA_SIMD32)
127void ChaCha::chacha(uint8_t output[],
size_t output_blocks, uint32_t state[16],
size_t rounds) {
130#if defined(BOTAN_HAS_CHACHA_AVX512)
132 while(output_blocks >= 16) {
133 ChaCha::chacha_avx512_x16(output, state, rounds);
140#if defined(BOTAN_HAS_CHACHA_AVX2)
142 while(output_blocks >= 8) {
143 ChaCha::chacha_avx2_x8(output, state, rounds);
150#if defined(BOTAN_HAS_CHACHA_SIMD32)
152 while(output_blocks >= 4) {
153 ChaCha::chacha_simd32_x4(output, state, rounds);
161 for(
size_t i = 0; i != output_blocks; ++i) {
162 uint32_t x00 = state[0];
163 uint32_t x01 = state[1];
164 uint32_t x02 = state[2];
165 uint32_t x03 = state[3];
166 uint32_t x04 = state[4];
167 uint32_t x05 = state[5];
168 uint32_t x06 = state[6];
169 uint32_t x07 = state[7];
170 uint32_t x08 = state[8];
171 uint32_t x09 = state[9];
172 uint32_t x10 = state[10];
173 uint32_t x11 = state[11];
174 uint32_t x12 = state[12];
175 uint32_t x13 = state[13];
176 uint32_t x14 = state[14];
177 uint32_t x15 = state[15];
179 for(
size_t r = 0; r != rounds / 2; ++r) {
180 chacha_quarter_round(x00, x04, x08, x12);
181 chacha_quarter_round(x01, x05, x09, x13);
182 chacha_quarter_round(x02, x06, x10, x14);
183 chacha_quarter_round(x03, x07, x11, x15);
185 chacha_quarter_round(x00, x05, x10, x15);
186 chacha_quarter_round(x01, x06, x11, x12);
187 chacha_quarter_round(x02, x07, x08, x13);
188 chacha_quarter_round(x03, x04, x09, x14);
208 store_le(x00, output + 64 * i + 4 * 0);
209 store_le(x01, output + 64 * i + 4 * 1);
210 store_le(x02, output + 64 * i + 4 * 2);
211 store_le(x03, output + 64 * i + 4 * 3);
212 store_le(x04, output + 64 * i + 4 * 4);
213 store_le(x05, output + 64 * i + 4 * 5);
214 store_le(x06, output + 64 * i + 4 * 6);
215 store_le(x07, output + 64 * i + 4 * 7);
216 store_le(x08, output + 64 * i + 4 * 8);
217 store_le(x09, output + 64 * i + 4 * 9);
218 store_le(x10, output + 64 * i + 4 * 10);
219 store_le(x11, output + 64 * i + 4 * 11);
220 store_le(x12, output + 64 * i + 4 * 12);
221 store_le(x13, output + 64 * i + 4 * 13);
222 store_le(x14, output + 64 * i + 4 * 14);
223 store_le(x15, output + 64 * i + 4 * 15);
235void ChaCha::cipher_bytes(
const uint8_t in[], uint8_t out[],
size_t length) {
238 while(length >= m_buffer.size() - m_position) {
239 const size_t available = m_buffer.size() - m_position;
241 xor_buf(out, in, &m_buffer[m_position], available);
242 chacha(m_buffer.data(), m_buffer.size() / 64, m_state.data(), m_rounds);
250 xor_buf(out, in, &m_buffer[m_position], length);
252 m_position += length;
255void ChaCha::generate_keystream(uint8_t out[],
size_t length) {
258 while(length >= m_buffer.size() - m_position) {
259 const size_t available = m_buffer.size() - m_position;
263 copy_mem(out, &m_buffer[m_position], available);
264 chacha(m_buffer.data(), m_buffer.size() / 64, m_state.data(), m_rounds);
271 copy_mem(out, &m_buffer[m_position], length);
273 m_position += length;
276void ChaCha::initialize_state() {
277 static const uint32_t TAU[] = {0x61707865, 0x3120646e, 0x79622d36, 0x6b206574};
279 static const uint32_t SIGMA[] = {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574};
281 m_state[4] = m_key[0];
282 m_state[5] = m_key[1];
283 m_state[6] = m_key[2];
284 m_state[7] = m_key[3];
286 if(m_key.size() == 4) {
292 m_state[8] = m_key[0];
293 m_state[9] = m_key[1];
294 m_state[10] = m_key[2];
295 m_state[11] = m_key[3];
297 m_state[0] = SIGMA[0];
298 m_state[1] = SIGMA[1];
299 m_state[2] = SIGMA[2];
300 m_state[3] = SIGMA[3];
302 m_state[8] = m_key[4];
303 m_state[9] = m_key[5];
304 m_state[10] = m_key[6];
305 m_state[11] = m_key[7];
317 return !m_state.empty();
327void ChaCha::key_schedule(std::span<const uint8_t> key) {
328 m_key.resize(key.size() / 4);
333 const size_t chacha_block = 64;
334 m_buffer.resize(parallelism() * chacha_block);
348 return std::make_unique<ChaCha>(m_rounds);
352 return (iv_len == 0 || iv_len == 8 || iv_len == 12 || iv_len == 24);
355void ChaCha::set_iv_bytes(
const uint8_t iv[],
size_t length) {
368 }
else if(length == 8) {
371 }
else if(length == 12) {
375 }
else if(length == 24) {
382 hchacha(hc.data(), m_state.data(), m_rounds);
398 chacha(m_buffer.data(), m_buffer.size() / 64, m_state.data(), m_rounds);
410 return fmt(
"ChaCha({})", m_rounds);
417 const uint64_t counter = offset / 64;
426 chacha(m_buffer.data(), m_buffer.size() / 64, m_state.data(), m_rounds);
427 m_position = offset % 64;
#define BOTAN_ARG_CHECK(expr, msg)
#define BOTAN_ASSERT(expr, assertion_made)
static std::optional< std::string > check(CPUID::Feature feat)
static bool has(CPUID::Feature feat)
std::string name() const override
size_t buffer_size() const override
std::unique_ptr< StreamCipher > new_object() const override
Key_Length_Specification key_spec() const override
bool valid_iv_length(size_t iv_len) const override
size_t default_iv_length() const override
std::string provider() const override
bool has_keying_material() const override
void seek(uint64_t offset) override
void set_iv(const uint8_t iv[], size_t iv_len)
void assert_key_material_set() const
constexpr void copy_mem(T *out, const T *in, size_t n)
void zap(std::vector< T, Alloc > &vec)
std::string fmt(std::string_view format, const T &... args)
constexpr auto store_le(ParamTs &&... params)
BOTAN_FORCE_INLINE constexpr T rotl(T input)
constexpr auto load_le(ParamTs &&... params)
constexpr void xor_buf(ranges::contiguous_output_range< uint8_t > auto &&out, ranges::contiguous_range< uint8_t > auto &&in)
std::vector< T, secure_allocator< T > > secure_vector