11#include <botan/internal/poly1305.h>
13#include <botan/internal/buffer_slicer.h>
14#include <botan/internal/ct_utils.h>
15#include <botan/internal/donna128.h>
16#include <botan/internal/loadstor.h>
18#if defined(BOTAN_HAS_POLY1305_AVX2) || defined(BOTAN_HAS_POLY1305_AVX512)
19 #include <botan/internal/cpuid.h>
28constexpr size_t PAD_BASE = 0;
29constexpr size_t H_BASE = 2;
30constexpr size_t R_BASE = 5;
43 constexpr uint64_t M44 = 0xFFFFFFFFFFF;
44 constexpr uint64_t M42 = 0x3FFFFFFFFFF;
46#if !defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
50 const uint64_t s1 = b1 * 20;
51 const uint64_t s2 = b2 * 20;
53 const uint128_t d0 = uint128_t(a0) * b0 + uint128_t(a1) * s2 + uint128_t(a2) * s1;
56 const uint128_t d1 = uint128_t(a0) * b1 + uint128_t(a1) * b0 + uint128_t(a2) * s2 + c0;
59 const uint128_t d2 = uint128_t(a0) * b2 + uint128_t(a1) * b1 + uint128_t(a2) * b0 + c1;
62 h0 = (d0 & M44) + c2 * 5;
63 h1 = (d1 & M44) + (h0 >> 44);
70 const size_t current_powers = (X.size() - 5) / 3;
72 if(current_powers >= target_powers) {
77 const uint64_t r0 = X[R_BASE + 0];
78 const uint64_t r1 = X[R_BASE + 1];
79 const uint64_t r2 = X[R_BASE + 2];
81 X.resize(5 + target_powers * 3);
84 for(
size_t i = current_powers + 1; i <= target_powers; ++i) {
85 const size_t offset = R_BASE + (i - 1) * 3;
87 X[offset + 0], X[offset + 1], X[offset + 2], X[offset - 3], X[offset - 2], X[offset - 1], r0, r1, r2);
94 X.reserve(2 + 3 + 2 * 3);
110 const uint64_t r0 = (t0) & 0xffc0fffffff;
111 const uint64_t r1 = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff;
112 const uint64_t r2 = ((t1 >> 24)) & 0x00ffffffc0f;
119 poly1305_extend_powers(X, 2);
133 constexpr uint64_t M44 = 0xFFFFFFFFFFF;
134 constexpr uint64_t M42 = 0x3FFFFFFFFFF;
136#if !defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
144 h1 += ((t0 >> 44) | (t1 << 20)) & M44;
145 h2 += ((t1 >> 24) & M42) | hibit;
147 const uint128_t d0 = uint128_t(h0) * r0 + uint128_t(h1) * s2 + uint128_t(h2) * s1;
150 const uint128_t d1 = uint128_t(h0) * r1 + uint128_t(h1) * r0 + uint128_t(h2) * s2 + c0;
153 const uint128_t d2 = uint128_t(h0) * r2 + uint128_t(h1) * r1 + uint128_t(h2) * r0 + c1;
156 h0 = (d0 & M44) + c2 * 5;
157 h1 = (d1 & M44) + (h0 >> 44);
179 constexpr uint64_t M44 = 0xFFFFFFFFFFF;
180 constexpr uint64_t M42 = 0x3FFFFFFFFFF;
182#if !defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
196 h1 += ((m0_t0 >> 44) | (m0_t1 << 20)) & M44;
197 h2 += ((m0_t1 >> 24) & M42) | hibit;
200 const uint64_t b0 = (m1_t0 & M44);
201 const uint64_t b1 = ((m1_t0 >> 44) | (m1_t1 << 20)) & M44;
202 const uint64_t b2 = ((m1_t1 >> 24) & M42) | hibit;
205 const uint128_t d0 = uint128_t(h0) * rr0 + uint128_t(h1) * ss2 + uint128_t(h2) * ss1 + uint128_t(b0) * r0 +
206 uint128_t(b1) * s2 + uint128_t(b2) * s1;
209 const uint128_t d1 = uint128_t(h0) * rr1 + uint128_t(h1) * rr0 + uint128_t(h2) * ss2 + uint128_t(b0) * r1 +
210 uint128_t(b1) * r0 + uint128_t(b2) * s2 + c0;
213 const uint128_t d2 = uint128_t(h0) * rr2 + uint128_t(h1) * rr1 + uint128_t(h2) * rr0 + uint128_t(b0) * r2 +
214 uint128_t(b1) * r1 + uint128_t(b2) * r0 + c1;
217 h0 = (d0 & M44) + c2 * 5;
218 h1 = (d1 & M44) + (h0 >> 44);
224 const uint64_t hibit = is_final ? 0 : (
static_cast<uint64_t
>(1) << 40);
227 const uint64_t r0 = X[R_BASE + 0];
228 const uint64_t r1 = X[R_BASE + 1];
229 const uint64_t r2 = X[R_BASE + 2];
230 const uint64_t s1 = r1 * 20;
231 const uint64_t s2 = r2 * 20;
234 const uint64_t rr0 = X[R_BASE + 3];
235 const uint64_t rr1 = X[R_BASE + 4];
236 const uint64_t rr2 = X[R_BASE + 5];
239 const uint64_t ss1 = rr1 * 20;
240 const uint64_t ss2 = rr2 * 20;
243 uint64_t h0 = X[H_BASE + 0];
244 uint64_t h1 = X[H_BASE + 1];
245 uint64_t h2 = X[H_BASE + 2];
248 poly1305_block_pair(h0, h1, h2, r0, r1, r2, s1, s2, rr0, rr1, rr2, ss1, ss2, m, hibit);
255 poly1305_block_single(h0, h1, h2, r0, r1, r2, s1, s2, m, hibit);
265 constexpr uint64_t M44 = 0xFFFFFFFFFFF;
266 constexpr uint64_t M42 = 0x3FFFFFFFFFF;
269 uint64_t h0 = X[H_BASE + 0];
270 uint64_t h1 = X[H_BASE + 1];
271 uint64_t h2 = X[H_BASE + 2];
273 uint64_t c = (h1 >> 44);
293 uint64_t g0 = h0 + 5;
296 uint64_t g1 = h1 + c;
299 const uint64_t g2 = h2 + c - (
static_cast<uint64_t
>(1) << 42);
303 h0 = c_mask.select(g0, h0);
304 h1 = c_mask.select(g1, h1);
305 h2 = c_mask.select(g2, h2);
308 const uint64_t t0 = X[PAD_BASE + 0];
309 const uint64_t t1 = X[PAD_BASE + 1];
314 h1 += (((t0 >> 44) | (t1 << 20)) & M44) + c;
317 h2 += (((t1 >> 24)) & M42) + c;
321 h0 = ((h0) | (h1 << 44));
322 h1 = ((h1 >> 20) | (h2 << 24));
339 return m_poly.size() >= 11;
342void Poly1305::key_schedule(std::span<const uint8_t> key) {
345 poly1305_init(m_poly, key.data());
349#if defined(BOTAN_HAS_POLY1305_AVX512)
355#if defined(BOTAN_HAS_POLY1305_AVX2)
364void Poly1305::add_data(std::span<const uint8_t> input) {
371 poly1305_blocks(m_poly, one_block->data(), 1);
376 if(full_blocks > 0) {
377 const uint8_t* data_ptr = aligned_data.data();
378 size_t blocks_remaining = full_blocks;
380#if defined(BOTAN_HAS_POLY1305_AVX512)
383 poly1305_extend_powers(m_poly, 8);
384 const size_t processed = poly1305_avx512_blocks(m_poly, data_ptr, blocks_remaining);
385 data_ptr += processed * 16;
386 blocks_remaining -= processed;
390#if defined(BOTAN_HAS_POLY1305_AVX2)
393 poly1305_extend_powers(m_poly, 4);
394 const size_t processed = poly1305_avx2_blocks(m_poly, data_ptr, blocks_remaining);
395 data_ptr += processed * 16;
396 blocks_remaining -= processed;
400 if(blocks_remaining > 0) {
401 poly1305_blocks(m_poly, data_ptr, blocks_remaining);
408void Poly1305::final_result(std::span<uint8_t> out) {
411 if(!m_buffer.in_alignment()) {
412 const uint8_t final_byte = 0x01;
413 m_buffer.append({&final_byte, 1});
414 m_buffer.fill_up_with_zeros();
415 poly1305_blocks(m_poly, m_buffer.consume().data(), 1,
true);
418 poly1305_finish(m_poly, out.data());
std::tuple< std::span< const uint8_t >, size_t > aligned_data_to_process(BufferSlicer &slicer) const
std::optional< std::span< const T > > handle_unaligned_data(BufferSlicer &slicer)
bool in_alignment() const
static std::optional< std::string > check(CPUID::Feature feat)
static bool has(CPUID::Feature feat)
static constexpr Mask< T > expand(T v)
std::string provider() const override
bool has_keying_material() const override
void assert_key_material_set() const
#define BOTAN_FORCE_INLINE
constexpr uint64_t carry_shift(const donna128 &a, size_t shift)
void zap(std::vector< T, Alloc > &vec)
constexpr auto store_le(ParamTs &&... params)
constexpr auto load_le(ParamTs &&... params)
std::vector< T, secure_allocator< T > > secure_vector
constexpr void clear_mem(T *ptr, size_t n)