7#include <botan/argon2.h>
9#include <botan/exceptn.h>
10#include <botan/hash.h>
11#include <botan/mem_ops.h>
12#include <botan/internal/fmt.h>
13#include <botan/internal/loadstor.h>
14#include <botan/internal/rotate.h>
17#if defined(BOTAN_HAS_THREAD_UTILS)
18 #include <botan/internal/thread_pool.h>
21#if defined(BOTAN_HAS_ARGON2_AVX2) || defined(BOTAN_HAS_ARGON2_SSSE3)
22 #include <botan/internal/cpuid.h>
29const size_t SYNC_POINTS = 4;
31void argon2_H0(uint8_t H0[64],
32 HashFunction& blake2b,
48 blake2b.update_le(
static_cast<uint32_t
>(p));
49 blake2b.update_le(
static_cast<uint32_t
>(output_len));
50 blake2b.update_le(
static_cast<uint32_t
>(M));
51 blake2b.update_le(
static_cast<uint32_t
>(t));
52 blake2b.update_le(
static_cast<uint32_t
>(v));
53 blake2b.update_le(
static_cast<uint32_t
>(y));
55 blake2b.update_le(
static_cast<uint32_t
>(password_len));
58 blake2b.update_le(
static_cast<uint32_t
>(salt_len));
59 blake2b.update(salt, salt_len);
61 blake2b.update_le(
static_cast<uint32_t
>(key_len));
62 blake2b.update(key, key_len);
64 blake2b.update_le(
static_cast<uint32_t
>(ad_len));
65 blake2b.update(ad, ad_len);
70void extract_key(uint8_t output[],
size_t output_len,
const secure_vector<uint64_t>& B,
size_t memory,
size_t threads) {
71 const size_t lanes = memory / threads;
73 uint64_t sum[128] = {0};
75 for(
size_t lane = 0; lane != threads; ++lane) {
76 const size_t start = 128 * (lane * lanes + lanes - 1);
77 const size_t end = 128 * (lane * lanes + lanes);
79 for(
size_t j = start; j != end; ++j) {
84 if(output_len <= 64) {
86 blake2b->update_le(
static_cast<uint32_t
>(output_len));
87 for(
size_t i = 0; i != 128; ++i) {
88 blake2b->update_le(sum[i]);
90 blake2b->final(output);
95 blake2b->update_le(
static_cast<uint32_t
>(output_len));
96 for(
size_t i = 0; i != 128; ++i) {
97 blake2b->update_le(sum[i]);
99 blake2b->final(&
T[0]);
101 while(output_len > 64) {
106 if(output_len > 64) {
108 blake2b->final(&
T[0]);
112 if(output_len == 64) {
114 blake2b->final(output);
117 blake2b_f->update(
T);
118 blake2b_f->final(output);
127 for(
size_t i = 0; i != threads; ++i) {
128 const size_t B_off = i * (memory / threads);
132 for(
size_t j = 0; j != 2; ++j) {
135 blake2b.update_le(
static_cast<uint32_t
>(1024));
136 blake2b.update(H0, 64);
137 blake2b.update_le(
static_cast<uint32_t
>(j));
138 blake2b.update_le(
static_cast<uint32_t
>(i));
141 for(
size_t k = 0; k != 30; ++k) {
142 load_le(&B[128 * (B_off + j) + 4 * k],
T, 32 / 8);
143 blake2b.update(
T, 64);
147 load_le(&B[128 * (B_off + j) + 4 * 30],
T, 64 / 8);
153 A += B + (
static_cast<uint64_t
>(2) *
static_cast<uint32_t
>(A)) *
static_cast<uint32_t
>(B);
156 C += D + (
static_cast<uint64_t
>(2) *
static_cast<uint32_t
>(C)) *
static_cast<uint32_t
>(D);
159 A += B + (
static_cast<uint64_t
>(2) *
static_cast<uint32_t
>(A)) *
static_cast<uint32_t
>(B);
162 C += D + (
static_cast<uint64_t
>(2) *
static_cast<uint32_t
>(C)) *
static_cast<uint32_t
>(D);
169#if defined(BOTAN_HAS_ARGON2_AVX2)
170 if(CPUID::has_avx2()) {
171 return Argon2::blamka_avx2(N,
T);
175#if defined(BOTAN_HAS_ARGON2_SSSE3)
176 if(CPUID::has_ssse3()) {
177 return Argon2::blamka_ssse3(N,
T);
183 for(
size_t i = 0; i != 128; i += 16) {
184 blamka_G(
T[i + 0],
T[i + 4],
T[i + 8],
T[i + 12]);
185 blamka_G(
T[i + 1],
T[i + 5],
T[i + 9],
T[i + 13]);
186 blamka_G(
T[i + 2],
T[i + 6],
T[i + 10],
T[i + 14]);
187 blamka_G(
T[i + 3],
T[i + 7],
T[i + 11],
T[i + 15]);
189 blamka_G(
T[i + 0],
T[i + 5],
T[i + 10],
T[i + 15]);
190 blamka_G(
T[i + 1],
T[i + 6],
T[i + 11],
T[i + 12]);
191 blamka_G(
T[i + 2],
T[i + 7],
T[i + 8],
T[i + 13]);
192 blamka_G(
T[i + 3],
T[i + 4],
T[i + 9],
T[i + 14]);
195 for(
size_t i = 0; i != 128 / 8; i += 2) {
196 blamka_G(
T[i + 0],
T[i + 32],
T[i + 64],
T[i + 96]);
197 blamka_G(
T[i + 1],
T[i + 33],
T[i + 65],
T[i + 97]);
198 blamka_G(
T[i + 16],
T[i + 48],
T[i + 80],
T[i + 112]);
199 blamka_G(
T[i + 17],
T[i + 49],
T[i + 81],
T[i + 113]);
201 blamka_G(
T[i + 0],
T[i + 33],
T[i + 80],
T[i + 113]);
202 blamka_G(
T[i + 1],
T[i + 48],
T[i + 81],
T[i + 96]);
203 blamka_G(
T[i + 16],
T[i + 49],
T[i + 64],
T[i + 97]);
204 blamka_G(
T[i + 17],
T[i + 32],
T[i + 65],
T[i + 112]);
207 for(
size_t i = 0; i != 128; ++i) {
214void gen_2i_addresses(uint64_t
T[128],
233 for(
size_t r = 0; r != 2; ++r) {
239 uint64_t random,
size_t lanes,
size_t segments,
size_t threads,
size_t n,
size_t slice,
size_t lane,
size_t index) {
240 size_t ref_lane =
static_cast<uint32_t
>(random >> 32) % threads;
242 if(n == 0 && slice == 0) {
246 size_t m = 3 * segments;
247 size_t s = ((slice + 1) % 4) * segments;
249 if(lane == ref_lane) {
254 m = slice * segments;
256 if(slice == 0 || lane == ref_lane) {
261 if(index == 0 || lane == ref_lane) {
265 uint64_t p =
static_cast<uint32_t
>(random);
269 return static_cast<uint32_t
>(ref_lane * lanes + (s + m - (p + 1)) % lanes);
284 if(n == 0 && slice == 0) {
288 const bool use_2i = mode == 1 || (mode == 2 && n == 0 && slice < SYNC_POINTS / 2);
290 uint64_t addresses[128];
291 size_t address_counter = 1;
294 gen_2i_addresses(
T, addresses, n, lane, slice, memory, time, mode, address_counter);
297 while(index < segments) {
298 const size_t offset = lane * lanes + slice * segments + index;
300 size_t prev = offset - 1;
301 if(index == 0 && slice == 0) {
305 if(use_2i && index > 0 && index % 128 == 0) {
306 address_counter += 1;
307 gen_2i_addresses(
T, addresses, n, lane, slice, memory, time, mode, address_counter);
310 const uint64_t random = use_2i ? addresses[index % 128] : B.at(128 * prev);
311 const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index);
314 for(
size_t i = 0; i != 128; ++i) {
315 N[i] = B[128 * prev + i] ^ B[128 * new_offset + i];
320 for(
size_t i = 0; i != 128; ++i) {
321 B[128 * offset + i] ^= N[i];
329 const size_t lanes = memory / threads;
330 const size_t segments = lanes / SYNC_POINTS;
332#if defined(BOTAN_HAS_THREAD_UTILS)
336 for(
size_t n = 0; n != t; ++n) {
337 for(
size_t slice = 0; slice != SYNC_POINTS; ++slice) {
338 std::vector<std::future<void>> fut_results;
339 fut_results.reserve(threads);
341 for(
size_t lane = 0; lane != threads; ++lane) {
342 fut_results.push_back(thread_pool.run(
343 process_block, std::ref(B), n, slice, lane, lanes, segments, threads, mode, memory, t));
346 for(
auto& fut : fut_results) {
356 for(
size_t n = 0; n != t; ++n) {
357 for(
size_t slice = 0; slice != SYNC_POINTS; ++slice) {
358 for(
size_t lane = 0; lane != threads; ++lane) {
359 process_block(B, n, slice, lane, lanes, segments, threads, mode, memory, t);
367void Argon2::argon2(uint8_t output[],
369 const char* password,
371 const uint8_t salt[],
376 size_t ad_len)
const {
377 BOTAN_ARG_CHECK(output_len >= 4 && output_len <= std::numeric_limits<uint32_t>::max(),
378 "Invalid Argon2 output length");
379 BOTAN_ARG_CHECK(password_len <= std::numeric_limits<uint32_t>::max(),
"Invalid Argon2 password length");
380 BOTAN_ARG_CHECK(salt_len <= std::numeric_limits<uint32_t>::max(),
"Invalid Argon2 salt length");
381 BOTAN_ARG_CHECK(key_len <= std::numeric_limits<uint32_t>::max(),
"Invalid Argon2 key length");
382 BOTAN_ARG_CHECK(ad_len <= std::numeric_limits<uint32_t>::max(),
"Invalid Argon2 ad length");
386 uint8_t H0[64] = {0};
403 const size_t memory = (m_M / (SYNC_POINTS * m_p)) * (SYNC_POINTS * m_p);
407 init_blocks(B, *blake2, H0, memory, m_p);
408 process_blocks(B, m_t, memory, m_p, m_family);
411 extract_key(output, output_len, B, memory, m_p);
#define BOTAN_ASSERT_NOMSG(expr)
#define BOTAN_ARG_CHECK(expr, msg)
static void blamka(uint64_t N[128], uint64_t T[128])
static std::unique_ptr< HashFunction > create_or_throw(std::string_view algo_spec, std::string_view provider="")
static Thread_Pool & global_instance()
#define BOTAN_FORCE_INLINE
std::string fmt(std::string_view format, const T &... args)
constexpr T rotr(T input)
constexpr auto load_le(ParamTs &&... params)
std::vector< T, secure_allocator< T > > secure_vector
constexpr void copy_mem(T *out, const T *in, size_t n)
constexpr void clear_mem(T *ptr, size_t n)
const uint8_t * cast_char_ptr_to_uint8(const char *s)