7 #include <botan/argon2.h> 8 #include <botan/loadstor.h> 9 #include <botan/hash.h> 10 #include <botan/mem_ops.h> 11 #include <botan/rotate.h> 12 #include <botan/exceptn.h> 18 static const size_t SYNC_POINTS = 4;
20 secure_vector<uint8_t> argon2_H0(HashFunction& blake2b,
22 const char* password,
size_t password_len,
23 const uint8_t salt[],
size_t salt_len,
24 const uint8_t key[],
size_t key_len,
25 const uint8_t ad[],
size_t ad_len,
26 size_t y,
size_t p,
size_t M,
size_t t)
30 blake2b.update_le(static_cast<uint32_t>(p));
31 blake2b.update_le(static_cast<uint32_t>(output_len));
32 blake2b.update_le(static_cast<uint32_t>(M));
33 blake2b.update_le(static_cast<uint32_t>(t));
34 blake2b.update_le(static_cast<uint32_t>(v));
35 blake2b.update_le(static_cast<uint32_t>(y));
37 blake2b.update_le(static_cast<uint32_t>(password_len));
40 blake2b.update_le(static_cast<uint32_t>(
salt_len));
43 blake2b.update_le(static_cast<uint32_t>(key_len));
44 blake2b.update(key, key_len);
46 blake2b.update_le(static_cast<uint32_t>(ad_len));
47 blake2b.update(ad, ad_len);
49 return blake2b.final();
52 void Htick(secure_vector<uint8_t>&
T,
55 HashFunction& blake2b,
56 const secure_vector<uint8_t>& H0,
61 blake2b.update_le(static_cast<uint32_t>(output_len));
63 blake2b.update_le(static_cast<uint32_t>(p0));
64 blake2b.update_le(static_cast<uint32_t>(p1));
68 while(output_len > 64)
82 void extract_key(uint8_t output[],
size_t output_len,
83 const secure_vector<uint64_t>& B,
84 size_t memory,
size_t threads)
86 const size_t lanes = memory / threads;
88 secure_vector<uint64_t> sum(128);
90 for(
size_t lane = 0; lane != threads; ++lane)
92 size_t start = 128*(lane * lanes + lanes - 1);
93 size_t end = 128*(lane * lanes + lanes);
95 for(
size_t j = start; j != end; ++j)
101 secure_vector<uint8_t> sum8(1024);
107 blake2b->update_le(static_cast<uint32_t>(output_len));
108 blake2b->update(sum8.data(), sum8.size());
109 blake2b->final(output);
113 secure_vector<uint8_t>
T(64);
116 blake2b->update_le(static_cast<uint32_t>(output_len));
117 blake2b->update(sum8.data(), sum8.size());
118 blake2b->final(&
T[0]);
120 while(output_len > 64)
129 blake2b->final(&
T[0]);
136 blake2b->final(output);
141 blake2b_f->update(
T);
142 blake2b_f->final(output);
147 void init_blocks(secure_vector<uint64_t>& B,
148 HashFunction& blake2b,
149 const secure_vector<uint8_t>& H0,
155 secure_vector<uint8_t> H(1024);
156 secure_vector<uint8_t>
T(blake2b.output_length());
158 for(
size_t i = 0; i != threads; ++i)
160 const size_t B_off = i * (memory / threads);
164 Htick(
T, &H[0], H.size(), blake2b, H0, 0, i);
166 for(
size_t j = 0; j != 128; ++j)
171 Htick(
T, &H[0], H.size(), blake2b, H0, 1, i);
173 for(
size_t j = 0; j != 128; ++j)
180 inline void blamka_G(uint64_t& A, uint64_t& B, uint64_t& C, uint64_t& D)
182 A += B + (
static_cast<uint64_t
>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
185 C += D + (
static_cast<uint64_t
>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
188 A += B + (
static_cast<uint64_t
>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
191 C += D + (
static_cast<uint64_t
>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
195 inline void blamka(uint64_t& V0, uint64_t& V1, uint64_t& V2, uint64_t& V3,
196 uint64_t& V4, uint64_t& V5, uint64_t& V6, uint64_t& V7,
197 uint64_t& V8, uint64_t& V9, uint64_t& VA, uint64_t& VB,
198 uint64_t& VC, uint64_t& VD, uint64_t& VE, uint64_t& VF)
200 blamka_G(V0, V4, V8, VC);
201 blamka_G(V1, V5, V9, VD);
202 blamka_G(V2, V6, VA, VE);
203 blamka_G(V3, V7, VB, VF);
205 blamka_G(V0, V5, VA, VF);
206 blamka_G(V1, V6, VB, VC);
207 blamka_G(V2, V7, V8, VD);
208 blamka_G(V3, V4, V9, VE);
211 void process_block_xor(secure_vector<uint64_t>&
T,
212 secure_vector<uint64_t>& B,
217 for(
size_t i = 0; i != 128; ++i)
218 T[i] = B[128*prev+i] ^ B[128*new_offset+i];
220 for(
size_t i = 0; i != 128; i += 16)
222 blamka(
T[i+ 0],
T[i+ 1],
T[i+ 2],
T[i+ 3],
223 T[i+ 4],
T[i+ 5],
T[i+ 6],
T[i+ 7],
224 T[i+ 8],
T[i+ 9],
T[i+10],
T[i+11],
225 T[i+12],
T[i+13],
T[i+14],
T[i+15]);
228 for(
size_t i = 0; i != 128 / 8; i += 2)
230 blamka(
T[ i],
T[ i+1],
T[ 16+i],
T[ 16+i+1],
231 T[ 32+i],
T[ 32+i+1],
T[ 48+i],
T[ 48+i+1],
232 T[ 64+i],
T[ 64+i+1],
T[ 80+i],
T[ 80+i+1],
233 T[ 96+i],
T[ 96+i+1],
T[112+i],
T[112+i+1]);
236 for(
size_t i = 0; i != 128; ++i)
237 B[128*offset + i] ^=
T[i] ^ B[128*prev+i] ^ B[128*new_offset+i];
240 void gen_2i_addresses(secure_vector<uint64_t>&
T, secure_vector<uint64_t>& B,
241 size_t n,
size_t lane,
size_t slice,
size_t memory,
242 size_t time,
size_t mode,
size_t cnt)
256 for(
size_t r = 0; r != 2; ++r)
260 for(
size_t i = 0; i != 128; i += 16)
262 blamka(
T[i+ 0],
T[i+ 1],
T[i+ 2],
T[i+ 3],
263 T[i+ 4],
T[i+ 5],
T[i+ 6],
T[i+ 7],
264 T[i+ 8],
T[i+ 9],
T[i+10],
T[i+11],
265 T[i+12],
T[i+13],
T[i+14],
T[i+15]);
267 for(
size_t i = 0; i != 128 / 8; i += 2)
269 blamka(
T[ i],
T[ i+1],
T[ 16+i],
T[ 16+i+1],
270 T[ 32+i],
T[ 32+i+1],
T[ 48+i],
T[ 48+i+1],
271 T[ 64+i],
T[ 64+i+1],
T[ 80+i],
T[ 80+i+1],
272 T[ 96+i],
T[ 96+i+1],
T[112+i],
T[112+i+1]);
275 for(
size_t i = 0; i != 128; ++i)
280 uint32_t index_alpha(uint64_t random,
289 size_t ref_lane =
static_cast<uint32_t
>(random >> 32) % threads;
291 if(n == 0 && slice == 0)
294 size_t m = 3*segments;
295 size_t s = ((slice+1) % 4)*segments;
303 if(slice == 0 || lane == ref_lane)
307 if(index == 0 || lane == ref_lane)
310 uint64_t p =
static_cast<uint32_t
>(random);
314 return static_cast<uint32_t
>(ref_lane*lanes + (s + m - (p+1)) % lanes);
317 void process_block_argon2d(secure_vector<uint64_t>&
T,
318 secure_vector<uint64_t>& B,
319 size_t n,
size_t slice,
size_t lane,
320 size_t lanes,
size_t segments,
size_t threads)
323 if(n == 0 && slice == 0)
326 while(index < segments)
328 const size_t offset = lane*lanes + slice*segments + index;
330 size_t prev = offset - 1;
331 if(index == 0 && slice == 0)
334 const uint64_t random = B.at(128*prev);
335 const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index);
337 process_block_xor(
T, B, offset, prev, new_offset);
343 void process_block_argon2i(secure_vector<uint64_t>&
T,
344 secure_vector<uint64_t>& B,
345 size_t n,
size_t slice,
size_t lane,
346 size_t lanes,
size_t segments,
size_t threads, uint8_t mode,
347 size_t memory,
size_t time)
350 if(n == 0 && slice == 0)
353 secure_vector<uint64_t> addresses(128);
354 size_t address_counter = 1;
356 gen_2i_addresses(
T, addresses, n, lane, slice, memory, time, mode, address_counter);
358 while(index < segments)
360 const size_t offset = lane*lanes + slice*segments + index;
362 size_t prev = offset - 1;
363 if(index == 0 && slice == 0)
366 if(index > 0 && index % 128 == 0)
368 address_counter += 1;
369 gen_2i_addresses(
T, addresses, n, lane, slice, memory, time, mode, address_counter);
372 const uint64_t random = addresses[index % 128];
373 const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index);
375 process_block_xor(
T, B, offset, prev, new_offset);
381 void process_blocks(secure_vector<uint64_t>& B,
387 const size_t lanes = memory / threads;
388 const size_t segments = lanes / SYNC_POINTS;
390 secure_vector<uint64_t>
T(128);
391 for(
size_t n = 0; n != t; ++n)
393 for(
size_t slice = 0; slice != SYNC_POINTS; ++slice)
396 for(
size_t lane = 0; lane != threads; ++lane)
398 if(mode == 1 || (mode == 2 && n == 0 && slice < SYNC_POINTS/2))
399 process_block_argon2i(
T, B, n, slice, lane, lanes, segments, threads, mode, memory, t);
401 process_block_argon2d(
T, B, n, slice, lane, lanes, segments, threads);
410 void argon2(uint8_t output[],
size_t output_len,
411 const char* password,
size_t password_len,
412 const uint8_t salt[],
size_t salt_len,
413 const uint8_t key[],
size_t key_len,
414 const uint8_t ad[],
size_t ad_len,
415 uint8_t mode,
size_t threads,
size_t M,
size_t t)
417 BOTAN_ARG_CHECK(mode == 0 || mode == 1 || mode == 2,
"Unknown Argon2 mode parameter");
419 BOTAN_ARG_CHECK(threads >= 1 && threads <= 128,
"Invalid Argon2 threads parameter");
420 BOTAN_ARG_CHECK(M >= 8*threads && M <= 8192*1024,
"Invalid Argon2 M parameter");
425 const auto H0 = argon2_H0(*blake2, output_len,
426 password, password_len,
430 mode, threads, M, t);
432 const size_t memory = (M / (SYNC_POINTS*threads)) * (SYNC_POINTS*threads);
436 init_blocks(B, *blake2, H0, memory, threads);
437 process_blocks(B, t, memory, threads, mode);
440 extract_key(output, output_len, B, memory, threads);
static std::unique_ptr< HashFunction > create_or_throw(const std::string &algo_spec, const std::string &provider="")
void copy_out_le(uint8_t out[], size_t out_bytes, const T in[])
void clear_mem(T *ptr, size_t n)
const uint8_t * cast_char_ptr_to_uint8(const char *s)
#define BOTAN_ASSERT_NOMSG(expr)
std::string to_string(const BER_Object &obj)
uint64_t load_le< uint64_t >(const uint8_t in[], size_t off)
void copy_mem(T *out, const T *in, size_t n)
#define BOTAN_ARG_CHECK(expr, msg)
void argon2(uint8_t output[], size_t output_len, const char *password, size_t password_len, const uint8_t salt[], size_t salt_len, const uint8_t key[], size_t key_len, const uint8_t ad[], size_t ad_len, uint8_t mode, size_t threads, size_t M, size_t t)
std::vector< T, secure_allocator< T > > secure_vector