Botan  2.12.1
Crypto and TLS for C++11
argon2.cpp
Go to the documentation of this file.
1 /**
2 * (C) 2018,2019 Jack Lloyd
3 *
4 * Botan is released under the Simplified BSD License (see license.txt)
5 */
6 
7 #include <botan/argon2.h>
8 #include <botan/loadstor.h>
9 #include <botan/hash.h>
10 #include <botan/mem_ops.h>
11 #include <botan/rotate.h>
12 #include <botan/exceptn.h>
13 
14 namespace Botan {
15 
16 namespace {
17 
18 static const size_t SYNC_POINTS = 4;
19 
20 secure_vector<uint8_t> argon2_H0(HashFunction& blake2b,
21  size_t output_len,
22  const char* password, size_t password_len,
23  const uint8_t salt[], size_t salt_len,
24  const uint8_t key[], size_t key_len,
25  const uint8_t ad[], size_t ad_len,
26  size_t y, size_t p, size_t M, size_t t)
27  {
28  const uint8_t v = 19; // Argon2 version code
29 
30  blake2b.update_le(static_cast<uint32_t>(p));
31  blake2b.update_le(static_cast<uint32_t>(output_len));
32  blake2b.update_le(static_cast<uint32_t>(M));
33  blake2b.update_le(static_cast<uint32_t>(t));
34  blake2b.update_le(static_cast<uint32_t>(v));
35  blake2b.update_le(static_cast<uint32_t>(y));
36 
37  blake2b.update_le(static_cast<uint32_t>(password_len));
38  blake2b.update(cast_char_ptr_to_uint8(password), password_len);
39 
40  blake2b.update_le(static_cast<uint32_t>(salt_len));
41  blake2b.update(salt, salt_len);
42 
43  blake2b.update_le(static_cast<uint32_t>(key_len));
44  blake2b.update(key, key_len);
45 
46  blake2b.update_le(static_cast<uint32_t>(ad_len));
47  blake2b.update(ad, ad_len);
48 
49  return blake2b.final();
50  }
51 
52 void Htick(secure_vector<uint8_t>& T,
53  uint8_t output[],
54  size_t output_len,
55  HashFunction& blake2b,
56  const secure_vector<uint8_t>& H0,
57  size_t p0, size_t p1)
58  {
59  BOTAN_ASSERT_NOMSG(output_len % 64 == 0);
60 
61  blake2b.update_le(static_cast<uint32_t>(output_len));
62  blake2b.update(H0);
63  blake2b.update_le(static_cast<uint32_t>(p0));
64  blake2b.update_le(static_cast<uint32_t>(p1));
65 
66  blake2b.final(&T[0]);
67 
68  while(output_len > 64)
69  {
70  copy_mem(output, &T[0], 32);
71  output_len -= 32;
72  output += 32;
73 
74  blake2b.update(T);
75  blake2b.final(&T[0]);
76  }
77 
78  if(output_len > 0)
79  copy_mem(output, &T[0], output_len);
80  }
81 
82 void extract_key(uint8_t output[], size_t output_len,
83  const secure_vector<uint64_t>& B,
84  size_t memory, size_t threads)
85  {
86  const size_t lanes = memory / threads;
87 
88  secure_vector<uint64_t> sum(128);
89 
90  for(size_t lane = 0; lane != threads; ++lane)
91  {
92  size_t start = 128*(lane * lanes + lanes - 1);
93  size_t end = 128*(lane * lanes + lanes);
94 
95  for(size_t j = start; j != end; ++j)
96  {
97  sum[j % 128] ^= B[j];
98  }
99  }
100 
101  secure_vector<uint8_t> sum8(1024);
102  copy_out_le(sum8.data(), 1024, sum.data());
103 
104  if(output_len <= 64)
105  {
106  std::unique_ptr<HashFunction> blake2b = HashFunction::create_or_throw("BLAKE2b(" + std::to_string(output_len*8) + ")");
107  blake2b->update_le(static_cast<uint32_t>(output_len));
108  blake2b->update(sum8.data(), sum8.size());
109  blake2b->final(output);
110  }
111  else
112  {
113  secure_vector<uint8_t> T(64);
114 
115  std::unique_ptr<HashFunction> blake2b = HashFunction::create_or_throw("BLAKE2b(512)");
116  blake2b->update_le(static_cast<uint32_t>(output_len));
117  blake2b->update(sum8.data(), sum8.size());
118  blake2b->final(&T[0]);
119 
120  while(output_len > 64)
121  {
122  copy_mem(output, &T[0], 32);
123  output_len -= 32;
124  output += 32;
125 
126  if(output_len > 64)
127  {
128  blake2b->update(T);
129  blake2b->final(&T[0]);
130  }
131  }
132 
133  if(output_len == 64)
134  {
135  blake2b->update(T);
136  blake2b->final(output);
137  }
138  else
139  {
140  std::unique_ptr<HashFunction> blake2b_f = HashFunction::create_or_throw("BLAKE2b(" + std::to_string(output_len*8) + ")");
141  blake2b_f->update(T);
142  blake2b_f->final(output);
143  }
144  }
145  }
146 
147 void init_blocks(secure_vector<uint64_t>& B,
148  HashFunction& blake2b,
149  const secure_vector<uint8_t>& H0,
150  size_t memory,
151  size_t threads)
152  {
153  BOTAN_ASSERT_NOMSG(B.size() >= threads*256);
154 
155  secure_vector<uint8_t> H(1024);
156  secure_vector<uint8_t> T(blake2b.output_length());
157 
158  for(size_t i = 0; i != threads; ++i)
159  {
160  const size_t B_off = i * (memory / threads);
161 
162  BOTAN_ASSERT_NOMSG(B.size() >= 128*(B_off+2));
163 
164  Htick(T, &H[0], H.size(), blake2b, H0, 0, i);
165 
166  for(size_t j = 0; j != 128; ++j)
167  {
168  B[128*B_off+j] = load_le<uint64_t>(H.data(), j);
169  }
170 
171  Htick(T, &H[0], H.size(), blake2b, H0, 1, i);
172 
173  for(size_t j = 0; j != 128; ++j)
174  {
175  B[128*(B_off+1)+j] = load_le<uint64_t>(H.data(), j);
176  }
177  }
178  }
179 
180 inline void blamka_G(uint64_t& A, uint64_t& B, uint64_t& C, uint64_t& D)
181  {
182  A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
183  D = rotr<32>(A ^ D);
184 
185  C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
186  B = rotr<24>(B ^ C);
187 
188  A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
189  D = rotr<16>(A ^ D);
190 
191  C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
192  B = rotr<63>(B ^ C);
193  }
194 
195 inline void blamka(uint64_t& V0, uint64_t& V1, uint64_t& V2, uint64_t& V3,
196  uint64_t& V4, uint64_t& V5, uint64_t& V6, uint64_t& V7,
197  uint64_t& V8, uint64_t& V9, uint64_t& VA, uint64_t& VB,
198  uint64_t& VC, uint64_t& VD, uint64_t& VE, uint64_t& VF)
199  {
200  blamka_G(V0, V4, V8, VC);
201  blamka_G(V1, V5, V9, VD);
202  blamka_G(V2, V6, VA, VE);
203  blamka_G(V3, V7, VB, VF);
204 
205  blamka_G(V0, V5, VA, VF);
206  blamka_G(V1, V6, VB, VC);
207  blamka_G(V2, V7, V8, VD);
208  blamka_G(V3, V4, V9, VE);
209  }
210 
211 void process_block_xor(secure_vector<uint64_t>& T,
212  secure_vector<uint64_t>& B,
213  size_t offset,
214  size_t prev,
215  size_t new_offset)
216  {
217  for(size_t i = 0; i != 128; ++i)
218  T[i] = B[128*prev+i] ^ B[128*new_offset+i];
219 
220  for(size_t i = 0; i != 128; i += 16)
221  {
222  blamka(T[i+ 0], T[i+ 1], T[i+ 2], T[i+ 3],
223  T[i+ 4], T[i+ 5], T[i+ 6], T[i+ 7],
224  T[i+ 8], T[i+ 9], T[i+10], T[i+11],
225  T[i+12], T[i+13], T[i+14], T[i+15]);
226  }
227 
228  for(size_t i = 0; i != 128 / 8; i += 2)
229  {
230  blamka(T[ i], T[ i+1], T[ 16+i], T[ 16+i+1],
231  T[ 32+i], T[ 32+i+1], T[ 48+i], T[ 48+i+1],
232  T[ 64+i], T[ 64+i+1], T[ 80+i], T[ 80+i+1],
233  T[ 96+i], T[ 96+i+1], T[112+i], T[112+i+1]);
234  }
235 
236  for(size_t i = 0; i != 128; ++i)
237  B[128*offset + i] ^= T[i] ^ B[128*prev+i] ^ B[128*new_offset+i];
238  }
239 
240 void gen_2i_addresses(secure_vector<uint64_t>& T, secure_vector<uint64_t>& B,
241  size_t n, size_t lane, size_t slice, size_t memory,
242  size_t time, size_t mode, size_t cnt)
243  {
244  BOTAN_ASSERT_NOMSG(B.size() == 128);
245  BOTAN_ASSERT_NOMSG(T.size() == 128);
246 
247  clear_mem(B.data(), B.size());
248  B[0] = n;
249  B[1] = lane;
250  B[2] = slice;
251  B[3] = memory;
252  B[4] = time;
253  B[5] = mode;
254  B[6] = cnt;
255 
256  for(size_t r = 0; r != 2; ++r)
257  {
258  copy_mem(T.data(), B.data(), B.size());
259 
260  for(size_t i = 0; i != 128; i += 16)
261  {
262  blamka(T[i+ 0], T[i+ 1], T[i+ 2], T[i+ 3],
263  T[i+ 4], T[i+ 5], T[i+ 6], T[i+ 7],
264  T[i+ 8], T[i+ 9], T[i+10], T[i+11],
265  T[i+12], T[i+13], T[i+14], T[i+15]);
266  }
267  for(size_t i = 0; i != 128 / 8; i += 2)
268  {
269  blamka(T[ i], T[ i+1], T[ 16+i], T[ 16+i+1],
270  T[ 32+i], T[ 32+i+1], T[ 48+i], T[ 48+i+1],
271  T[ 64+i], T[ 64+i+1], T[ 80+i], T[ 80+i+1],
272  T[ 96+i], T[ 96+i+1], T[112+i], T[112+i+1]);
273  }
274 
275  for(size_t i = 0; i != 128; ++i)
276  B[i] ^= T[i];
277  }
278  }
279 
280 uint32_t index_alpha(uint64_t random,
281  size_t lanes,
282  size_t segments,
283  size_t threads,
284  size_t n,
285  size_t slice,
286  size_t lane,
287  size_t index)
288  {
289  size_t ref_lane = static_cast<uint32_t>(random >> 32) % threads;
290 
291  if(n == 0 && slice == 0)
292  ref_lane = lane;
293 
294  size_t m = 3*segments;
295  size_t s = ((slice+1) % 4)*segments;
296 
297  if(lane == ref_lane)
298  m += index;
299 
300  if(n == 0) {
301  m = slice*segments;
302  s = 0;
303  if(slice == 0 || lane == ref_lane)
304  m += index;
305  }
306 
307  if(index == 0 || lane == ref_lane)
308  m -= 1;
309 
310  uint64_t p = static_cast<uint32_t>(random);
311  p = (p * p) >> 32;
312  p = (p * m) >> 32;
313 
314  return static_cast<uint32_t>(ref_lane*lanes + (s + m - (p+1)) % lanes);
315  }
316 
317 void process_block_argon2d(secure_vector<uint64_t>& T,
318  secure_vector<uint64_t>& B,
319  size_t n, size_t slice, size_t lane,
320  size_t lanes, size_t segments, size_t threads)
321  {
322  size_t index = 0;
323  if(n == 0 && slice == 0)
324  index = 2;
325 
326  while(index < segments)
327  {
328  const size_t offset = lane*lanes + slice*segments + index;
329 
330  size_t prev = offset - 1;
331  if(index == 0 && slice == 0)
332  prev += lanes;
333 
334  const uint64_t random = B.at(128*prev);
335  const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index);
336 
337  process_block_xor(T, B, offset, prev, new_offset);
338 
339  index += 1;
340  }
341  }
342 
343 void process_block_argon2i(secure_vector<uint64_t>& T,
344  secure_vector<uint64_t>& B,
345  size_t n, size_t slice, size_t lane,
346  size_t lanes, size_t segments, size_t threads, uint8_t mode,
347  size_t memory, size_t time)
348  {
349  size_t index = 0;
350  if(n == 0 && slice == 0)
351  index = 2;
352 
353  secure_vector<uint64_t> addresses(128);
354  size_t address_counter = 1;
355 
356  gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
357 
358  while(index < segments)
359  {
360  const size_t offset = lane*lanes + slice*segments + index;
361 
362  size_t prev = offset - 1;
363  if(index == 0 && slice == 0)
364  prev += lanes;
365 
366  if(index > 0 && index % 128 == 0)
367  {
368  address_counter += 1;
369  gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
370  }
371 
372  const uint64_t random = addresses[index % 128];
373  const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index);
374 
375  process_block_xor(T, B, offset, prev, new_offset);
376 
377  index += 1;
378  }
379  }
380 
381 void process_blocks(secure_vector<uint64_t>& B,
382  size_t t,
383  size_t memory,
384  size_t threads,
385  uint8_t mode)
386  {
387  const size_t lanes = memory / threads;
388  const size_t segments = lanes / SYNC_POINTS;
389 
390  secure_vector<uint64_t> T(128);
391  for(size_t n = 0; n != t; ++n)
392  {
393  for(size_t slice = 0; slice != SYNC_POINTS; ++slice)
394  {
395  // TODO can run this in Thread_Pool
396  for(size_t lane = 0; lane != threads; ++lane)
397  {
398  if(mode == 1 || (mode == 2 && n == 0 && slice < SYNC_POINTS/2))
399  process_block_argon2i(T, B, n, slice, lane, lanes, segments, threads, mode, memory, t);
400  else
401  process_block_argon2d(T, B, n, slice, lane, lanes, segments, threads);
402  }
403  }
404  }
405 
406  }
407 
408 }
409 
410 void argon2(uint8_t output[], size_t output_len,
411  const char* password, size_t password_len,
412  const uint8_t salt[], size_t salt_len,
413  const uint8_t key[], size_t key_len,
414  const uint8_t ad[], size_t ad_len,
415  uint8_t mode, size_t threads, size_t M, size_t t)
416  {
417  BOTAN_ARG_CHECK(mode == 0 || mode == 1 || mode == 2, "Unknown Argon2 mode parameter");
418  BOTAN_ARG_CHECK(output_len >= 4, "Invalid Argon2 output length");
419  BOTAN_ARG_CHECK(threads >= 1 && threads <= 128, "Invalid Argon2 threads parameter");
420  BOTAN_ARG_CHECK(M >= 8*threads && M <= 8192*1024, "Invalid Argon2 M parameter");
421  BOTAN_ARG_CHECK(t >= 1, "Invalid Argon2 t parameter");
422 
423  std::unique_ptr<HashFunction> blake2 = HashFunction::create_or_throw("BLAKE2b");
424 
425  const auto H0 = argon2_H0(*blake2, output_len,
426  password, password_len,
427  salt, salt_len,
428  key, key_len,
429  ad, ad_len,
430  mode, threads, M, t);
431 
432  const size_t memory = (M / (SYNC_POINTS*threads)) * (SYNC_POINTS*threads);
433 
434  secure_vector<uint64_t> B(memory * 1024/8);
435 
436  init_blocks(B, *blake2, H0, memory, threads);
437  process_blocks(B, t, memory, threads, mode);
438 
439  clear_mem(output, output_len);
440  extract_key(output, output_len, B, memory, threads);
441  }
442 
443 }
static std::unique_ptr< HashFunction > create_or_throw(const std::string &algo_spec, const std::string &provider="")
Definition: hash.cpp:344
void copy_out_le(uint8_t out[], size_t out_bytes, const T in[])
Definition: loadstor.h:679
void clear_mem(T *ptr, size_t n)
Definition: mem_ops.h:111
const uint8_t * cast_char_ptr_to_uint8(const char *s)
Definition: mem_ops.h:164
#define BOTAN_ASSERT_NOMSG(expr)
Definition: assert.h:68
std::string to_string(const BER_Object &obj)
Definition: asn1_obj.cpp:213
uint64_t load_le< uint64_t >(const uint8_t in[], size_t off)
Definition: loadstor.h:237
size_t salt_len
Definition: x509_obj.cpp:25
void copy_mem(T *out, const T *in, size_t n)
Definition: mem_ops.h:122
Definition: alg_id.cpp:13
#define BOTAN_ARG_CHECK(expr, msg)
Definition: assert.h:37
void argon2(uint8_t output[], size_t output_len, const char *password, size_t password_len, const uint8_t salt[], size_t salt_len, const uint8_t key[], size_t key_len, const uint8_t ad[], size_t ad_len, uint8_t mode, size_t threads, size_t M, size_t t)
Definition: argon2.cpp:410
fe T
Definition: ge.cpp:37
std::vector< T, secure_allocator< T > > secure_vector
Definition: secmem.h:65