Botan  2.11.0
Crypto and TLS for C++11
argon2.cpp
Go to the documentation of this file.
1 /**
2 * (C) 2018,2019 Jack Lloyd
3 *
4 * Botan is released under the Simplified BSD License (see license.txt)
5 */
6 
7 #include <botan/argon2.h>
8 #include <botan/hash.h>
9 #include <botan/mem_ops.h>
10 #include <botan/rotate.h>
11 #include <botan/exceptn.h>
12 
13 namespace Botan {
14 
15 namespace {
16 
17 static const size_t SYNC_POINTS = 4;
18 
19 secure_vector<uint8_t> argon2_H0(HashFunction& blake2b,
20  size_t output_len,
21  const char* password, size_t password_len,
22  const uint8_t salt[], size_t salt_len,
23  const uint8_t key[], size_t key_len,
24  const uint8_t ad[], size_t ad_len,
25  size_t y, size_t p, size_t M, size_t t)
26  {
27  const uint8_t v = 19; // Argon2 version code
28 
29  blake2b.update_le<uint32_t>(static_cast<uint32_t>(p));
30  blake2b.update_le<uint32_t>(static_cast<uint32_t>(output_len));
31  blake2b.update_le<uint32_t>(static_cast<uint32_t>(M));
32  blake2b.update_le<uint32_t>(static_cast<uint32_t>(t));
33  blake2b.update_le<uint32_t>(static_cast<uint32_t>(v));
34  blake2b.update_le<uint32_t>(static_cast<uint32_t>(y));
35 
36  blake2b.update_le<uint32_t>(static_cast<uint32_t>(password_len));
37  blake2b.update(cast_char_ptr_to_uint8(password), password_len);
38 
39  blake2b.update_le<uint32_t>(static_cast<uint32_t>(salt_len));
40  blake2b.update(salt, salt_len);
41 
42  blake2b.update_le<uint32_t>(static_cast<uint32_t>(key_len));
43  blake2b.update(key, key_len);
44 
45  blake2b.update_le<uint32_t>(static_cast<uint32_t>(ad_len));
46  blake2b.update(ad, ad_len);
47 
48  return blake2b.final();
49  }
50 
51 void Htick(secure_vector<uint8_t>& T,
52  uint8_t output[],
53  size_t output_len,
54  HashFunction& blake2b,
55  const secure_vector<uint8_t>& H0,
56  size_t p0, size_t p1)
57  {
58  BOTAN_ASSERT_NOMSG(output_len % 64 == 0);
59 
60  blake2b.update_le<uint32_t>(static_cast<uint32_t>(output_len));
61  blake2b.update(H0);
62  blake2b.update_le<uint32_t>(static_cast<uint32_t>(p0));
63  blake2b.update_le<uint32_t>(static_cast<uint32_t>(p1));
64 
65  blake2b.final(&T[0]);
66 
67  while(output_len > 64)
68  {
69  copy_mem(output, &T[0], 32);
70  output_len -= 32;
71  output += 32;
72 
73  blake2b.update(T);
74  blake2b.final(&T[0]);
75  }
76 
77  if(output_len > 0)
78  copy_mem(output, &T[0], output_len);
79  }
80 
81 void extract_key(uint8_t output[], size_t output_len,
82  const secure_vector<uint64_t>& B,
83  size_t memory, size_t threads)
84  {
85  const size_t lanes = memory / threads;
86 
87  secure_vector<uint64_t> sum(128);
88 
89  for(size_t lane = 0; lane != threads; ++lane)
90  {
91  size_t start = 128*(lane * lanes + lanes - 1);
92  size_t end = 128*(lane * lanes + lanes);
93 
94  for(size_t j = start; j != end; ++j)
95  {
96  sum[j % 128] ^= B[j];
97  }
98  }
99 
100  secure_vector<uint8_t> sum8(1024);
101  copy_out_le(sum8.data(), 1024, sum.data());
102 
103  if(output_len <= 64)
104  {
105  std::unique_ptr<HashFunction> blake2b = HashFunction::create_or_throw("BLAKE2b(" + std::to_string(output_len*8) + ")");
106 
107  blake2b->update_le(static_cast<uint32_t>(output_len));
108  blake2b->update(sum8.data(), sum8.size());
109 
110  blake2b->final(output);
111  }
112  else
113  {
114  throw Not_Implemented("todo");
115  }
116  }
117 
118 void init_blocks(secure_vector<uint64_t>& B,
119  HashFunction& blake2b,
120  const secure_vector<uint8_t>& H0,
121  size_t memory,
122  size_t threads)
123  {
124  BOTAN_ASSERT_NOMSG(B.size() >= threads*256);
125 
126  secure_vector<uint8_t> H(1024);
127  secure_vector<uint8_t> T(blake2b.output_length());
128 
129  for(size_t i = 0; i != threads; ++i)
130  {
131  const size_t B_off = i * (memory / threads);
132 
133  BOTAN_ASSERT_NOMSG(B.size() >= 128*(B_off+2));
134 
135  Htick(T, &H[0], H.size(), blake2b, H0, 0, i);
136 
137  for(size_t j = 0; j != 128; ++j)
138  {
139  B[128*B_off+j] = load_le<uint64_t>(H.data(), j);
140  }
141 
142  Htick(T, &H[0], H.size(), blake2b, H0, 1, i);
143 
144  for(size_t j = 0; j != 128; ++j)
145  {
146  B[128*(B_off+1)+j] = load_le<uint64_t>(H.data(), j);
147  }
148  }
149  }
150 
151 inline void blamka_G(uint64_t& A, uint64_t& B, uint64_t& C, uint64_t& D)
152  {
153  A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
154  D = rotr<32>(A ^ D);
155 
156  C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
157  B = rotr<24>(B ^ C);
158 
159  A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
160  D = rotr<16>(A ^ D);
161 
162  C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
163  B = rotr<63>(B ^ C);
164  }
165 
166 inline void blamka(uint64_t& V0, uint64_t& V1, uint64_t& V2, uint64_t& V3,
167  uint64_t& V4, uint64_t& V5, uint64_t& V6, uint64_t& V7,
168  uint64_t& V8, uint64_t& V9, uint64_t& VA, uint64_t& VB,
169  uint64_t& VC, uint64_t& VD, uint64_t& VE, uint64_t& VF)
170  {
171  blamka_G(V0, V4, V8, VC);
172  blamka_G(V1, V5, V9, VD);
173  blamka_G(V2, V6, VA, VE);
174  blamka_G(V3, V7, VB, VF);
175 
176  blamka_G(V0, V5, VA, VF);
177  blamka_G(V1, V6, VB, VC);
178  blamka_G(V2, V7, V8, VD);
179  blamka_G(V3, V4, V9, VE);
180  }
181 
182 void process_block_xor(secure_vector<uint64_t>& T,
183  secure_vector<uint64_t>& B,
184  size_t offset,
185  size_t prev,
186  size_t new_offset)
187  {
188  for(size_t i = 0; i != 128; ++i)
189  T[i] = B[128*prev+i] ^ B[128*new_offset+i];
190 
191  for(size_t i = 0; i != 128; i += 16)
192  {
193  blamka(T[i+ 0], T[i+ 1], T[i+ 2], T[i+ 3],
194  T[i+ 4], T[i+ 5], T[i+ 6], T[i+ 7],
195  T[i+ 8], T[i+ 9], T[i+10], T[i+11],
196  T[i+12], T[i+13], T[i+14], T[i+15]);
197  }
198 
199  for(size_t i = 0; i != 128 / 8; i += 2)
200  {
201  blamka(T[ i], T[ i+1], T[ 16+i], T[ 16+i+1],
202  T[ 32+i], T[ 32+i+1], T[ 48+i], T[ 48+i+1],
203  T[ 64+i], T[ 64+i+1], T[ 80+i], T[ 80+i+1],
204  T[ 96+i], T[ 96+i+1], T[112+i], T[112+i+1]);
205  }
206 
207  for(size_t i = 0; i != 128; ++i)
208  B[128*offset + i] ^= T[i] ^ B[128*prev+i] ^ B[128*new_offset+i];
209  }
210 
211 void gen_2i_addresses(secure_vector<uint64_t>& T, secure_vector<uint64_t>& B,
212  size_t n, size_t lane, size_t slice, size_t memory,
213  size_t time, size_t mode, size_t cnt)
214  {
215  BOTAN_ASSERT_NOMSG(B.size() == 128);
216  BOTAN_ASSERT_NOMSG(T.size() == 128);
217 
218  clear_mem(B.data(), B.size());
219  B[0] = n;
220  B[1] = lane;
221  B[2] = slice;
222  B[3] = memory;
223  B[4] = time;
224  B[5] = mode;
225  B[6] = cnt;
226 
227  for(size_t r = 0; r != 2; ++r)
228  {
229  copy_mem(T.data(), B.data(), B.size());
230 
231  for(size_t i = 0; i != 128; i += 16)
232  {
233  blamka(T[i+ 0], T[i+ 1], T[i+ 2], T[i+ 3],
234  T[i+ 4], T[i+ 5], T[i+ 6], T[i+ 7],
235  T[i+ 8], T[i+ 9], T[i+10], T[i+11],
236  T[i+12], T[i+13], T[i+14], T[i+15]);
237  }
238  for(size_t i = 0; i != 128 / 8; i += 2)
239  {
240  blamka(T[ i], T[ i+1], T[ 16+i], T[ 16+i+1],
241  T[ 32+i], T[ 32+i+1], T[ 48+i], T[ 48+i+1],
242  T[ 64+i], T[ 64+i+1], T[ 80+i], T[ 80+i+1],
243  T[ 96+i], T[ 96+i+1], T[112+i], T[112+i+1]);
244  }
245 
246  for(size_t i = 0; i != 128; ++i)
247  B[i] ^= T[i];
248  }
249  }
250 
251 uint32_t index_alpha(uint64_t random,
252  size_t lanes,
253  size_t segments,
254  size_t threads,
255  size_t n,
256  size_t slice,
257  size_t lane,
258  size_t index)
259  {
260  size_t ref_lane = static_cast<uint32_t>(random >> 32) % threads;
261 
262  if(n == 0 && slice == 0)
263  ref_lane = lane;
264 
265  size_t m = 3*segments;
266  size_t s = ((slice+1) % 4)*segments;
267 
268  if(lane == ref_lane)
269  m += index;
270 
271  if(n == 0) {
272  m = slice*segments;
273  s = 0;
274  if(slice == 0 || lane == ref_lane)
275  m += index;
276  }
277 
278  if(index == 0 || lane == ref_lane)
279  m -= 1;
280 
281  uint64_t p = static_cast<uint32_t>(random);
282  p = (p * p) >> 32;
283  p = (p * m) >> 32;
284 
285  return static_cast<uint32_t>(ref_lane*lanes + (s + m - (p+1)) % lanes);
286  }
287 
288 void process_block_argon2d(secure_vector<uint64_t>& T,
289  secure_vector<uint64_t>& B,
290  size_t n, size_t slice, size_t lane,
291  size_t lanes, size_t segments, size_t threads)
292  {
293  size_t index = 0;
294  if(n == 0 && slice == 0)
295  index = 2;
296 
297  while(index < segments)
298  {
299  const size_t offset = lane*lanes + slice*segments + index;
300 
301  size_t prev = offset - 1;
302  if(index == 0 && slice == 0)
303  prev += lanes;
304 
305  const uint64_t random = B.at(128*prev);
306  const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index);
307 
308  process_block_xor(T, B, offset, prev, new_offset);
309 
310  index += 1;
311  }
312  }
313 
314 void process_block_argon2i(secure_vector<uint64_t>& T,
315  secure_vector<uint64_t>& B,
316  size_t n, size_t slice, size_t lane,
317  size_t lanes, size_t segments, size_t threads, uint8_t mode,
318  size_t memory, size_t time)
319  {
320  size_t index = 0;
321  if(n == 0 && slice == 0)
322  index = 2;
323 
324  secure_vector<uint64_t> addresses(128);
325  size_t address_counter = 1;
326 
327  gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
328 
329  while(index < segments)
330  {
331  const size_t offset = lane*lanes + slice*segments + index;
332 
333  size_t prev = offset - 1;
334  if(index == 0 && slice == 0)
335  prev += lanes;
336 
337  if(index > 0 && index % 128 == 0)
338  {
339  address_counter += 1;
340  gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
341  }
342 
343  const uint64_t random = addresses[index % 128];
344  const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index);
345 
346  process_block_xor(T, B, offset, prev, new_offset);
347 
348  index += 1;
349  }
350  }
351 
352 void process_blocks(secure_vector<uint64_t>& B,
353  size_t t,
354  size_t memory,
355  size_t threads,
356  uint8_t mode)
357  {
358  const size_t lanes = memory / threads;
359  const size_t segments = lanes / SYNC_POINTS;
360 
361  secure_vector<uint64_t> T(128);
362  for(size_t n = 0; n != t; ++n)
363  {
364  for(size_t slice = 0; slice != SYNC_POINTS; ++slice)
365  {
366  // TODO can run this in Thread_Pool
367  for(size_t lane = 0; lane != threads; ++lane)
368  {
369  if(mode == 1 || (mode == 2 && n == 0 && slice < SYNC_POINTS/2))
370  process_block_argon2i(T, B, n, slice, lane, lanes, segments, threads, mode, memory, t);
371  else
372  process_block_argon2d(T, B, n, slice, lane, lanes, segments, threads);
373  }
374  }
375  }
376 
377  }
378 
379 }
380 
381 void argon2(uint8_t output[], size_t output_len,
382  const char* password, size_t password_len,
383  const uint8_t salt[], size_t salt_len,
384  const uint8_t key[], size_t key_len,
385  const uint8_t ad[], size_t ad_len,
386  uint8_t mode, size_t threads, size_t M, size_t t)
387  {
388  BOTAN_ARG_CHECK(mode == 0 || mode == 1 || mode == 2, "Unknown Argon2 mode parameter");
389  BOTAN_ARG_CHECK(output_len >= 4, "Invalid Argon2 output length");
390  BOTAN_ARG_CHECK(threads >= 1 && threads <= 128, "Invalid Argon2 threads parameter");
391  BOTAN_ARG_CHECK(M >= 8*threads && M <= 8192*1024, "Invalid Argon2 M parameter");
392  BOTAN_ARG_CHECK(t >= 1, "Invalid Argon2 t parameter");
393 
394  std::unique_ptr<HashFunction> blake2 = HashFunction::create_or_throw("BLAKE2b");
395 
396  const auto H0 = argon2_H0(*blake2, output_len,
397  password, password_len,
398  salt, salt_len,
399  key, key_len,
400  ad, ad_len,
401  mode, threads, M, t);
402 
403  const size_t memory = (M / (SYNC_POINTS*threads)) * (SYNC_POINTS*threads);
404 
405  secure_vector<uint64_t> B(memory * 1024/8);
406 
407  init_blocks(B, *blake2, H0, memory, threads);
408  process_blocks(B, t, memory, threads, mode);
409 
410  clear_mem(output, output_len);
411  extract_key(output, output_len, B, memory, threads);
412  }
413 
414 }
static std::unique_ptr< HashFunction > create_or_throw(const std::string &algo_spec, const std::string &provider="")
Definition: hash.cpp:359
void copy_out_le(uint8_t out[], size_t out_bytes, const T in[])
Definition: loadstor.h:677
void clear_mem(T *ptr, size_t n)
Definition: mem_ops.h:111
const uint8_t * cast_char_ptr_to_uint8(const char *s)
Definition: mem_ops.h:159
#define BOTAN_ASSERT_NOMSG(expr)
Definition: assert.h:68
std::string to_string(const BER_Object &obj)
Definition: asn1_obj.cpp:213
uint64_t load_le< uint64_t >(const uint8_t in[], size_t off)
Definition: loadstor.h:235
size_t salt_len
Definition: x509_obj.cpp:26
void copy_mem(T *out, const T *in, size_t n)
Definition: mem_ops.h:122
Definition: alg_id.cpp:13
#define BOTAN_ARG_CHECK(expr, msg)
Definition: assert.h:37
void argon2(uint8_t output[], size_t output_len, const char *password, size_t password_len, const uint8_t salt[], size_t salt_len, const uint8_t key[], size_t key_len, const uint8_t ad[], size_t ad_len, uint8_t mode, size_t threads, size_t M, size_t t)
Definition: argon2.cpp:381
fe T
Definition: ge.cpp:37
std::vector< T, secure_allocator< T > > secure_vector
Definition: secmem.h:65