Botan 2.19.1
Crypto and TLS for C&
argon2.cpp
Go to the documentation of this file.
1/**
2* (C) 2018,2019 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/argon2.h>
8#include <botan/loadstor.h>
9#include <botan/hash.h>
10#include <botan/mem_ops.h>
11#include <botan/rotate.h>
12#include <botan/exceptn.h>
13
14namespace Botan {
15
16namespace {
17
18static const size_t SYNC_POINTS = 4;
19
20secure_vector<uint8_t> argon2_H0(HashFunction& blake2b,
21 size_t output_len,
22 const char* password, size_t password_len,
23 const uint8_t salt[], size_t salt_len,
24 const uint8_t key[], size_t key_len,
25 const uint8_t ad[], size_t ad_len,
26 size_t y, size_t p, size_t M, size_t t)
27 {
28 const uint8_t v = 19; // Argon2 version code
29
30 blake2b.update_le(static_cast<uint32_t>(p));
31 blake2b.update_le(static_cast<uint32_t>(output_len));
32 blake2b.update_le(static_cast<uint32_t>(M));
33 blake2b.update_le(static_cast<uint32_t>(t));
34 blake2b.update_le(static_cast<uint32_t>(v));
35 blake2b.update_le(static_cast<uint32_t>(y));
36
37 blake2b.update_le(static_cast<uint32_t>(password_len));
38 blake2b.update(cast_char_ptr_to_uint8(password), password_len);
39
40 blake2b.update_le(static_cast<uint32_t>(salt_len));
41 blake2b.update(salt, salt_len);
42
43 blake2b.update_le(static_cast<uint32_t>(key_len));
44 blake2b.update(key, key_len);
45
46 blake2b.update_le(static_cast<uint32_t>(ad_len));
47 blake2b.update(ad, ad_len);
48
49 return blake2b.final();
50 }
51
52void Htick(secure_vector<uint8_t>& T,
53 uint8_t output[],
54 size_t output_len,
55 HashFunction& blake2b,
56 const secure_vector<uint8_t>& H0,
57 size_t p0, size_t p1)
58 {
59 BOTAN_ASSERT_NOMSG(output_len % 64 == 0);
60
61 blake2b.update_le(static_cast<uint32_t>(output_len));
62 blake2b.update(H0);
63 blake2b.update_le(static_cast<uint32_t>(p0));
64 blake2b.update_le(static_cast<uint32_t>(p1));
65
66 blake2b.final(&T[0]);
67
68 while(output_len > 64)
69 {
70 copy_mem(output, &T[0], 32);
71 output_len -= 32;
72 output += 32;
73
74 blake2b.update(T);
75 blake2b.final(&T[0]);
76 }
77
78 if(output_len > 0)
79 copy_mem(output, &T[0], output_len);
80 }
81
82void extract_key(uint8_t output[], size_t output_len,
83 const secure_vector<uint64_t>& B,
84 size_t memory, size_t threads)
85 {
86 const size_t lanes = memory / threads;
87
88 secure_vector<uint64_t> sum(128);
89
90 for(size_t lane = 0; lane != threads; ++lane)
91 {
92 size_t start = 128*(lane * lanes + lanes - 1);
93 size_t end = 128*(lane * lanes + lanes);
94
95 for(size_t j = start; j != end; ++j)
96 {
97 sum[j % 128] ^= B[j];
98 }
99 }
100
101 secure_vector<uint8_t> sum8(1024);
102 copy_out_le(sum8.data(), 1024, sum.data());
103
104 if(output_len <= 64)
105 {
106 std::unique_ptr<HashFunction> blake2b = HashFunction::create_or_throw("BLAKE2b(" + std::to_string(output_len*8) + ")");
107 blake2b->update_le(static_cast<uint32_t>(output_len));
108 blake2b->update(sum8.data(), sum8.size());
109 blake2b->final(output);
110 }
111 else
112 {
113 secure_vector<uint8_t> T(64);
114
115 std::unique_ptr<HashFunction> blake2b = HashFunction::create_or_throw("BLAKE2b(512)");
116 blake2b->update_le(static_cast<uint32_t>(output_len));
117 blake2b->update(sum8.data(), sum8.size());
118 blake2b->final(&T[0]);
119
120 while(output_len > 64)
121 {
122 copy_mem(output, &T[0], 32);
123 output_len -= 32;
124 output += 32;
125
126 if(output_len > 64)
127 {
128 blake2b->update(T);
129 blake2b->final(&T[0]);
130 }
131 }
132
133 if(output_len == 64)
134 {
135 blake2b->update(T);
136 blake2b->final(output);
137 }
138 else
139 {
140 std::unique_ptr<HashFunction> blake2b_f = HashFunction::create_or_throw("BLAKE2b(" + std::to_string(output_len*8) + ")");
141 blake2b_f->update(T);
142 blake2b_f->final(output);
143 }
144 }
145 }
146
147void init_blocks(secure_vector<uint64_t>& B,
148 HashFunction& blake2b,
149 const secure_vector<uint8_t>& H0,
150 size_t memory,
151 size_t threads)
152 {
153 BOTAN_ASSERT_NOMSG(B.size() >= threads*256);
154
155 secure_vector<uint8_t> H(1024);
156 secure_vector<uint8_t> T(blake2b.output_length());
157
158 for(size_t i = 0; i != threads; ++i)
159 {
160 const size_t B_off = i * (memory / threads);
161
162 BOTAN_ASSERT_NOMSG(B.size() >= 128*(B_off+2));
163
164 Htick(T, &H[0], H.size(), blake2b, H0, 0, i);
165
166 for(size_t j = 0; j != 128; ++j)
167 {
168 B[128*B_off+j] = load_le<uint64_t>(H.data(), j);
169 }
170
171 Htick(T, &H[0], H.size(), blake2b, H0, 1, i);
172
173 for(size_t j = 0; j != 128; ++j)
174 {
175 B[128*(B_off+1)+j] = load_le<uint64_t>(H.data(), j);
176 }
177 }
178 }
179
180inline void blamka_G(uint64_t& A, uint64_t& B, uint64_t& C, uint64_t& D)
181 {
182 A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
183 D = rotr<32>(A ^ D);
184
185 C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
186 B = rotr<24>(B ^ C);
187
188 A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
189 D = rotr<16>(A ^ D);
190
191 C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
192 B = rotr<63>(B ^ C);
193 }
194
195inline void blamka(uint64_t& V0, uint64_t& V1, uint64_t& V2, uint64_t& V3,
196 uint64_t& V4, uint64_t& V5, uint64_t& V6, uint64_t& V7,
197 uint64_t& V8, uint64_t& V9, uint64_t& VA, uint64_t& VB,
198 uint64_t& VC, uint64_t& VD, uint64_t& VE, uint64_t& VF)
199 {
200 blamka_G(V0, V4, V8, VC);
201 blamka_G(V1, V5, V9, VD);
202 blamka_G(V2, V6, VA, VE);
203 blamka_G(V3, V7, VB, VF);
204
205 blamka_G(V0, V5, VA, VF);
206 blamka_G(V1, V6, VB, VC);
207 blamka_G(V2, V7, V8, VD);
208 blamka_G(V3, V4, V9, VE);
209 }
210
211void process_block_xor(secure_vector<uint64_t>& T,
212 secure_vector<uint64_t>& B,
213 size_t offset,
214 size_t prev,
215 size_t new_offset)
216 {
217 for(size_t i = 0; i != 128; ++i)
218 T[i] = B[128*prev+i] ^ B[128*new_offset+i];
219
220 for(size_t i = 0; i != 128; i += 16)
221 {
222 blamka(T[i+ 0], T[i+ 1], T[i+ 2], T[i+ 3],
223 T[i+ 4], T[i+ 5], T[i+ 6], T[i+ 7],
224 T[i+ 8], T[i+ 9], T[i+10], T[i+11],
225 T[i+12], T[i+13], T[i+14], T[i+15]);
226 }
227
228 for(size_t i = 0; i != 128 / 8; i += 2)
229 {
230 blamka(T[ i], T[ i+1], T[ 16+i], T[ 16+i+1],
231 T[ 32+i], T[ 32+i+1], T[ 48+i], T[ 48+i+1],
232 T[ 64+i], T[ 64+i+1], T[ 80+i], T[ 80+i+1],
233 T[ 96+i], T[ 96+i+1], T[112+i], T[112+i+1]);
234 }
235
236 for(size_t i = 0; i != 128; ++i)
237 B[128*offset + i] ^= T[i] ^ B[128*prev+i] ^ B[128*new_offset+i];
238 }
239
240void gen_2i_addresses(secure_vector<uint64_t>& T, secure_vector<uint64_t>& B,
241 size_t n, size_t lane, size_t slice, size_t memory,
242 size_t time, size_t mode, size_t cnt)
243 {
244 BOTAN_ASSERT_NOMSG(B.size() == 128);
245 BOTAN_ASSERT_NOMSG(T.size() == 128);
246
247 clear_mem(B.data(), B.size());
248 B[0] = n;
249 B[1] = lane;
250 B[2] = slice;
251 B[3] = memory;
252 B[4] = time;
253 B[5] = mode;
254 B[6] = cnt;
255
256 for(size_t r = 0; r != 2; ++r)
257 {
258 copy_mem(T.data(), B.data(), B.size());
259
260 for(size_t i = 0; i != 128; i += 16)
261 {
262 blamka(T[i+ 0], T[i+ 1], T[i+ 2], T[i+ 3],
263 T[i+ 4], T[i+ 5], T[i+ 6], T[i+ 7],
264 T[i+ 8], T[i+ 9], T[i+10], T[i+11],
265 T[i+12], T[i+13], T[i+14], T[i+15]);
266 }
267 for(size_t i = 0; i != 128 / 8; i += 2)
268 {
269 blamka(T[ i], T[ i+1], T[ 16+i], T[ 16+i+1],
270 T[ 32+i], T[ 32+i+1], T[ 48+i], T[ 48+i+1],
271 T[ 64+i], T[ 64+i+1], T[ 80+i], T[ 80+i+1],
272 T[ 96+i], T[ 96+i+1], T[112+i], T[112+i+1]);
273 }
274
275 for(size_t i = 0; i != 128; ++i)
276 B[i] ^= T[i];
277 }
278 }
279
280uint32_t index_alpha(uint64_t random,
281 size_t lanes,
282 size_t segments,
283 size_t threads,
284 size_t n,
285 size_t slice,
286 size_t lane,
287 size_t index)
288 {
289 size_t ref_lane = static_cast<uint32_t>(random >> 32) % threads;
290
291 if(n == 0 && slice == 0)
292 ref_lane = lane;
293
294 size_t m = 3*segments;
295 size_t s = ((slice+1) % 4)*segments;
296
297 if(lane == ref_lane)
298 m += index;
299
300 if(n == 0) {
301 m = slice*segments;
302 s = 0;
303 if(slice == 0 || lane == ref_lane)
304 m += index;
305 }
306
307 if(index == 0 || lane == ref_lane)
308 m -= 1;
309
310 uint64_t p = static_cast<uint32_t>(random);
311 p = (p * p) >> 32;
312 p = (p * m) >> 32;
313
314 return static_cast<uint32_t>(ref_lane*lanes + (s + m - (p+1)) % lanes);
315 }
316
317void process_block_argon2d(secure_vector<uint64_t>& T,
318 secure_vector<uint64_t>& B,
319 size_t n, size_t slice, size_t lane,
320 size_t lanes, size_t segments, size_t threads)
321 {
322 size_t index = 0;
323 if(n == 0 && slice == 0)
324 index = 2;
325
326 while(index < segments)
327 {
328 const size_t offset = lane*lanes + slice*segments + index;
329
330 size_t prev = offset - 1;
331 if(index == 0 && slice == 0)
332 prev += lanes;
333
334 const uint64_t random = B.at(128*prev);
335 const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index);
336
337 process_block_xor(T, B, offset, prev, new_offset);
338
339 index += 1;
340 }
341 }
342
343void process_block_argon2i(secure_vector<uint64_t>& T,
344 secure_vector<uint64_t>& B,
345 size_t n, size_t slice, size_t lane,
346 size_t lanes, size_t segments, size_t threads, uint8_t mode,
347 size_t memory, size_t time)
348 {
349 size_t index = 0;
350 if(n == 0 && slice == 0)
351 index = 2;
352
353 secure_vector<uint64_t> addresses(128);
354 size_t address_counter = 1;
355
356 gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
357
358 while(index < segments)
359 {
360 const size_t offset = lane*lanes + slice*segments + index;
361
362 size_t prev = offset - 1;
363 if(index == 0 && slice == 0)
364 prev += lanes;
365
366 if(index > 0 && index % 128 == 0)
367 {
368 address_counter += 1;
369 gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
370 }
371
372 const uint64_t random = addresses[index % 128];
373 const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index);
374
375 process_block_xor(T, B, offset, prev, new_offset);
376
377 index += 1;
378 }
379 }
380
381void process_blocks(secure_vector<uint64_t>& B,
382 size_t t,
383 size_t memory,
384 size_t threads,
385 uint8_t mode)
386 {
387 const size_t lanes = memory / threads;
388 const size_t segments = lanes / SYNC_POINTS;
389
390 secure_vector<uint64_t> T(128);
391 for(size_t n = 0; n != t; ++n)
392 {
393 for(size_t slice = 0; slice != SYNC_POINTS; ++slice)
394 {
395 // TODO can run this in Thread_Pool
396 for(size_t lane = 0; lane != threads; ++lane)
397 {
398 if(mode == 1 || (mode == 2 && n == 0 && slice < SYNC_POINTS/2))
399 process_block_argon2i(T, B, n, slice, lane, lanes, segments, threads, mode, memory, t);
400 else
401 process_block_argon2d(T, B, n, slice, lane, lanes, segments, threads);
402 }
403 }
404 }
405
406 }
407
408}
409
410void argon2(uint8_t output[], size_t output_len,
411 const char* password, size_t password_len,
412 const uint8_t salt[], size_t salt_len,
413 const uint8_t key[], size_t key_len,
414 const uint8_t ad[], size_t ad_len,
415 uint8_t mode, size_t threads, size_t M, size_t t)
416 {
417 BOTAN_ARG_CHECK(mode == 0 || mode == 1 || mode == 2, "Unknown Argon2 mode parameter");
418 BOTAN_ARG_CHECK(output_len >= 4, "Invalid Argon2 output length");
419 BOTAN_ARG_CHECK(threads >= 1 && threads <= 128, "Invalid Argon2 threads parameter");
420 BOTAN_ARG_CHECK(M >= 8*threads && M <= 8192*1024, "Invalid Argon2 M parameter");
421 BOTAN_ARG_CHECK(t >= 1, "Invalid Argon2 t parameter");
422
423 std::unique_ptr<HashFunction> blake2 = HashFunction::create_or_throw("BLAKE2b");
424
425 const auto H0 = argon2_H0(*blake2, output_len,
426 password, password_len,
427 salt, salt_len,
428 key, key_len,
429 ad, ad_len,
430 mode, threads, M, t);
431
432 const size_t memory = (M / (SYNC_POINTS*threads)) * (SYNC_POINTS*threads);
433
434 secure_vector<uint64_t> B(memory * 1024/8);
435
436 init_blocks(B, *blake2, H0, memory, threads);
437 process_blocks(B, t, memory, threads, mode);
438
439 clear_mem(output, output_len);
440 extract_key(output, output_len, B, memory, threads);
441 }
442
443}
#define BOTAN_ASSERT_NOMSG(expr)
Definition: assert.h:68
#define BOTAN_ARG_CHECK(expr, msg)
Definition: assert.h:37
static std::unique_ptr< HashFunction > create_or_throw(const std::string &algo_spec, const std::string &provider="")
Definition: hash.cpp:344
fe T
Definition: ge.cpp:37
std::string to_string(const BER_Object &obj)
Definition: asn1_obj.cpp:213
Definition: alg_id.cpp:13
void copy_out_le(uint8_t out[], size_t out_bytes, const T in[])
Definition: loadstor.h:679
void argon2(uint8_t output[], size_t output_len, const char *password, size_t password_len, const uint8_t salt[], size_t salt_len, const uint8_t key[], size_t key_len, const uint8_t ad[], size_t ad_len, uint8_t mode, size_t threads, size_t M, size_t t)
Definition: argon2.cpp:410
void copy_mem(T *out, const T *in, size_t n)
Definition: mem_ops.h:133
std::vector< T, secure_allocator< T > > secure_vector
Definition: secmem.h:65
void clear_mem(T *ptr, size_t n)
Definition: mem_ops.h:115
const uint8_t * cast_char_ptr_to_uint8(const char *s)
Definition: mem_ops.h:190
uint64_t load_le< uint64_t >(const uint8_t in[], size_t off)
Definition: loadstor.h:237
size_t salt_len
Definition: x509_obj.cpp:25