Botan 3.0.0-alpha0
Crypto and TLS for C&
argon2.cpp
Go to the documentation of this file.
1/**
2* (C) 2018,2019,2022 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/argon2.h>
8#include <botan/internal/loadstor.h>
9#include <botan/hash.h>
10#include <botan/mem_ops.h>
11#include <botan/internal/rotate.h>
12#include <botan/exceptn.h>
13
14#if defined(BOTAN_HAS_THREAD_UTILS)
15 #include <botan/internal/thread_pool.h>
16#endif
17
18#if defined(BOTAN_HAS_ARGON2_SSSE3)
19 #include <botan/internal/argon2_ssse3.h>
20 #include <botan/internal/cpuid.h>
21#endif
22
23namespace Botan {
24
25namespace {
26
27const size_t SYNC_POINTS = 4;
28
29void argon2_H0(uint8_t H0[64],
30 HashFunction& blake2b,
31 size_t output_len,
32 const char* password, size_t password_len,
33 const uint8_t salt[], size_t salt_len,
34 const uint8_t key[], size_t key_len,
35 const uint8_t ad[], size_t ad_len,
36 size_t y, size_t p, size_t M, size_t t)
37 {
38 const uint8_t v = 19; // Argon2 version code
39
40 blake2b.update_le(static_cast<uint32_t>(p));
41 blake2b.update_le(static_cast<uint32_t>(output_len));
42 blake2b.update_le(static_cast<uint32_t>(M));
43 blake2b.update_le(static_cast<uint32_t>(t));
44 blake2b.update_le(static_cast<uint32_t>(v));
45 blake2b.update_le(static_cast<uint32_t>(y));
46
47 blake2b.update_le(static_cast<uint32_t>(password_len));
48 blake2b.update(cast_char_ptr_to_uint8(password), password_len);
49
50 blake2b.update_le(static_cast<uint32_t>(salt_len));
51 blake2b.update(salt, salt_len);
52
53 blake2b.update_le(static_cast<uint32_t>(key_len));
54 blake2b.update(key, key_len);
55
56 blake2b.update_le(static_cast<uint32_t>(ad_len));
57 blake2b.update(ad, ad_len);
58
59 blake2b.final(H0);
60 }
61
62void extract_key(uint8_t output[], size_t output_len,
63 const secure_vector<uint64_t>& B,
64 size_t memory, size_t threads)
65 {
66 const size_t lanes = memory / threads;
67
68 uint64_t sum[128] = { 0 };
69
70 for(size_t lane = 0; lane != threads; ++lane)
71 {
72 const size_t start = 128*(lane * lanes + lanes - 1);
73 const size_t end = 128*(lane * lanes + lanes);
74
75 for(size_t j = start; j != end; ++j)
76 {
77 sum[j % 128] ^= B[j];
78 }
79 }
80
81 if(output_len <= 64)
82 {
83 std::unique_ptr<HashFunction> blake2b = HashFunction::create_or_throw("BLAKE2b(" + std::to_string(output_len*8) + ")");
84 blake2b->update_le(static_cast<uint32_t>(output_len));
85 for(size_t i = 0; i != 128; ++i)
86 blake2b->update_le(sum[i]);
87 blake2b->final(output);
88 }
89 else
90 {
91 secure_vector<uint8_t> T(64);
92
93 std::unique_ptr<HashFunction> blake2b = HashFunction::create_or_throw("BLAKE2b(512)");
94 blake2b->update_le(static_cast<uint32_t>(output_len));
95 for(size_t i = 0; i != 128; ++i)
96 blake2b->update_le(sum[i]);
97 blake2b->final(&T[0]);
98
99 while(output_len > 64)
100 {
101 copy_mem(output, &T[0], 32);
102 output_len -= 32;
103 output += 32;
104
105 if(output_len > 64)
106 {
107 blake2b->update(T);
108 blake2b->final(&T[0]);
109 }
110 }
111
112 if(output_len == 64)
113 {
114 blake2b->update(T);
115 blake2b->final(output);
116 }
117 else
118 {
119 std::unique_ptr<HashFunction> blake2b_f = HashFunction::create_or_throw("BLAKE2b(" + std::to_string(output_len*8) + ")");
120 blake2b_f->update(T);
121 blake2b_f->final(output);
122 }
123 }
124 }
125
126void init_blocks(secure_vector<uint64_t>& B,
127 HashFunction& blake2b,
128 const uint8_t H0[64],
129 size_t memory,
130 size_t threads)
131 {
132 BOTAN_ASSERT_NOMSG(B.size() >= threads*256);
133
134 for(size_t i = 0; i != threads; ++i)
135 {
136 const size_t B_off = i * (memory / threads);
137
138 BOTAN_ASSERT_NOMSG(B.size() >= 128*(B_off+2));
139
140 for(size_t j = 0; j != 2; ++j)
141 {
142 uint8_t T[64] = { 0 };
143
144 blake2b.update_le(static_cast<uint32_t>(1024));
145 blake2b.update(H0, 64);
146 blake2b.update_le(static_cast<uint32_t>(j));
147 blake2b.update_le(static_cast<uint32_t>(i));
148 blake2b.final(T);
149
150 for(size_t k = 0; k != 30; ++k)
151 {
152 load_le(&B[128*(B_off+j)+4*k], T, 32 / 8);
153 blake2b.update(T, 64);
154 blake2b.final(T);
155 }
156
157 load_le(&B[128*(B_off+j)+4*30], T, 64 / 8);
158 }
159 }
160 }
161
162BOTAN_FORCE_INLINE void blamka_G(uint64_t& A, uint64_t& B, uint64_t& C, uint64_t& D)
163 {
164 A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
165 D = rotr<32>(A ^ D);
166
167 C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
168 B = rotr<24>(B ^ C);
169
170 A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
171 D = rotr<16>(A ^ D);
172
173 C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
174 B = rotr<63>(B ^ C);
175 }
176
177void blamka(uint64_t T[128])
178 {
179#if defined(BOTAN_HAS_ARGON2_SSSE3)
180 if(CPUID::has_ssse3())
181 return blamka_ssse3(T);
182#endif
183
184 for(size_t i = 0; i != 128; i += 16)
185 {
186 blamka_G(T[i+ 0], T[i+ 4], T[i+ 8], T[i+ 12]);
187 blamka_G(T[i+ 1], T[i+ 5], T[i+ 9], T[i+ 13]);
188 blamka_G(T[i+ 2], T[i+ 6], T[i+ 10], T[i+ 14]);
189 blamka_G(T[i+ 3], T[i+ 7], T[i+ 11], T[i+ 15]);
190
191 blamka_G(T[i+ 0], T[i+ 5], T[i+ 10], T[i+ 15]);
192 blamka_G(T[i+ 1], T[i+ 6], T[i+ 11], T[i+ 12]);
193 blamka_G(T[i+ 2], T[i+ 7], T[i+ 8], T[i+ 13]);
194 blamka_G(T[i+ 3], T[i+ 4], T[i+ 9], T[i+ 14]);
195 }
196
197 for(size_t i = 0; i != 128 / 8; i += 2)
198 {
199 blamka_G(T[i+ 0], T[i+ 32], T[i+ 64], T[i+ 96]);
200 blamka_G(T[i+ 1], T[i+ 33], T[i+ 65], T[i+ 97]);
201 blamka_G(T[i+ 16], T[i+ 48], T[i+ 80], T[i+112]);
202 blamka_G(T[i+ 17], T[i+ 49], T[i+ 81], T[i+113]);
203
204 blamka_G(T[i+ 0], T[i+ 33], T[i+ 80], T[i+113]);
205 blamka_G(T[i+ 1], T[i+ 48], T[i+ 81], T[i+ 96]);
206 blamka_G(T[i+ 16], T[i+ 49], T[i+ 64], T[i+ 97]);
207 blamka_G(T[i+ 17], T[i+ 32], T[i+ 65], T[i+112]);
208 }
209 }
210
211void gen_2i_addresses(uint64_t T[128], uint64_t B[128],
212 size_t n, size_t lane, size_t slice, size_t memory,
213 size_t time, size_t mode, size_t cnt)
214 {
215 clear_mem(B, 128);
216
217 B[0] = n;
218 B[1] = lane;
219 B[2] = slice;
220 B[3] = memory;
221 B[4] = time;
222 B[5] = mode;
223 B[6] = cnt;
224
225 for(size_t r = 0; r != 2; ++r)
226 {
227 copy_mem(T, B, 128);
228
229 blamka(T);
230
231 for(size_t i = 0; i != 128; ++i)
232 B[i] ^= T[i];
233 }
234 }
235
236uint32_t index_alpha(uint64_t random,
237 size_t lanes,
238 size_t segments,
239 size_t threads,
240 size_t n,
241 size_t slice,
242 size_t lane,
243 size_t index)
244 {
245 size_t ref_lane = static_cast<uint32_t>(random >> 32) % threads;
246
247 if(n == 0 && slice == 0)
248 ref_lane = lane;
249
250 size_t m = 3*segments;
251 size_t s = ((slice+1) % 4)*segments;
252
253 if(lane == ref_lane)
254 m += index;
255
256 if(n == 0)
257 {
258 m = slice*segments;
259 s = 0;
260 if(slice == 0 || lane == ref_lane)
261 m += index;
262 }
263
264 if(index == 0 || lane == ref_lane)
265 m -= 1;
266
267 uint64_t p = static_cast<uint32_t>(random);
268 p = (p * p) >> 32;
269 p = (p * m) >> 32;
270
271 return static_cast<uint32_t>(ref_lane*lanes + (s + m - (p+1)) % lanes);
272 }
273
274void process_block(secure_vector<uint64_t>& B,
275 size_t n, size_t slice, size_t lane,
276 size_t lanes, size_t segments, size_t threads, uint8_t mode,
277 size_t memory, size_t time)
278 {
279 uint64_t T[128];
280 size_t index = 0;
281 if(n == 0 && slice == 0)
282 index = 2;
283
284 const bool use_2i = mode == 1 || (mode == 2 && n == 0 && slice < SYNC_POINTS/2);
285
286 uint64_t addresses[128];
287 size_t address_counter = 1;
288
289 if(use_2i)
290 {
291 gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
292 }
293
294 while(index < segments)
295 {
296 const size_t offset = lane*lanes + slice*segments + index;
297
298 size_t prev = offset - 1;
299 if(index == 0 && slice == 0)
300 prev += lanes;
301
302 if(use_2i && index > 0 && index % 128 == 0)
303 {
304 address_counter += 1;
305 gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
306 }
307
308 const uint64_t random = use_2i ? addresses[index % 128] : B.at(128*prev);
309 const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index);
310
311 for(size_t i = 0; i != 128; ++i)
312 T[i] = B[128*prev+i] ^ B[128*new_offset+i];
313
314 blamka(T);
315
316 for(size_t i = 0; i != 128; ++i)
317 B[128*offset + i] ^= T[i] ^ B[128*prev+i] ^ B[128*new_offset+i];
318
319 index += 1;
320 }
321 }
322
323void process_blocks(secure_vector<uint64_t>& B,
324 size_t t,
325 size_t memory,
326 size_t threads,
327 uint8_t mode)
328 {
329 const size_t lanes = memory / threads;
330 const size_t segments = lanes / SYNC_POINTS;
331
332#if defined(BOTAN_HAS_THREAD_UTILS)
333 auto& thread_pool = Thread_Pool::global_instance();
334#endif
335
336 for(size_t n = 0; n != t; ++n)
337 {
338 for(size_t slice = 0; slice != SYNC_POINTS; ++slice)
339 {
340#if defined(BOTAN_HAS_THREAD_UTILS)
341 if(threads > 1)
342 {
343 std::vector<std::future<void>> fut_results;
344 fut_results.reserve(threads);
345
346 for(size_t lane = 0; lane != threads; ++lane)
347 {
348 fut_results.push_back(thread_pool.run(
349 process_block,
350 std::ref(B), n, slice, lane, lanes, segments, threads, mode, memory, t));
351 }
352
353 for(auto& fut : fut_results)
354 fut.get();
355
356 continue;
357 }
358#endif
359
360 for(size_t lane = 0; lane != threads; ++lane)
361 {
362 process_block(B, n, slice, lane, lanes, segments, threads, mode, memory, t);
363 }
364 }
365 }
366 }
367
368}
369
370void Argon2::argon2(uint8_t output[], size_t output_len,
371 const char* password, size_t password_len,
372 const uint8_t salt[], size_t salt_len,
373 const uint8_t key[], size_t key_len,
374 const uint8_t ad[], size_t ad_len) const
375 {
376 BOTAN_ARG_CHECK(output_len >= 4, "Invalid Argon2 output length");
377
378 auto blake2 = HashFunction::create_or_throw("BLAKE2b");
379
380 uint8_t H0[64] = { 0 };
381 argon2_H0(H0, *blake2, output_len,
382 password, password_len,
383 salt, salt_len,
384 key, key_len,
385 ad, ad_len,
386 m_family, m_p, m_M, m_t);
387
388 const size_t memory = (m_M / (SYNC_POINTS*m_p)) * (SYNC_POINTS*m_p);
389
390 secure_vector<uint64_t> B(memory * 1024/8);
391
392 init_blocks(B, *blake2, H0, memory, m_p);
393 process_blocks(B, m_t, memory, m_p, m_family);
394
395 clear_mem(output, output_len);
396 extract_key(output, output_len, B, memory, m_p);
397 }
398
399}
#define BOTAN_ASSERT_NOMSG(expr)
Definition: assert.h:67
#define BOTAN_ARG_CHECK(expr, msg)
Definition: assert.h:36
static std::unique_ptr< HashFunction > create_or_throw(const std::string &algo_spec, const std::string &provider="")
Definition: hash.cpp:312
static Thread_Pool & global_instance()
Definition: thread_pool.cpp:38
#define BOTAN_FORCE_INLINE
Definition: compiler.h:128
fe T
Definition: ge.cpp:36
Polynomial v
Definition: kyber.cpp:822
std::string to_string(const BER_Object &obj)
Definition: asn1_obj.cpp:209
Definition: alg_id.cpp:13
void blamka_ssse3(uint64_t T[128])
constexpr void copy_mem(T *out, const T *in, size_t n)
Definition: mem_ops.h:126
constexpr T load_le(const uint8_t in[], size_t off)
Definition: loadstor.h:134
constexpr void clear_mem(T *ptr, size_t n)
Definition: mem_ops.h:115
const uint8_t * cast_char_ptr_to_uint8(const char *s)
Definition: mem_ops.h:183
size_t salt_len
Definition: x509_obj.cpp:25