Botan 3.6.1
Crypto and TLS for C&
argon2.cpp
Go to the documentation of this file.
1/**
2* (C) 2018,2019,2022 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/argon2.h>
8
9#include <botan/exceptn.h>
10#include <botan/hash.h>
11#include <botan/mem_ops.h>
12#include <botan/internal/fmt.h>
13#include <botan/internal/loadstor.h>
14#include <botan/internal/rotate.h>
15#include <limits>
16
17#if defined(BOTAN_HAS_THREAD_UTILS)
18 #include <botan/internal/thread_pool.h>
19#endif
20
21#if defined(BOTAN_HAS_ARGON2_AVX2) || defined(BOTAN_HAS_ARGON2_SSSE3)
22 #include <botan/internal/cpuid.h>
23#endif
24
25namespace Botan {
26
27namespace {
28
29const size_t SYNC_POINTS = 4;
30
31void argon2_H0(uint8_t H0[64],
32 HashFunction& blake2b,
33 size_t output_len,
34 const char* password,
35 size_t password_len,
36 const uint8_t salt[],
37 size_t salt_len,
38 const uint8_t key[],
39 size_t key_len,
40 const uint8_t ad[],
41 size_t ad_len,
42 size_t y,
43 size_t p,
44 size_t M,
45 size_t t) {
46 const uint8_t v = 19; // Argon2 version code
47
48 blake2b.update_le(static_cast<uint32_t>(p));
49 blake2b.update_le(static_cast<uint32_t>(output_len));
50 blake2b.update_le(static_cast<uint32_t>(M));
51 blake2b.update_le(static_cast<uint32_t>(t));
52 blake2b.update_le(static_cast<uint32_t>(v));
53 blake2b.update_le(static_cast<uint32_t>(y));
54
55 blake2b.update_le(static_cast<uint32_t>(password_len));
56 blake2b.update(cast_char_ptr_to_uint8(password), password_len);
57
58 blake2b.update_le(static_cast<uint32_t>(salt_len));
59 blake2b.update(salt, salt_len);
60
61 blake2b.update_le(static_cast<uint32_t>(key_len));
62 blake2b.update(key, key_len);
63
64 blake2b.update_le(static_cast<uint32_t>(ad_len));
65 blake2b.update(ad, ad_len);
66
67 blake2b.final(H0);
68}
69
70void extract_key(uint8_t output[], size_t output_len, const secure_vector<uint64_t>& B, size_t memory, size_t threads) {
71 const size_t lanes = memory / threads;
72
73 uint64_t sum[128] = {0};
74
75 for(size_t lane = 0; lane != threads; ++lane) {
76 const size_t start = 128 * (lane * lanes + lanes - 1);
77 const size_t end = 128 * (lane * lanes + lanes);
78
79 for(size_t j = start; j != end; ++j) {
80 sum[j % 128] ^= B[j];
81 }
82 }
83
84 if(output_len <= 64) {
85 auto blake2b = HashFunction::create_or_throw(fmt("BLAKE2b({})", output_len * 8));
86 blake2b->update_le(static_cast<uint32_t>(output_len));
87 for(size_t i = 0; i != 128; ++i) {
88 blake2b->update_le(sum[i]);
89 }
90 blake2b->final(output);
91 } else {
93
94 auto blake2b = HashFunction::create_or_throw("BLAKE2b(512)");
95 blake2b->update_le(static_cast<uint32_t>(output_len));
96 for(size_t i = 0; i != 128; ++i) {
97 blake2b->update_le(sum[i]);
98 }
99 blake2b->final(&T[0]);
100
101 while(output_len > 64) {
102 copy_mem(output, &T[0], 32);
103 output_len -= 32;
104 output += 32;
105
106 if(output_len > 64) {
107 blake2b->update(T);
108 blake2b->final(&T[0]);
109 }
110 }
111
112 if(output_len == 64) {
113 blake2b->update(T);
114 blake2b->final(output);
115 } else {
116 auto blake2b_f = HashFunction::create_or_throw(fmt("BLAKE2b({})", output_len * 8));
117 blake2b_f->update(T);
118 blake2b_f->final(output);
119 }
120 }
121}
122
123void init_blocks(
124 secure_vector<uint64_t>& B, HashFunction& blake2b, const uint8_t H0[64], size_t memory, size_t threads) {
125 BOTAN_ASSERT_NOMSG(B.size() >= threads * 256);
126
127 for(size_t i = 0; i != threads; ++i) {
128 const size_t B_off = i * (memory / threads);
129
130 BOTAN_ASSERT_NOMSG(B.size() >= 128 * (B_off + 2));
131
132 for(size_t j = 0; j != 2; ++j) {
133 uint8_t T[64] = {0};
134
135 blake2b.update_le(static_cast<uint32_t>(1024));
136 blake2b.update(H0, 64);
137 blake2b.update_le(static_cast<uint32_t>(j));
138 blake2b.update_le(static_cast<uint32_t>(i));
139 blake2b.final(T);
140
141 for(size_t k = 0; k != 30; ++k) {
142 load_le(&B[128 * (B_off + j) + 4 * k], T, 32 / 8);
143 blake2b.update(T, 64);
144 blake2b.final(T);
145 }
146
147 load_le(&B[128 * (B_off + j) + 4 * 30], T, 64 / 8);
148 }
149 }
150}
151
152BOTAN_FORCE_INLINE void blamka_G(uint64_t& A, uint64_t& B, uint64_t& C, uint64_t& D) {
153 A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
154 D = rotr<32>(A ^ D);
155
156 C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
157 B = rotr<24>(B ^ C);
158
159 A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
160 D = rotr<16>(A ^ D);
161
162 C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
163 B = rotr<63>(B ^ C);
164}
165
166} // namespace
167
168void Argon2::blamka(uint64_t N[128], uint64_t T[128]) {
169#if defined(BOTAN_HAS_ARGON2_AVX2)
170 if(CPUID::has_avx2()) {
171 return Argon2::blamka_avx2(N, T);
172 }
173#endif
174
175#if defined(BOTAN_HAS_ARGON2_SSSE3)
176 if(CPUID::has_ssse3()) {
177 return Argon2::blamka_ssse3(N, T);
178 }
179#endif
180
181 copy_mem(T, N, 128);
182
183 for(size_t i = 0; i != 128; i += 16) {
184 blamka_G(T[i + 0], T[i + 4], T[i + 8], T[i + 12]);
185 blamka_G(T[i + 1], T[i + 5], T[i + 9], T[i + 13]);
186 blamka_G(T[i + 2], T[i + 6], T[i + 10], T[i + 14]);
187 blamka_G(T[i + 3], T[i + 7], T[i + 11], T[i + 15]);
188
189 blamka_G(T[i + 0], T[i + 5], T[i + 10], T[i + 15]);
190 blamka_G(T[i + 1], T[i + 6], T[i + 11], T[i + 12]);
191 blamka_G(T[i + 2], T[i + 7], T[i + 8], T[i + 13]);
192 blamka_G(T[i + 3], T[i + 4], T[i + 9], T[i + 14]);
193 }
194
195 for(size_t i = 0; i != 128 / 8; i += 2) {
196 blamka_G(T[i + 0], T[i + 32], T[i + 64], T[i + 96]);
197 blamka_G(T[i + 1], T[i + 33], T[i + 65], T[i + 97]);
198 blamka_G(T[i + 16], T[i + 48], T[i + 80], T[i + 112]);
199 blamka_G(T[i + 17], T[i + 49], T[i + 81], T[i + 113]);
200
201 blamka_G(T[i + 0], T[i + 33], T[i + 80], T[i + 113]);
202 blamka_G(T[i + 1], T[i + 48], T[i + 81], T[i + 96]);
203 blamka_G(T[i + 16], T[i + 49], T[i + 64], T[i + 97]);
204 blamka_G(T[i + 17], T[i + 32], T[i + 65], T[i + 112]);
205 }
206
207 for(size_t i = 0; i != 128; ++i) {
208 N[i] ^= T[i];
209 }
210}
211
212namespace {
213
214void gen_2i_addresses(uint64_t T[128],
215 uint64_t B[128],
216 size_t n,
217 size_t lane,
218 size_t slice,
219 size_t memory,
220 size_t time,
221 size_t mode,
222 size_t cnt) {
223 clear_mem(B, 128);
224
225 B[0] = n;
226 B[1] = lane;
227 B[2] = slice;
228 B[3] = memory;
229 B[4] = time;
230 B[5] = mode;
231 B[6] = cnt;
232
233 for(size_t r = 0; r != 2; ++r) {
234 Argon2::blamka(B, T);
235 }
236}
237
238uint32_t index_alpha(
239 uint64_t random, size_t lanes, size_t segments, size_t threads, size_t n, size_t slice, size_t lane, size_t index) {
240 size_t ref_lane = static_cast<uint32_t>(random >> 32) % threads;
241
242 if(n == 0 && slice == 0) {
243 ref_lane = lane;
244 }
245
246 size_t m = 3 * segments;
247 size_t s = ((slice + 1) % 4) * segments;
248
249 if(lane == ref_lane) {
250 m += index;
251 }
252
253 if(n == 0) {
254 m = slice * segments;
255 s = 0;
256 if(slice == 0 || lane == ref_lane) {
257 m += index;
258 }
259 }
260
261 if(index == 0 || lane == ref_lane) {
262 m -= 1;
263 }
264
265 uint64_t p = static_cast<uint32_t>(random);
266 p = (p * p) >> 32;
267 p = (p * m) >> 32;
268
269 return static_cast<uint32_t>(ref_lane * lanes + (s + m - (p + 1)) % lanes);
270}
271
272void process_block(secure_vector<uint64_t>& B,
273 size_t n,
274 size_t slice,
275 size_t lane,
276 size_t lanes,
277 size_t segments,
278 size_t threads,
279 uint8_t mode,
280 size_t memory,
281 size_t time) {
282 uint64_t T[128];
283 size_t index = 0;
284 if(n == 0 && slice == 0) {
285 index = 2;
286 }
287
288 const bool use_2i = mode == 1 || (mode == 2 && n == 0 && slice < SYNC_POINTS / 2);
289
290 uint64_t addresses[128];
291 size_t address_counter = 1;
292
293 if(use_2i) {
294 gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
295 }
296
297 while(index < segments) {
298 const size_t offset = lane * lanes + slice * segments + index;
299
300 size_t prev = offset - 1;
301 if(index == 0 && slice == 0) {
302 prev += lanes;
303 }
304
305 if(use_2i && index > 0 && index % 128 == 0) {
306 address_counter += 1;
307 gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
308 }
309
310 const uint64_t random = use_2i ? addresses[index % 128] : B.at(128 * prev);
311 const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index);
312
313 uint64_t N[128];
314 for(size_t i = 0; i != 128; ++i) {
315 N[i] = B[128 * prev + i] ^ B[128 * new_offset + i];
316 }
317
318 Argon2::blamka(N, T);
319
320 for(size_t i = 0; i != 128; ++i) {
321 B[128 * offset + i] ^= N[i];
322 }
323
324 index += 1;
325 }
326}
327
328void process_blocks(secure_vector<uint64_t>& B, size_t t, size_t memory, size_t threads, uint8_t mode) {
329 const size_t lanes = memory / threads;
330 const size_t segments = lanes / SYNC_POINTS;
331
332#if defined(BOTAN_HAS_THREAD_UTILS)
333 if(threads > 1) {
334 auto& thread_pool = Thread_Pool::global_instance();
335
336 for(size_t n = 0; n != t; ++n) {
337 for(size_t slice = 0; slice != SYNC_POINTS; ++slice) {
338 std::vector<std::future<void>> fut_results;
339 fut_results.reserve(threads);
340
341 for(size_t lane = 0; lane != threads; ++lane) {
342 fut_results.push_back(thread_pool.run(
343 process_block, std::ref(B), n, slice, lane, lanes, segments, threads, mode, memory, t));
344 }
345
346 for(auto& fut : fut_results) {
347 fut.get();
348 }
349 }
350 }
351
352 return;
353 }
354#endif
355
356 for(size_t n = 0; n != t; ++n) {
357 for(size_t slice = 0; slice != SYNC_POINTS; ++slice) {
358 for(size_t lane = 0; lane != threads; ++lane) {
359 process_block(B, n, slice, lane, lanes, segments, threads, mode, memory, t);
360 }
361 }
362 }
363}
364
365} // namespace
366
367void Argon2::argon2(uint8_t output[],
368 size_t output_len,
369 const char* password,
370 size_t password_len,
371 const uint8_t salt[],
372 size_t salt_len,
373 const uint8_t key[],
374 size_t key_len,
375 const uint8_t ad[],
376 size_t ad_len) const {
377 BOTAN_ARG_CHECK(output_len >= 4 && output_len <= std::numeric_limits<uint32_t>::max(),
378 "Invalid Argon2 output length");
379 BOTAN_ARG_CHECK(password_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 password length");
380 BOTAN_ARG_CHECK(salt_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 salt length");
381 BOTAN_ARG_CHECK(key_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 key length");
382 BOTAN_ARG_CHECK(ad_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 ad length");
383
384 auto blake2 = HashFunction::create_or_throw("BLAKE2b");
385
386 uint8_t H0[64] = {0};
387 argon2_H0(H0,
388 *blake2,
389 output_len,
390 password,
391 password_len,
392 salt,
393 salt_len,
394 key,
395 key_len,
396 ad,
397 ad_len,
398 m_family,
399 m_p,
400 m_M,
401 m_t);
402
403 const size_t memory = (m_M / (SYNC_POINTS * m_p)) * (SYNC_POINTS * m_p);
404
405 secure_vector<uint64_t> B(memory * 1024 / 8);
406
407 init_blocks(B, *blake2, H0, memory, m_p);
408 process_blocks(B, m_t, memory, m_p, m_family);
409
410 clear_mem(output, output_len);
411 extract_key(output, output_len, B, memory, m_p);
412}
413
414} // namespace Botan
#define BOTAN_ASSERT_NOMSG(expr)
Definition assert.h:59
#define BOTAN_ARG_CHECK(expr, msg)
Definition assert.h:29
static void blamka(uint64_t N[128], uint64_t T[128])
Definition argon2.cpp:168
static std::unique_ptr< HashFunction > create_or_throw(std::string_view algo_spec, std::string_view provider="")
Definition hash.cpp:298
static Thread_Pool & global_instance()
#define BOTAN_FORCE_INLINE
Definition compiler.h:165
FE_25519 T
Definition ge.cpp:34
std::string fmt(std::string_view format, const T &... args)
Definition fmt.h:53
constexpr T rotr(T input)
Definition rotate.h:33
constexpr auto load_le(ParamTs &&... params)
Definition loadstor.h:521
std::vector< T, secure_allocator< T > > secure_vector
Definition secmem.h:61
constexpr void copy_mem(T *out, const T *in, size_t n)
Definition mem_ops.h:146
constexpr void clear_mem(T *ptr, size_t n)
Definition mem_ops.h:120
const uint8_t * cast_char_ptr_to_uint8(const char *s)
Definition mem_ops.h:273