Botan 3.12.0
Crypto and TLS for C&
argon2.cpp
Go to the documentation of this file.
1/**
2* (C) 2018,2019,2022 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/argon2.h>
8
9#include <botan/hash.h>
10#include <botan/mem_ops.h>
11#include <botan/internal/bit_ops.h>
12#include <botan/internal/fmt.h>
13#include <botan/internal/loadstor.h>
14#include <botan/internal/mem_utils.h>
15#include <botan/internal/rotate.h>
16#include <limits>
17
18#if defined(BOTAN_HAS_THREAD_UTILS)
19 #include <botan/internal/thread_pool.h>
20#endif
21
22#if defined(BOTAN_HAS_CPUID)
23 #include <botan/internal/cpuid.h>
24#endif
25
26namespace Botan {
27
28namespace {
29
30const size_t SYNC_POINTS = 4;
31
32void argon2_H0(uint8_t H0[64],
33 HashFunction& blake2b,
34 size_t output_len,
35 const char* password,
36 size_t password_len,
37 const uint8_t salt[],
38 size_t salt_len,
39 const uint8_t key[],
40 size_t key_len,
41 const uint8_t ad[],
42 size_t ad_len,
43 size_t y,
44 size_t p,
45 size_t M,
46 size_t t) {
47 const uint8_t v = 19; // Argon2 version code
48
49 blake2b.update_le(static_cast<uint32_t>(p));
50 blake2b.update_le(static_cast<uint32_t>(output_len));
51 blake2b.update_le(static_cast<uint32_t>(M));
52 blake2b.update_le(static_cast<uint32_t>(t));
53 blake2b.update_le(static_cast<uint32_t>(v));
54 blake2b.update_le(static_cast<uint32_t>(y));
55
56 blake2b.update_le(static_cast<uint32_t>(password_len));
57 blake2b.update(as_span_of_bytes(password, password_len));
58
59 blake2b.update_le(static_cast<uint32_t>(salt_len));
60 blake2b.update(salt, salt_len);
61
62 blake2b.update_le(static_cast<uint32_t>(key_len));
63 blake2b.update(key, key_len);
64
65 blake2b.update_le(static_cast<uint32_t>(ad_len));
66 blake2b.update(ad, ad_len);
67
68 blake2b.final(H0);
69}
70
71void extract_key(uint8_t output[], size_t output_len, const secure_vector<uint64_t>& B, size_t memory, size_t threads) {
72 const size_t lanes = memory / threads;
73
74 uint64_t sum[128] = {0};
75
76 for(size_t lane = 0; lane != threads; ++lane) {
77 const size_t start = 128 * (lane * lanes + lanes - 1);
78 const size_t end = 128 * (lane * lanes + lanes);
79
80 for(size_t j = start; j != end; ++j) {
81 sum[j % 128] ^= B[j];
82 }
83 }
84
85 if(output_len <= 64) {
86 auto blake2b = HashFunction::create_or_throw(fmt("BLAKE2b({})", output_len * 8));
87 blake2b->update_le(static_cast<uint32_t>(output_len));
88 for(size_t i = 0; i != 128; ++i) { // NOLINT(modernize-loop-convert)
89 blake2b->update_le(sum[i]);
90 }
91 blake2b->final(output);
92 } else {
94
95 auto blake2b = HashFunction::create_or_throw("BLAKE2b(512)");
96 blake2b->update_le(static_cast<uint32_t>(output_len));
97 for(size_t i = 0; i != 128; ++i) { // NOLINT(modernize-loop-convert)
98 blake2b->update_le(sum[i]);
99 }
100 blake2b->final(std::span{T});
101
102 while(output_len > 64) {
103 copy_mem(output, T.data(), 32);
104 output_len -= 32;
105 output += 32;
106
107 if(output_len > 64) {
108 blake2b->update(T);
109 blake2b->final(std::span{T});
110 }
111 }
112
113 if(output_len == 64) {
114 blake2b->update(T);
115 blake2b->final(output);
116 } else {
117 auto blake2b_f = HashFunction::create_or_throw(fmt("BLAKE2b({})", output_len * 8));
118 blake2b_f->update(T);
119 blake2b_f->final(output);
120 }
121 }
122}
123
124void init_blocks(
125 secure_vector<uint64_t>& B, HashFunction& blake2b, const uint8_t H0[64], size_t memory, size_t threads) {
126 BOTAN_ASSERT_NOMSG(B.size() >= threads * 256);
127
128 for(size_t i = 0; i != threads; ++i) {
129 const size_t B_off = i * (memory / threads);
130
131 BOTAN_ASSERT_NOMSG(B.size() >= 128 * (B_off + 2));
132
133 for(size_t j = 0; j != 2; ++j) {
134 uint8_t T[64] = {0};
135
136 blake2b.update_le(static_cast<uint32_t>(1024));
137 blake2b.update(H0, 64);
138 blake2b.update_le(static_cast<uint32_t>(j));
139 blake2b.update_le(static_cast<uint32_t>(i));
140 blake2b.final(T);
141
142 for(size_t k = 0; k != 30; ++k) {
143 load_le(&B[128 * (B_off + j) + 4 * k], T, 32 / 8);
144 blake2b.update(T, 64);
145 blake2b.final(T);
146 }
147
148 load_le(&B[128 * (B_off + j) + 4 * 30], T, 64 / 8);
149 }
150 }
151}
152
153BOTAN_FORCE_INLINE void blamka_G(uint64_t& A, uint64_t& B, uint64_t& C, uint64_t& D) {
154 A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
155 D = rotr<32>(A ^ D);
156
157 C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
158 B = rotr<24>(B ^ C);
159
160 A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
161 D = rotr<16>(A ^ D);
162
163 C += D + (static_cast<uint64_t>(2) * static_cast<uint32_t>(C)) * static_cast<uint32_t>(D);
164 B = rotr<63>(B ^ C);
165}
166
167} // namespace
168
169void Argon2::blamka(uint64_t N[128], uint64_t T[128]) {
170#if defined(BOTAN_HAS_ARGON2_AVX512)
172 return Argon2::blamka_avx512(N, T);
173 }
174#endif
175
176#if defined(BOTAN_HAS_ARGON2_AVX2)
178 return Argon2::blamka_avx2(N, T);
179 }
180#endif
181
182#if defined(BOTAN_HAS_ARGON2_SIMD64)
184 return Argon2::blamka_simd64(N, T);
185 }
186#endif
187
188 copy_mem(T, N, 128);
189
190 for(size_t i = 0; i != 128; i += 16) {
191 blamka_G(T[i + 0], T[i + 4], T[i + 8], T[i + 12]);
192 blamka_G(T[i + 1], T[i + 5], T[i + 9], T[i + 13]);
193 blamka_G(T[i + 2], T[i + 6], T[i + 10], T[i + 14]);
194 blamka_G(T[i + 3], T[i + 7], T[i + 11], T[i + 15]);
195
196 blamka_G(T[i + 0], T[i + 5], T[i + 10], T[i + 15]);
197 blamka_G(T[i + 1], T[i + 6], T[i + 11], T[i + 12]);
198 blamka_G(T[i + 2], T[i + 7], T[i + 8], T[i + 13]);
199 blamka_G(T[i + 3], T[i + 4], T[i + 9], T[i + 14]);
200 }
201
202 for(size_t i = 0; i != 128 / 8; i += 2) {
203 blamka_G(T[i + 0], T[i + 32], T[i + 64], T[i + 96]);
204 blamka_G(T[i + 1], T[i + 33], T[i + 65], T[i + 97]);
205 blamka_G(T[i + 16], T[i + 48], T[i + 80], T[i + 112]);
206 blamka_G(T[i + 17], T[i + 49], T[i + 81], T[i + 113]);
207
208 blamka_G(T[i + 0], T[i + 33], T[i + 80], T[i + 113]);
209 blamka_G(T[i + 1], T[i + 48], T[i + 81], T[i + 96]);
210 blamka_G(T[i + 16], T[i + 49], T[i + 64], T[i + 97]);
211 blamka_G(T[i + 17], T[i + 32], T[i + 65], T[i + 112]);
212 }
213
214 for(size_t i = 0; i != 128; ++i) {
215 N[i] ^= T[i];
216 }
217}
218
219namespace {
220
221void gen_2i_addresses(uint64_t T[128],
222 uint64_t B[128],
223 size_t n,
224 size_t lane,
225 size_t slice,
226 size_t memory,
227 size_t time,
228 size_t mode,
229 size_t cnt) {
230 clear_mem(B, 128);
231
232 B[0] = n;
233 B[1] = lane;
234 B[2] = slice;
235 B[3] = memory;
236 B[4] = time;
237 B[5] = mode;
238 B[6] = cnt;
239
240 for(size_t r = 0; r != 2; ++r) {
241 Argon2::blamka(B, T);
242 }
243}
244
245// Reduce random modulo Argon2 thread count (normally a power of 2)
246inline size_t mod_threads(uint32_t random, size_t threads) {
247 if(is_power_of_2(threads)) {
248 return random & static_cast<uint32_t>(threads - 1);
249 } else {
250 return random % threads;
251 }
252}
253
254// Reduce alpha modulo the lane length; always a multiple of 4 and commonly a power of 2
255inline size_t mod_lanes(uint64_t alpha, size_t lanes) {
256 if(is_power_of_2(lanes)) {
257 return static_cast<size_t>(alpha & static_cast<uint64_t>(lanes - 1));
258 } else {
259 return alpha % lanes;
260 }
261}
262
263uint32_t index_alpha(
264 uint64_t random, size_t lanes, size_t segments, size_t threads, size_t n, size_t slice, size_t lane, size_t index) {
265 size_t ref_lane = mod_threads(static_cast<uint32_t>(random >> 32), threads);
266
267 if(n == 0 && slice == 0) {
268 ref_lane = lane;
269 }
270
271 size_t m = 3 * segments;
272 size_t s = ((slice + 1) % 4) * segments;
273
274 if(lane == ref_lane) {
275 m += index;
276 }
277
278 if(n == 0) {
279 m = slice * segments;
280 s = 0;
281 if(slice == 0 || lane == ref_lane) {
282 m += index;
283 }
284 }
285
286 if(index == 0 || lane == ref_lane) {
287 m -= 1;
288 }
289
290 uint64_t p = static_cast<uint32_t>(random);
291 p = (p * p) >> 32;
292 p = (p * m) >> 32;
293
294 return static_cast<uint32_t>(ref_lane * lanes + mod_lanes(s + m - (p + 1), lanes));
295}
296
297void process_block(secure_vector<uint64_t>& B,
298 size_t n,
299 size_t slice,
300 size_t lane,
301 size_t lanes,
302 size_t segments,
303 size_t threads,
304 uint8_t mode,
305 size_t memory,
306 size_t time) {
307 uint64_t T[128];
308 size_t index = 0;
309 if(n == 0 && slice == 0) {
310 index = 2;
311 }
312
313 const bool use_2i = mode == 1 || (mode == 2 && n == 0 && slice < SYNC_POINTS / 2);
314
315 uint64_t addresses[128];
316 size_t address_counter = 1;
317
318 if(use_2i) {
319 gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
320 }
321
322 while(index < segments) {
323 const size_t offset = lane * lanes + slice * segments + index;
324
325 size_t prev = offset - 1;
326 if(index == 0 && slice == 0) {
327 prev += lanes;
328 }
329
330 if(use_2i && index > 0 && index % 128 == 0) {
331 address_counter += 1;
332 gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
333 }
334
335 const uint64_t random = use_2i ? addresses[index % 128] : B.at(128 * prev);
336 const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index);
337
338 uint64_t N[128];
339 for(size_t i = 0; i != 128; ++i) {
340 N[i] = B[128 * prev + i] ^ B[128 * new_offset + i];
341 }
342
343 Argon2::blamka(N, T);
344
345 for(size_t i = 0; i != 128; ++i) {
346 B[128 * offset + i] ^= N[i];
347 }
348
349 index += 1;
350 }
351}
352
353void process_blocks(secure_vector<uint64_t>& B, size_t t, size_t memory, size_t threads, uint8_t mode) {
354 const size_t lanes = memory / threads;
355 const size_t segments = lanes / SYNC_POINTS;
356
357#if defined(BOTAN_HAS_THREAD_UTILS)
358 if(threads > 1) {
359 auto& thread_pool = Thread_Pool::global_instance();
360
361 for(size_t n = 0; n != t; ++n) {
362 for(size_t slice = 0; slice != SYNC_POINTS; ++slice) {
363 std::vector<std::future<void>> fut_results;
364 fut_results.reserve(threads);
365
366 for(size_t lane = 0; lane != threads; ++lane) {
367 fut_results.push_back(thread_pool.run(
368 process_block, std::ref(B), n, slice, lane, lanes, segments, threads, mode, memory, t));
369 }
370
371 for(auto& fut : fut_results) {
372 fut.get();
373 }
374 }
375 }
376
377 return;
378 }
379#endif
380
381 for(size_t n = 0; n != t; ++n) {
382 for(size_t slice = 0; slice != SYNC_POINTS; ++slice) {
383 for(size_t lane = 0; lane != threads; ++lane) {
384 process_block(B, n, slice, lane, lanes, segments, threads, mode, memory, t);
385 }
386 }
387 }
388}
389
390} // namespace
391
392void Argon2::argon2(uint8_t output[],
393 size_t output_len,
394 const char* password,
395 size_t password_len,
396 const uint8_t salt[],
397 size_t salt_len,
398 const uint8_t key[],
399 size_t key_len,
400 const uint8_t ad[],
401 size_t ad_len) const {
402 BOTAN_ARG_CHECK(output_len >= 4 && output_len <= std::numeric_limits<uint32_t>::max(),
403 "Invalid Argon2 output length");
404 BOTAN_ARG_CHECK(password_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 password length");
405 BOTAN_ARG_CHECK(salt_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 salt length");
406 BOTAN_ARG_CHECK(key_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 key length");
407 BOTAN_ARG_CHECK(ad_len <= std::numeric_limits<uint32_t>::max(), "Invalid Argon2 ad length");
408
409 auto blake2 = HashFunction::create_or_throw("BLAKE2b");
410
411 uint8_t H0[64] = {0};
412 argon2_H0(H0,
413 *blake2,
414 output_len,
415 password,
416 password_len,
417 salt,
418 salt_len,
419 key,
420 key_len,
421 ad,
422 ad_len,
423 m_family,
424 m_p,
425 m_M,
426 m_t);
427
428 const size_t memory = (m_M / (SYNC_POINTS * m_p)) * (SYNC_POINTS * m_p);
429
430 secure_vector<uint64_t> B(memory * 1024 / 8);
431
432 init_blocks(B, *blake2, H0, memory, m_p);
433 process_blocks(B, m_t, memory, m_p, m_family);
434
435 clear_mem(output, output_len);
436 extract_key(output, output_len, B, memory, m_p);
437}
438
439} // namespace Botan
#define BOTAN_ASSERT_NOMSG(expr)
Definition assert.h:75
#define BOTAN_ARG_CHECK(expr, msg)
Definition assert.h:33
static void blamka(uint64_t N[128], uint64_t T[128])
Definition argon2.cpp:169
static bool has(CPUID::Feature feat)
Definition cpuid.h:94
static std::unique_ptr< HashFunction > create_or_throw(std::string_view algo_spec, std::string_view provider="")
Definition hash.cpp:308
static Thread_Pool & global_instance()
#define BOTAN_FORCE_INLINE
Definition compiler.h:87
BOTAN_FORCE_INLINE constexpr bool is_power_of_2(T arg)
Definition bit_ops.h:62
std::span< const uint8_t > as_span_of_bytes(const char *s, size_t len)
Definition mem_utils.h:59
std::string fmt(std::string_view format, const T &... args)
Definition fmt.h:53
constexpr void copy_mem(T *out, const T *in, size_t n)
Definition mem_ops.h:144
BOTAN_FORCE_INLINE constexpr T rotr(T input)
Definition rotate.h:35
constexpr auto load_le(ParamTs &&... params)
Definition loadstor.h:495
std::vector< T, secure_allocator< T > > secure_vector
Definition secmem.h:68
constexpr void clear_mem(T *ptr, size_t n)
Definition mem_ops.h:118