Botan 3.11.0
Crypto and TLS for C&
mp_core.h
Go to the documentation of this file.
1/*
2* MPI Algorithms
3* (C) 1999-2010,2018,2024 Jack Lloyd
4* 2006 Luca Piccarreta
5* 2016 Matthias Gierlings
6*
7* Botan is released under the Simplified BSD License (see license.txt)
8*/
9
10#ifndef BOTAN_MP_CORE_OPS_H_
11#define BOTAN_MP_CORE_OPS_H_
12
13#include <botan/assert.h>
14#include <botan/types.h>
15#include <botan/internal/ct_utils.h>
16#include <botan/internal/mem_utils.h>
17#include <botan/internal/mp_asmi.h>
18#include <array>
19#include <span>
20
21namespace Botan {
22
23/*
24* If cond == 0, does nothing.
25* If cond > 0, swaps x[0:size] with y[0:size]
26* Runs in constant time
27*/
28template <WordType W>
29inline constexpr void bigint_cnd_swap(W cnd, W x[], W y[], size_t size) {
30 const auto mask = CT::Mask<W>::expand(cnd);
31
32 for(size_t i = 0; i != size; ++i) {
33 const W a = x[i];
34 const W b = y[i];
35 x[i] = mask.select(b, a);
36 y[i] = mask.select(a, b);
37 }
38}
39
40/*
41* If cond > 0 adds x[0:size] and y[0:size] and returns carry
42* Runs in constant time
43*/
44template <WordType W>
45inline constexpr W bigint_cnd_add(W cnd, W x[], const W y[], size_t size) {
46 const auto mask = CT::Mask<W>::expand(cnd).value();
47
48 W carry = 0;
49
50 for(size_t i = 0; i != size; ++i) {
51 x[i] = word_add(x[i], y[i] & mask, &carry);
52 }
53
54 return (mask & carry);
55}
56
57/*
58* If cond > 0 subtracts y[0:size] from x[0:size] and returns borrow
59* Runs in constant time
60*/
61template <WordType W>
62inline constexpr auto bigint_cnd_sub(W cnd, W x[], const W y[], size_t size) -> W {
63 const auto mask = CT::Mask<W>::expand(cnd).value();
64
65 W carry = 0;
66
67 for(size_t i = 0; i != size; ++i) {
68 x[i] = word_sub(x[i], y[i] & mask, &carry);
69 }
70
71 return (mask & carry);
72}
73
74/*
75* 2s complement absolute value
76* If cond > 0 sets x to ~x + 1
77* Runs in constant time
78*/
79template <WordType W>
80inline constexpr void bigint_cnd_abs(W cnd, W x[], size_t size) {
81 const auto mask = CT::Mask<W>::expand(cnd);
82
83 W carry = mask.if_set_return(1);
84 for(size_t i = 0; i != size; ++i) {
85 const W z = word_add(~x[i], static_cast<W>(0), &carry);
86 x[i] = mask.select(z, x[i]);
87 }
88}
89
90/**
91* Two operand addition with carry out
92*/
93template <WordType W>
94inline constexpr auto bigint_add2(W x[], size_t x_size, const W y[], size_t y_size) -> W {
95 W carry = 0;
96
97 BOTAN_ASSERT(x_size >= y_size, "Expected sizes");
98
99 const size_t blocks = y_size - (y_size % 8);
100
101 for(size_t i = 0; i != blocks; i += 8) {
102 carry = word8_add2(x + i, y + i, carry);
103 }
104
105 for(size_t i = blocks; i != y_size; ++i) {
106 x[i] = word_add(x[i], y[i], &carry);
107 }
108
109 for(size_t i = y_size; i != x_size; ++i) {
110 x[i] = word_add(x[i], static_cast<W>(0), &carry);
111 }
112
113 return carry;
114}
115
116/**
117* Three operand addition with carry out
118*/
119template <WordType W>
120inline constexpr auto bigint_add3(W z[], const W x[], size_t x_size, const W y[], size_t y_size) -> W {
121 if(x_size < y_size) {
122 return bigint_add3(z, y, y_size, x, x_size);
123 }
124
125 W carry = 0;
126
127 const size_t blocks = y_size - (y_size % 8);
128
129 for(size_t i = 0; i != blocks; i += 8) {
130 carry = word8_add3(z + i, x + i, y + i, carry);
131 }
132
133 for(size_t i = blocks; i != y_size; ++i) {
134 z[i] = word_add(x[i], y[i], &carry);
135 }
136
137 for(size_t i = y_size; i != x_size; ++i) {
138 z[i] = word_add(x[i], static_cast<W>(0), &carry);
139 }
140
141 return carry;
142}
143
144/**
145* Two operand subtraction
146*/
147template <WordType W>
148inline constexpr auto bigint_sub2(W x[], size_t x_size, const W y[], size_t y_size) -> W {
149 W borrow = 0;
150
151 BOTAN_ASSERT(x_size >= y_size, "Expected sizes");
152
153 const size_t blocks = y_size - (y_size % 8);
154
155 for(size_t i = 0; i != blocks; i += 8) {
156 borrow = word8_sub2(x + i, y + i, borrow);
157 }
158
159 for(size_t i = blocks; i != y_size; ++i) {
160 x[i] = word_sub(x[i], y[i], &borrow);
161 }
162
163 for(size_t i = y_size; i != x_size; ++i) {
164 x[i] = word_sub(x[i], static_cast<W>(0), &borrow);
165 }
166
167 return borrow;
168}
169
170/**
171* Two operand subtraction, x = y - x; assumes y >= x
172*/
173template <WordType W>
174inline constexpr void bigint_sub2_rev(W x[], const W y[], size_t y_size) {
175 W borrow = 0;
176
177 for(size_t i = 0; i != y_size; ++i) {
178 x[i] = word_sub(y[i], x[i], &borrow);
179 }
180
181 BOTAN_ASSERT(borrow == 0, "y must be greater than x");
182}
183
184/**
185* Three operand subtraction
186*
187* Expects that x_size >= y_size
188*
189* Writes to z[0:x_size] and returns borrow
190*/
191template <WordType W>
192inline constexpr auto bigint_sub3(W z[], const W x[], size_t x_size, const W y[], size_t y_size) -> W {
193 W borrow = 0;
194
195 BOTAN_ASSERT(x_size >= y_size, "Expected sizes");
196
197 const size_t blocks = y_size - (y_size % 8);
198
199 for(size_t i = 0; i != blocks; i += 8) {
200 borrow = word8_sub3(z + i, x + i, y + i, borrow);
201 }
202
203 for(size_t i = blocks; i != y_size; ++i) {
204 z[i] = word_sub(x[i], y[i], &borrow);
205 }
206
207 for(size_t i = y_size; i != x_size; ++i) {
208 z[i] = word_sub(x[i], static_cast<W>(0), &borrow);
209 }
210
211 return borrow;
212}
213
214/**
215* Conditional subtraction for Montgomery reduction
216*
217* This function assumes that (x0 || x) is less than 2*p
218*
219* Computes z[0:N] = (x0 || x[0:N]) - p[0:N]
220*
221* If z would be positive, returns z[0:N]
222* Otherwise returns original input x
223*/
224template <WordType W>
225inline constexpr void bigint_monty_maybe_sub(size_t N, W z[], W x0, const W x[], const W p[]) {
226 W borrow = 0;
227
228 const size_t blocks = N - (N % 8);
229
230 for(size_t i = 0; i != blocks; i += 8) {
231 borrow = word8_sub3(z + i, x + i, p + i, borrow);
232 }
233
234 for(size_t i = blocks; i != N; ++i) {
235 z[i] = word_sub(x[i], p[i], &borrow);
236 }
237
238 borrow = (x0 - borrow) > x0;
239
240 CT::conditional_assign_mem(borrow, z, x, N);
241}
242
243/**
244* Conditional subtraction for Montgomery reduction
245*
246* This function assumes that (x0 || x) is less than 2*p
247*
248* Computes z[0:N] = (x0 || x[0:N]) - p[0:N]
249*
250* If z would be positive, returns z[0:N]
251* Otherwise returns original input x
252*/
253template <size_t N, WordType W>
254inline constexpr void bigint_monty_maybe_sub(W z[N], W x0, const W x[N], const W y[N]) {
255 W borrow = 0;
256
257 for(size_t i = 0; i != N; ++i) {
258 z[i] = word_sub(x[i], y[i], &borrow);
259 }
260
261 borrow = (x0 - borrow) > x0;
262
263 CT::conditional_assign_mem(borrow, z, x, N);
264}
265
266/**
267* Return abs(x-y), ie if x >= y, then compute z = x - y
268* Otherwise compute z = y - x
269* No borrow is possible since the result is always >= 0
270*
271* Returns a Mask: |1| if x >= y or |0| if x < y
272* @param z output array of at least N words
273* @param x input array of N words
274* @param y input array of N words
275* @param N length of x and y
276* @param ws array of at least 2*N words
277*/
278template <WordType W>
279inline constexpr auto bigint_sub_abs(W z[], const W x[], const W y[], size_t N, W ws[]) -> CT::Mask<W> {
280 // Subtract in both direction then conditional copy out the result
281
282 W* ws0 = ws;
283 W* ws1 = ws + N;
284
285 W borrow0 = 0;
286 W borrow1 = 0;
287
288 const size_t blocks = N - (N % 8);
289
290 for(size_t i = 0; i != blocks; i += 8) {
291 borrow0 = word8_sub3(ws0 + i, x + i, y + i, borrow0);
292 borrow1 = word8_sub3(ws1 + i, y + i, x + i, borrow1);
293 }
294
295 for(size_t i = blocks; i != N; ++i) {
296 ws0[i] = word_sub(x[i], y[i], &borrow0);
297 ws1[i] = word_sub(y[i], x[i], &borrow1);
298 }
299
300 return CT::conditional_copy_mem(borrow0, z, ws1, ws0, N);
301}
302
303/*
304* Shift Operations
305*/
306template <WordType W>
307inline constexpr void bigint_shl1(W x[], size_t x_size, size_t x_words, size_t shift) {
308 const size_t word_shift = shift / WordInfo<W>::bits;
309 const size_t bit_shift = shift % WordInfo<W>::bits;
310
311 unchecked_copy_memory(x + word_shift, x, x_words);
312 zeroize_buffer(x, word_shift);
313
314 const auto carry_mask = CT::Mask<W>::expand(bit_shift);
315 const W carry_shift = carry_mask.if_set_return(WordInfo<W>::bits - bit_shift);
316
317 W carry = 0;
318 for(size_t i = word_shift; i != x_size; ++i) {
319 const W w = x[i];
320 x[i] = (w << bit_shift) | carry;
321 carry = carry_mask.if_set_return(w >> carry_shift);
322 }
323}
324
325template <WordType W>
326inline constexpr void bigint_shr1(W x[], size_t x_size, size_t shift) {
327 const size_t word_shift = shift / WordInfo<W>::bits;
328 const size_t bit_shift = shift % WordInfo<W>::bits;
329
330 const size_t top = x_size >= word_shift ? (x_size - word_shift) : 0;
331
332 if(top > 0) {
333 unchecked_copy_memory(x, x + word_shift, top);
334 }
335 zeroize_buffer(x + top, std::min(word_shift, x_size));
336
337 const auto carry_mask = CT::Mask<W>::expand(bit_shift);
338 const W carry_shift = carry_mask.if_set_return(WordInfo<W>::bits - bit_shift);
339
340 W carry = 0;
341
342 for(size_t i = 0; i != top; ++i) {
343 const W w = x[top - i - 1];
344 x[top - i - 1] = (w >> bit_shift) | carry;
345 carry = carry_mask.if_set_return(w << carry_shift);
346 }
347}
348
349template <WordType W>
350inline constexpr void bigint_shl2(W y[], const W x[], size_t x_size, size_t shift) {
351 const size_t word_shift = shift / WordInfo<W>::bits;
352 const size_t bit_shift = shift % WordInfo<W>::bits;
353
354 unchecked_copy_memory(y + word_shift, x, x_size);
355
356 const auto carry_mask = CT::Mask<W>::expand(bit_shift);
357 const W carry_shift = carry_mask.if_set_return(WordInfo<W>::bits - bit_shift);
358
359 W carry = 0;
360 for(size_t i = word_shift; i != x_size + word_shift + 1; ++i) {
361 const W w = y[i];
362 y[i] = (w << bit_shift) | carry;
363 carry = carry_mask.if_set_return(w >> carry_shift);
364 }
365}
366
367template <WordType W>
368inline constexpr void bigint_shr2(W y[], const W x[], size_t x_size, size_t shift) {
369 const size_t word_shift = shift / WordInfo<W>::bits;
370 const size_t bit_shift = shift % WordInfo<W>::bits;
371 const size_t new_size = x_size < word_shift ? 0 : (x_size - word_shift);
372
373 if(new_size > 0) {
374 unchecked_copy_memory(y, x + word_shift, new_size);
375 }
376
377 const auto carry_mask = CT::Mask<W>::expand(bit_shift);
378 const W carry_shift = carry_mask.if_set_return(WordInfo<W>::bits - bit_shift);
379
380 W carry = 0;
381 for(size_t i = new_size; i > 0; --i) {
382 W w = y[i - 1];
383 y[i - 1] = (w >> bit_shift) | carry;
384 carry = carry_mask.if_set_return(w << carry_shift);
385 }
386}
387
388/*
389* Linear Multiply - returns the carry
390*/
391template <WordType W>
392[[nodiscard]] inline constexpr auto bigint_linmul2(W x[], size_t x_size, W y) -> W {
393 W carry = 0;
394
395 for(size_t i = 0; i != x_size; ++i) {
396 x[i] = word_madd2(x[i], y, &carry);
397 }
398
399 return carry;
400}
401
402template <WordType W>
403inline constexpr void bigint_linmul3(W z[], const W x[], size_t x_size, W y) {
404 const size_t blocks = x_size - (x_size % 8);
405
406 W carry = 0;
407
408 for(size_t i = 0; i != blocks; i += 8) {
409 carry = word8_linmul3(z + i, x + i, y, carry);
410 }
411
412 for(size_t i = blocks; i != x_size; ++i) {
413 z[i] = word_madd2(x[i], y, &carry);
414 }
415
416 z[x_size] = carry;
417}
418
419/**
420* Compare x and y
421* Return -1 if x < y
422* Return 0 if x == y
423* Return 1 if x > y
424*/
425template <WordType W>
426inline constexpr int32_t bigint_cmp(const W x[], size_t x_size, const W y[], size_t y_size) {
427 static_assert(sizeof(W) >= sizeof(uint32_t), "Size assumption");
428
429 const W LT = static_cast<W>(-1);
430 const W EQ = 0;
431 const W GT = 1;
432
433 const size_t common_elems = std::min(x_size, y_size);
434
435 W result = EQ; // until found otherwise
436
437 for(size_t i = 0; i != common_elems; i++) {
438 const auto is_eq = CT::Mask<W>::is_equal(x[i], y[i]);
439 const auto is_lt = CT::Mask<W>::is_lt(x[i], y[i]);
440
441 result = is_eq.select(result, is_lt.select(LT, GT));
442 }
443
444 if(x_size < y_size) {
445 W mask = 0;
446 for(size_t i = x_size; i != y_size; i++) {
447 mask |= y[i];
448 }
449
450 // If any bits were set in high part of y, then x < y
451 result = CT::Mask<W>::is_zero(mask).select(result, LT);
452 } else if(y_size < x_size) {
453 W mask = 0;
454 for(size_t i = y_size; i != x_size; i++) {
455 mask |= x[i];
456 }
457
458 // If any bits were set in high part of x, then x > y
459 result = CT::Mask<W>::is_zero(mask).select(result, GT);
460 }
461
462 CT::unpoison(result);
463 BOTAN_DEBUG_ASSERT(result == LT || result == GT || result == EQ);
464 return static_cast<int32_t>(result);
465}
466
467/**
468* Compare x and y
469* Returns a Mask: |1| if x[0:x_size] < y[0:y_size] or |0| otherwise
470* If lt_or_equal is true, returns |1| also for x == y
471*/
472template <WordType W>
473inline constexpr auto bigint_ct_is_lt(const W x[], size_t x_size, const W y[], size_t y_size, bool lt_or_equal = false)
474 -> CT::Mask<W> {
475 const size_t common_elems = std::min(x_size, y_size);
476
477 auto is_lt = CT::Mask<W>::expand(lt_or_equal);
478
479 for(size_t i = 0; i != common_elems; i++) {
480 const auto eq = CT::Mask<W>::is_equal(x[i], y[i]);
481 const auto lt = CT::Mask<W>::is_lt(x[i], y[i]);
482 is_lt = eq.select_mask(is_lt, lt);
483 }
484
485 if(x_size < y_size) {
486 W mask = 0;
487 for(size_t i = x_size; i != y_size; i++) {
488 mask |= y[i];
489 }
490 // If any bits were set in high part of y, then is_lt should be forced true
491 is_lt |= CT::Mask<W>::expand(mask);
492 } else if(y_size < x_size) {
493 W mask = 0;
494 for(size_t i = y_size; i != x_size; i++) {
495 mask |= x[i];
496 }
497
498 // If any bits were set in high part of x, then is_lt should be false
499 is_lt &= CT::Mask<W>::is_zero(mask);
500 }
501
502 return is_lt;
503}
504
505template <WordType W>
506inline constexpr auto bigint_ct_is_eq(const W x[], size_t x_size, const W y[], size_t y_size) -> CT::Mask<W> {
507 const size_t common_elems = std::min(x_size, y_size);
508
509 W diff = 0;
510
511 for(size_t i = 0; i != common_elems; i++) {
512 diff |= (x[i] ^ y[i]);
513 }
514
515 // If any bits were set in high part of x/y, then they are not equal
516 if(x_size < y_size) {
517 for(size_t i = x_size; i != y_size; i++) {
518 diff |= y[i];
519 }
520 } else if(y_size < x_size) {
521 for(size_t i = y_size; i != x_size; i++) {
522 diff |= x[i];
523 }
524 }
525
526 return CT::Mask<W>::is_zero(diff);
527}
528
529template <WordType W, W div>
530consteval std::pair<W, size_t> div_magic()
531 requires(div == 10)
532{
533 if constexpr(div == 10 && std::same_as<W, uint32_t>) {
534 constexpr W magic = 0xCCCCCCCD;
535 constexpr size_t shift = 35;
536 return std::make_pair(magic, shift);
537 } else if constexpr(div == 10 && std::same_as<W, uint64_t>) {
538 constexpr W magic = 0xCCCCCCCCCCCCCCCD;
539 constexpr size_t shift = 67;
540 return std::make_pair(magic, shift);
541 }
542}
543
544template <WordType W>
545inline constexpr W divide_10(W x) {
546 auto [magic, shift] = div_magic<W, 10>();
547 const auto p = typename WordInfo<W>::dword(magic) * x;
548 return static_cast<W>(p >> shift);
549}
550
551/**
552* Setup for variable-time word level division/modulo operations
553*
554* Currently this just uses the compiler's support for a 2/1 word division,
555* but likely could be improved by precomputed values based on the divisor,
556* for example using the approaches outlined in Hacker's Delight chapter 10.
557*/
558template <WordType W>
559class divide_precomp final {
560 public:
561 explicit constexpr divide_precomp(W divisor) : m_divisor(divisor) {
562 BOTAN_ARG_CHECK(m_divisor != 0, "Division by zero");
563 }
564
565 // Return floor((n1 || n0) / d)
566 //
567 // This assumes n1 < d so that the quotient fits in a word
568 inline constexpr W vartime_div_2to1(W n1, W n0) const {
569 BOTAN_ASSERT_NOMSG(n1 < m_divisor);
570
571 if(m_divisor == WordInfo<W>::max) {
572 return vartime_div_2to1_max_d(n1, n0);
573 }
574
575 if(m_divisor == WordInfo<W>::top_bit) {
576 // Simply a shift by N-1 bits
577 return (n1 << 1) | (n0 >> (WordInfo<W>::bits - 1));
578 }
579
580 if(!std::is_constant_evaluated()) {
581#if defined(BOTAN_MP_USE_X86_64_ASM)
582 if constexpr(std::same_as<W, uint64_t>) {
583 W quotient = 0;
584 W remainder = 0;
585 // NOLINTNEXTLINE(*-no-assembler)
586 asm("divq %[v]" : "=a"(quotient), "=d"(remainder) : [v] "r"(m_divisor), "a"(n0), "d"(n1));
587 return quotient;
588 }
589#endif
590
591#if !defined(BOTAN_BUILD_COMPILER_IS_CLANGCL)
592
593 /* clang-cl has a bug where on encountering a 128/64 division it emits
594 * a call to __udivti3() but then fails to link the relevant builtin into
595 * the binary, causing a link failure. Work around this by simply omitting
596 * such code for clang-cl
597 *
598 * See https://github.com/llvm/llvm-project/issues/25679
599 */
600 if constexpr(WordInfo<W>::dword_is_native) {
601 typename WordInfo<W>::dword n = n1;
602 n <<= WordInfo<W>::bits;
603 n |= n0;
604 return static_cast<W>(n / m_divisor);
605 }
606#endif
607 }
608
609 W high = n1;
610 W quotient = 0;
611
612 for(size_t i = 0; i != WordInfo<W>::bits; ++i) {
613 const W high_top_bit = high >> (WordInfo<W>::bits - 1);
614
615 high <<= 1;
616 high |= (n0 >> (WordInfo<W>::bits - 1 - i)) & 1;
617 quotient <<= 1;
618
619 if(high_top_bit || high >= m_divisor) {
620 high -= m_divisor;
621 quotient |= 1;
622 }
623 }
624
625 return quotient;
626 }
627
628 // Return floor((n1 || n0) % d)
629 //
630 // This assumes n1 < d so that the quotient fits in a word
631 inline constexpr W vartime_mod_2to1(W n1, W n0) const {
632 BOTAN_ASSERT_NOMSG(n1 < m_divisor);
633 W q = this->vartime_div_2to1(n1, n0);
634 W carry = 0;
635 q = word_madd2(q, m_divisor, &carry);
636 return (n0 - q);
637 }
638
639 private:
640 /*
641 * When the divisor is the maximum integer value, then a two word
642 * division becomes simple.
643 */
644 static inline constexpr W vartime_div_2to1_max_d(W n1, W n0) {
645 /*
646 Use k to refer to WordInfo<W>::bits
647
648 We are dividing n = (n1 * 2^k) + n0 by 2^k - 1
649
650 Recall that 2^k = 1 (mod 2^k - 1)
651
652 Rewrite n = n1*2^k + n0 as n1*(2^k - 1) + n1 + n0
653
654 The result of dividing n by (2^k - 1) will be equal to
655 (n1*(2^k-1) + n1 + n0) / (2^k-1) =
656 n1 + ((n1 + n0) / (2^k-1)
657
658 Use c to refer to ((n1 + n0) / (2^k-1))
659
660 If (n1 + n0) < (2^k - 1) then c is 0
661 If (n1 + n0) >= (2^k - 1) then c is 1
662
663 Since n1 < 2^k - 1 [*] and n0 <= 2^k - 1 it is impossible for (n1 + n0) / (2^k -1)
664 to be greater than 1.
665
666 [*] We require n1 be strictly less than the divisor to ensure that the
667 output fits in a single word; this is checked at the start of vartime_div_2to1.
668 */
669
670 const W s = n0 + n1;
671 // did n0 + n1 overflow? or does (n0 + n1) == 2^k - 1? if either, c == 1
672 if(s < n0 || s == WordInfo<W>::max) {
673 n1 += 1;
674 }
675
676 return n1;
677 }
678
679 W m_divisor;
680};
681
682/*
683* Compute an integer x such that (a*x) == -1 (mod 2^n)
684*
685* Throws an exception if input is even, since in that case no inverse
686* exists. If input is odd, then input and 2^n are relatively prime and
687* the inverse exists.
688*/
689template <WordType W>
690inline constexpr auto monty_inverse(W a) -> W {
691 BOTAN_ARG_CHECK(a % 2 == 1, "Cannot compute Montgomery inverse of an even integer");
692
693 // Newton's Method, following https://lemire.me/blog/2017/09/18/computing-the-inverse-of-odd-integers/
694
695 constexpr size_t iter = WordInfo<W>::bits == 64 ? 4 : 3;
696
697 // Initial guess provides 5 bits of accuracy
698 W r = (3 * a) ^ 2;
699
700 // Each iteration doubles the accuracy
701 for(size_t i = 0; i != iter; ++i) {
702 r = r * (2 - r * a);
703 }
704
705 // Now invert in addition space
706 r = (WordInfo<W>::max - r) + 1;
707
708 return r;
709}
710
711template <size_t S, WordType W, size_t N>
712inline constexpr W shift_left(std::array<W, N>& x) {
713 static_assert(N >= 1, "Invalid input size");
714 static_assert(S < WordInfo<W>::bits, "Shift too large");
715
716 const W carry = x[N - 1] >> (WordInfo<W>::bits - S);
717
718 for(size_t i = N - 1; i != 0; --i) {
719 x[i] = (x[i] << S) | (x[i - 1] >> (WordInfo<W>::bits - S));
720 }
721 x[0] <<= S;
722
723 return carry;
724}
725
726template <size_t S, WordType W, size_t N>
727inline constexpr W shift_right(std::array<W, N>& x) {
728 static_assert(N >= 1, "Invalid input size");
729 static_assert(S < WordInfo<W>::bits, "Shift too large");
730
731 const W carry = x[0] << (WordInfo<W>::bits - S);
732
733 for(size_t i = 0; i != N - 1; ++i) {
734 x[i] = (x[i] >> S) | (x[i + 1] << (WordInfo<W>::bits - S));
735 }
736 x[N - 1] >>= S;
737
738 return carry;
739}
740
741// Should be consteval but this triggers a bug in Clang 14
742template <WordType W, size_t N>
743constexpr auto hex_to_words(const char (&s)[N]) {
744 // Char count includes null terminator which we ignore
745 const constexpr size_t C = N - 1;
746
747 // Number of nibbles that a word can hold
748 const constexpr size_t NPW = (WordInfo<W>::bits / 4);
749
750 // Round up to the next number of words that will fit the input
751 const constexpr size_t S = (C + NPW - 1) / NPW;
752
753 auto hex2int = [](char c) -> int8_t {
754 if(c >= '0' && c <= '9') {
755 return static_cast<int8_t>(c - '0');
756 } else if(c >= 'a' && c <= 'f') {
757 return static_cast<int8_t>(c - 'a' + 10);
758 } else if(c >= 'A' && c <= 'F') {
759 return static_cast<int8_t>(c - 'A' + 10);
760 } else {
761 return -1;
762 }
763 };
764
765 std::array<W, S> r = {0};
766
767 for(size_t i = 0; i != C; ++i) {
768 const int8_t c = hex2int(s[i]);
769 if(c >= 0) {
770 shift_left<4>(r);
771 r[0] += c;
772 }
773 }
774
775 return r;
776}
777
778/*
779* Comba Multiplication / Squaring
780*/
781BOTAN_FUZZER_API void bigint_comba_mul4(word z[8], const word x[4], const word y[4]);
782BOTAN_FUZZER_API void bigint_comba_mul6(word z[12], const word x[6], const word y[6]);
783BOTAN_FUZZER_API void bigint_comba_mul7(word z[14], const word x[7], const word y[7]);
784BOTAN_FUZZER_API void bigint_comba_mul8(word z[16], const word x[8], const word y[8]);
785BOTAN_FUZZER_API void bigint_comba_mul9(word z[18], const word x[9], const word y[9]);
786BOTAN_FUZZER_API void bigint_comba_mul16(word z[32], const word x[16], const word y[16]);
787BOTAN_FUZZER_API void bigint_comba_mul24(word z[48], const word x[24], const word y[24]);
788
789BOTAN_FUZZER_API void bigint_comba_sqr4(word out[8], const word in[4]);
790BOTAN_FUZZER_API void bigint_comba_sqr6(word out[12], const word in[6]);
791BOTAN_FUZZER_API void bigint_comba_sqr7(word out[14], const word x[7]);
792BOTAN_FUZZER_API void bigint_comba_sqr8(word out[16], const word in[8]);
793BOTAN_FUZZER_API void bigint_comba_sqr9(word out[18], const word in[9]);
794BOTAN_FUZZER_API void bigint_comba_sqr16(word out[32], const word in[16]);
795BOTAN_FUZZER_API void bigint_comba_sqr24(word out[48], const word in[24]);
796
797/*
798* Comba Fixed Length Multiplication
799*/
800template <size_t N, WordType W>
801constexpr inline void comba_mul(W z[2 * N], const W x[N], const W y[N]) {
802 if(!std::is_constant_evaluated()) {
803 if constexpr(std::same_as<W, word> && N == 4) {
804 return bigint_comba_mul4(z, x, y);
805 }
806 if constexpr(std::same_as<W, word> && N == 6) {
807 return bigint_comba_mul6(z, x, y);
808 }
809 if constexpr(std::same_as<W, word> && N == 7) {
810 return bigint_comba_mul7(z, x, y);
811 }
812 if constexpr(std::same_as<W, word> && N == 8) {
813 return bigint_comba_mul8(z, x, y);
814 }
815 if constexpr(std::same_as<W, word> && N == 9) {
816 return bigint_comba_mul9(z, x, y);
817 }
818 if constexpr(std::same_as<W, word> && N == 16) {
819 return bigint_comba_mul16(z, x, y);
820 }
821 }
822
823 word3<W> accum;
824
825 for(size_t i = 0; i != 2 * N; ++i) {
826 const size_t start = i + 1 < N ? 0 : i + 1 - N;
827 const size_t end = std::min(N, i + 1);
828
829 for(size_t j = start; j != end; ++j) {
830 accum.mul(x[j], y[i - j]);
831 }
832 z[i] = accum.extract();
833 }
834}
835
836template <size_t N, WordType W>
837constexpr inline void comba_sqr(W z[2 * N], const W x[N]) {
838 if(!std::is_constant_evaluated()) {
839 if constexpr(std::same_as<W, word> && N == 4) {
840 return bigint_comba_sqr4(z, x);
841 }
842 if constexpr(std::same_as<W, word> && N == 6) {
843 return bigint_comba_sqr6(z, x);
844 }
845 if constexpr(std::same_as<W, word> && N == 7) {
846 return bigint_comba_sqr7(z, x);
847 }
848 if constexpr(std::same_as<W, word> && N == 8) {
849 return bigint_comba_sqr8(z, x);
850 }
851 if constexpr(std::same_as<W, word> && N == 9) {
852 return bigint_comba_sqr9(z, x);
853 }
854 if constexpr(std::same_as<W, word> && N == 16) {
855 return bigint_comba_sqr16(z, x);
856 }
857 }
858
859 word3<W> accum;
860
861 for(size_t i = 0; i != 2 * N; ++i) {
862 const size_t start = i + 1 < N ? 0 : i + 1 - N;
863 const size_t end = std::min(N, i + 1);
864
865 for(size_t j = start; j != end; ++j) {
866 accum.mul(x[j], x[i - j]);
867 }
868 z[i] = accum.extract();
869 }
870}
871
872/*
873* Montgomery reduction
874*
875* Sets r to the Montgomery reduction of z using parameters p / p_dash
876*
877* The workspace should be of size equal to the prime
878*/
879BOTAN_FUZZER_API void bigint_monty_redc_4(word r[4], const word z[8], const word p[4], word p_dash, word ws[4]);
880BOTAN_FUZZER_API void bigint_monty_redc_6(word r[6], const word z[12], const word p[6], word p_dash, word ws[6]);
881BOTAN_FUZZER_API void bigint_monty_redc_8(word r[8], const word z[16], const word p[8], word p_dash, word ws[8]);
882BOTAN_FUZZER_API void bigint_monty_redc_12(word r[12], const word z[24], const word p[12], word p_dash, word ws[12]);
883BOTAN_FUZZER_API void bigint_monty_redc_16(word r[16], const word z[32], const word p[16], word p_dash, word ws[16]);
884BOTAN_FUZZER_API void bigint_monty_redc_24(word r[24], const word z[48], const word p[24], word p_dash, word ws[24]);
885BOTAN_FUZZER_API void bigint_monty_redc_32(word r[32], const word z[64], const word p[32], word p_dash, word ws[32]);
886
889 word r[], const word z[], size_t z_size, const word p[], size_t p_size, word p_dash, word ws[]);
890
891/**
892* Montgomery Reduction
893* @param r result of exactly p_size words
894* @param z integer to reduce, of size exactly 2*p_size.
895* @param p modulus
896* @param p_size size of p
897* @param p_dash Montgomery value
898* @param ws array of at least p_size words
899* @param ws_size size of ws in words
900*
901* It is allowed to set &r[0] == &z[0] however in this case note that only the
902* first p_size words of r will be written to and the high p_size words of r/z
903* will still hold the original inputs, these must be cleared after use.
904* See bigint_monty_redc_inplace
905*/
907 word r[], const word z[], const word p[], size_t p_size, word p_dash, word ws[], size_t ws_size) {
908 const size_t z_size = 2 * p_size;
909
910 BOTAN_ARG_CHECK(ws_size >= p_size, "Montgomery reduction workspace too small");
911
912 if(p_size == 4) {
913 bigint_monty_redc_4(r, z, p, p_dash, ws);
914 } else if(p_size == 6) {
915 bigint_monty_redc_6(r, z, p, p_dash, ws);
916 } else if(p_size == 8) {
917 bigint_monty_redc_8(r, z, p, p_dash, ws);
918 } else if(p_size == 12) {
919 bigint_monty_redc_12(r, z, p, p_dash, ws);
920 } else if(p_size == 16) {
921 bigint_monty_redc_16(r, z, p, p_dash, ws);
922 } else if(p_size == 24) {
923 bigint_monty_redc_24(r, z, p, p_dash, ws);
924 } else if(p_size == 32) {
925 bigint_monty_redc_32(r, z, p, p_dash, ws);
926 } else {
927 bigint_monty_redc_generic(r, z, z_size, p, p_size, p_dash, ws);
928 }
929}
930
931inline void bigint_monty_redc_inplace(word z[], const word p[], size_t p_size, word p_dash, word ws[], size_t ws_size) {
932 bigint_monty_redc(z, z, p, p_size, p_dash, ws, ws_size);
933 zeroize_buffer(z + p_size, p_size);
934}
935
936/**
937* Basecase O(N^2) multiplication
938*/
940void basecase_mul(word z[], size_t z_size, const word x[], size_t x_size, const word y[], size_t y_size);
941
942/**
943* Basecase O(N^2) squaring
944*/
946void basecase_sqr(word z[], size_t z_size, const word x[], size_t x_size);
947
948/*
949* High Level Multiplication/Squaring Interfaces
950*/
951void bigint_mul(word z[],
952 size_t z_size,
953 const word x[],
954 size_t x_size,
955 size_t x_sw,
956 const word y[],
957 size_t y_size,
958 size_t y_sw,
959 word workspace[],
960 size_t ws_size);
961
962void bigint_sqr(word z[], size_t z_size, const word x[], size_t x_size, size_t x_sw, word workspace[], size_t ws_size);
963
964/**
965* Reduce z modulo p = 2**B - C where C is small
966*
967* z is assumed to be at most (p-1)**2
968*
969* For details on the algorithm see
970* - Handbook of Applied Cryptography, Algorithm 14.47
971* - Guide to Elliptic Curve Cryptography, Algorithm 2.54 and Note 2.55
972*
973*/
974template <WordType W, size_t N, W C>
975constexpr std::array<W, N> redc_crandall(std::span<const W, 2 * N> z) {
976 static_assert(N >= 2);
977
978 std::array<W, N> hi = {};
979
980 // hi = hi * c + lo
981
982 W carry = 0;
983 for(size_t i = 0; i != N; ++i) {
984 hi[i] = word_madd3(z[i + N], C, z[i], &carry);
985 }
986
987 // hi += carry * C
988 word carry_c[2] = {0};
989 carry_c[0] = word_madd2(carry, C, &carry_c[1]);
990
991 carry = bigint_add2(hi.data(), N, carry_c, 2);
992
993 constexpr W P0 = WordInfo<W>::max - (C - 1);
994
995 std::array<W, N> r = {};
996
997 W borrow = 0;
998
999 /*
1000 * For undetermined reasons, on GCC (only) removing this asm block causes
1001 * massive (up to 20%) performance regressions in secp256k1.
1002 *
1003 * The generated code without the asm seems quite reasonable, and timing
1004 * repeated calls to redc_crandall with the cycle counter show that GCC
1005 * computes it in about the same number of cycles with or without the asm.
1006 *
1007 * So the cause of the regression is unclear. But it is reproducible across
1008 * machines and GCC versions.
1009 */
1010#if defined(BOTAN_MP_USE_X86_64_ASM) && defined(__GNUC__) && !defined(__clang__)
1011 if constexpr(N == 4 && std::same_as<W, uint64_t>) {
1012 if(!std::is_constant_evaluated()) {
1013 asm volatile(R"(
1014 movq 0(%[x]), %[borrow]
1015 subq %[p0], %[borrow]
1016 movq %[borrow], 0(%[r])
1017 movq 16(%[x]), %[borrow]
1018 sbbq $-1, %[borrow]
1019 movq %[borrow], 8(%[r])
1020 movq 16(%[x]), %[borrow]
1021 sbbq $-1, %[borrow]
1022 movq %[borrow], 16(%[r])
1023 movq 24(%[x]), %[borrow]
1024 sbbq $-1, %[borrow]
1025 movq %[borrow], 24(%[r])
1026 sbbq %[borrow],%[borrow]
1027 negq %[borrow]
1028 )"
1029 : [borrow] "=r"(borrow)
1030 : [x] "r"(hi.data()), [p0] "r"(P0), [r] "r"(r.data()), "0"(borrow)
1031 : "cc", "memory");
1032 }
1033
1034 borrow = (carry - borrow) > carry;
1035 CT::conditional_assign_mem(borrow, r.data(), hi.data(), N);
1036 return r;
1037 }
1038#endif
1039
1040 r[0] = word_sub(hi[0], P0, &borrow);
1041 for(size_t i = 1; i != N; ++i) {
1042 r[i] = word_sub(hi[i], WordInfo<W>::max, &borrow);
1043 }
1044
1045 borrow = (carry - borrow) > carry;
1046
1047 CT::conditional_assign_mem(borrow, r.data(), hi.data(), N);
1048
1049 return r;
1050}
1051
1052// Extract a WindowBits sized window out of s, depending on offset.
1053template <size_t WindowBits, typename W, size_t N>
1054constexpr size_t read_window_bits(std::span<const W, N> words, size_t offset) {
1055 static_assert(WindowBits >= 1 && WindowBits <= 7);
1056
1057 constexpr uint8_t WindowMask = static_cast<uint8_t>(1 << WindowBits) - 1;
1058
1059 constexpr size_t W_bits = sizeof(W) * 8;
1060 const auto bit_shift = offset % W_bits;
1061 const auto word_offset = words.size() - 1 - (offset / W_bits);
1062
1063 const bool single_byte_window = bit_shift <= (W_bits - WindowBits) || word_offset == 0;
1064
1065 const auto w0 = words[word_offset];
1066
1067 if(single_byte_window) {
1068 return (w0 >> bit_shift) & WindowMask;
1069 } else {
1070 // Otherwise we must join two words and extract the result
1071 const auto w1 = words[word_offset - 1];
1072 const auto combined = ((w0 >> bit_shift) | (w1 << (W_bits - bit_shift)));
1073 return combined & WindowMask;
1074 }
1075}
1076
1077} // namespace Botan
1078
1079#endif
#define BOTAN_FUZZER_API
Definition api.h:65
#define BOTAN_ASSERT_NOMSG(expr)
Definition assert.h:75
#define BOTAN_DEBUG_ASSERT(expr)
Definition assert.h:129
#define BOTAN_ARG_CHECK(expr, msg)
Definition assert.h:33
#define BOTAN_ASSERT(expr, assertion_made)
Definition assert.h:62
static constexpr Mask< T > expand(T v)
Definition ct_utils.h:392
static constexpr Mask< T > is_equal(T x, T y)
Definition ct_utils.h:442
static constexpr Mask< T > is_lt(T x, T y)
Definition ct_utils.h:450
static constexpr Mask< T > is_zero(T x)
Definition ct_utils.h:437
constexpr W vartime_mod_2to1(W n1, W n0) const
Definition mp_core.h:631
constexpr W vartime_div_2to1(W n1, W n0) const
Definition mp_core.h:568
constexpr divide_precomp(W divisor)
Definition mp_core.h:561
constexpr W extract()
Definition mp_asmi.h:610
constexpr void mul(W x, W y)
Definition mp_asmi.h:495
constexpr Mask< T > conditional_copy_mem(Mask< T > mask, T *dest, const T *if_set, const T *if_unset, size_t elems)
Definition ct_utils.h:732
constexpr void unpoison(const T *p, size_t n)
Definition ct_utils.h:67
constexpr Mask< T > conditional_assign_mem(T cnd, T *dest, const T *src, size_t elems)
Definition ct_utils.h:749
constexpr void bigint_cnd_abs(W cnd, W x[], size_t size)
Definition mp_core.h:80
constexpr auto bigint_add2(W x[], size_t x_size, const W y[], size_t y_size) -> W
Definition mp_core.h:94
constexpr void bigint_linmul3(W z[], const W x[], size_t x_size, W y)
Definition mp_core.h:403
constexpr auto bigint_cnd_sub(W cnd, W x[], const W y[], size_t size) -> W
Definition mp_core.h:62
constexpr auto bigint_add3(W z[], const W x[], size_t x_size, const W y[], size_t y_size) -> W
Definition mp_core.h:120
constexpr void bigint_cnd_swap(W cnd, W x[], W y[], size_t size)
Definition mp_core.h:29
constexpr auto word8_sub3(W z[8], const W x[8], const W y[8], W carry) -> W
Definition mp_asmi.h:371
constexpr W shift_left(std::array< W, N > &x)
Definition mp_core.h:712
constexpr auto word_sub(W x, W y, W *carry) -> W
Definition mp_asmi.h:320
constexpr auto word_add(W x, W y, W *carry) -> W
Definition mp_asmi.h:231
constexpr void comba_sqr(W z[2 *N], const W x[N])
Definition mp_core.h:837
constexpr uint64_t carry_shift(const donna128 &a, size_t shift)
Definition donna128.h:129
constexpr void bigint_shr2(W y[], const W x[], size_t x_size, size_t shift)
Definition mp_core.h:368
BOTAN_FUZZER_API void basecase_sqr(word z[], size_t z_size, const word x[], size_t x_size)
Definition mp_karat.cpp:46
constexpr size_t read_window_bits(std::span< const W, N > words, size_t offset)
Definition mp_core.h:1054
void bigint_comba_sqr4(word z[8], const word x[4])
Definition mp_comba.cpp:16
void bigint_comba_sqr6(word z[12], const word x[6])
Definition mp_comba.cpp:74
constexpr void bigint_shr1(W x[], size_t x_size, size_t shift)
Definition mp_core.h:326
constexpr auto word8_add3(W z[8], const W x[8], const W y[8], W carry) -> W
Definition mp_asmi.h:294
BOTAN_FUZZER_API void bigint_monty_redc_6(word r[6], const word z[12], const word p[6], word p_dash, word ws[6])
constexpr void comba_mul(W z[2 *N], const W x[N], const W y[N])
Definition mp_core.h:801
constexpr auto word8_sub2(W x[8], const W y[8], W carry) -> W
Definition mp_asmi.h:345
void bigint_comba_sqr7(word z[14], const word x[7])
Definition mp_comba.cpp:171
void bigint_comba_mul4(word z[8], const word x[4], const word y[4])
Definition mp_comba.cpp:42
constexpr auto word_madd2(W a, W b, W *c) -> W
Definition mp_asmi.h:90
void bigint_sqr(word z[], size_t z_size, const word x[], size_t x_size, size_t x_sw, word workspace[], size_t ws_size)
Definition mp_karat.cpp:327
void bigint_comba_mul16(word z[32], const word x[16], const word y[16])
Definition mp_comba.cpp:794
BOTAN_FUZZER_API void bigint_monty_redc_24(word r[24], const word z[48], const word p[24], word p_dash, word ws[24])
constexpr auto bigint_sub3(W z[], const W x[], size_t x_size, const W y[], size_t y_size) -> W
Definition mp_core.h:192
constexpr auto monty_inverse(W a) -> W
Definition mp_core.h:690
BOTAN_FUZZER_API void bigint_monty_redc_generic(word r[], const word z[], size_t z_size, const word p[], size_t p_size, word p_dash, word ws[])
Definition mp_monty.cpp:90
void bigint_mul(word z[], size_t z_size, const word x[], size_t x_size, size_t x_sw, const word y[], size_t y_size, size_t y_sw, word workspace[], size_t ws_size)
Definition mp_karat.cpp:283
void zeroize_buffer(T buf[], size_t n)
Definition mem_utils.h:37
void bigint_comba_mul6(word z[12], const word x[6], const word y[6])
Definition mp_comba.cpp:115
constexpr void bigint_shl1(W x[], size_t x_size, size_t x_words, size_t shift)
Definition mp_core.h:307
uint32_t P0(uint32_t X)
Definition sm3_fn.h:17
constexpr auto bigint_ct_is_eq(const W x[], size_t x_size, const W y[], size_t y_size) -> CT::Mask< W >
Definition mp_core.h:506
constexpr int32_t bigint_cmp(const W x[], size_t x_size, const W y[], size_t y_size)
Definition mp_core.h:426
consteval std::pair< W, size_t > div_magic()
Definition mp_core.h:530
BOTAN_FUZZER_API void bigint_monty_redc_4(word r[4], const word z[8], const word p[4], word p_dash, word ws[4])
void bigint_monty_redc_inplace(word z[], const word p[], size_t p_size, word p_dash, word ws[], size_t ws_size)
Definition mp_core.h:931
void bigint_monty_redc(word r[], const word z[], const word p[], size_t p_size, word p_dash, word ws[], size_t ws_size)
Definition mp_core.h:906
void bigint_comba_mul7(word z[14], const word x[7], const word y[7])
Definition mp_comba.cpp:221
constexpr W divide_10(W x)
Definition mp_core.h:545
constexpr W bigint_cnd_add(W cnd, W x[], const W y[], size_t size)
Definition mp_core.h:45
void unchecked_copy_memory(T *out, const T *in, size_t n)
Definition mem_utils.h:44
constexpr void bigint_monty_maybe_sub(size_t N, W z[], W x0, const W x[], const W p[])
Definition mp_core.h:225
constexpr void bigint_shl2(W y[], const W x[], size_t x_size, size_t shift)
Definition mp_core.h:350
void bigint_comba_mul9(word z[18], const word x[9], const word y[9])
Definition mp_comba.cpp:511
BOTAN_FUZZER_API void bigint_monty_redc_12(word r[12], const word z[24], const word p[12], word p_dash, word ws[12])
void carry(int64_t &h0, int64_t &h1)
BOTAN_FUZZER_API void bigint_monty_redc_16(word r[16], const word z[32], const word p[16], word p_dash, word ws[16])
void bigint_comba_mul24(word z[48], const word x[24], const word y[24])
constexpr auto bigint_sub_abs(W z[], const W x[], const W y[], size_t N, W ws[]) -> CT::Mask< W >
Definition mp_core.h:279
constexpr auto bigint_ct_is_lt(const W x[], size_t x_size, const W y[], size_t y_size, bool lt_or_equal=false) -> CT::Mask< W >
Definition mp_core.h:473
constexpr std::array< W, N > redc_crandall(std::span< const W, 2 *N > z)
Definition mp_core.h:975
constexpr auto word8_add2(W x[8], const W y[8], W carry) -> W
Definition mp_asmi.h:268
constexpr auto bigint_sub2(W x[], size_t x_size, const W y[], size_t y_size) -> W
Definition mp_core.h:148
void bigint_comba_sqr8(word z[16], const word x[8])
Definition mp_comba.cpp:292
constexpr auto hex_to_words(const char(&s)[N])
Definition mp_core.h:743
void bigint_comba_sqr16(word z[32], const word x[16])
Definition mp_comba.cpp:618
constexpr void bigint_sub2_rev(W x[], const W y[], size_t y_size)
Definition mp_core.h:174
void bigint_comba_sqr9(word z[18], const word x[9])
Definition mp_comba.cpp:440
constexpr auto word8_linmul3(W z[8], const W x[8], W y, W carry) -> W
Definition mp_asmi.h:397
BOTAN_FUZZER_API void basecase_mul(word z[], size_t z_size, const word x[], size_t x_size, const word y[], size_t y_size)
Definition mp_karat.cpp:20
std::conditional_t< HasNative64BitRegisters, std::uint64_t, uint32_t > word
Definition types.h:119
void bigint_comba_sqr24(word z[48], const word x[24])
void bigint_comba_mul8(word z[16], const word x[8], const word y[8])
Definition mp_comba.cpp:352
BOTAN_FUZZER_API void bigint_monty_redc_8(word r[8], const word z[16], const word p[8], word p_dash, word ws[8])
constexpr W shift_right(std::array< W, N > &x)
Definition mp_core.h:727
BOTAN_FUZZER_API void bigint_monty_redc_32(word r[32], const word z[64], const word p[32], word p_dash, word ws[32])
constexpr auto word_madd3(W a, W b, W c, W *d) -> W
Definition mp_asmi.h:133
constexpr auto bigint_linmul2(W x[], size_t x_size, W y) -> W
Definition mp_core.h:392