Botan 3.9.0
Crypto and TLS for C&
mp_asmi.h
Go to the documentation of this file.
1/*
2* Lowest Level MPI Algorithms
3* (C) 1999-2010,2025 Jack Lloyd
4* 2006 Luca Piccarreta
5*
6* Botan is released under the Simplified BSD License (see license.txt)
7*/
8
9#ifndef BOTAN_MP_ASM_INTERNAL_H_
10#define BOTAN_MP_ASM_INTERNAL_H_
11
12#include <botan/compiler.h>
13#include <botan/types.h>
14#include <botan/internal/target_info.h>
15#include <concepts>
16
17#if !defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
18 #include <botan/internal/donna128.h>
19#endif
20
21namespace Botan {
22
23// NOLINTBEGIN(*-macro-usage)
24
25#if defined(BOTAN_USE_GCC_INLINE_ASM) && defined(BOTAN_TARGET_ARCH_IS_X86_64)
26 #define BOTAN_MP_USE_X86_64_ASM
27#endif
28
29/*
30* Expressing an add with carry is sadly quite difficult in standard C/C++.
31*
32* Compilers will recognize various idioms and generate a reasonable carry
33* chain. Unfortunately which idioms the compiler will understand vary, so we
34* have to decide what to do based on the compiler. This is fragile; what will
35* work varies not just based on compiler but also version, target architecture,
36* and optimization flags.
37*/
38#if defined(__clang__)
39static constexpr bool use_dword_for_word_add = false;
40#else
41static constexpr bool use_dword_for_word_add = true;
42#endif
43
44/*
45* Concept for allowed multiprecision word types
46*/
47template <typename T>
48concept WordType = (std::same_as<T, uint32_t> || std::same_as<T, uint64_t>);
49
50template <WordType W>
51struct WordInfo {};
52
53template <>
54struct WordInfo<uint32_t> {
55 public:
56 static const constexpr size_t bytes = 4;
57 static const constexpr size_t bits = 32;
58 static const constexpr uint32_t max = 0xFFFFFFFF;
59 static const constexpr uint32_t top_bit = 0x80000000;
60
61 typedef uint64_t dword;
62 static const constexpr bool dword_is_native = true;
63};
64
65template <>
66struct WordInfo<uint64_t> {
67 public:
68 static const constexpr size_t bytes = 8;
69 static const constexpr size_t bits = 64;
70 static const constexpr uint64_t max = 0xFFFFFFFFFFFFFFFF;
71 static const constexpr uint64_t top_bit = 0x8000000000000000;
72
73#if defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
74 typedef uint128_t dword;
75 static const constexpr bool dword_is_native = true;
76#else
77 typedef donna128 dword;
78 static const constexpr bool dword_is_native = false;
79#endif
80};
81
82/*
83* Word Multiply/Add
84*/
85template <WordType W>
86inline constexpr auto word_madd2(W a, W b, W* c) -> W {
87#if defined(BOTAN_MP_USE_X86_64_ASM)
88 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
89 asm(R"(
90 mulq %[b]
91 addq %[c],%[a]
92 adcq $0,%[carry]
93 )"
94 : [a] "=a"(a), [b] "=rm"(b), [carry] "=&d"(*c)
95 : "0"(a), "1"(b), [c] "g"(*c)
96 : "cc");
97
98 return a;
99 }
100#endif
101
102 typedef typename WordInfo<W>::dword dword;
103 const dword s = dword(a) * b + *c;
104 *c = static_cast<W>(s >> WordInfo<W>::bits);
105 return static_cast<W>(s);
106}
107
108/*
109* Word Multiply/Add
110*/
111template <WordType W>
112inline constexpr auto word_madd3(W a, W b, W c, W* d) -> W {
113#if defined(BOTAN_MP_USE_X86_64_ASM)
114 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
115 asm(R"(
116 mulq %[b]
117
118 addq %[c],%[a]
119 adcq $0,%[carry]
120
121 addq %[d],%[a]
122 adcq $0,%[carry]
123 )"
124 : [a] "=a"(a), [b] "=rm"(b), [carry] "=&d"(*d)
125 : "0"(a), "1"(b), [c] "g"(c), [d] "g"(*d)
126 : "cc");
127
128 return a;
129 }
130#endif
131
132 typedef typename WordInfo<W>::dword dword;
133 const dword s = dword(a) * b + c + *d;
134 *d = static_cast<W>(s >> WordInfo<W>::bits);
135 return static_cast<W>(s);
136}
137
138#if defined(BOTAN_MP_USE_X86_64_ASM)
139
140 #define ASM(x) x "\n\t"
141
142 #define DO_8_TIMES(MACRO, ARG) \
143 MACRO(ARG, 0) \
144 MACRO(ARG, 1) \
145 MACRO(ARG, 2) \
146 MACRO(ARG, 3) \
147 MACRO(ARG, 4) \
148 MACRO(ARG, 5) \
149 MACRO(ARG, 6) \
150 MACRO(ARG, 7)
151
152 #define ADDSUB2_OP(OPERATION, INDEX) \
153 ASM("movq 8*" #INDEX "(%[y]), %[carry]") \
154 ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])")
155
156 #define ADDSUB3_OP(OPERATION, INDEX) \
157 ASM("movq 8*" #INDEX "(%[x]), %[carry]") \
158 ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \
159 ASM("movq %[carry], 8*" #INDEX "(%[z])")
160
161 #define LINMUL_OP(WRITE_TO, INDEX) \
162 ASM("movq 8*" #INDEX "(%[x]),%%rax") \
163 ASM("mulq %[y]") \
164 ASM("addq %[carry],%%rax") \
165 ASM("adcq $0,%%rdx") \
166 ASM("movq %%rdx,%[carry]") \
167 ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
168
169 #define MULADD_OP(IGNORED, INDEX) \
170 ASM("movq 8*" #INDEX "(%[x]),%%rax") \
171 ASM("mulq %[y]") \
172 ASM("addq %[carry],%%rax") \
173 ASM("adcq $0,%%rdx") \
174 ASM("addq 8*" #INDEX "(%[z]),%%rax") \
175 ASM("adcq $0,%%rdx") \
176 ASM("movq %%rdx,%[carry]") \
177 ASM("movq %%rax, 8*" #INDEX " (%[z])")
178
179 #define ADD_OR_SUBTRACT(CORE_CODE) \
180 ASM("rorq %[carry]") \
181 CORE_CODE \
182 ASM("sbbq %[carry],%[carry]") \
183 ASM("negq %[carry]")
184
185#endif
186
187/*
188* Word Addition
189*/
190template <WordType W>
191inline constexpr auto word_add(W x, W y, W* carry) -> W {
192#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_addc)
193 if(!std::is_constant_evaluated()) {
194 if constexpr(std::same_as<W, unsigned int>) {
195 return __builtin_addc(x, y, *carry & 1, carry);
196 } else if constexpr(std::same_as<W, unsigned long>) {
197 return __builtin_addcl(x, y, *carry & 1, carry);
198 } else if constexpr(std::same_as<W, unsigned long long>) {
199 return __builtin_addcll(x, y, *carry & 1, carry);
200 }
201 }
202#endif
203
204 if constexpr(WordInfo<W>::dword_is_native && use_dword_for_word_add) {
205 /*
206 TODO(Botan4) this is largely a performance hack for GCCs that don't
207 support __builtin_addc, if we increase the minimum supported version of
208 GCC to GCC 14 then we can remove this and not worry about it
209 */
210 const W cb = *carry & 1;
211 const auto s = typename WordInfo<W>::dword(x) + y + cb;
212 *carry = static_cast<W>(s >> WordInfo<W>::bits);
213 return static_cast<W>(s);
214 } else {
215 const W cb = *carry & 1;
216 W z = x + y;
217 W c1 = (z < x);
218 z += cb;
219 *carry = c1 | (z < cb);
220 return z;
221 }
222}
223
224/*
225* Eight Word Block Addition, Two Argument
226*/
227template <WordType W>
228inline constexpr auto word8_add2(W x[8], const W y[8], W carry) -> W {
229#if defined(BOTAN_MP_USE_X86_64_ASM)
230 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
231 asm volatile(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq"))
232 : [carry] "=r"(carry)
233 : [x] "r"(x), [y] "r"(y), "0"(carry)
234 : "cc", "memory");
235 return carry;
236 }
237#endif
238
239 x[0] = word_add(x[0], y[0], &carry);
240 x[1] = word_add(x[1], y[1], &carry);
241 x[2] = word_add(x[2], y[2], &carry);
242 x[3] = word_add(x[3], y[3], &carry);
243 x[4] = word_add(x[4], y[4], &carry);
244 x[5] = word_add(x[5], y[5], &carry);
245 x[6] = word_add(x[6], y[6], &carry);
246 x[7] = word_add(x[7], y[7], &carry);
247 return carry;
248}
249
250/*
251* Eight Word Block Addition, Three Argument
252*/
253template <WordType W>
254inline constexpr auto word8_add3(W z[8], const W x[8], const W y[8], W carry) -> W {
255#if defined(BOTAN_MP_USE_X86_64_ASM)
256 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
257 asm volatile(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq"))
258 : [carry] "=r"(carry)
259 : [x] "r"(x), [y] "r"(y), [z] "r"(z), "0"(carry)
260 : "cc", "memory");
261 return carry;
262 }
263#endif
264
265 z[0] = word_add(x[0], y[0], &carry);
266 z[1] = word_add(x[1], y[1], &carry);
267 z[2] = word_add(x[2], y[2], &carry);
268 z[3] = word_add(x[3], y[3], &carry);
269 z[4] = word_add(x[4], y[4], &carry);
270 z[5] = word_add(x[5], y[5], &carry);
271 z[6] = word_add(x[6], y[6], &carry);
272 z[7] = word_add(x[7], y[7], &carry);
273 return carry;
274}
275
276/*
277* Word Subtraction
278*/
279template <WordType W>
280inline constexpr auto word_sub(W x, W y, W* carry) -> W {
281#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_subc)
282 if(!std::is_constant_evaluated()) {
283 if constexpr(std::same_as<W, unsigned int>) {
284 return __builtin_subc(x, y, *carry & 1, carry);
285 } else if constexpr(std::same_as<W, unsigned long>) {
286 return __builtin_subcl(x, y, *carry & 1, carry);
287 } else if constexpr(std::same_as<W, unsigned long long>) {
288 return __builtin_subcll(x, y, *carry & 1, carry);
289 }
290 }
291#endif
292
293 const W cb = *carry & 1;
294 W t0 = x - y;
295 W c1 = (t0 > x);
296 W z = t0 - cb;
297 *carry = c1 | (z > t0);
298 return z;
299}
300
301/*
302* Eight Word Block Subtraction, Two Argument
303*/
304template <WordType W>
305inline constexpr auto word8_sub2(W x[8], const W y[8], W carry) -> W {
306#if defined(BOTAN_MP_USE_X86_64_ASM)
307 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
308 asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq"))
309 : [carry] "=r"(carry)
310 : [x] "r"(x), [y] "r"(y), "0"(carry)
311 : "cc", "memory");
312 return carry;
313 }
314#endif
315
316 x[0] = word_sub(x[0], y[0], &carry);
317 x[1] = word_sub(x[1], y[1], &carry);
318 x[2] = word_sub(x[2], y[2], &carry);
319 x[3] = word_sub(x[3], y[3], &carry);
320 x[4] = word_sub(x[4], y[4], &carry);
321 x[5] = word_sub(x[5], y[5], &carry);
322 x[6] = word_sub(x[6], y[6], &carry);
323 x[7] = word_sub(x[7], y[7], &carry);
324 return carry;
325}
326
327/*
328* Eight Word Block Subtraction, Three Argument
329*/
330template <WordType W>
331inline constexpr auto word8_sub3(W z[8], const W x[8], const W y[8], W carry) -> W {
332#if defined(BOTAN_MP_USE_X86_64_ASM)
333 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
334 asm volatile(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
335 : [carry] "=r"(carry)
336 : [x] "r"(x), [y] "r"(y), [z] "r"(z), "0"(carry)
337 : "cc", "memory");
338 return carry;
339 }
340#endif
341
342 z[0] = word_sub(x[0], y[0], &carry);
343 z[1] = word_sub(x[1], y[1], &carry);
344 z[2] = word_sub(x[2], y[2], &carry);
345 z[3] = word_sub(x[3], y[3], &carry);
346 z[4] = word_sub(x[4], y[4], &carry);
347 z[5] = word_sub(x[5], y[5], &carry);
348 z[6] = word_sub(x[6], y[6], &carry);
349 z[7] = word_sub(x[7], y[7], &carry);
350 return carry;
351}
352
353/*
354* Eight Word Block Linear Multiplication
355*/
356template <WordType W>
357inline constexpr auto word8_linmul3(W z[8], const W x[8], W y, W carry) -> W {
358#if defined(BOTAN_MP_USE_X86_64_ASM)
359 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
360 asm(DO_8_TIMES(LINMUL_OP, "z")
361 : [carry] "=r"(carry)
362 : [z] "r"(z), [x] "r"(x), [y] "rm"(y), "0"(carry)
363 : "cc", "%rax", "%rdx");
364 return carry;
365 }
366#endif
367
368 z[0] = word_madd2(x[0], y, &carry);
369 z[1] = word_madd2(x[1], y, &carry);
370 z[2] = word_madd2(x[2], y, &carry);
371 z[3] = word_madd2(x[3], y, &carry);
372 z[4] = word_madd2(x[4], y, &carry);
373 z[5] = word_madd2(x[5], y, &carry);
374 z[6] = word_madd2(x[6], y, &carry);
375 z[7] = word_madd2(x[7], y, &carry);
376 return carry;
377}
378
379/*
380* Eight Word Block Multiply/Add
381*/
382template <WordType W>
383inline constexpr auto word8_madd3(W z[8], const W x[8], W y, W carry) -> W {
384#if defined(BOTAN_MP_USE_X86_64_ASM)
385 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
386 asm(DO_8_TIMES(MULADD_OP, "")
387 : [carry] "=r"(carry)
388 : [z] "r"(z), [x] "r"(x), [y] "rm"(y), "0"(carry)
389 : "cc", "%rax", "%rdx");
390 return carry;
391 }
392#endif
393
394 z[0] = word_madd3(x[0], y, z[0], &carry);
395 z[1] = word_madd3(x[1], y, z[1], &carry);
396 z[2] = word_madd3(x[2], y, z[2], &carry);
397 z[3] = word_madd3(x[3], y, z[3], &carry);
398 z[4] = word_madd3(x[4], y, z[4], &carry);
399 z[5] = word_madd3(x[5], y, z[5], &carry);
400 z[6] = word_madd3(x[6], y, z[6], &carry);
401 z[7] = word_madd3(x[7], y, z[7], &carry);
402 return carry;
403}
404
405/**
406* Helper for 3-word accumulators
407*
408* A number of algorithms especially Comba multiplication and
409* Montgomery reduction can take advantage of wide accumulators, which
410* consume inputs via addition with outputs extracted from the low
411* bits.
412*/
413template <WordType W>
414class word3 final {
415#if defined(__BITINT_MAXWIDTH__) && (__BITINT_MAXWIDTH__ >= 3 * 64)
416
417 public:
418 constexpr word3() : m_w(0) {}
419
420 inline constexpr void mul(W x, W y) { m_w += static_cast<W3>(x) * y; }
421
422 inline constexpr void mul_x2(W x, W y) { m_w += static_cast<W3>(x) * y * 2; }
423
424 inline constexpr void add(W x) { m_w += x; }
425
426 inline constexpr W extract() {
427 W r = static_cast<W>(m_w);
428 m_w >>= WordInfo<W>::bits;
429 return r;
430 }
431
432 inline constexpr W monty_step(W p0, W p_dash) {
433 const W w0 = static_cast<W>(m_w);
434 const W r = w0 * p_dash;
435 mul(r, p0);
436 m_w >>= WordInfo<W>::bits;
437 return r;
438 }
439
440 inline constexpr W monty_step_pdash1() {
441 const W r = static_cast<W>(m_w);
442 m_w >>= WordInfo<W>::bits;
443 m_w += static_cast<W3>(r);
444 return r;
445 }
446
447 private:
448 __extension__ typedef unsigned _BitInt(WordInfo<W>::bits * 3) W3;
449 W3 m_w;
450#else
451
452 public:
453 constexpr word3() : m_w0(0), m_w1(0), m_w2(0) {}
454
455 inline constexpr void mul(W x, W y) {
456 #if defined(BOTAN_MP_USE_X86_64_ASM)
457 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
458 W z0 = 0, z1 = 0;
459
460 asm("mulq %[y]" : "=a"(z0), "=d"(z1) : "a"(x), [y] "rm"(y) : "cc");
461
462 asm(R"(
463 addq %[z0],%[w0]
464 adcq %[z1],%[w1]
465 adcq $0,%[w2]
466 )"
467 : [w0] "=r"(m_w0), [w1] "=r"(m_w1), [w2] "=r"(m_w2)
468 : [z0] "r"(z0), [z1] "r"(z1), "0"(m_w0), "1"(m_w1), "2"(m_w2)
469 : "cc");
470 return;
471 }
472 #endif
473
474 typedef typename WordInfo<W>::dword dword;
475 const dword s = dword(x) * y + m_w0;
476 W carry = static_cast<W>(s >> WordInfo<W>::bits);
477 m_w0 = static_cast<W>(s);
478 m_w1 += carry;
479 m_w2 += (m_w1 < carry);
480 }
481
482 inline constexpr void mul_x2(W x, W y) {
483 #if defined(BOTAN_MP_USE_X86_64_ASM)
484 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
485 W z0 = 0, z1 = 0;
486
487 asm("mulq %[y]" : "=a"(z0), "=d"(z1) : "a"(x), [y] "rm"(y) : "cc");
488
489 asm(R"(
490 addq %[z0],%[w0]
491 adcq %[z1],%[w1]
492 adcq $0,%[w2]
493
494 addq %[z0],%[w0]
495 adcq %[z1],%[w1]
496 adcq $0,%[w2]
497 )"
498 : [w0] "=r"(m_w0), [w1] "=r"(m_w1), [w2] "=r"(m_w2)
499 : [z0] "r"(z0), [z1] "r"(z1), "0"(m_w0), "1"(m_w1), "2"(m_w2)
500 : "cc");
501 return;
502 }
503 #endif
504
505 W carry = 0;
506 x = word_madd2(x, y, &carry);
507 y = carry;
508
509 carry = 0;
510 m_w0 = word_add(m_w0, x, &carry);
511 m_w1 = word_add(m_w1, y, &carry);
512 m_w2 += carry;
513
514 carry = 0;
515 m_w0 = word_add(m_w0, x, &carry);
516 m_w1 = word_add(m_w1, y, &carry);
517 m_w2 += carry;
518 }
519
520 inline constexpr void add(W x) {
521 constexpr W z = 0;
522
523 W carry = 0;
524 m_w0 = word_add(m_w0, x, &carry);
525 m_w1 = word_add(m_w1, z, &carry);
526 m_w2 += carry;
527 }
528
529 inline constexpr W extract() {
530 W r = m_w0;
531 m_w0 = m_w1;
532 m_w1 = m_w2;
533 m_w2 = 0;
534 return r;
535 }
536
537 inline constexpr W monty_step(W p0, W p_dash) {
538 W r = m_w0 * p_dash;
539 mul(r, p0);
540 m_w0 = m_w1;
541 m_w1 = m_w2;
542 m_w2 = 0;
543 return r;
544 }
545
546 inline constexpr W monty_step_pdash1() {
547 // If p_dash == 1 then p[0] = -1 and everything simplifies
548 const W r = m_w0;
549 m_w0 += m_w1;
550 m_w1 = m_w2 + (m_w0 < m_w1);
551 m_w2 = 0;
552 return r;
553 }
554
555 private:
556 W m_w0;
557 W m_w1;
558 W m_w2;
559#endif
560};
561
562#if defined(ASM)
563 #undef ASM
564 #undef DO_8_TIMES
565 #undef ADD_OR_SUBTRACT
566 #undef ADDSUB2_OP
567 #undef ADDSUB3_OP
568 #undef LINMUL_OP
569 #undef MULADD_OP
570#endif
571
572// NOLINTEND(*-macro-usage)
573
574} // namespace Botan
575
576#endif
constexpr void add(W x)
Definition mp_asmi.h:520
constexpr W monty_step(W p0, W p_dash)
Definition mp_asmi.h:537
constexpr word3()
Definition mp_asmi.h:453
constexpr W extract()
Definition mp_asmi.h:529
constexpr void mul(W x, W y)
Definition mp_asmi.h:455
constexpr W monty_step_pdash1()
Definition mp_asmi.h:546
constexpr void mul_x2(W x, W y)
Definition mp_asmi.h:482
constexpr auto word8_sub3(W z[8], const W x[8], const W y[8], W carry) -> W
Definition mp_asmi.h:331
constexpr auto word_sub(W x, W y, W *carry) -> W
Definition mp_asmi.h:280
constexpr auto word_add(W x, W y, W *carry) -> W
Definition mp_asmi.h:191
constexpr auto word8_madd3(W z[8], const W x[8], W y, W carry) -> W
Definition mp_asmi.h:383
constexpr auto word8_add3(W z[8], const W x[8], const W y[8], W carry) -> W
Definition mp_asmi.h:254
constexpr auto word8_sub2(W x[8], const W y[8], W carry) -> W
Definition mp_asmi.h:305
constexpr auto word_madd2(W a, W b, W *c) -> W
Definition mp_asmi.h:86
void carry(int64_t &h0, int64_t &h1)
constexpr auto word8_add2(W x[8], const W y[8], W carry) -> W
Definition mp_asmi.h:228
constexpr auto word8_linmul3(W z[8], const W x[8], W y, W carry) -> W
Definition mp_asmi.h:357
constexpr auto word_madd3(W a, W b, W c, W *d) -> W
Definition mp_asmi.h:112
static const constexpr bool dword_is_native
Definition mp_asmi.h:62
static const constexpr uint32_t top_bit
Definition mp_asmi.h:59
static const constexpr size_t bytes
Definition mp_asmi.h:56
static const constexpr size_t bits
Definition mp_asmi.h:57
static const constexpr uint32_t max
Definition mp_asmi.h:58
static const constexpr size_t bytes
Definition mp_asmi.h:68
static const constexpr bool dword_is_native
Definition mp_asmi.h:78
static const constexpr size_t bits
Definition mp_asmi.h:69
static const constexpr uint64_t max
Definition mp_asmi.h:70
static const constexpr uint64_t top_bit
Definition mp_asmi.h:71