9#ifndef BOTAN_MP_ASM_INTERNAL_H_
10#define BOTAN_MP_ASM_INTERNAL_H_
12#include <botan/types.h>
14#if !defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
15 #include <botan/internal/donna128.h>
20#if defined(BOTAN_USE_GCC_INLINE_ASM) && defined(BOTAN_TARGET_ARCH_IS_X86_64)
21 #define BOTAN_MP_USE_X86_64_ASM
28concept WordType = (std::same_as<T, uint32_t> || std::same_as<T, uint64_t>);
36 static const constexpr size_t bytes = 4;
37 static const constexpr size_t bits = 32;
38 static const constexpr uint32_t max = 0xFFFFFFFF;
39 static const constexpr uint32_t top_bit = 0x80000000;
42 static const constexpr bool dword_is_native =
true;
48 static const constexpr size_t bytes = 8;
49 static const constexpr size_t bits = 64;
50 static const constexpr uint64_t max = 0xFFFFFFFFFFFFFFFF;
51 static const constexpr uint64_t top_bit = 0x8000000000000000;
53#if defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
54 typedef uint128_t
dword;
55 static const constexpr bool dword_is_native =
true;
58 static const constexpr bool dword_is_native =
false;
67#if defined(BOTAN_MP_USE_X86_64_ASM)
68 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
74 : [a] "=a"(a), [
b]
"=rm"(
b), [
carry]
"=&d"(*c)
75 :
"0"(a),
"1"(
b), [c]
"g"(*c)
83 const dword s = dword(a) *
b + *c;
85 return static_cast<W
>(s);
93#if defined(BOTAN_MP_USE_X86_64_ASM)
94 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
104 : [a] "=a"(a), [
b]
"=rm"(
b), [
carry]
"=&d"(*d)
105 :
"0"(a),
"1"(
b), [c]
"g"(c), [d]
"g"(*d)
113 const dword s = dword(a) *
b + c + *d;
115 return static_cast<W
>(s);
118#if defined(BOTAN_MP_USE_X86_64_ASM)
120 #define ASM(x) x "\n\t"
122 #define DO_4_TIMES(MACRO, ARG) \
128 #define DO_8_TIMES(MACRO, ARG) \
138 #define ADDSUB2_OP(OPERATION, INDEX) \
139 ASM("movq 8*" #INDEX "(%[y]), %[carry]") \
140 ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])")
142 #define ADDSUB3_OP(OPERATION, INDEX) \
143 ASM("movq 8*" #INDEX "(%[x]), %[carry]") \
144 ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \
145 ASM("movq %[carry], 8*" #INDEX "(%[z])")
147 #define LINMUL_OP(WRITE_TO, INDEX) \
148 ASM("movq 8*" #INDEX "(%[x]),%%rax") \
150 ASM("addq %[carry],%%rax") \
151 ASM("adcq $0,%%rdx") \
152 ASM("movq %%rdx,%[carry]") \
153 ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
155 #define MULADD_OP(IGNORED, INDEX) \
156 ASM("movq 8*" #INDEX "(%[x]),%%rax") \
158 ASM("addq %[carry],%%rax") \
159 ASM("adcq $0,%%rdx") \
160 ASM("addq 8*" #INDEX "(%[z]),%%rax") \
161 ASM("adcq $0,%%rdx") \
162 ASM("movq %%rdx,%[carry]") \
163 ASM("movq %%rax, 8*" #INDEX " (%[z])")
165 #define ADD_OR_SUBTRACT(CORE_CODE) \
166 ASM("rorq %[carry]") \
168 ASM("sbbq %[carry],%[carry]") \
178 if(!std::is_constant_evaluated()) {
179#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_addc)
180 if constexpr(std::same_as<W, unsigned int>) {
181 return __builtin_addc(x, y, *
carry & 1,
carry);
182 }
else if constexpr(std::same_as<W, unsigned long>) {
183 return __builtin_addcl(x, y, *
carry & 1,
carry);
184 }
else if constexpr(std::same_as<W, unsigned long long>) {
185 return __builtin_addcll(x, y, *
carry & 1,
carry);
187#elif defined(BOTAN_MP_USE_X86_64_ASM)
188 if(std::same_as<W, uint64_t>) {
189 asm(ADD_OR_SUBTRACT(ASM(
"adcq %[y],%[x]"))
191 :
"0"(x), [y]
"rm"(y),
"1"(*carry)
198 const W cb = *
carry & 1;
202 *
carry = c1 | (z < cb);
211#if defined(BOTAN_MP_USE_X86_64_ASM)
212 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
213 asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP,
"adcq"))
215 : [x]
"r"(x), [y]
"r"(y),
"0"(
carry)
237#if defined(BOTAN_MP_USE_X86_64_ASM)
238 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
239 asm volatile(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"adcq"))
241 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
260#if defined(BOTAN_MP_USE_X86_64_ASM)
261 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
262 asm volatile(ADD_OR_SUBTRACT(DO_4_TIMES(ADDSUB3_OP,
"adcq"))
264 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
282 if(!std::is_constant_evaluated()) {
283#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_subc)
284 if constexpr(std::same_as<W, unsigned int>) {
285 return __builtin_subc(x, y, *
carry & 1,
carry);
286 }
else if constexpr(std::same_as<W, unsigned long>) {
287 return __builtin_subcl(x, y, *
carry & 1,
carry);
288 }
else if constexpr(std::same_as<W, unsigned long long>) {
289 return __builtin_subcll(x, y, *
carry & 1,
carry);
291#elif defined(BOTAN_MP_USE_X86_64_ASM)
292 if(std::same_as<W, uint64_t>) {
293 asm(ADD_OR_SUBTRACT(ASM(
"sbbq %[y],%[x]"))
295 :
"0"(x), [y]
"rm"(y),
"1"(*carry)
302 const W cb = *
carry & 1;
306 *
carry = c1 | (z > t0);
315#if defined(BOTAN_MP_USE_X86_64_ASM)
316 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
317 asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP,
"sbbq"))
319 : [x]
"r"(x), [y]
"r"(y),
"0"(
carry)
341#if defined(BOTAN_MP_USE_X86_64_ASM)
342 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
343 asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"sbbq"))
345 : [x]
"r"(y), [y]
"r"(x), [z]
"r"(x),
"0"(
carry)
367#if defined(BOTAN_MP_USE_X86_64_ASM)
368 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
369 asm volatile(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"sbbq"))
371 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
390#if defined(BOTAN_MP_USE_X86_64_ASM)
391 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
392 asm volatile(ADD_OR_SUBTRACT(DO_4_TIMES(ADDSUB3_OP,
"sbbq"))
394 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
412#if defined(BOTAN_MP_USE_X86_64_ASM)
413 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
414 asm(DO_8_TIMES(LINMUL_OP,
"x")
416 : [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
417 :
"cc",
"%rax",
"%rdx");
438#if defined(BOTAN_MP_USE_X86_64_ASM)
439 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
440 asm(DO_8_TIMES(LINMUL_OP,
"z")
442 : [z]
"r"(z), [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
443 :
"cc",
"%rax",
"%rdx");
464#if defined(BOTAN_MP_USE_X86_64_ASM)
465 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
466 asm(DO_8_TIMES(MULADD_OP,
"")
468 : [z]
"r"(z), [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
469 :
"cc",
"%rax",
"%rdx");
491#if defined(BOTAN_MP_USE_X86_64_ASM)
492 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
495 asm(
"mulq %[y]" :
"=a"(z0),
"=d"(z1) :
"a"(x), [y]
"rm"(y) :
"cc");
502 : [w0] "=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
503 : [z0]
"r"(z0), [z1]
"r"(z1),
"0"(*w0),
"1"(*w1),
"2"(*w2)
512 *w2 += (*w1 <
carry);
520inline constexpr void word3_add(W* w2, W* w1, W* w0, W x) {
521#if defined(BOTAN_MP_USE_X86_64_ASM)
522 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
528 : [w0] "=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
529 : [x]
"r"(x),
"0"(*w0),
"1"(*w1),
"2"(*w2)
548#if defined(BOTAN_MP_USE_X86_64_ASM)
549 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
552 asm(
"mulq %[y]" :
"=a"(z0),
"=d"(z1) :
"a"(x), [y]
"rm"(y) :
"cc");
563 : [w0] "=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
564 : [z0]
"r"(z0), [z1]
"r"(z1),
"0"(*w0),
"1"(*w1),
"2"(*w2)
576 W top = (y >> top_bit_shift);
578 y |= (x >> top_bit_shift);
597#if defined(__BITINT_MAXWIDTH__) && (__BITINT_MAXWIDTH__ >= 3 * 64)
600 constexpr word3() { m_w = 0; }
602 inline constexpr void mul(W x, W y) { m_w +=
static_cast<W3
>(x) * y; }
604 inline constexpr void mul_x2(W x, W y) { m_w +=
static_cast<W3
>(x) * y * 2; }
606 inline constexpr void add(W x) { m_w += x; }
609 W r =
static_cast<W
>(m_w);
614 inline constexpr W
monty_step(W p0, W p_dash) {
615 const W w0 =
static_cast<W
>(m_w);
616 const W r = w0 * p_dash;
623 const W r =
static_cast<W
>(m_w);
625 m_w +=
static_cast<W3
>(r);
668 m_w1 = m_w2 + (m_w0 < m_w1);
682 #undef ADD_OR_SUBTRACT
constexpr W monty_step(W p0, W p_dash)
constexpr void mul(W x, W y)
constexpr W monty_step_pdash1()
constexpr void mul_x2(W x, W y)
constexpr auto word8_sub3(W z[8], const W x[8], const W y[8], W carry) -> W
constexpr auto word_sub(W x, W y, W *carry) -> W
constexpr auto word_add(W x, W y, W *carry) -> W
constexpr auto word8_madd3(W z[8], const W x[8], W y, W carry) -> W
constexpr auto word8_add3(W z[8], const W x[8], const W y[8], W carry) -> W
constexpr auto word4_sub3(W z[4], const W x[4], const W y[4], W carry) -> W
constexpr auto word8_sub2(W x[8], const W y[8], W carry) -> W
constexpr auto word_madd2(W a, W b, W *c) -> W
constexpr auto word8_sub2_rev(W x[8], const W y[8], W carry) -> W
constexpr auto word4_add3(W z[4], const W x[4], const W y[4], W carry) -> W
constexpr void word3_add(W *w2, W *w1, W *w0, W x)
constexpr void word3_muladd_2(W *w2, W *w1, W *w0, W x, W y)
constexpr void word3_muladd(W *w2, W *w1, W *w0, W x, W y)
void carry(int64_t &h0, int64_t &h1)
constexpr auto word8_linmul2(W x[8], W y, W carry) -> W
constexpr auto word8_add2(W x[8], const W y[8], W carry) -> W
constexpr auto word8_linmul3(W z[8], const W x[8], W y, W carry) -> W
constexpr auto word_madd3(W a, W b, W c, W *d) -> W