9#ifndef BOTAN_MP_ASM_INTERNAL_H_
10#define BOTAN_MP_ASM_INTERNAL_H_
12#include <botan/compiler.h>
13#include <botan/types.h>
15#if !defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
16 #include <botan/internal/donna128.h>
21#if defined(BOTAN_USE_GCC_INLINE_ASM) && defined(BOTAN_TARGET_ARCH_IS_X86_64)
22 #define BOTAN_MP_USE_X86_64_ASM
29concept WordType = (std::same_as<T, uint32_t> || std::same_as<T, uint64_t>);
37 static const constexpr size_t bytes = 4;
38 static const constexpr size_t bits = 32;
39 static const constexpr uint32_t max = 0xFFFFFFFF;
40 static const constexpr uint32_t top_bit = 0x80000000;
43 static const constexpr bool dword_is_native =
true;
49 static const constexpr size_t bytes = 8;
50 static const constexpr size_t bits = 64;
51 static const constexpr uint64_t max = 0xFFFFFFFFFFFFFFFF;
52 static const constexpr uint64_t top_bit = 0x8000000000000000;
54#if defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
55 typedef uint128_t
dword;
56 static const constexpr bool dword_is_native =
true;
59 static const constexpr bool dword_is_native =
false;
68#if defined(BOTAN_MP_USE_X86_64_ASM)
69 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
75 : [a] "=a"(a), [
b]
"=rm"(
b), [
carry]
"=&d"(*c)
76 :
"0"(a),
"1"(
b), [c]
"g"(*c)
84 const dword s = dword(a) *
b + *c;
86 return static_cast<W
>(s);
94#if defined(BOTAN_MP_USE_X86_64_ASM)
95 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
105 : [a] "=a"(a), [
b]
"=rm"(
b), [
carry]
"=&d"(*d)
106 :
"0"(a),
"1"(
b), [c]
"g"(c), [d]
"g"(*d)
114 const dword s = dword(a) *
b + c + *d;
116 return static_cast<W
>(s);
119#if defined(BOTAN_MP_USE_X86_64_ASM)
121 #define ASM(x) x "\n\t"
123 #define DO_4_TIMES(MACRO, ARG) \
129 #define DO_8_TIMES(MACRO, ARG) \
139 #define ADDSUB2_OP(OPERATION, INDEX) \
140 ASM("movq 8*" #INDEX "(%[y]), %[carry]") \
141 ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])")
143 #define ADDSUB3_OP(OPERATION, INDEX) \
144 ASM("movq 8*" #INDEX "(%[x]), %[carry]") \
145 ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \
146 ASM("movq %[carry], 8*" #INDEX "(%[z])")
148 #define LINMUL_OP(WRITE_TO, INDEX) \
149 ASM("movq 8*" #INDEX "(%[x]),%%rax") \
151 ASM("addq %[carry],%%rax") \
152 ASM("adcq $0,%%rdx") \
153 ASM("movq %%rdx,%[carry]") \
154 ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
156 #define MULADD_OP(IGNORED, INDEX) \
157 ASM("movq 8*" #INDEX "(%[x]),%%rax") \
159 ASM("addq %[carry],%%rax") \
160 ASM("adcq $0,%%rdx") \
161 ASM("addq 8*" #INDEX "(%[z]),%%rax") \
162 ASM("adcq $0,%%rdx") \
163 ASM("movq %%rdx,%[carry]") \
164 ASM("movq %%rax, 8*" #INDEX " (%[z])")
166 #define ADD_OR_SUBTRACT(CORE_CODE) \
167 ASM("rorq %[carry]") \
169 ASM("sbbq %[carry],%[carry]") \
179 if(!std::is_constant_evaluated()) {
180#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_addc)
181 if constexpr(std::same_as<W, unsigned int>) {
182 return __builtin_addc(x, y, *
carry & 1,
carry);
183 }
else if constexpr(std::same_as<W, unsigned long>) {
184 return __builtin_addcl(x, y, *
carry & 1,
carry);
185 }
else if constexpr(std::same_as<W, unsigned long long>) {
186 return __builtin_addcll(x, y, *
carry & 1,
carry);
188#elif defined(BOTAN_MP_USE_X86_64_ASM)
189 if(std::same_as<W, uint64_t>) {
190 asm(ADD_OR_SUBTRACT(ASM(
"adcq %[y],%[x]"))
192 :
"0"(x), [y]
"rm"(y),
"1"(*carry)
199 const W cb = *
carry & 1;
203 *
carry = c1 | (z < cb);
212#if defined(BOTAN_MP_USE_X86_64_ASM)
213 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
214 asm volatile(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP,
"adcq"))
216 : [x]
"r"(x), [y]
"r"(y),
"0"(
carry)
238#if defined(BOTAN_MP_USE_X86_64_ASM)
239 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
240 asm volatile(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"adcq"))
242 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
261#if defined(BOTAN_MP_USE_X86_64_ASM)
262 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
263 asm volatile(ADD_OR_SUBTRACT(DO_4_TIMES(ADDSUB3_OP,
"adcq"))
265 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
283 if(!std::is_constant_evaluated()) {
284#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_subc)
285 if constexpr(std::same_as<W, unsigned int>) {
286 return __builtin_subc(x, y, *
carry & 1,
carry);
287 }
else if constexpr(std::same_as<W, unsigned long>) {
288 return __builtin_subcl(x, y, *
carry & 1,
carry);
289 }
else if constexpr(std::same_as<W, unsigned long long>) {
290 return __builtin_subcll(x, y, *
carry & 1,
carry);
292#elif defined(BOTAN_MP_USE_X86_64_ASM)
293 if(std::same_as<W, uint64_t>) {
294 asm(ADD_OR_SUBTRACT(ASM(
"sbbq %[y],%[x]"))
296 :
"0"(x), [y]
"rm"(y),
"1"(*carry)
303 const W cb = *
carry & 1;
307 *
carry = c1 | (z > t0);
316#if defined(BOTAN_MP_USE_X86_64_ASM)
317 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
318 asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP,
"sbbq"))
320 : [x]
"r"(x), [y]
"r"(y),
"0"(
carry)
342#if defined(BOTAN_MP_USE_X86_64_ASM)
343 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
344 asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"sbbq"))
346 : [x]
"r"(y), [y]
"r"(x), [z]
"r"(x),
"0"(
carry)
368#if defined(BOTAN_MP_USE_X86_64_ASM)
369 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
370 asm volatile(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"sbbq"))
372 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
391#if defined(BOTAN_MP_USE_X86_64_ASM)
392 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
393 asm volatile(ADD_OR_SUBTRACT(DO_4_TIMES(ADDSUB3_OP,
"sbbq"))
395 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
413#if defined(BOTAN_MP_USE_X86_64_ASM)
414 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
415 asm(DO_8_TIMES(LINMUL_OP,
"x")
417 : [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
418 :
"cc",
"%rax",
"%rdx");
439#if defined(BOTAN_MP_USE_X86_64_ASM)
440 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
441 asm(DO_8_TIMES(LINMUL_OP,
"z")
443 : [z]
"r"(z), [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
444 :
"cc",
"%rax",
"%rdx");
465#if defined(BOTAN_MP_USE_X86_64_ASM)
466 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
467 asm(DO_8_TIMES(MULADD_OP,
"")
469 : [z]
"r"(z), [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
470 :
"cc",
"%rax",
"%rdx");
492#if defined(BOTAN_MP_USE_X86_64_ASM)
493 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
496 asm(
"mulq %[y]" :
"=a"(z0),
"=d"(z1) :
"a"(x), [y]
"rm"(y) :
"cc");
503 : [w0] "=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
504 : [z0]
"r"(z0), [z1]
"r"(z1),
"0"(*w0),
"1"(*w1),
"2"(*w2)
513 *w2 += (*w1 <
carry);
521inline constexpr void word3_add(W* w2, W* w1, W* w0, W x) {
522#if defined(BOTAN_MP_USE_X86_64_ASM)
523 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
529 : [w0] "=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
530 : [x]
"r"(x),
"0"(*w0),
"1"(*w1),
"2"(*w2)
549#if defined(BOTAN_MP_USE_X86_64_ASM)
550 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
553 asm(
"mulq %[y]" :
"=a"(z0),
"=d"(z1) :
"a"(x), [y]
"rm"(y) :
"cc");
564 : [w0] "=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
565 : [z0]
"r"(z0), [z1]
"r"(z1),
"0"(*w0),
"1"(*w1),
"2"(*w2)
577 W top = (y >> top_bit_shift);
579 y |= (x >> top_bit_shift);
598#if defined(__BITINT_MAXWIDTH__) && (__BITINT_MAXWIDTH__ >= 3 * 64)
601 constexpr word3() { m_w = 0; }
603 inline constexpr void mul(W x, W y) { m_w +=
static_cast<W3
>(x) * y; }
605 inline constexpr void mul_x2(W x, W y) { m_w +=
static_cast<W3
>(x) * y * 2; }
607 inline constexpr void add(W x) { m_w += x; }
610 W r =
static_cast<W
>(m_w);
615 inline constexpr W
monty_step(W p0, W p_dash) {
616 const W w0 =
static_cast<W
>(m_w);
617 const W r = w0 * p_dash;
624 const W r =
static_cast<W
>(m_w);
626 m_w +=
static_cast<W3
>(r);
669 m_w1 = m_w2 + (m_w0 < m_w1);
683 #undef ADD_OR_SUBTRACT
constexpr W monty_step(W p0, W p_dash)
constexpr void mul(W x, W y)
constexpr W monty_step_pdash1()
constexpr void mul_x2(W x, W y)
int(* final)(unsigned char *, CTX *)
constexpr auto word8_sub3(W z[8], const W x[8], const W y[8], W carry) -> W
constexpr auto word_sub(W x, W y, W *carry) -> W
constexpr auto word_add(W x, W y, W *carry) -> W
constexpr auto word8_madd3(W z[8], const W x[8], W y, W carry) -> W
constexpr auto word8_add3(W z[8], const W x[8], const W y[8], W carry) -> W
constexpr auto word4_sub3(W z[4], const W x[4], const W y[4], W carry) -> W
constexpr auto word8_sub2(W x[8], const W y[8], W carry) -> W
constexpr auto word_madd2(W a, W b, W *c) -> W
constexpr auto word8_sub2_rev(W x[8], const W y[8], W carry) -> W
constexpr auto word4_add3(W z[4], const W x[4], const W y[4], W carry) -> W
constexpr void word3_add(W *w2, W *w1, W *w0, W x)
constexpr void word3_muladd_2(W *w2, W *w1, W *w0, W x, W y)
constexpr void word3_muladd(W *w2, W *w1, W *w0, W x, W y)
void carry(int64_t &h0, int64_t &h1)
constexpr auto word8_linmul2(W x[8], W y, W carry) -> W
constexpr auto word8_add2(W x[8], const W y[8], W carry) -> W
constexpr auto word8_linmul3(W z[8], const W x[8], W y, W carry) -> W
constexpr auto word_madd3(W a, W b, W c, W *d) -> W