9#ifndef BOTAN_MP_ASM_INTERNAL_H_
10#define BOTAN_MP_ASM_INTERNAL_H_
12#include <botan/compiler.h>
13#include <botan/types.h>
14#include <botan/internal/target_info.h>
17#if !defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
18 #include <botan/internal/donna128.h>
25#if defined(BOTAN_USE_GCC_INLINE_ASM) && defined(BOTAN_TARGET_ARCH_IS_X86_64)
26 #define BOTAN_MP_USE_X86_64_ASM
29#if defined(BOTAN_USE_GCC_INLINE_ASM) && defined(BOTAN_TARGET_ARCH_IS_ARM64)
30 #define BOTAN_MP_USE_AARCH64_ASM
43static constexpr bool use_dword_for_word_add =
false;
45static constexpr bool use_dword_for_word_add =
true;
52concept WordType = (std::same_as<T, uint32_t> || std::same_as<T, uint64_t>);
60 static const constexpr size_t bytes = 4;
61 static const constexpr size_t bits = 32;
62 static const constexpr uint32_t
max = 0xFFFFFFFF;
63 static const constexpr uint32_t
top_bit = 0x80000000;
72 static const constexpr size_t bytes = 8;
73 static const constexpr size_t bits = 64;
74 static const constexpr uint64_t
max = 0xFFFFFFFFFFFFFFFF;
75 static const constexpr uint64_t
top_bit = 0x8000000000000000;
77#if defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
78 typedef uint128_t
dword;
91#if defined(BOTAN_MP_USE_X86_64_ASM)
92 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
98 : [a] "=a"(a), [b]
"=rm"(b), [
carry]
"=&d"(*c)
99 :
"0"(a),
"1"(b), [c]
"g"(*c)
104#elif defined(BOTAN_MP_USE_AARCH64_ASM)
105 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
109 mul %[lo], %[a], %[b]
110 umulh %[hi], %[a], %[b]
111 adds %[lo], %[lo], %[c]
112 adc %[hi], %[hi], xzr
114 : [lo] "=&r"(lo), [hi]
"=&r"(hi)
115 : [a]
"r"(a), [b]
"r"(b), [c]
"r"(*c)
124 const dword s = dword(a) * b + *c;
126 return static_cast<W
>(s);
134#if defined(BOTAN_MP_USE_X86_64_ASM)
135 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
145 : [a] "=a"(a), [b]
"=rm"(b), [
carry]
"=&d"(*d)
146 :
"0"(a),
"1"(b), [c]
"g"(c), [d]
"g"(*d)
151#elif defined(BOTAN_MP_USE_AARCH64_ASM)
152 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
156 mul %[lo], %[a], %[b]
157 umulh %[hi], %[a], %[b]
158 adds %[lo], %[lo], %[c]
159 adc %[hi], %[hi], xzr
160 adds %[lo], %[lo], %[d]
161 adc %[hi], %[hi], xzr
163 : [lo] "=&r"(lo), [hi]
"=&r"(hi)
164 : [a]
"r"(a), [b]
"r"(b), [c]
"r"(c), [d]
"r"(*d)
173 const dword s = dword(a) * b + c + *d;
175 return static_cast<W
>(s);
178#if defined(BOTAN_MP_USE_X86_64_ASM)
180 #define ASM(x) x "\n\t"
182 #define DO_8_TIMES(MACRO, ARG) \
192 #define ADDSUB2_OP(OPERATION, INDEX) \
193 ASM("movq 8*" #INDEX "(%[y]), %[carry]") \
194 ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])")
196 #define ADDSUB3_OP(OPERATION, INDEX) \
197 ASM("movq 8*" #INDEX "(%[x]), %[carry]") \
198 ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \
199 ASM("movq %[carry], 8*" #INDEX "(%[z])")
201 #define LINMUL_OP(WRITE_TO, INDEX) \
202 ASM("movq 8*" #INDEX "(%[x]),%%rax") \
204 ASM("addq %[carry],%%rax") \
205 ASM("adcq $0,%%rdx") \
206 ASM("movq %%rdx,%[carry]") \
207 ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
209 #define MULADD_OP(IGNORED, INDEX) \
210 ASM("movq 8*" #INDEX "(%[x]),%%rax") \
212 ASM("addq %[carry],%%rax") \
213 ASM("adcq $0,%%rdx") \
214 ASM("addq 8*" #INDEX "(%[z]),%%rax") \
215 ASM("adcq $0,%%rdx") \
216 ASM("movq %%rdx,%[carry]") \
217 ASM("movq %%rax, 8*" #INDEX " (%[z])")
219 #define ADD_OR_SUBTRACT(CORE_CODE) \
220 ASM("rorq %[carry]") \
222 ASM("sbbq %[carry],%[carry]") \
232#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_addc)
233 if(!std::is_constant_evaluated()) {
234 if constexpr(std::same_as<W, unsigned int>) {
235 return __builtin_addc(x, y, *
carry & 1,
carry);
236 }
else if constexpr(std::same_as<W, unsigned long>) {
237 return __builtin_addcl(x, y, *
carry & 1,
carry);
238 }
else if constexpr(std::same_as<W, unsigned long long>) {
239 return __builtin_addcll(x, y, *
carry & 1,
carry);
250 const W cb = *
carry & 1;
253 return static_cast<W
>(s);
255 const W cb = *
carry & 1;
259 *
carry = c1 | (z < cb);
269#if defined(BOTAN_MP_USE_X86_64_ASM)
270 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
271 asm volatile(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP,
"adcq"))
273 : [x]
"r"(x), [y]
"r"(y),
"0"(
carry)
295#if defined(BOTAN_MP_USE_X86_64_ASM)
296 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
297 asm volatile(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"adcq"))
299 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
321#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_subc)
322 if(!std::is_constant_evaluated()) {
323 if constexpr(std::same_as<W, unsigned int>) {
324 return __builtin_subc(x, y, *
carry & 1,
carry);
325 }
else if constexpr(std::same_as<W, unsigned long>) {
326 return __builtin_subcl(x, y, *
carry & 1,
carry);
327 }
else if constexpr(std::same_as<W, unsigned long long>) {
328 return __builtin_subcll(x, y, *
carry & 1,
carry);
333 const W cb = *
carry & 1;
337 *
carry = c1 | (z > t0);
346#if defined(BOTAN_MP_USE_X86_64_ASM)
347 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
348 asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP,
"sbbq"))
350 : [x]
"r"(x), [y]
"r"(y),
"0"(
carry)
372#if defined(BOTAN_MP_USE_X86_64_ASM)
373 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
374 asm volatile(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"sbbq"))
376 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
398#if defined(BOTAN_MP_USE_X86_64_ASM)
399 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
400 asm(DO_8_TIMES(LINMUL_OP,
"z")
402 : [z]
"r"(z), [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
403 :
"cc",
"%rax",
"%rdx");
424#if defined(BOTAN_MP_USE_X86_64_ASM)
425 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
426 asm(DO_8_TIMES(MULADD_OP,
"")
428 : [z]
"r"(z), [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
429 :
"cc",
"%rax",
"%rdx");
455#if defined(__BITINT_MAXWIDTH__) && (__BITINT_MAXWIDTH__ >= 3 * 64)
458 constexpr word3() : m_w(0) {}
460 inline constexpr void mul(W x, W y) { m_w +=
static_cast<W3
>(x) * y; }
462 inline constexpr void mul_x2(W x, W y) { m_w +=
static_cast<W3
>(x) * y * 2; }
464 inline constexpr void add(W x) { m_w += x; }
467 W r =
static_cast<W
>(m_w);
472 inline constexpr W
monty_step(W p0, W p_dash) {
473 const W w0 =
static_cast<W
>(m_w);
474 const W r = w0 * p_dash;
481 const W r =
static_cast<W
>(m_w);
483 m_w +=
static_cast<W3
>(r);
493 constexpr word3() : m_w0(0), m_w1(0), m_w2(0) {}
495 inline constexpr void mul(W x, W y) {
496 #if defined(BOTAN_MP_USE_X86_64_ASM)
497 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
501 asm(
"mulq %[y]" :
"=a"(z0),
"=d"(z1) :
"a"(x), [y]
"rm"(y) :
"cc");
508 : [w0] "=r"(m_w0), [w1]
"=r"(m_w1), [w2]
"=r"(m_w2)
509 : [z0]
"r"(z0), [z1]
"r"(z1),
"0"(m_w0),
"1"(m_w1),
"2"(m_w2)
513 #elif defined(BOTAN_MP_USE_AARCH64_ASM)
514 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
518 mul %[t0], %[x], %[y]
519 umulh %[t1], %[x], %[y]
520 adds %[w0], %[w0], %[t0]
521 adcs %[w1], %[w1], %[t1]
522 adc %[w2], %[w2], xzr
524 : [w0] "+r"(m_w0), [w1]
"+r"(m_w1), [w2]
"+r"(m_w2), [t0]
"=&r"(t0), [t1]
"=&r"(t1)
525 : [x]
"r"(x), [y]
"r"(y)
532 const auto z = dword(x) * y;
533 const auto z0 =
static_cast<W
>(z);
543 #if defined(BOTAN_MP_USE_X86_64_ASM)
544 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
548 asm(
"mulq %[y]" :
"=a"(z0),
"=d"(z1) :
"a"(x), [y]
"rm"(y) :
"cc");
559 : [w0] "=r"(m_w0), [w1]
"=r"(m_w1), [w2]
"=r"(m_w2)
560 : [z0]
"r"(z0), [z1]
"r"(z1),
"0"(m_w0),
"1"(m_w1),
"2"(m_w2)
564 #elif defined(BOTAN_MP_USE_AARCH64_ASM)
565 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
569 mul %[t0], %[x], %[y]
570 umulh %[t1], %[x], %[y]
571 adds %[w0], %[w0], %[t0]
572 adcs %[w1], %[w1], %[t1]
573 adc %[w2], %[w2], xzr
574 adds %[w0], %[w0], %[t0]
575 adcs %[w1], %[w1], %[t1]
576 adc %[w2], %[w2], xzr
578 : [w0] "+r"(m_w0), [w1]
"+r"(m_w1), [w2]
"+r"(m_w2), [t0]
"=&r"(t0), [t1]
"=&r"(t1)
579 : [x]
"r"(x), [y]
"r"(y)
586 const auto z = dword(x) * y;
587 const auto z0 =
static_cast<W
>(z);
601 inline constexpr void add(W x) {
631 m_w1 = m_w2 + (m_w0 < m_w1);
646 #undef ADD_OR_SUBTRACT
constexpr W monty_step(W p0, W p_dash)
constexpr void mul(W x, W y)
constexpr W monty_step_pdash1()
constexpr void mul_x2(W x, W y)
constexpr auto word8_sub3(W z[8], const W x[8], const W y[8], W carry) -> W
constexpr auto word_sub(W x, W y, W *carry) -> W
constexpr auto word_add(W x, W y, W *carry) -> W
constexpr auto word8_madd3(W z[8], const W x[8], W y, W carry) -> W
constexpr auto word8_add3(W z[8], const W x[8], const W y[8], W carry) -> W
constexpr auto word8_sub2(W x[8], const W y[8], W carry) -> W
constexpr auto word_madd2(W a, W b, W *c) -> W
void carry(int64_t &h0, int64_t &h1)
constexpr auto word8_add2(W x[8], const W y[8], W carry) -> W
constexpr auto word8_linmul3(W z[8], const W x[8], W y, W carry) -> W
constexpr auto word_madd3(W a, W b, W c, W *d) -> W
static const constexpr bool dword_is_native
static const constexpr uint32_t top_bit
static const constexpr size_t bytes
static const constexpr size_t bits
static const constexpr uint32_t max
static const constexpr size_t bytes
static const constexpr bool dword_is_native
static const constexpr size_t bits
static const constexpr uint64_t max
static const constexpr uint64_t top_bit