9#ifndef BOTAN_MP_ASM_INTERNAL_H_
10#define BOTAN_MP_ASM_INTERNAL_H_
12#include <botan/compiler.h>
13#include <botan/types.h>
14#include <botan/internal/target_info.h>
17#if !defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
18 #include <botan/internal/donna128.h>
25#if defined(BOTAN_USE_GCC_INLINE_ASM) && defined(BOTAN_TARGET_ARCH_IS_X86_64)
26 #define BOTAN_MP_USE_X86_64_ASM
39static constexpr bool use_dword_for_word_add =
false;
41static constexpr bool use_dword_for_word_add =
true;
48concept WordType = (std::same_as<T, uint32_t> || std::same_as<T, uint64_t>);
56 static const constexpr size_t bytes = 4;
57 static const constexpr size_t bits = 32;
58 static const constexpr uint32_t
max = 0xFFFFFFFF;
59 static const constexpr uint32_t
top_bit = 0x80000000;
68 static const constexpr size_t bytes = 8;
69 static const constexpr size_t bits = 64;
70 static const constexpr uint64_t
max = 0xFFFFFFFFFFFFFFFF;
71 static const constexpr uint64_t
top_bit = 0x8000000000000000;
73#if defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
74 typedef uint128_t
dword;
87#if defined(BOTAN_MP_USE_X86_64_ASM)
88 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
94 : [a] "=a"(a), [b]
"=rm"(b), [
carry]
"=&d"(*c)
95 :
"0"(a),
"1"(b), [c]
"g"(*c)
103 const dword s = dword(a) * b + *c;
105 return static_cast<W
>(s);
113#if defined(BOTAN_MP_USE_X86_64_ASM)
114 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
124 : [a] "=a"(a), [b]
"=rm"(b), [
carry]
"=&d"(*d)
125 :
"0"(a),
"1"(b), [c]
"g"(c), [d]
"g"(*d)
133 const dword s = dword(a) * b + c + *d;
135 return static_cast<W
>(s);
138#if defined(BOTAN_MP_USE_X86_64_ASM)
140 #define ASM(x) x "\n\t"
142 #define DO_8_TIMES(MACRO, ARG) \
152 #define ADDSUB2_OP(OPERATION, INDEX) \
153 ASM("movq 8*" #INDEX "(%[y]), %[carry]") \
154 ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])")
156 #define ADDSUB3_OP(OPERATION, INDEX) \
157 ASM("movq 8*" #INDEX "(%[x]), %[carry]") \
158 ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \
159 ASM("movq %[carry], 8*" #INDEX "(%[z])")
161 #define LINMUL_OP(WRITE_TO, INDEX) \
162 ASM("movq 8*" #INDEX "(%[x]),%%rax") \
164 ASM("addq %[carry],%%rax") \
165 ASM("adcq $0,%%rdx") \
166 ASM("movq %%rdx,%[carry]") \
167 ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
169 #define MULADD_OP(IGNORED, INDEX) \
170 ASM("movq 8*" #INDEX "(%[x]),%%rax") \
172 ASM("addq %[carry],%%rax") \
173 ASM("adcq $0,%%rdx") \
174 ASM("addq 8*" #INDEX "(%[z]),%%rax") \
175 ASM("adcq $0,%%rdx") \
176 ASM("movq %%rdx,%[carry]") \
177 ASM("movq %%rax, 8*" #INDEX " (%[z])")
179 #define ADD_OR_SUBTRACT(CORE_CODE) \
180 ASM("rorq %[carry]") \
182 ASM("sbbq %[carry],%[carry]") \
192#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_addc)
193 if(!std::is_constant_evaluated()) {
194 if constexpr(std::same_as<W, unsigned int>) {
195 return __builtin_addc(x, y, *
carry & 1,
carry);
196 }
else if constexpr(std::same_as<W, unsigned long>) {
197 return __builtin_addcl(x, y, *
carry & 1,
carry);
198 }
else if constexpr(std::same_as<W, unsigned long long>) {
199 return __builtin_addcll(x, y, *
carry & 1,
carry);
210 const W cb = *
carry & 1;
213 return static_cast<W
>(s);
215 const W cb = *
carry & 1;
219 *
carry = c1 | (z < cb);
229#if defined(BOTAN_MP_USE_X86_64_ASM)
230 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
231 asm volatile(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP,
"adcq"))
233 : [x]
"r"(x), [y]
"r"(y),
"0"(
carry)
255#if defined(BOTAN_MP_USE_X86_64_ASM)
256 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
257 asm volatile(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"adcq"))
259 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
281#if BOTAN_COMPILER_HAS_BUILTIN(__builtin_subc)
282 if(!std::is_constant_evaluated()) {
283 if constexpr(std::same_as<W, unsigned int>) {
284 return __builtin_subc(x, y, *
carry & 1,
carry);
285 }
else if constexpr(std::same_as<W, unsigned long>) {
286 return __builtin_subcl(x, y, *
carry & 1,
carry);
287 }
else if constexpr(std::same_as<W, unsigned long long>) {
288 return __builtin_subcll(x, y, *
carry & 1,
carry);
293 const W cb = *
carry & 1;
297 *
carry = c1 | (z > t0);
306#if defined(BOTAN_MP_USE_X86_64_ASM)
307 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
308 asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP,
"sbbq"))
310 : [x]
"r"(x), [y]
"r"(y),
"0"(
carry)
332#if defined(BOTAN_MP_USE_X86_64_ASM)
333 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
334 asm volatile(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"sbbq"))
336 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
358#if defined(BOTAN_MP_USE_X86_64_ASM)
359 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
360 asm(DO_8_TIMES(LINMUL_OP,
"z")
362 : [z]
"r"(z), [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
363 :
"cc",
"%rax",
"%rdx");
384#if defined(BOTAN_MP_USE_X86_64_ASM)
385 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
386 asm(DO_8_TIMES(MULADD_OP,
"")
388 : [z]
"r"(z), [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
389 :
"cc",
"%rax",
"%rdx");
415#if defined(__BITINT_MAXWIDTH__) && (__BITINT_MAXWIDTH__ >= 3 * 64)
418 constexpr word3() : m_w(0) {}
420 inline constexpr void mul(W x, W y) { m_w +=
static_cast<W3
>(x) * y; }
422 inline constexpr void mul_x2(W x, W y) { m_w +=
static_cast<W3
>(x) * y * 2; }
424 inline constexpr void add(W x) { m_w += x; }
427 W r =
static_cast<W
>(m_w);
432 inline constexpr W
monty_step(W p0, W p_dash) {
433 const W w0 =
static_cast<W
>(m_w);
434 const W r = w0 * p_dash;
441 const W r =
static_cast<W
>(m_w);
443 m_w +=
static_cast<W3
>(r);
453 constexpr word3() : m_w0(0), m_w1(0), m_w2(0) {}
455 inline constexpr void mul(W x, W y) {
456 #if defined(BOTAN_MP_USE_X86_64_ASM)
457 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
460 asm(
"mulq %[y]" :
"=a"(z0),
"=d"(z1) :
"a"(x), [y]
"rm"(y) :
"cc");
467 : [w0] "=r"(m_w0), [w1]
"=r"(m_w1), [w2]
"=r"(m_w2)
468 : [z0]
"r"(z0), [z1]
"r"(z1),
"0"(m_w0),
"1"(m_w1),
"2"(m_w2)
475 const dword s = dword(x) * y + m_w0;
477 m_w0 =
static_cast<W
>(s);
479 m_w2 += (m_w1 <
carry);
483 #if defined(BOTAN_MP_USE_X86_64_ASM)
484 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
487 asm(
"mulq %[y]" :
"=a"(z0),
"=d"(z1) :
"a"(x), [y]
"rm"(y) :
"cc");
498 : [w0] "=r"(m_w0), [w1]
"=r"(m_w1), [w2]
"=r"(m_w2)
499 : [z0]
"r"(z0), [z1]
"r"(z1),
"0"(m_w0),
"1"(m_w1),
"2"(m_w2)
520 inline constexpr void add(W x) {
550 m_w1 = m_w2 + (m_w0 < m_w1);
565 #undef ADD_OR_SUBTRACT
constexpr W monty_step(W p0, W p_dash)
constexpr void mul(W x, W y)
constexpr W monty_step_pdash1()
constexpr void mul_x2(W x, W y)
constexpr auto word8_sub3(W z[8], const W x[8], const W y[8], W carry) -> W
constexpr auto word_sub(W x, W y, W *carry) -> W
constexpr auto word_add(W x, W y, W *carry) -> W
constexpr auto word8_madd3(W z[8], const W x[8], W y, W carry) -> W
constexpr auto word8_add3(W z[8], const W x[8], const W y[8], W carry) -> W
constexpr auto word8_sub2(W x[8], const W y[8], W carry) -> W
constexpr auto word_madd2(W a, W b, W *c) -> W
void carry(int64_t &h0, int64_t &h1)
constexpr auto word8_add2(W x[8], const W y[8], W carry) -> W
constexpr auto word8_linmul3(W z[8], const W x[8], W y, W carry) -> W
constexpr auto word_madd3(W a, W b, W c, W *d) -> W
static const constexpr bool dword_is_native
static const constexpr uint32_t top_bit
static const constexpr size_t bytes
static const constexpr size_t bits
static const constexpr uint32_t max
static const constexpr size_t bytes
static const constexpr bool dword_is_native
static const constexpr size_t bits
static const constexpr uint64_t max
static const constexpr uint64_t top_bit