9#ifndef BOTAN_MP_ASM_INTERNAL_H_
10#define BOTAN_MP_ASM_INTERNAL_H_
12#include <botan/types.h>
14#if !defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
15 #include <botan/internal/donna128.h>
20#if defined(BOTAN_USE_GCC_INLINE_ASM)
21 #if defined(BOTAN_TARGET_ARCH_IS_X86_FAMILY)
22 #define BOTAN_MP_USE_X86_32_ASM
25 #if defined(BOTAN_TARGET_ARCH_IS_X86_64)
26 #define BOTAN_MP_USE_X86_64_ASM
34concept WordType = (std::same_as<T, uint32_t> || std::same_as<T, uint64_t>);
42 static const constexpr size_t bytes = 4;
43 static const constexpr size_t bits = 32;
44 static const constexpr uint32_t max = 0xFFFFFFFF;
47 static const constexpr bool dword_is_native =
true;
53 static const constexpr size_t bytes = 8;
54 static const constexpr size_t bits = 64;
55 static const constexpr uint64_t max = 0xFFFFFFFFFFFFFFFF;
57#if defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
58 typedef uint128_t
dword;
59 static const constexpr bool dword_is_native =
true;
62 static const constexpr bool dword_is_native =
false;
71#if defined(BOTAN_MP_USE_X86_32_ASM)
72 if(std::same_as<W, uint32_t> && !std::is_constant_evaluated()) {
78 : [a] "=a"(a), [b]
"=rm"(b), [
carry]
"=&d"(*c)
79 :
"0"(a),
"1"(b), [c]
"g"(*c)
86#if defined(BOTAN_MP_USE_X86_64_ASM)
87 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
93 : [a] "=a"(a), [b]
"=rm"(b), [
carry]
"=&d"(*c)
94 :
"0"(a),
"1"(b), [c]
"g"(*c)
102 const dword s = dword(a) * b + *c;
104 return static_cast<W
>(s);
112#if defined(BOTAN_MP_USE_X86_32_ASM)
113 if(std::same_as<W, uint32_t> && !std::is_constant_evaluated()) {
123 : [a] "=a"(a), [b]
"=rm"(b), [
carry]
"=&d"(*d)
124 :
"0"(a),
"1"(b), [c]
"g"(c), [d]
"g"(*d)
131#if defined(BOTAN_MP_USE_X86_64_ASM)
132 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
142 : [a] "=a"(a), [b]
"=rm"(b), [
carry]
"=&d"(*d)
143 :
"0"(a),
"1"(b), [c]
"g"(c), [d]
"g"(*d)
151 const dword s = dword(a) * b + c + *d;
153 return static_cast<W
>(s);
156#if defined(BOTAN_MP_USE_X86_32_ASM)
158 #define ADDSUB2_OP(OPERATION, INDEX) \
159 ASM("movl 4*" #INDEX "(%[y]), %[carry]") \
160 ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])")
162 #define ADDSUB3_OP(OPERATION, INDEX) \
163 ASM("movl 4*" #INDEX "(%[x]), %[carry]") \
164 ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \
165 ASM("movl %[carry], 4*" #INDEX "(%[z])")
167 #define LINMUL_OP(WRITE_TO, INDEX) \
168 ASM("movl 4*" #INDEX "(%[x]),%%eax") \
170 ASM("addl %[carry],%%eax") \
171 ASM("adcl $0,%%edx") \
172 ASM("movl %%edx,%[carry]") \
173 ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])")
175 #define MULADD_OP(IGNORED, INDEX) \
176 ASM("movl 4*" #INDEX "(%[x]),%%eax") \
178 ASM("addl %[carry],%%eax") \
179 ASM("adcl $0,%%edx") \
180 ASM("addl 4*" #INDEX "(%[z]),%%eax") \
181 ASM("adcl $0,%%edx") \
182 ASM("movl %%edx,%[carry]") \
183 ASM("movl %%eax, 4*" #INDEX " (%[z])")
185 #define ADD_OR_SUBTRACT(CORE_CODE) \
186 ASM("rorl %[carry]") \
188 ASM("sbbl %[carry],%[carry]") \
191#elif defined(BOTAN_MP_USE_X86_64_ASM)
193 #define ADDSUB2_OP(OPERATION, INDEX) \
194 ASM("movq 8*" #INDEX "(%[y]), %[carry]") \
195 ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])")
197 #define ADDSUB3_OP(OPERATION, INDEX) \
198 ASM("movq 8*" #INDEX "(%[x]), %[carry]") \
199 ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \
200 ASM("movq %[carry], 8*" #INDEX "(%[z])")
202 #define LINMUL_OP(WRITE_TO, INDEX) \
203 ASM("movq 8*" #INDEX "(%[x]),%%rax") \
205 ASM("addq %[carry],%%rax") \
206 ASM("adcq $0,%%rdx") \
207 ASM("movq %%rdx,%[carry]") \
208 ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
210 #define MULADD_OP(IGNORED, INDEX) \
211 ASM("movq 8*" #INDEX "(%[x]),%%rax") \
213 ASM("addq %[carry],%%rax") \
214 ASM("adcq $0,%%rdx") \
215 ASM("addq 8*" #INDEX "(%[z]),%%rax") \
216 ASM("adcq $0,%%rdx") \
217 ASM("movq %%rdx,%[carry]") \
218 ASM("movq %%rax, 8*" #INDEX " (%[z])")
220 #define ADD_OR_SUBTRACT(CORE_CODE) \
221 ASM("rorq %[carry]") \
223 ASM("sbbq %[carry],%[carry]") \
228#if defined(ADD_OR_SUBTRACT)
230 #define ASM(x) x "\n\t"
232 #define DO_8_TIMES(MACRO, ARG) \
249#if defined(BOTAN_MP_USE_X86_32_ASM)
250 if(std::same_as<W, uint32_t> && !std::is_constant_evaluated()) {
251 asm(ADD_OR_SUBTRACT(ASM(
"adcl %[y],%[x]"))
253 :
"0"(x), [y]
"rm"(y),
"1"(*carry)
259#if defined(BOTAN_MP_USE_X86_64_ASM)
260 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
261 asm(ADD_OR_SUBTRACT(ASM(
"adcq %[y],%[x]"))
263 :
"0"(x), [y]
"rm"(y),
"1"(*carry)
281#if defined(BOTAN_MP_USE_X86_32_ASM)
282 if(std::same_as<W, uint32_t> && !std::is_constant_evaluated()) {
283 asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP,
"adcl"))
285 : [x]
"r"(x), [y]
"r"(y),
"0"(
carry)
291#if defined(BOTAN_MP_USE_X86_64_ASM)
292 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
293 asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP,
"adcq"))
295 : [x]
"r"(x), [y]
"r"(y),
"0"(
carry)
317#if defined(BOTAN_MP_USE_X86_32_ASM)
318 if(std::same_as<W, uint32_t> && !std::is_constant_evaluated()) {
319 asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"adcl"))
321 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
327#if defined(BOTAN_MP_USE_X86_64_ASM)
328 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
329 asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"adcq"))
331 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
353#if defined(BOTAN_MP_USE_X86_32_ASM)
354 if(std::same_as<W, uint32_t> && !std::is_constant_evaluated()) {
355 asm(ADD_OR_SUBTRACT(ASM(
"sbbl %[y],%[x]"))
357 :
"0"(x), [y]
"rm"(y),
"1"(*carry)
363#if defined(BOTAN_MP_USE_X86_64_ASM)
364 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
365 asm(ADD_OR_SUBTRACT(ASM(
"sbbq %[y],%[x]"))
367 :
"0"(x), [y]
"rm"(y),
"1"(*carry)
376 *
carry = c1 | (z > t0);
385#if defined(BOTAN_MP_USE_X86_32_ASM)
386 if(std::same_as<W, uint32_t> && !std::is_constant_evaluated()) {
387 asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP,
"sbbl"))
389 : [x]
"r"(x), [y]
"r"(y),
"0"(
carry)
395#if defined(BOTAN_MP_USE_X86_64_ASM)
396 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
397 asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP,
"sbbq"))
399 : [x]
"r"(x), [y]
"r"(y),
"0"(
carry)
421#if defined(BOTAN_MP_USE_X86_32_ASM)
422 if(std::same_as<W, uint32_t> && !std::is_constant_evaluated()) {
423 asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"sbbl"))
425 : [x]
"r"(y), [y]
"r"(x), [z]
"r"(x),
"0"(
carry)
431#if defined(BOTAN_MP_USE_X86_64_ASM)
432 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
433 asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"sbbq"))
435 : [x]
"r"(y), [y]
"r"(x), [z]
"r"(x),
"0"(
carry)
457#if defined(BOTAN_MP_USE_X86_32_ASM)
458 if(std::same_as<W, uint32_t> && !std::is_constant_evaluated()) {
459 asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"sbbl"))
461 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
467#if defined(BOTAN_MP_USE_X86_64_ASM)
468 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
469 asm(ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"sbbq"))
471 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
493#if defined(BOTAN_MP_USE_X86_32_ASM)
494 if(std::same_as<W, uint32_t> && !std::is_constant_evaluated()) {
495 asm(DO_8_TIMES(LINMUL_OP,
"x")
497 : [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
498 :
"cc",
"%eax",
"%edx");
503#if defined(BOTAN_MP_USE_X86_64_ASM)
504 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
505 asm(DO_8_TIMES(LINMUL_OP,
"x")
507 : [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
508 :
"cc",
"%rax",
"%rdx");
529#if defined(BOTAN_MP_USE_X86_32_ASM)
530 if(std::same_as<W, uint32_t> && !std::is_constant_evaluated()) {
531 asm(DO_8_TIMES(LINMUL_OP,
"z")
533 : [z]
"r"(z), [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
534 :
"cc",
"%eax",
"%edx");
539#if defined(BOTAN_MP_USE_X86_64_ASM)
540 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
541 asm(DO_8_TIMES(LINMUL_OP,
"z")
543 : [z]
"r"(z), [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
544 :
"cc",
"%rax",
"%rdx");
565#if defined(BOTAN_MP_USE_X86_32_ASM)
566 if(std::same_as<W, uint32_t> && !std::is_constant_evaluated()) {
567 asm(DO_8_TIMES(MULADD_OP,
"")
569 : [z]
"r"(z), [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
570 :
"cc",
"%eax",
"%edx");
575#if defined(BOTAN_MP_USE_X86_64_ASM)
576 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
577 asm(DO_8_TIMES(MULADD_OP,
"")
579 : [z]
"r"(z), [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
580 :
"cc",
"%rax",
"%rdx");
602#if defined(BOTAN_MP_USE_X86_32_ASM)
603 if(std::same_as<W, uint32_t> && !std::is_constant_evaluated()) {
606 asm(
"mull %[y]" :
"=a"(z0),
"=d"(z1) :
"a"(x), [y]
"rm"(y) :
"cc");
613 : [w0] "=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
614 : [z0]
"r"(z0), [z1]
"r"(z1),
"0"(*w0),
"1"(*w1),
"2"(*w2)
620#if defined(BOTAN_MP_USE_X86_64_ASM)
621 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
624 asm(
"mulq %[y]" :
"=a"(z0),
"=d"(z1) :
"a"(x), [y]
"rm"(y) :
"cc");
631 : [w0] "=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
632 : [z0]
"r"(z0), [z1]
"r"(z1),
"0"(*w0),
"1"(*w1),
"2"(*w2)
641 *w2 += (*w1 <
carry);
649inline constexpr void word3_add(W* w2, W* w1, W* w0, W x) {
650#if defined(BOTAN_MP_USE_X86_32_ASM)
651 if(std::same_as<W, uint32_t> && !std::is_constant_evaluated()) {
657 : [w0] "=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
658 : [x]
"r"(x),
"0"(*w0),
"1"(*w1),
"2"(*w2)
664#if defined(BOTAN_MP_USE_X86_64_ASM)
665 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
671 : [w0] "=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
672 : [x]
"r"(x),
"0"(*w0),
"1"(*w1),
"2"(*w2)
691#if defined(BOTAN_MP_USE_X86_32_ASM)
692 if(std::same_as<W, uint32_t> && !std::is_constant_evaluated()) {
695 asm(
"mull %[y]" :
"=a"(z0),
"=d"(z1) :
"a"(x), [y]
"rm"(y) :
"cc");
706 : [w0] "=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
707 : [z0]
"r"(z0), [z1]
"r"(z1),
"0"(*w0),
"1"(*w1),
"2"(*w2)
713#if defined(BOTAN_MP_USE_X86_64_ASM)
714 if(std::same_as<W, uint64_t> && !std::is_constant_evaluated()) {
717 asm(
"mulq %[y]" :
"=a"(z0),
"=d"(z1) :
"a"(x), [y]
"rm"(y) :
"cc");
728 : [w0] "=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
729 : [z0]
"r"(z0), [z1]
"r"(z1),
"0"(*w0),
"1"(*w1),
"2"(*w2)
741 W top = (y >> top_bit_shift);
743 y |= (x >> top_bit_shift);
755 #undef ADD_OR_SUBTRACT
constexpr auto word8_sub3(W z[8], const W x[8], const W y[8], W carry) -> W
constexpr auto word_sub(W x, W y, W *carry) -> W
constexpr auto word_add(W x, W y, W *carry) -> W
constexpr auto word8_madd3(W z[8], const W x[8], W y, W carry) -> W
constexpr auto word8_add3(W z[8], const W x[8], const W y[8], W carry) -> W
constexpr auto word8_sub2(W x[8], const W y[8], W carry) -> W
constexpr auto word_madd2(W a, W b, W *c) -> W
constexpr auto word8_sub2_rev(W x[8], const W y[8], W carry) -> W
constexpr void word3_add(W *w2, W *w1, W *w0, W x)
constexpr void word3_muladd_2(W *w2, W *w1, W *w0, W x, W y)
constexpr void word3_muladd(W *w2, W *w1, W *w0, W x, W y)
void carry(int64_t &h0, int64_t &h1)
constexpr auto word8_linmul2(W x[8], W y, W carry) -> W
constexpr auto word8_add2(W x[8], const W y[8], W carry) -> W
constexpr auto word8_linmul3(W z[8], const W x[8], W y, W carry) -> W
constexpr auto word_madd3(W a, W b, W c, W *d) -> W