9#ifndef BOTAN_MP_ASM_INTERNAL_H_
10#define BOTAN_MP_ASM_INTERNAL_H_
12#include <botan/types.h>
13#include <botan/internal/mul128.h>
17#if (BOTAN_MP_WORD_BITS == 32)
18 typedef uint64_t dword;
19 #define BOTAN_HAS_MP_DWORD
21#elif (BOTAN_MP_WORD_BITS == 64)
22 #if defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
23 typedef uint128_t dword;
24 #define BOTAN_HAS_MP_DWORD
30 #error BOTAN_MP_WORD_BITS must be 32 or 64
33#if defined(BOTAN_USE_GCC_INLINE_ASM)
35 #if defined(BOTAN_TARGET_ARCH_IS_X86_32) && (BOTAN_MP_WORD_BITS == 32)
36 #define BOTAN_MP_USE_X86_32_ASM
37 #elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && (BOTAN_MP_WORD_BITS == 64)
38 #define BOTAN_MP_USE_X86_64_ASM
48#if defined(BOTAN_MP_USE_X86_32_ASM)
54 : [a]"=a"(a), [
b]
"=rm"(
b), [
carry]
"=&d"(*c)
55 :
"0"(a),
"1"(
b), [c]
"g"(*c) :
"cc");
59#elif defined(BOTAN_MP_USE_X86_64_ASM)
65 : [a]"=a"(a), [
b]
"=rm"(
b), [
carry]
"=&d"(*c)
66 :
"0"(a),
"1"(
b), [c]
"g"(*c) :
"cc");
70#elif defined(BOTAN_HAS_MP_DWORD)
71 const dword s =
static_cast<dword
>(a) *
b + *c;
73 return static_cast<word
>(s);
94#if defined(BOTAN_MP_USE_X86_32_ASM)
104 : [a]"=a"(a), [
b]
"=rm"(
b), [
carry]
"=&d"(*d)
105 :
"0"(a),
"1"(
b), [c]
"g"(c), [d]
"g"(*d) :
"cc");
109#elif defined(BOTAN_MP_USE_X86_64_ASM)
119 : [a]"=a"(a), [
b]
"=rm"(
b), [
carry]
"=&d"(*d)
120 :
"0"(a),
"1"(
b), [c]
"g"(c), [d]
"g"(*d) :
"cc");
124#elif defined(BOTAN_HAS_MP_DWORD)
125 const dword s =
static_cast<dword
>(a) *
b + c + *d;
127 return static_cast<word
>(s);
146#if defined(BOTAN_MP_USE_X86_32_ASM)
148#define ADDSUB2_OP(OPERATION, INDEX) \
149 ASM("movl 4*" #INDEX "(%[y]), %[carry]") \
150 ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \
152#define ADDSUB3_OP(OPERATION, INDEX) \
153 ASM("movl 4*" #INDEX "(%[x]), %[carry]") \
154 ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \
155 ASM("movl %[carry], 4*" #INDEX "(%[z])") \
157#define LINMUL_OP(WRITE_TO, INDEX) \
158 ASM("movl 4*" #INDEX "(%[x]),%%eax") \
160 ASM("addl %[carry],%%eax") \
161 ASM("adcl $0,%%edx") \
162 ASM("movl %%edx,%[carry]") \
163 ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])")
165#define MULADD_OP(IGNORED, INDEX) \
166 ASM("movl 4*" #INDEX "(%[x]),%%eax") \
168 ASM("addl %[carry],%%eax") \
169 ASM("adcl $0,%%edx") \
170 ASM("addl 4*" #INDEX "(%[z]),%%eax") \
171 ASM("adcl $0,%%edx") \
172 ASM("movl %%edx,%[carry]") \
173 ASM("movl %%eax, 4*" #INDEX " (%[z])")
175#define ADD_OR_SUBTRACT(CORE_CODE) \
176 ASM("rorl %[carry]") \
178 ASM("sbbl %[carry],%[carry]") \
181#elif defined(BOTAN_MP_USE_X86_64_ASM)
183#define ADDSUB2_OP(OPERATION, INDEX) \
184 ASM("movq 8*" #INDEX "(%[y]), %[carry]") \
185 ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \
187#define ADDSUB3_OP(OPERATION, INDEX) \
188 ASM("movq 8*" #INDEX "(%[x]), %[carry]") \
189 ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \
190 ASM("movq %[carry], 8*" #INDEX "(%[z])") \
192#define LINMUL_OP(WRITE_TO, INDEX) \
193 ASM("movq 8*" #INDEX "(%[x]),%%rax") \
195 ASM("addq %[carry],%%rax") \
196 ASM("adcq $0,%%rdx") \
197 ASM("movq %%rdx,%[carry]") \
198 ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
200#define MULADD_OP(IGNORED, INDEX) \
201 ASM("movq 8*" #INDEX "(%[x]),%%rax") \
203 ASM("addq %[carry],%%rax") \
204 ASM("adcq $0,%%rdx") \
205 ASM("addq 8*" #INDEX "(%[z]),%%rax") \
206 ASM("adcq $0,%%rdx") \
207 ASM("movq %%rdx,%[carry]") \
208 ASM("movq %%rax, 8*" #INDEX " (%[z])")
210#define ADD_OR_SUBTRACT(CORE_CODE) \
211 ASM("rorq %[carry]") \
213 ASM("sbbq %[carry],%[carry]") \
218#if defined(ADD_OR_SUBTRACT)
220#define ASM(x) x "\n\t"
222#define DO_8_TIMES(MACRO, ARG) \
239#if defined(BOTAN_MP_USE_X86_32_ASM)
241 ADD_OR_SUBTRACT(ASM(
"adcl %[y],%[x]"))
243 :
"0"(x), [y]
"rm"(y),
"1"(*carry)
247#elif defined(BOTAN_MP_USE_X86_64_ASM)
250 ADD_OR_SUBTRACT(ASM(
"adcq %[y],%[x]"))
252 :
"0"(x), [y]
"rm"(y),
"1"(*carry)
270#if defined(BOTAN_MP_USE_X86_32_ASM)
272 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP,
"adcl"))
274 : [x]
"r"(x), [y]
"r"(y),
"0"(
carry)
277#elif defined(BOTAN_MP_USE_X86_64_ASM)
280 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP,
"adcq"))
282 : [x]
"r"(x), [y]
"r"(y),
"0"(
carry)
303 const word y[8], word
carry)
305#if defined(BOTAN_MP_USE_X86_32_ASM)
307 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"adcl"))
309 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
312#elif defined(BOTAN_MP_USE_X86_64_ASM)
314 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"adcq"))
316 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
338#if defined(BOTAN_MP_USE_X86_32_ASM)
340 ADD_OR_SUBTRACT(ASM(
"sbbl %[y],%[x]"))
342 :
"0"(x), [y]
"rm"(y),
"1"(*carry)
346#elif defined(BOTAN_MP_USE_X86_64_ASM)
348 ADD_OR_SUBTRACT(ASM(
"sbbq %[y],%[x]"))
350 :
"0"(x), [y]
"rm"(y),
"1"(*carry)
357 word z = t0 - *
carry;
358 *
carry = c1 | (z > t0);
368#if defined(BOTAN_MP_USE_X86_32_ASM)
370 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP,
"sbbl"))
372 : [x]
"r"(x), [y]
"r"(y),
"0"(
carry)
375#elif defined(BOTAN_MP_USE_X86_64_ASM)
377 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP,
"sbbq"))
379 : [x]
"r"(x), [y]
"r"(y),
"0"(
carry)
401#if defined(BOTAN_MP_USE_X86_32_ASM)
403 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"sbbl"))
405 : [x]
"r"(y), [y]
"r"(x), [z]
"r"(x),
"0"(
carry)
408#elif defined(BOTAN_MP_USE_X86_64_ASM)
410 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"sbbq"))
412 : [x]
"r"(y), [y]
"r"(x), [z]
"r"(x),
"0"(
carry)
433 const word y[8], word
carry)
435#if defined(BOTAN_MP_USE_X86_32_ASM)
437 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"sbbl"))
439 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
442#elif defined(BOTAN_MP_USE_X86_64_ASM)
444 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"sbbq"))
446 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
468#if defined(BOTAN_MP_USE_X86_32_ASM)
470 DO_8_TIMES(LINMUL_OP,
"x")
472 : [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
473 :
"cc",
"%eax",
"%edx");
475#elif defined(BOTAN_MP_USE_X86_64_ASM)
477 DO_8_TIMES(LINMUL_OP,
"x")
479 : [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
480 :
"cc",
"%rax",
"%rdx");
501#if defined(BOTAN_MP_USE_X86_32_ASM)
503 DO_8_TIMES(LINMUL_OP,
"z")
505 : [z]
"r"(z), [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
506 :
"cc",
"%eax",
"%edx");
508#elif defined(BOTAN_MP_USE_X86_64_ASM)
510 DO_8_TIMES(LINMUL_OP,
"z")
512 : [z]
"r"(z), [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
513 :
"cc",
"%rax",
"%rdx");
534#if defined(BOTAN_MP_USE_X86_32_ASM)
536 DO_8_TIMES(MULADD_OP,
"")
538 : [z]
"r"(z), [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
539 :
"cc",
"%eax",
"%edx");
541#elif defined(BOTAN_MP_USE_X86_64_ASM)
543 DO_8_TIMES(MULADD_OP,
"")
545 : [z]
"r"(z), [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
546 :
"cc",
"%rax",
"%rdx");
568#if defined(BOTAN_MP_USE_X86_32_ASM)
581 : [w0]"=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
582 : [z0]
"r"(z0), [z1]
"r"(z1),
"0"(*w0),
"1"(*w1),
"2"(*w2)
585#elif defined(BOTAN_MP_USE_X86_64_ASM)
598 : [w0]"=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
599 : [z0]
"r"(z0), [z1]
"r"(z1),
"0"(*w0),
"1"(*w1),
"2"(*w2)
606 *w2 += (*w1 <
carry);
614inline void word3_add(word* w2, word* w1, word* w0, word x)
616#if defined(BOTAN_MP_USE_X86_32_ASM)
622 : [w0]"=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
623 : [x]
"r"(x),
"0"(*w0),
"1"(*w1),
"2"(*w2)
626#elif defined(BOTAN_MP_USE_X86_64_ASM)
632 : [w0]"=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
633 : [x]
"r"(x),
"0"(*w0),
"1"(*w1),
"2"(*w2)
640 word c2 = (*w1 < c1);
651#if defined(BOTAN_MP_USE_X86_32_ASM)
668 : [w0]"=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
669 : [z0]
"r"(z0), [z1]
"r"(z1),
"0"(*w0),
"1"(*w1),
"2"(*w2)
672#elif defined(BOTAN_MP_USE_X86_64_ASM)
689 : [w0]"=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
690 : [z0]
"r"(z0), [z1]
"r"(z1),
"0"(*w0),
"1"(*w1),
"2"(*w2)
713 #undef ADD_OR_SUBTRACT
#define BOTAN_MP_WORD_BITS
word word8_sub3(word z[8], const word x[8], const word y[8], word carry)
word word8_madd3(word z[8], const word x[8], word y, word carry)
word word_sub(word x, word y, word *carry)
word word8_sub2(word x[8], const word y[8], word carry)
word word8_add2(word x[8], const word y[8], word carry)
word word8_linmul3(word z[8], const word x[8], word y, word carry)
void carry(int64_t &h0, int64_t &h1)
word word8_linmul2(word x[8], word y, word carry)
word word_madd3(word a, word b, word c, word *d)
void word3_muladd(word *w2, word *w1, word *w0, word x, word y)
word word8_add3(word z[8], const word x[8], const word y[8], word carry)
word word_madd2(word a, word b, word *c)
word word_add(word x, word y, word *carry)
void word3_muladd_2(word *w2, word *w1, word *w0, word x, word y)
void word3_add(word *w2, word *w1, word *w0, word x)
word word8_sub2_rev(word x[8], const word y[8], word carry)
void mul64x64_128(uint64_t a, uint64_t b, uint64_t *lo, uint64_t *hi)