Botan 2.19.1
Crypto and TLS for C&
mp_asmi.h
Go to the documentation of this file.
1/*
2* Lowest Level MPI Algorithms
3* (C) 1999-2010 Jack Lloyd
4* 2006 Luca Piccarreta
5*
6* Botan is released under the Simplified BSD License (see license.txt)
7*/
8
9#ifndef BOTAN_MP_ASM_INTERNAL_H_
10#define BOTAN_MP_ASM_INTERNAL_H_
11
12#include <botan/internal/mp_madd.h>
13
14namespace Botan {
15
16#if defined(BOTAN_MP_USE_X86_32_ASM)
17
18#define ADDSUB2_OP(OPERATION, INDEX) \
19 ASM("movl 4*" #INDEX "(%[y]), %[carry]") \
20 ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \
21
22#define ADDSUB3_OP(OPERATION, INDEX) \
23 ASM("movl 4*" #INDEX "(%[x]), %[carry]") \
24 ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \
25 ASM("movl %[carry], 4*" #INDEX "(%[z])") \
26
27#define LINMUL_OP(WRITE_TO, INDEX) \
28 ASM("movl 4*" #INDEX "(%[x]),%%eax") \
29 ASM("mull %[y]") \
30 ASM("addl %[carry],%%eax") \
31 ASM("adcl $0,%%edx") \
32 ASM("movl %%edx,%[carry]") \
33 ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])")
34
35#define MULADD_OP(IGNORED, INDEX) \
36 ASM("movl 4*" #INDEX "(%[x]),%%eax") \
37 ASM("mull %[y]") \
38 ASM("addl %[carry],%%eax") \
39 ASM("adcl $0,%%edx") \
40 ASM("addl 4*" #INDEX "(%[z]),%%eax") \
41 ASM("adcl $0,%%edx") \
42 ASM("movl %%edx,%[carry]") \
43 ASM("movl %%eax, 4*" #INDEX " (%[z])")
44
45#define ADD_OR_SUBTRACT(CORE_CODE) \
46 ASM("rorl %[carry]") \
47 CORE_CODE \
48 ASM("sbbl %[carry],%[carry]") \
49 ASM("negl %[carry]")
50
51#elif defined(BOTAN_MP_USE_X86_64_ASM)
52
53#define ADDSUB2_OP(OPERATION, INDEX) \
54 ASM("movq 8*" #INDEX "(%[y]), %[carry]") \
55 ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \
56
57#define ADDSUB3_OP(OPERATION, INDEX) \
58 ASM("movq 8*" #INDEX "(%[x]), %[carry]") \
59 ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \
60 ASM("movq %[carry], 8*" #INDEX "(%[z])") \
61
62#define LINMUL_OP(WRITE_TO, INDEX) \
63 ASM("movq 8*" #INDEX "(%[x]),%%rax") \
64 ASM("mulq %[y]") \
65 ASM("addq %[carry],%%rax") \
66 ASM("adcq $0,%%rdx") \
67 ASM("movq %%rdx,%[carry]") \
68 ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
69
70#define MULADD_OP(IGNORED, INDEX) \
71 ASM("movq 8*" #INDEX "(%[x]),%%rax") \
72 ASM("mulq %[y]") \
73 ASM("addq %[carry],%%rax") \
74 ASM("adcq $0,%%rdx") \
75 ASM("addq 8*" #INDEX "(%[z]),%%rax") \
76 ASM("adcq $0,%%rdx") \
77 ASM("movq %%rdx,%[carry]") \
78 ASM("movq %%rax, 8*" #INDEX " (%[z])")
79
80#define ADD_OR_SUBTRACT(CORE_CODE) \
81 ASM("rorq %[carry]") \
82 CORE_CODE \
83 ASM("sbbq %[carry],%[carry]") \
84 ASM("negq %[carry]")
85
86#endif
87
88#if defined(ADD_OR_SUBTRACT)
89
90#define ASM(x) x "\n\t"
91
92#define DO_8_TIMES(MACRO, ARG) \
93 MACRO(ARG, 0) \
94 MACRO(ARG, 1) \
95 MACRO(ARG, 2) \
96 MACRO(ARG, 3) \
97 MACRO(ARG, 4) \
98 MACRO(ARG, 5) \
99 MACRO(ARG, 6) \
100 MACRO(ARG, 7)
101
102#endif
103
104/*
105* Word Addition
106*/
107inline word word_add(word x, word y, word* carry)
108 {
109#if defined(BOTAN_MP_USE_X86_32_ASM)
110 asm(
111 ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]"))
112 : [x]"=r"(x), [carry]"=r"(*carry)
113 : "0"(x), [y]"rm"(y), "1"(*carry)
114 : "cc");
115 return x;
116
117#elif defined(BOTAN_MP_USE_X86_64_ASM)
118
119 asm(
120 ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]"))
121 : [x]"=r"(x), [carry]"=r"(*carry)
122 : "0"(x), [y]"rm"(y), "1"(*carry)
123 : "cc");
124 return x;
125
126#else
127 word z = x + y;
128 word c1 = (z < x);
129 z += *carry;
130 *carry = c1 | (z < *carry);
131 return z;
132#endif
133 }
134
135/*
136* Eight Word Block Addition, Two Argument
137*/
138inline word word8_add2(word x[8], const word y[8], word carry)
139 {
140#if defined(BOTAN_MP_USE_X86_32_ASM)
141 asm(
142 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl"))
143 : [carry]"=r"(carry)
144 : [x]"r"(x), [y]"r"(y), "0"(carry)
145 : "cc", "memory");
146 return carry;
147
148#elif defined(BOTAN_MP_USE_X86_64_ASM)
149
150 asm(
151 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq"))
152 : [carry]"=r"(carry)
153 : [x]"r"(x), [y]"r"(y), "0"(carry)
154 : "cc", "memory");
155 return carry;
156
157#else
158 x[0] = word_add(x[0], y[0], &carry);
159 x[1] = word_add(x[1], y[1], &carry);
160 x[2] = word_add(x[2], y[2], &carry);
161 x[3] = word_add(x[3], y[3], &carry);
162 x[4] = word_add(x[4], y[4], &carry);
163 x[5] = word_add(x[5], y[5], &carry);
164 x[6] = word_add(x[6], y[6], &carry);
165 x[7] = word_add(x[7], y[7], &carry);
166 return carry;
167#endif
168 }
169
170/*
171* Eight Word Block Addition, Three Argument
172*/
173inline word word8_add3(word z[8], const word x[8],
174 const word y[8], word carry)
175 {
176#if defined(BOTAN_MP_USE_X86_32_ASM)
177 asm(
178 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl"))
179 : [carry]"=r"(carry)
180 : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
181 : "cc", "memory");
182 return carry;
183
184#elif defined(BOTAN_MP_USE_X86_64_ASM)
185
186 asm(
187 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq"))
188 : [carry]"=r"(carry)
189 : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
190 : "cc", "memory");
191 return carry;
192
193#else
194 z[0] = word_add(x[0], y[0], &carry);
195 z[1] = word_add(x[1], y[1], &carry);
196 z[2] = word_add(x[2], y[2], &carry);
197 z[3] = word_add(x[3], y[3], &carry);
198 z[4] = word_add(x[4], y[4], &carry);
199 z[5] = word_add(x[5], y[5], &carry);
200 z[6] = word_add(x[6], y[6], &carry);
201 z[7] = word_add(x[7], y[7], &carry);
202 return carry;
203#endif
204 }
205
206/*
207* Word Subtraction
208*/
209inline word word_sub(word x, word y, word* carry)
210 {
211#if defined(BOTAN_MP_USE_X86_32_ASM)
212 asm(
213 ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]"))
214 : [x]"=r"(x), [carry]"=r"(*carry)
215 : "0"(x), [y]"rm"(y), "1"(*carry)
216 : "cc");
217 return x;
218
219#elif defined(BOTAN_MP_USE_X86_64_ASM)
220
221 asm(
222 ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]"))
223 : [x]"=r"(x), [carry]"=r"(*carry)
224 : "0"(x), [y]"rm"(y), "1"(*carry)
225 : "cc");
226 return x;
227
228#else
229 word t0 = x - y;
230 word c1 = (t0 > x);
231 word z = t0 - *carry;
232 *carry = c1 | (z > t0);
233 return z;
234#endif
235 }
236
237/*
238* Eight Word Block Subtraction, Two Argument
239*/
240inline word word8_sub2(word x[8], const word y[8], word carry)
241 {
242#if defined(BOTAN_MP_USE_X86_32_ASM)
243 asm(
244 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl"))
245 : [carry]"=r"(carry)
246 : [x]"r"(x), [y]"r"(y), "0"(carry)
247 : "cc", "memory");
248 return carry;
249
250#elif defined(BOTAN_MP_USE_X86_64_ASM)
251
252 asm(
253 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq"))
254 : [carry]"=r"(carry)
255 : [x]"r"(x), [y]"r"(y), "0"(carry)
256 : "cc", "memory");
257 return carry;
258
259#else
260 x[0] = word_sub(x[0], y[0], &carry);
261 x[1] = word_sub(x[1], y[1], &carry);
262 x[2] = word_sub(x[2], y[2], &carry);
263 x[3] = word_sub(x[3], y[3], &carry);
264 x[4] = word_sub(x[4], y[4], &carry);
265 x[5] = word_sub(x[5], y[5], &carry);
266 x[6] = word_sub(x[6], y[6], &carry);
267 x[7] = word_sub(x[7], y[7], &carry);
268 return carry;
269#endif
270 }
271
272/*
273* Eight Word Block Subtraction, Two Argument
274*/
275inline word word8_sub2_rev(word x[8], const word y[8], word carry)
276 {
277#if defined(BOTAN_MP_USE_X86_32_ASM)
278 asm(
279 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
280 : [carry]"=r"(carry)
281 : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
282 : "cc", "memory");
283 return carry;
284
285#elif defined(BOTAN_MP_USE_X86_64_ASM)
286
287 asm(
288 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
289 : [carry]"=r"(carry)
290 : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
291 : "cc", "memory");
292 return carry;
293
294#else
295 x[0] = word_sub(y[0], x[0], &carry);
296 x[1] = word_sub(y[1], x[1], &carry);
297 x[2] = word_sub(y[2], x[2], &carry);
298 x[3] = word_sub(y[3], x[3], &carry);
299 x[4] = word_sub(y[4], x[4], &carry);
300 x[5] = word_sub(y[5], x[5], &carry);
301 x[6] = word_sub(y[6], x[6], &carry);
302 x[7] = word_sub(y[7], x[7], &carry);
303 return carry;
304#endif
305 }
306
307/*
308* Eight Word Block Subtraction, Three Argument
309*/
310inline word word8_sub3(word z[8], const word x[8],
311 const word y[8], word carry)
312 {
313#if defined(BOTAN_MP_USE_X86_32_ASM)
314 asm(
315 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
316 : [carry]"=r"(carry)
317 : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
318 : "cc", "memory");
319 return carry;
320
321#elif defined(BOTAN_MP_USE_X86_64_ASM)
322
323 asm(
324 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
325 : [carry]"=r"(carry)
326 : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
327 : "cc", "memory");
328 return carry;
329
330#else
331 z[0] = word_sub(x[0], y[0], &carry);
332 z[1] = word_sub(x[1], y[1], &carry);
333 z[2] = word_sub(x[2], y[2], &carry);
334 z[3] = word_sub(x[3], y[3], &carry);
335 z[4] = word_sub(x[4], y[4], &carry);
336 z[5] = word_sub(x[5], y[5], &carry);
337 z[6] = word_sub(x[6], y[6], &carry);
338 z[7] = word_sub(x[7], y[7], &carry);
339 return carry;
340#endif
341 }
342
343/*
344* Eight Word Block Linear Multiplication
345*/
346inline word word8_linmul2(word x[8], word y, word carry)
347 {
348#if defined(BOTAN_MP_USE_X86_32_ASM)
349 asm(
350 DO_8_TIMES(LINMUL_OP, "x")
351 : [carry]"=r"(carry)
352 : [x]"r"(x), [y]"rm"(y), "0"(carry)
353 : "cc", "%eax", "%edx");
354 return carry;
355
356#elif defined(BOTAN_MP_USE_X86_64_ASM)
357
358 asm(
359 DO_8_TIMES(LINMUL_OP, "x")
360 : [carry]"=r"(carry)
361 : [x]"r"(x), [y]"rm"(y), "0"(carry)
362 : "cc", "%rax", "%rdx");
363 return carry;
364
365#else
366 x[0] = word_madd2(x[0], y, &carry);
367 x[1] = word_madd2(x[1], y, &carry);
368 x[2] = word_madd2(x[2], y, &carry);
369 x[3] = word_madd2(x[3], y, &carry);
370 x[4] = word_madd2(x[4], y, &carry);
371 x[5] = word_madd2(x[5], y, &carry);
372 x[6] = word_madd2(x[6], y, &carry);
373 x[7] = word_madd2(x[7], y, &carry);
374 return carry;
375#endif
376 }
377
378/*
379* Eight Word Block Linear Multiplication
380*/
381inline word word8_linmul3(word z[8], const word x[8], word y, word carry)
382 {
383#if defined(BOTAN_MP_USE_X86_32_ASM)
384 asm(
385 DO_8_TIMES(LINMUL_OP, "z")
386 : [carry]"=r"(carry)
387 : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
388 : "cc", "%eax", "%edx");
389 return carry;
390
391#elif defined(BOTAN_MP_USE_X86_64_ASM)
392 asm(
393 DO_8_TIMES(LINMUL_OP, "z")
394 : [carry]"=r"(carry)
395 : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
396 : "cc", "%rax", "%rdx");
397 return carry;
398
399#else
400 z[0] = word_madd2(x[0], y, &carry);
401 z[1] = word_madd2(x[1], y, &carry);
402 z[2] = word_madd2(x[2], y, &carry);
403 z[3] = word_madd2(x[3], y, &carry);
404 z[4] = word_madd2(x[4], y, &carry);
405 z[5] = word_madd2(x[5], y, &carry);
406 z[6] = word_madd2(x[6], y, &carry);
407 z[7] = word_madd2(x[7], y, &carry);
408 return carry;
409#endif
410 }
411
412/*
413* Eight Word Block Multiply/Add
414*/
415inline word word8_madd3(word z[8], const word x[8], word y, word carry)
416 {
417#if defined(BOTAN_MP_USE_X86_32_ASM)
418 asm(
419 DO_8_TIMES(MULADD_OP, "")
420 : [carry]"=r"(carry)
421 : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
422 : "cc", "%eax", "%edx");
423 return carry;
424
425#elif defined(BOTAN_MP_USE_X86_64_ASM)
426
427 asm(
428 DO_8_TIMES(MULADD_OP, "")
429 : [carry]"=r"(carry)
430 : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
431 : "cc", "%rax", "%rdx");
432 return carry;
433
434#else
435 z[0] = word_madd3(x[0], y, z[0], &carry);
436 z[1] = word_madd3(x[1], y, z[1], &carry);
437 z[2] = word_madd3(x[2], y, z[2], &carry);
438 z[3] = word_madd3(x[3], y, z[3], &carry);
439 z[4] = word_madd3(x[4], y, z[4], &carry);
440 z[5] = word_madd3(x[5], y, z[5], &carry);
441 z[6] = word_madd3(x[6], y, z[6], &carry);
442 z[7] = word_madd3(x[7], y, z[7], &carry);
443 return carry;
444#endif
445 }
446
447/*
448* Multiply-Add Accumulator
449* (w2,w1,w0) += x * y
450*/
451inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y)
452 {
453#if defined(BOTAN_MP_USE_X86_32_ASM)
454 word z0 = 0, z1 = 0;
455
456 asm("mull %[y]"
457 : "=a"(z0),"=d"(z1)
458 : "a"(x), [y]"rm"(y)
459 : "cc");
460
461 asm(R"(
462 addl %[z0],%[w0]
463 adcl %[z1],%[w1]
464 adcl $0,%[w2]
465 )"
466 : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
467 : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
468 : "cc");
469
470#elif defined(BOTAN_MP_USE_X86_64_ASM)
471
472 word z0 = 0, z1 = 0;
473
474 asm("mulq %[y]"
475 : "=a"(z0),"=d"(z1)
476 : "a"(x), [y]"rm"(y)
477 : "cc");
478
479 asm(R"(
480 addq %[z0],%[w0]
481 adcq %[z1],%[w1]
482 adcq $0,%[w2]
483 )"
484 : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
485 : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
486 : "cc");
487
488#else
489 word carry = *w0;
490 *w0 = word_madd2(x, y, &carry);
491 *w1 += carry;
492 *w2 += (*w1 < carry);
493#endif
494 }
495
496/*
497* 3-word addition
498* (w2,w1,w0) += x
499*/
500inline void word3_add(word* w2, word* w1, word* w0, word x)
501 {
502#if defined(BOTAN_MP_USE_X86_32_ASM)
503 asm(R"(
504 addl %[x],%[w0]
505 adcl $0,%[w1]
506 adcl $0,%[w2]
507 )"
508 : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
509 : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2)
510 : "cc");
511
512#elif defined(BOTAN_MP_USE_X86_64_ASM)
513
514 asm(R"(
515 addq %[x],%[w0]
516 adcq $0,%[w1]
517 adcq $0,%[w2]
518 )"
519 : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
520 : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2)
521 : "cc");
522
523#else
524 *w0 += x;
525 word c1 = (*w0 < x);
526 *w1 += c1;
527 word c2 = (*w1 < c1);
528 *w2 += c2;
529#endif
530 }
531
532/*
533* Multiply-Add Accumulator
534* (w2,w1,w0) += 2 * x * y
535*/
536inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y)
537 {
538#if defined(BOTAN_MP_USE_X86_32_ASM)
539
540 word z0 = 0, z1 = 0;
541
542 asm("mull %[y]"
543 : "=a"(z0),"=d"(z1)
544 : "a"(x), [y]"rm"(y)
545 : "cc");
546
547 asm(R"(
548 addl %[z0],%[w0]
549 adcl %[z1],%[w1]
550 adcl $0,%[w2]
551
552 addl %[z0],%[w0]
553 adcl %[z1],%[w1]
554 adcl $0,%[w2]
555 )"
556 : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
557 : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
558 : "cc");
559
560#elif defined(BOTAN_MP_USE_X86_64_ASM)
561
562 word z0 = 0, z1 = 0;
563
564 asm("mulq %[y]"
565 : "=a"(z0),"=d"(z1)
566 : "a"(x), [y]"rm"(y)
567 : "cc");
568
569 asm(R"(
570 addq %[z0],%[w0]
571 adcq %[z1],%[w1]
572 adcq $0,%[w2]
573
574 addq %[z0],%[w0]
575 adcq %[z1],%[w1]
576 adcq $0,%[w2]
577 )"
578 : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
579 : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
580 : "cc");
581
582#else
583 word carry = 0;
584 x = word_madd2(x, y, &carry);
585 y = carry;
586
587 word top = (y >> (BOTAN_MP_WORD_BITS-1));
588 y <<= 1;
589 y |= (x >> (BOTAN_MP_WORD_BITS-1));
590 x <<= 1;
591
592 carry = 0;
593 *w0 = word_add(*w0, x, &carry);
594 *w1 = word_add(*w1, y, &carry);
595 *w2 = word_add(*w2, top, &carry);
596#endif
597 }
598
599#if defined(ASM)
600 #undef ASM
601 #undef DO_8_TIMES
602 #undef ADD_OR_SUBTRACT
603 #undef ADDSUB2_OP
604 #undef ADDSUB3_OP
605 #undef LINMUL_OP
606 #undef MULADD_OP
607#endif
608
609}
610
611#endif
Definition: alg_id.cpp:13
word word8_sub3(word z[8], const word x[8], const word y[8], word carry)
Definition: mp_asmi.h:310
word word8_madd3(word z[8], const word x[8], word y, word carry)
Definition: mp_asmi.h:415
word word_sub(word x, word y, word *carry)
Definition: mp_asmi.h:209
word word8_sub2(word x[8], const word y[8], word carry)
Definition: mp_asmi.h:240
word word8_add2(word x[8], const word y[8], word carry)
Definition: mp_asmi.h:138
word word8_linmul3(word z[8], const word x[8], word y, word carry)
Definition: mp_asmi.h:381
void carry(int64_t &h0, int64_t &h1)
word word8_linmul2(word x[8], word y, word carry)
Definition: mp_asmi.h:346
word word_madd3(word a, word b, word c, word *d)
Definition: mp_madd.h:92
void word3_muladd(word *w2, word *w1, word *w0, word x, word y)
Definition: mp_asmi.h:451
word word8_add3(word z[8], const word x[8], const word y[8], word carry)
Definition: mp_asmi.h:173
word word_madd2(word a, word b, word *c)
Definition: mp_madd.h:46
word word_add(word x, word y, word *carry)
Definition: mp_asmi.h:107
void word3_muladd_2(word *w2, word *w1, word *w0, word x, word y)
Definition: mp_asmi.h:536
void word3_add(word *w2, word *w1, word *w0, word x)
Definition: mp_asmi.h:500
word word8_sub2_rev(word x[8], const word y[8], word carry)
Definition: mp_asmi.h:275