Botan  2.7.0
Crypto and TLS for C++11
mp_asmi.h
Go to the documentation of this file.
1 /*
2 * Lowest Level MPI Algorithms
3 * (C) 1999-2010 Jack Lloyd
4 * 2006 Luca Piccarreta
5 *
6 * Botan is released under the Simplified BSD License (see license.txt)
7 */
8 
9 #ifndef BOTAN_MP_ASM_INTERNAL_H_
10 #define BOTAN_MP_ASM_INTERNAL_H_
11 
12 #include <botan/internal/mp_madd.h>
13 
14 namespace Botan {
15 
16 #if defined(BOTAN_MP_USE_X86_32_ASM)
17 
18 #define ADDSUB2_OP(OPERATION, INDEX) \
19  ASM("movl 4*" #INDEX "(%[y]), %[carry]") \
20  ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \
21 
22 #define ADDSUB3_OP(OPERATION, INDEX) \
23  ASM("movl 4*" #INDEX "(%[x]), %[carry]") \
24  ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \
25  ASM("movl %[carry], 4*" #INDEX "(%[z])") \
26 
27 #define LINMUL_OP(WRITE_TO, INDEX) \
28  ASM("movl 4*" #INDEX "(%[x]),%%eax") \
29  ASM("mull %[y]") \
30  ASM("addl %[carry],%%eax") \
31  ASM("adcl $0,%%edx") \
32  ASM("movl %%edx,%[carry]") \
33  ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])")
34 
35 #define MULADD_OP(IGNORED, INDEX) \
36  ASM("movl 4*" #INDEX "(%[x]),%%eax") \
37  ASM("mull %[y]") \
38  ASM("addl %[carry],%%eax") \
39  ASM("adcl $0,%%edx") \
40  ASM("addl 4*" #INDEX "(%[z]),%%eax") \
41  ASM("adcl $0,%%edx") \
42  ASM("movl %%edx,%[carry]") \
43  ASM("movl %%eax, 4*" #INDEX " (%[z])")
44 
45 #define ADD_OR_SUBTRACT(CORE_CODE) \
46  ASM("rorl %[carry]") \
47  CORE_CODE \
48  ASM("sbbl %[carry],%[carry]") \
49  ASM("negl %[carry]")
50 
51 #elif defined(BOTAN_MP_USE_X86_64_ASM)
52 
53 #define ADDSUB2_OP(OPERATION, INDEX) \
54  ASM("movq 8*" #INDEX "(%[y]), %[carry]") \
55  ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \
56 
57 #define ADDSUB3_OP(OPERATION, INDEX) \
58  ASM("movq 8*" #INDEX "(%[x]), %[carry]") \
59  ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \
60  ASM("movq %[carry], 8*" #INDEX "(%[z])") \
61 
62 #define LINMUL_OP(WRITE_TO, INDEX) \
63  ASM("movq 8*" #INDEX "(%[x]),%%rax") \
64  ASM("mulq %[y]") \
65  ASM("addq %[carry],%%rax") \
66  ASM("adcq $0,%%rdx") \
67  ASM("movq %%rdx,%[carry]") \
68  ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
69 
70 #define MULADD_OP(IGNORED, INDEX) \
71  ASM("movq 8*" #INDEX "(%[x]),%%rax") \
72  ASM("mulq %[y]") \
73  ASM("addq %[carry],%%rax") \
74  ASM("adcq $0,%%rdx") \
75  ASM("addq 8*" #INDEX "(%[z]),%%rax") \
76  ASM("adcq $0,%%rdx") \
77  ASM("movq %%rdx,%[carry]") \
78  ASM("movq %%rax, 8*" #INDEX " (%[z])")
79 
80 #define ADD_OR_SUBTRACT(CORE_CODE) \
81  ASM("rorq %[carry]") \
82  CORE_CODE \
83  ASM("sbbq %[carry],%[carry]") \
84  ASM("negq %[carry]")
85 
86 #endif
87 
88 #if defined(ADD_OR_SUBTRACT)
89 
90 #define ASM(x) x "\n\t"
91 
92 #define DO_8_TIMES(MACRO, ARG) \
93  MACRO(ARG, 0) \
94  MACRO(ARG, 1) \
95  MACRO(ARG, 2) \
96  MACRO(ARG, 3) \
97  MACRO(ARG, 4) \
98  MACRO(ARG, 5) \
99  MACRO(ARG, 6) \
100  MACRO(ARG, 7)
101 
102 #endif
103 
104 /*
105 * Word Addition
106 */
107 inline word word_add(word x, word y, word* carry)
108  {
109 #if defined(BOTAN_MP_USE_X86_32_ASM)
110  asm(
111  ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]"))
112  : [x]"=r"(x), [carry]"=r"(*carry)
113  : "0"(x), [y]"rm"(y), "1"(*carry)
114  : "cc");
115  return x;
116 
117 #elif defined(BOTAN_MP_USE_X86_64_ASM)
118 
119  asm(
120  ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]"))
121  : [x]"=r"(x), [carry]"=r"(*carry)
122  : "0"(x), [y]"rm"(y), "1"(*carry)
123  : "cc");
124  return x;
125 
126 #else
127  word z = x + y;
128  word c1 = (z < x);
129  z += *carry;
130  *carry = c1 | (z < *carry);
131  return z;
132 #endif
133  }
134 
135 /*
136 * Eight Word Block Addition, Two Argument
137 */
138 inline word word8_add2(word x[8], const word y[8], word carry)
139  {
140 #if defined(BOTAN_MP_USE_X86_32_ASM)
141  asm(
142  ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl"))
143  : [carry]"=r"(carry)
144  : [x]"r"(x), [y]"r"(y), "0"(carry)
145  : "cc", "memory");
146  return carry;
147 
148 #elif defined(BOTAN_MP_USE_X86_64_ASM)
149 
150  asm(
151  ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq"))
152  : [carry]"=r"(carry)
153  : [x]"r"(x), [y]"r"(y), "0"(carry)
154  : "cc", "memory");
155  return carry;
156 
157 #elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM)
158 
159  __asm {
160  mov edx,[x]
161  mov esi,[y]
162  xor eax,eax
163  sub eax,[carry] //force CF=1 iff *carry==1
164  mov eax,[esi]
165  adc [edx],eax
166  mov eax,[esi+4]
167  adc [edx+4],eax
168  mov eax,[esi+8]
169  adc [edx+8],eax
170  mov eax,[esi+12]
171  adc [edx+12],eax
172  mov eax,[esi+16]
173  adc [edx+16],eax
174  mov eax,[esi+20]
175  adc [edx+20],eax
176  mov eax,[esi+24]
177  adc [edx+24],eax
178  mov eax,[esi+28]
179  adc [edx+28],eax
180  sbb eax,eax
181  neg eax
182  }
183 
184 #else
185  x[0] = word_add(x[0], y[0], &carry);
186  x[1] = word_add(x[1], y[1], &carry);
187  x[2] = word_add(x[2], y[2], &carry);
188  x[3] = word_add(x[3], y[3], &carry);
189  x[4] = word_add(x[4], y[4], &carry);
190  x[5] = word_add(x[5], y[5], &carry);
191  x[6] = word_add(x[6], y[6], &carry);
192  x[7] = word_add(x[7], y[7], &carry);
193  return carry;
194 #endif
195  }
196 
197 /*
198 * Eight Word Block Addition, Three Argument
199 */
200 inline word word8_add3(word z[8], const word x[8],
201  const word y[8], word carry)
202  {
203 #if defined(BOTAN_MP_USE_X86_32_ASM)
204  asm(
205  ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl"))
206  : [carry]"=r"(carry)
207  : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
208  : "cc", "memory");
209  return carry;
210 
211 #elif defined(BOTAN_MP_USE_X86_64_ASM)
212 
213  asm(
214  ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq"))
215  : [carry]"=r"(carry)
216  : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
217  : "cc", "memory");
218  return carry;
219 
220 #elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM)
221 
222  __asm {
223  mov edi,[x]
224  mov esi,[y]
225  mov ebx,[z]
226  xor eax,eax
227  sub eax,[carry] //force CF=1 iff *carry==1
228  mov eax,[edi]
229  adc eax,[esi]
230  mov [ebx],eax
231 
232  mov eax,[edi+4]
233  adc eax,[esi+4]
234  mov [ebx+4],eax
235 
236  mov eax,[edi+8]
237  adc eax,[esi+8]
238  mov [ebx+8],eax
239 
240  mov eax,[edi+12]
241  adc eax,[esi+12]
242  mov [ebx+12],eax
243 
244  mov eax,[edi+16]
245  adc eax,[esi+16]
246  mov [ebx+16],eax
247 
248  mov eax,[edi+20]
249  adc eax,[esi+20]
250  mov [ebx+20],eax
251 
252  mov eax,[edi+24]
253  adc eax,[esi+24]
254  mov [ebx+24],eax
255 
256  mov eax,[edi+28]
257  adc eax,[esi+28]
258  mov [ebx+28],eax
259 
260  sbb eax,eax
261  neg eax
262  }
263 
264 #else
265  z[0] = word_add(x[0], y[0], &carry);
266  z[1] = word_add(x[1], y[1], &carry);
267  z[2] = word_add(x[2], y[2], &carry);
268  z[3] = word_add(x[3], y[3], &carry);
269  z[4] = word_add(x[4], y[4], &carry);
270  z[5] = word_add(x[5], y[5], &carry);
271  z[6] = word_add(x[6], y[6], &carry);
272  z[7] = word_add(x[7], y[7], &carry);
273  return carry;
274 #endif
275  }
276 
277 /*
278 * Word Subtraction
279 */
280 inline word word_sub(word x, word y, word* carry)
281  {
282 #if defined(BOTAN_MP_USE_X86_32_ASM)
283  asm(
284  ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]"))
285  : [x]"=r"(x), [carry]"=r"(*carry)
286  : "0"(x), [y]"rm"(y), "1"(*carry)
287  : "cc");
288  return x;
289 
290 #elif defined(BOTAN_MP_USE_X86_64_ASM)
291 
292  asm(
293  ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]"))
294  : [x]"=r"(x), [carry]"=r"(*carry)
295  : "0"(x), [y]"rm"(y), "1"(*carry)
296  : "cc");
297  return x;
298 
299 #else
300  word t0 = x - y;
301  word c1 = (t0 > x);
302  word z = t0 - *carry;
303  *carry = c1 | (z > t0);
304  return z;
305 #endif
306  }
307 
308 /*
309 * Eight Word Block Subtraction, Two Argument
310 */
311 inline word word8_sub2(word x[8], const word y[8], word carry)
312  {
313 #if defined(BOTAN_MP_USE_X86_32_ASM)
314  asm(
315  ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl"))
316  : [carry]"=r"(carry)
317  : [x]"r"(x), [y]"r"(y), "0"(carry)
318  : "cc", "memory");
319  return carry;
320 
321 #elif defined(BOTAN_MP_USE_X86_64_ASM)
322 
323  asm(
324  ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq"))
325  : [carry]"=r"(carry)
326  : [x]"r"(x), [y]"r"(y), "0"(carry)
327  : "cc", "memory");
328  return carry;
329 
330 #elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM)
331 
332  __asm {
333  mov edi,[x]
334  mov esi,[y]
335  xor eax,eax
336  sub eax,[carry] //force CF=1 iff *carry==1
337  mov eax,[edi]
338  sbb eax,[esi]
339  mov [edi],eax
340  mov eax,[edi+4]
341  sbb eax,[esi+4]
342  mov [edi+4],eax
343  mov eax,[edi+8]
344  sbb eax,[esi+8]
345  mov [edi+8],eax
346  mov eax,[edi+12]
347  sbb eax,[esi+12]
348  mov [edi+12],eax
349  mov eax,[edi+16]
350  sbb eax,[esi+16]
351  mov [edi+16],eax
352  mov eax,[edi+20]
353  sbb eax,[esi+20]
354  mov [edi+20],eax
355  mov eax,[edi+24]
356  sbb eax,[esi+24]
357  mov [edi+24],eax
358  mov eax,[edi+28]
359  sbb eax,[esi+28]
360  mov [edi+28],eax
361  sbb eax,eax
362  neg eax
363  }
364 
365 #else
366  x[0] = word_sub(x[0], y[0], &carry);
367  x[1] = word_sub(x[1], y[1], &carry);
368  x[2] = word_sub(x[2], y[2], &carry);
369  x[3] = word_sub(x[3], y[3], &carry);
370  x[4] = word_sub(x[4], y[4], &carry);
371  x[5] = word_sub(x[5], y[5], &carry);
372  x[6] = word_sub(x[6], y[6], &carry);
373  x[7] = word_sub(x[7], y[7], &carry);
374  return carry;
375 #endif
376  }
377 
378 /*
379 * Eight Word Block Subtraction, Two Argument
380 */
381 inline word word8_sub2_rev(word x[8], const word y[8], word carry)
382  {
383 #if defined(BOTAN_MP_USE_X86_32_ASM)
384  asm(
385  ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
386  : [carry]"=r"(carry)
387  : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
388  : "cc", "memory");
389  return carry;
390 
391 #elif defined(BOTAN_MP_USE_X86_64_ASM)
392 
393  asm(
394  ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
395  : [carry]"=r"(carry)
396  : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
397  : "cc", "memory");
398  return carry;
399 
400 #else
401  x[0] = word_sub(y[0], x[0], &carry);
402  x[1] = word_sub(y[1], x[1], &carry);
403  x[2] = word_sub(y[2], x[2], &carry);
404  x[3] = word_sub(y[3], x[3], &carry);
405  x[4] = word_sub(y[4], x[4], &carry);
406  x[5] = word_sub(y[5], x[5], &carry);
407  x[6] = word_sub(y[6], x[6], &carry);
408  x[7] = word_sub(y[7], x[7], &carry);
409  return carry;
410 #endif
411  }
412 
413 /*
414 * Eight Word Block Subtraction, Three Argument
415 */
416 inline word word8_sub3(word z[8], const word x[8],
417  const word y[8], word carry)
418  {
419 #if defined(BOTAN_MP_USE_X86_32_ASM)
420  asm(
421  ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
422  : [carry]"=r"(carry)
423  : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
424  : "cc", "memory");
425  return carry;
426 
427 #elif defined(BOTAN_MP_USE_X86_64_ASM)
428 
429  asm(
430  ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
431  : [carry]"=r"(carry)
432  : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
433  : "cc", "memory");
434  return carry;
435 
436 #elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM)
437 
438  __asm {
439  mov edi,[x]
440  mov esi,[y]
441  xor eax,eax
442  sub eax,[carry] //force CF=1 iff *carry==1
443  mov ebx,[z]
444  mov eax,[edi]
445  sbb eax,[esi]
446  mov [ebx],eax
447  mov eax,[edi+4]
448  sbb eax,[esi+4]
449  mov [ebx+4],eax
450  mov eax,[edi+8]
451  sbb eax,[esi+8]
452  mov [ebx+8],eax
453  mov eax,[edi+12]
454  sbb eax,[esi+12]
455  mov [ebx+12],eax
456  mov eax,[edi+16]
457  sbb eax,[esi+16]
458  mov [ebx+16],eax
459  mov eax,[edi+20]
460  sbb eax,[esi+20]
461  mov [ebx+20],eax
462  mov eax,[edi+24]
463  sbb eax,[esi+24]
464  mov [ebx+24],eax
465  mov eax,[edi+28]
466  sbb eax,[esi+28]
467  mov [ebx+28],eax
468  sbb eax,eax
469  neg eax
470  }
471 
472 #else
473  z[0] = word_sub(x[0], y[0], &carry);
474  z[1] = word_sub(x[1], y[1], &carry);
475  z[2] = word_sub(x[2], y[2], &carry);
476  z[3] = word_sub(x[3], y[3], &carry);
477  z[4] = word_sub(x[4], y[4], &carry);
478  z[5] = word_sub(x[5], y[5], &carry);
479  z[6] = word_sub(x[6], y[6], &carry);
480  z[7] = word_sub(x[7], y[7], &carry);
481  return carry;
482 #endif
483  }
484 
485 /*
486 * Eight Word Block Linear Multiplication
487 */
488 inline word word8_linmul2(word x[8], word y, word carry)
489  {
490 #if defined(BOTAN_MP_USE_X86_32_ASM)
491  asm(
492  DO_8_TIMES(LINMUL_OP, "x")
493  : [carry]"=r"(carry)
494  : [x]"r"(x), [y]"rm"(y), "0"(carry)
495  : "cc", "%eax", "%edx");
496  return carry;
497 
498 #elif defined(BOTAN_MP_USE_X86_64_ASM)
499 
500  asm(
501  DO_8_TIMES(LINMUL_OP, "x")
502  : [carry]"=r"(carry)
503  : [x]"r"(x), [y]"rm"(y), "0"(carry)
504  : "cc", "%rax", "%rdx");
505  return carry;
506 
507 #elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM)
508 
509  __asm {
510  mov esi,[x]
511  mov eax,[esi] //load a
512  mul [y] //edx(hi):eax(lo)=a*b
513  add eax,[carry] //sum lo carry
514  adc edx,0 //sum hi carry
515  mov ecx,edx //store carry
516  mov [esi],eax //load a
517 
518  mov eax,[esi+4] //load a
519  mul [y] //edx(hi):eax(lo)=a*b
520  add eax,ecx //sum lo carry
521  adc edx,0 //sum hi carry
522  mov ecx,edx //store carry
523  mov [esi+4],eax //load a
524 
525  mov eax,[esi+8] //load a
526  mul [y] //edx(hi):eax(lo)=a*b
527  add eax,ecx //sum lo carry
528  adc edx,0 //sum hi carry
529  mov ecx,edx //store carry
530  mov [esi+8],eax //load a
531 
532  mov eax,[esi+12] //load a
533  mul [y] //edx(hi):eax(lo)=a*b
534  add eax,ecx //sum lo carry
535  adc edx,0 //sum hi carry
536  mov ecx,edx //store carry
537  mov [esi+12],eax //load a
538 
539  mov eax,[esi+16] //load a
540  mul [y] //edx(hi):eax(lo)=a*b
541  add eax,ecx //sum lo carry
542  adc edx,0 //sum hi carry
543  mov ecx,edx //store carry
544  mov [esi+16],eax //load a
545 
546  mov eax,[esi+20] //load a
547  mul [y] //edx(hi):eax(lo)=a*b
548  add eax,ecx //sum lo carry
549  adc edx,0 //sum hi carry
550  mov ecx,edx //store carry
551  mov [esi+20],eax //load a
552 
553  mov eax,[esi+24] //load a
554  mul [y] //edx(hi):eax(lo)=a*b
555  add eax,ecx //sum lo carry
556  adc edx,0 //sum hi carry
557  mov ecx,edx //store carry
558  mov [esi+24],eax //load a
559 
560  mov eax,[esi+28] //load a
561  mul [y] //edx(hi):eax(lo)=a*b
562  add eax,ecx //sum lo carry
563  adc edx,0 //sum hi carry
564  mov [esi+28],eax //load a
565 
566  mov eax,edx //store carry
567  }
568 
569 #else
570  x[0] = word_madd2(x[0], y, &carry);
571  x[1] = word_madd2(x[1], y, &carry);
572  x[2] = word_madd2(x[2], y, &carry);
573  x[3] = word_madd2(x[3], y, &carry);
574  x[4] = word_madd2(x[4], y, &carry);
575  x[5] = word_madd2(x[5], y, &carry);
576  x[6] = word_madd2(x[6], y, &carry);
577  x[7] = word_madd2(x[7], y, &carry);
578  return carry;
579 #endif
580  }
581 
582 /*
583 * Eight Word Block Linear Multiplication
584 */
585 inline word word8_linmul3(word z[8], const word x[8], word y, word carry)
586  {
587 #if defined(BOTAN_MP_USE_X86_32_ASM)
588  asm(
589  DO_8_TIMES(LINMUL_OP, "z")
590  : [carry]"=r"(carry)
591  : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
592  : "cc", "%eax", "%edx");
593  return carry;
594 
595 #elif defined(BOTAN_MP_USE_X86_64_ASM)
596  asm(
597  DO_8_TIMES(LINMUL_OP, "z")
598  : [carry]"=r"(carry)
599  : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
600  : "cc", "%rax", "%rdx");
601  return carry;
602 
603 #elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM)
604 
605  __asm {
606  mov edi,[z]
607  mov esi,[x]
608  mov eax,[esi] //load a
609  mul [y] //edx(hi):eax(lo)=a*b
610  add eax,[carry] //sum lo carry
611  adc edx,0 //sum hi carry
612  mov ecx,edx //store carry
613  mov [edi],eax //load a
614 
615  mov eax,[esi+4] //load a
616  mul [y] //edx(hi):eax(lo)=a*b
617  add eax,ecx //sum lo carry
618  adc edx,0 //sum hi carry
619  mov ecx,edx //store carry
620  mov [edi+4],eax //load a
621 
622  mov eax,[esi+8] //load a
623  mul [y] //edx(hi):eax(lo)=a*b
624  add eax,ecx //sum lo carry
625  adc edx,0 //sum hi carry
626  mov ecx,edx //store carry
627  mov [edi+8],eax //load a
628 
629  mov eax,[esi+12] //load a
630  mul [y] //edx(hi):eax(lo)=a*b
631  add eax,ecx //sum lo carry
632  adc edx,0 //sum hi carry
633  mov ecx,edx //store carry
634  mov [edi+12],eax //load a
635 
636  mov eax,[esi+16] //load a
637  mul [y] //edx(hi):eax(lo)=a*b
638  add eax,ecx //sum lo carry
639  adc edx,0 //sum hi carry
640  mov ecx,edx //store carry
641  mov [edi+16],eax //load a
642 
643  mov eax,[esi+20] //load a
644  mul [y] //edx(hi):eax(lo)=a*b
645  add eax,ecx //sum lo carry
646  adc edx,0 //sum hi carry
647  mov ecx,edx //store carry
648  mov [edi+20],eax //load a
649 
650  mov eax,[esi+24] //load a
651  mul [y] //edx(hi):eax(lo)=a*b
652  add eax,ecx //sum lo carry
653  adc edx,0 //sum hi carry
654  mov ecx,edx //store carry
655  mov [edi+24],eax //load a
656 
657  mov eax,[esi+28] //load a
658  mul [y] //edx(hi):eax(lo)=a*b
659  add eax,ecx //sum lo carry
660  adc edx,0 //sum hi carry
661  mov [edi+28],eax //load a
662  mov eax,edx //store carry
663  }
664 
665 #else
666  z[0] = word_madd2(x[0], y, &carry);
667  z[1] = word_madd2(x[1], y, &carry);
668  z[2] = word_madd2(x[2], y, &carry);
669  z[3] = word_madd2(x[3], y, &carry);
670  z[4] = word_madd2(x[4], y, &carry);
671  z[5] = word_madd2(x[5], y, &carry);
672  z[6] = word_madd2(x[6], y, &carry);
673  z[7] = word_madd2(x[7], y, &carry);
674  return carry;
675 #endif
676  }
677 
678 /*
679 * Eight Word Block Multiply/Add
680 */
681 inline word word8_madd3(word z[8], const word x[8], word y, word carry)
682  {
683 #if defined(BOTAN_MP_USE_X86_32_ASM)
684  asm(
685  DO_8_TIMES(MULADD_OP, "")
686  : [carry]"=r"(carry)
687  : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
688  : "cc", "%eax", "%edx");
689  return carry;
690 
691 #elif defined(BOTAN_MP_USE_X86_64_ASM)
692 
693  asm(
694  DO_8_TIMES(MULADD_OP, "")
695  : [carry]"=r"(carry)
696  : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
697  : "cc", "%rax", "%rdx");
698  return carry;
699 
700 #else
701  z[0] = word_madd3(x[0], y, z[0], &carry);
702  z[1] = word_madd3(x[1], y, z[1], &carry);
703  z[2] = word_madd3(x[2], y, z[2], &carry);
704  z[3] = word_madd3(x[3], y, z[3], &carry);
705  z[4] = word_madd3(x[4], y, z[4], &carry);
706  z[5] = word_madd3(x[5], y, z[5], &carry);
707  z[6] = word_madd3(x[6], y, z[6], &carry);
708  z[7] = word_madd3(x[7], y, z[7], &carry);
709  return carry;
710 #endif
711  }
712 
713 /*
714 * Multiply-Add Accumulator
715 * (w2,w1,w0) += x * y
716 */
717 inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y)
718  {
719 #if defined(BOTAN_MP_USE_X86_32_ASM)
720  word z0 = 0, z1 = 0;
721 
722  asm ("mull %[y]"
723  : "=a"(z0),"=d"(z1)
724  : "a"(x), [y]"rm"(y)
725  : "cc");
726 
727  asm(ASM("addl %[z0],%[w0]")
728  ASM("adcl %[z1],%[w1]")
729  ASM("adcl $0,%[w2]")
730 
731  : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
732  : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
733  : "cc");
734 
735 #elif defined(BOTAN_MP_USE_X86_64_ASM)
736 
737  word z0 = 0, z1 = 0;
738 
739  asm ("mulq %[y]"
740  : "=a"(z0),"=d"(z1)
741  : "a"(x), [y]"rm"(y)
742  : "cc");
743 
744  asm(ASM("addq %[z0],%[w0]")
745  ASM("adcq %[z1],%[w1]")
746  ASM("adcq $0,%[w2]")
747 
748  : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
749  : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
750  : "cc");
751 
752 #else
753  word carry = *w0;
754  *w0 = word_madd2(x, y, &carry);
755  *w1 += carry;
756  *w2 += (*w1 < carry);
757 #endif
758  }
759 
760 /*
761 * 3-word addition
762 * (w2,w1,w0) += x
763 */
764 inline void word3_add(word* w2, word* w1, word* w0, word x)
765  {
766 #if defined(BOTAN_MP_USE_X86_32_ASM)
767  asm(
768  ASM("addl %[x],%[w0]")
769  ASM("adcl $0,%[w1]")
770  ASM("adcl $0,%[w2]")
771 
772  : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
773  : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2)
774  : "cc");
775 
776 #elif defined(BOTAN_MP_USE_X86_64_ASM)
777 
778  asm(
779  ASM("addq %[x],%[w0]")
780  ASM("adcq $0,%[w1]")
781  ASM("adcq $0,%[w2]")
782 
783  : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
784  : [x]"r"(x), "0"(*w0), "1"(*w1), "2"(*w2)
785  : "cc");
786 
787 #else
788  *w0 += x;
789  word c1 = (*w0 < x);
790  *w1 += c1;
791  word c2 = (*w1 < c1);
792  *w2 += c2;
793 #endif
794  }
795 
796 /*
797 * Multiply-Add Accumulator
798 * (w2,w1,w0) += 2 * x * y
799 */
800 inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y)
801  {
802 #if defined(BOTAN_MP_USE_X86_32_ASM)
803 
804  word z0 = 0, z1 = 0;
805 
806  asm ("mull %[y]"
807  : "=a"(z0),"=d"(z1)
808  : "a"(x), [y]"rm"(y)
809  : "cc");
810 
811  asm(
812  ASM("addl %[z0],%[w0]")
813  ASM("adcl %[z1],%[w1]")
814  ASM("adcl $0,%[w2]")
815 
816  ASM("addl %[z0],%[w0]")
817  ASM("adcl %[z1],%[w1]")
818  ASM("adcl $0,%[w2]")
819 
820  : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
821  : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
822  : "cc");
823 
824 #elif defined(BOTAN_MP_USE_X86_64_ASM)
825 
826  word z0 = 0, z1 = 0;
827 
828  asm ("mulq %[y]"
829  : "=a"(z0),"=d"(z1)
830  : "a"(x), [y]"rm"(y)
831  : "cc");
832 
833  asm(
834  ASM("addq %[z0],%[w0]")
835  ASM("adcq %[z1],%[w1]")
836  ASM("adcq $0,%[w2]")
837 
838  ASM("addq %[z0],%[w0]")
839  ASM("adcq %[z1],%[w1]")
840  ASM("adcq $0,%[w2]")
841 
842  : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
843  : [z0]"r"(z0), [z1]"r"(z1), "0"(*w0), "1"(*w1), "2"(*w2)
844  : "cc");
845 
846 #else
847  word carry = 0;
848  x = word_madd2(x, y, &carry);
849  y = carry;
850 
851  word top = (y >> (BOTAN_MP_WORD_BITS-1));
852  y <<= 1;
853  y |= (x >> (BOTAN_MP_WORD_BITS-1));
854  x <<= 1;
855 
856  carry = 0;
857  *w0 = word_add(*w0, x, &carry);
858  *w1 = word_add(*w1, y, &carry);
859  *w2 = word_add(*w2, top, &carry);
860 #endif
861  }
862 
863 #if defined(ASM)
864  #undef ASM
865  #undef DO_8_TIMES
866  #undef ADD_OR_SUBTRACT
867  #undef ADDSUB2_OP
868  #undef ADDSUB3_OP
869  #undef LINMUL_OP
870  #undef MULADD_OP
871 #endif
872 
873 }
874 
875 #endif
word word8_sub2_rev(word x[8], const word y[8], word carry)
Definition: mp_asmi.h:381
void word3_muladd(word *w2, word *w1, word *w0, word x, word y)
Definition: mp_asmi.h:717
word word8_add2(word x[8], const word y[8], word carry)
Definition: mp_asmi.h:138
word word8_linmul3(word z[8], const word x[8], word y, word carry)
Definition: mp_asmi.h:585
void carry(int64_t &h0, int64_t &h1)
word word_madd3(word a, word b, word c, word *d)
Definition: mp_madd.h:105
word word8_sub2(word x[8], const word y[8], word carry)
Definition: mp_asmi.h:311
word word_madd2(word a, word b, word *c)
Definition: mp_madd.h:59
word word8_madd3(word z[8], const word x[8], word y, word carry)
Definition: mp_asmi.h:681
void word3_add(word *w2, word *w1, word *w0, word x)
Definition: mp_asmi.h:764
word word8_linmul2(word x[8], word y, word carry)
Definition: mp_asmi.h:488
Definition: alg_id.cpp:13
word word8_add3(word z[8], const word x[8], const word y[8], word carry)
Definition: mp_asmi.h:200
void word3_muladd_2(word *w2, word *w1, word *w0, word x, word y)
Definition: mp_asmi.h:800
word word_sub(word x, word y, word *carry)
Definition: mp_asmi.h:280
word word_add(word x, word y, word *carry)
Definition: mp_asmi.h:107
word word8_sub3(word z[8], const word x[8], const word y[8], word carry)
Definition: mp_asmi.h:416