Botan  2.4.0
Crypto and TLS for C++11
mp_asmi.h
Go to the documentation of this file.
1 /*
2 * Lowest Level MPI Algorithms
3 * (C) 1999-2010 Jack Lloyd
4 * 2006 Luca Piccarreta
5 *
6 * Botan is released under the Simplified BSD License (see license.txt)
7 */
8 
9 #ifndef BOTAN_MP_ASM_INTERNAL_H_
10 #define BOTAN_MP_ASM_INTERNAL_H_
11 
12 #include <botan/internal/mp_madd.h>
13 
14 namespace Botan {
15 
16 #if defined(BOTAN_MP_USE_X86_32_ASM)
17 
18 #define ADDSUB2_OP(OPERATION, INDEX) \
19  ASM("movl 4*" #INDEX "(%[y]), %[carry]") \
20  ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \
21 
22 #define ADDSUB3_OP(OPERATION, INDEX) \
23  ASM("movl 4*" #INDEX "(%[x]), %[carry]") \
24  ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \
25  ASM("movl %[carry], 4*" #INDEX "(%[z])") \
26 
27 #define LINMUL_OP(WRITE_TO, INDEX) \
28  ASM("movl 4*" #INDEX "(%[x]),%%eax") \
29  ASM("mull %[y]") \
30  ASM("addl %[carry],%%eax") \
31  ASM("adcl $0,%%edx") \
32  ASM("movl %%edx,%[carry]") \
33  ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])")
34 
35 #define MULADD_OP(IGNORED, INDEX) \
36  ASM("movl 4*" #INDEX "(%[x]),%%eax") \
37  ASM("mull %[y]") \
38  ASM("addl %[carry],%%eax") \
39  ASM("adcl $0,%%edx") \
40  ASM("addl 4*" #INDEX "(%[z]),%%eax") \
41  ASM("adcl $0,%%edx") \
42  ASM("movl %%edx,%[carry]") \
43  ASM("movl %%eax, 4*" #INDEX " (%[z])")
44 
45 #define ADD_OR_SUBTRACT(CORE_CODE) \
46  ASM("rorl %[carry]") \
47  CORE_CODE \
48  ASM("sbbl %[carry],%[carry]") \
49  ASM("negl %[carry]")
50 
51 #elif defined(BOTAN_MP_USE_X86_64_ASM)
52 
53 #define ADDSUB2_OP(OPERATION, INDEX) \
54  ASM("movq 8*" #INDEX "(%[y]), %[carry]") \
55  ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \
56 
57 #define ADDSUB3_OP(OPERATION, INDEX) \
58  ASM("movq 8*" #INDEX "(%[x]), %[carry]") \
59  ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \
60  ASM("movq %[carry], 8*" #INDEX "(%[z])") \
61 
62 #define LINMUL_OP(WRITE_TO, INDEX) \
63  ASM("movq 8*" #INDEX "(%[x]),%%rax") \
64  ASM("mulq %[y]") \
65  ASM("addq %[carry],%%rax") \
66  ASM("adcq $0,%%rdx") \
67  ASM("movq %%rdx,%[carry]") \
68  ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
69 
70 #define MULADD_OP(IGNORED, INDEX) \
71  ASM("movq 8*" #INDEX "(%[x]),%%rax") \
72  ASM("mulq %[y]") \
73  ASM("addq %[carry],%%rax") \
74  ASM("adcq $0,%%rdx") \
75  ASM("addq 8*" #INDEX "(%[z]),%%rax") \
76  ASM("adcq $0,%%rdx") \
77  ASM("movq %%rdx,%[carry]") \
78  ASM("movq %%rax, 8*" #INDEX " (%[z])")
79 
80 #define ADD_OR_SUBTRACT(CORE_CODE) \
81  ASM("rorq %[carry]") \
82  CORE_CODE \
83  ASM("sbbq %[carry],%[carry]") \
84  ASM("negq %[carry]")
85 
86 #endif
87 
88 #if defined(ADD_OR_SUBTRACT)
89 
90 #define ASM(x) x "\n\t"
91 
92 #define DO_8_TIMES(MACRO, ARG) \
93  MACRO(ARG, 0) \
94  MACRO(ARG, 1) \
95  MACRO(ARG, 2) \
96  MACRO(ARG, 3) \
97  MACRO(ARG, 4) \
98  MACRO(ARG, 5) \
99  MACRO(ARG, 6) \
100  MACRO(ARG, 7)
101 
102 #endif
103 
104 /*
105 * Word Addition
106 */
107 inline word word_add(word x, word y, word* carry)
108  {
109 #if defined(BOTAN_MP_USE_X86_32_ASM)
110  asm(
111  ADD_OR_SUBTRACT(ASM("adcl %[y],%[x]"))
112  : [x]"=r"(x), [carry]"=r"(*carry)
113  : "0"(x), [y]"rm"(y), "1"(*carry)
114  : "cc");
115  return x;
116 
117 #elif defined(BOTAN_MP_USE_X86_64_ASM)
118 
119  asm(
120  ADD_OR_SUBTRACT(ASM("adcq %[y],%[x]"))
121  : [x]"=r"(x), [carry]"=r"(*carry)
122  : "0"(x), [y]"rm"(y), "1"(*carry)
123  : "cc");
124  return x;
125 
126 #else
127  word z = x + y;
128  word c1 = (z < x);
129  z += *carry;
130  *carry = c1 | (z < *carry);
131  return z;
132 #endif
133  }
134 
135 /*
136 * Eight Word Block Addition, Two Argument
137 */
138 inline word word8_add2(word x[8], const word y[8], word carry)
139  {
140 #if defined(BOTAN_MP_USE_X86_32_ASM)
141  asm(
142  ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcl"))
143  : [carry]"=r"(carry)
144  : [x]"r"(x), [y]"r"(y), "0"(carry)
145  : "cc", "memory");
146  return carry;
147 
148 #elif defined(BOTAN_MP_USE_X86_64_ASM)
149 
150  asm(
151  ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "adcq"))
152  : [carry]"=r"(carry)
153  : [x]"r"(x), [y]"r"(y), "0"(carry)
154  : "cc", "memory");
155  return carry;
156 
157 #elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM)
158 
159  __asm {
160  mov edx,[x]
161  mov esi,[y]
162  xor eax,eax
163  sub eax,[carry] //force CF=1 iff *carry==1
164  mov eax,[esi]
165  adc [edx],eax
166  mov eax,[esi+4]
167  adc [edx+4],eax
168  mov eax,[esi+8]
169  adc [edx+8],eax
170  mov eax,[esi+12]
171  adc [edx+12],eax
172  mov eax,[esi+16]
173  adc [edx+16],eax
174  mov eax,[esi+20]
175  adc [edx+20],eax
176  mov eax,[esi+24]
177  adc [edx+24],eax
178  mov eax,[esi+28]
179  adc [edx+28],eax
180  sbb eax,eax
181  neg eax
182  }
183 
184 #else
185  x[0] = word_add(x[0], y[0], &carry);
186  x[1] = word_add(x[1], y[1], &carry);
187  x[2] = word_add(x[2], y[2], &carry);
188  x[3] = word_add(x[3], y[3], &carry);
189  x[4] = word_add(x[4], y[4], &carry);
190  x[5] = word_add(x[5], y[5], &carry);
191  x[6] = word_add(x[6], y[6], &carry);
192  x[7] = word_add(x[7], y[7], &carry);
193  return carry;
194 #endif
195  }
196 
197 /*
198 * Eight Word Block Addition, Three Argument
199 */
200 inline word word8_add3(word z[8], const word x[8],
201  const word y[8], word carry)
202  {
203 #if defined(BOTAN_MP_USE_X86_32_ASM)
204  asm(
205  ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcl"))
206  : [carry]"=r"(carry)
207  : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
208  : "cc", "memory");
209  return carry;
210 
211 #elif defined(BOTAN_MP_USE_X86_64_ASM)
212 
213  asm(
214  ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "adcq"))
215  : [carry]"=r"(carry)
216  : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
217  : "cc", "memory");
218  return carry;
219 
220 #elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM)
221 
222  __asm {
223  mov edi,[x]
224  mov esi,[y]
225  mov ebx,[z]
226  xor eax,eax
227  sub eax,[carry] //force CF=1 iff *carry==1
228  mov eax,[edi]
229  adc eax,[esi]
230  mov [ebx],eax
231 
232  mov eax,[edi+4]
233  adc eax,[esi+4]
234  mov [ebx+4],eax
235 
236  mov eax,[edi+8]
237  adc eax,[esi+8]
238  mov [ebx+8],eax
239 
240  mov eax,[edi+12]
241  adc eax,[esi+12]
242  mov [ebx+12],eax
243 
244  mov eax,[edi+16]
245  adc eax,[esi+16]
246  mov [ebx+16],eax
247 
248  mov eax,[edi+20]
249  adc eax,[esi+20]
250  mov [ebx+20],eax
251 
252  mov eax,[edi+24]
253  adc eax,[esi+24]
254  mov [ebx+24],eax
255 
256  mov eax,[edi+28]
257  adc eax,[esi+28]
258  mov [ebx+28],eax
259 
260  sbb eax,eax
261  neg eax
262  }
263 
264 #else
265  z[0] = word_add(x[0], y[0], &carry);
266  z[1] = word_add(x[1], y[1], &carry);
267  z[2] = word_add(x[2], y[2], &carry);
268  z[3] = word_add(x[3], y[3], &carry);
269  z[4] = word_add(x[4], y[4], &carry);
270  z[5] = word_add(x[5], y[5], &carry);
271  z[6] = word_add(x[6], y[6], &carry);
272  z[7] = word_add(x[7], y[7], &carry);
273  return carry;
274 #endif
275  }
276 
277 /*
278 * Word Subtraction
279 */
280 inline word word_sub(word x, word y, word* carry)
281  {
282 #if defined(BOTAN_MP_USE_X86_32_ASM)
283  asm(
284  ADD_OR_SUBTRACT(ASM("sbbl %[y],%[x]"))
285  : [x]"=r"(x), [carry]"=r"(*carry)
286  : "0"(x), [y]"rm"(y), "1"(*carry)
287  : "cc");
288  return x;
289 
290 #elif defined(BOTAN_MP_USE_X86_64_ASM)
291 
292  asm(
293  ADD_OR_SUBTRACT(ASM("sbbq %[y],%[x]"))
294  : [x]"=r"(x), [carry]"=r"(*carry)
295  : "0"(x), [y]"rm"(y), "1"(*carry)
296  : "cc");
297  return x;
298 
299 #else
300  word t0 = x - y;
301  word c1 = (t0 > x);
302  word z = t0 - *carry;
303  *carry = c1 | (z > t0);
304  return z;
305 #endif
306  }
307 
308 /*
309 * Eight Word Block Subtraction, Two Argument
310 */
311 inline word word8_sub2(word x[8], const word y[8], word carry)
312  {
313 #if defined(BOTAN_MP_USE_X86_32_ASM)
314  asm(
315  ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbl"))
316  : [carry]"=r"(carry)
317  : [x]"r"(x), [y]"r"(y), "0"(carry)
318  : "cc", "memory");
319  return carry;
320 
321 #elif defined(BOTAN_MP_USE_X86_64_ASM)
322 
323  asm(
324  ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP, "sbbq"))
325  : [carry]"=r"(carry)
326  : [x]"r"(x), [y]"r"(y), "0"(carry)
327  : "cc", "memory");
328  return carry;
329 
330 #elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM)
331 
332  __asm {
333  mov edi,[x]
334  mov esi,[y]
335  xor eax,eax
336  sub eax,[carry] //force CF=1 iff *carry==1
337  mov eax,[edi]
338  sbb eax,[esi]
339  mov [edi],eax
340  mov eax,[edi+4]
341  sbb eax,[esi+4]
342  mov [edi+4],eax
343  mov eax,[edi+8]
344  sbb eax,[esi+8]
345  mov [edi+8],eax
346  mov eax,[edi+12]
347  sbb eax,[esi+12]
348  mov [edi+12],eax
349  mov eax,[edi+16]
350  sbb eax,[esi+16]
351  mov [edi+16],eax
352  mov eax,[edi+20]
353  sbb eax,[esi+20]
354  mov [edi+20],eax
355  mov eax,[edi+24]
356  sbb eax,[esi+24]
357  mov [edi+24],eax
358  mov eax,[edi+28]
359  sbb eax,[esi+28]
360  mov [edi+28],eax
361  sbb eax,eax
362  neg eax
363  }
364 
365 #else
366  x[0] = word_sub(x[0], y[0], &carry);
367  x[1] = word_sub(x[1], y[1], &carry);
368  x[2] = word_sub(x[2], y[2], &carry);
369  x[3] = word_sub(x[3], y[3], &carry);
370  x[4] = word_sub(x[4], y[4], &carry);
371  x[5] = word_sub(x[5], y[5], &carry);
372  x[6] = word_sub(x[6], y[6], &carry);
373  x[7] = word_sub(x[7], y[7], &carry);
374  return carry;
375 #endif
376  }
377 
378 /*
379 * Eight Word Block Subtraction, Two Argument
380 */
381 inline word word8_sub2_rev(word x[8], const word y[8], word carry)
382  {
383 #if defined(BOTAN_MP_USE_X86_32_ASM)
384  asm(
385  ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
386  : [carry]"=r"(carry)
387  : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
388  : "cc", "memory");
389  return carry;
390 
391 #elif defined(BOTAN_MP_USE_X86_64_ASM)
392 
393  asm(
394  ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
395  : [carry]"=r"(carry)
396  : [x]"r"(y), [y]"r"(x), [z]"r"(x), "0"(carry)
397  : "cc", "memory");
398  return carry;
399 
400 #else
401  x[0] = word_sub(y[0], x[0], &carry);
402  x[1] = word_sub(y[1], x[1], &carry);
403  x[2] = word_sub(y[2], x[2], &carry);
404  x[3] = word_sub(y[3], x[3], &carry);
405  x[4] = word_sub(y[4], x[4], &carry);
406  x[5] = word_sub(y[5], x[5], &carry);
407  x[6] = word_sub(y[6], x[6], &carry);
408  x[7] = word_sub(y[7], x[7], &carry);
409  return carry;
410 #endif
411  }
412 
413 /*
414 * Eight Word Block Subtraction, Three Argument
415 */
416 inline word word8_sub3(word z[8], const word x[8],
417  const word y[8], word carry)
418  {
419 #if defined(BOTAN_MP_USE_X86_32_ASM)
420  asm(
421  ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbl"))
422  : [carry]"=r"(carry)
423  : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
424  : "cc", "memory");
425  return carry;
426 
427 #elif defined(BOTAN_MP_USE_X86_64_ASM)
428 
429  asm(
430  ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP, "sbbq"))
431  : [carry]"=r"(carry)
432  : [x]"r"(x), [y]"r"(y), [z]"r"(z), "0"(carry)
433  : "cc", "memory");
434  return carry;
435 
436 #elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM)
437 
438  __asm {
439  mov edi,[x]
440  mov esi,[y]
441  xor eax,eax
442  sub eax,[carry] //force CF=1 iff *carry==1
443  mov ebx,[z]
444  mov eax,[edi]
445  sbb eax,[esi]
446  mov [ebx],eax
447  mov eax,[edi+4]
448  sbb eax,[esi+4]
449  mov [ebx+4],eax
450  mov eax,[edi+8]
451  sbb eax,[esi+8]
452  mov [ebx+8],eax
453  mov eax,[edi+12]
454  sbb eax,[esi+12]
455  mov [ebx+12],eax
456  mov eax,[edi+16]
457  sbb eax,[esi+16]
458  mov [ebx+16],eax
459  mov eax,[edi+20]
460  sbb eax,[esi+20]
461  mov [ebx+20],eax
462  mov eax,[edi+24]
463  sbb eax,[esi+24]
464  mov [ebx+24],eax
465  mov eax,[edi+28]
466  sbb eax,[esi+28]
467  mov [ebx+28],eax
468  sbb eax,eax
469  neg eax
470  }
471 
472 #else
473  z[0] = word_sub(x[0], y[0], &carry);
474  z[1] = word_sub(x[1], y[1], &carry);
475  z[2] = word_sub(x[2], y[2], &carry);
476  z[3] = word_sub(x[3], y[3], &carry);
477  z[4] = word_sub(x[4], y[4], &carry);
478  z[5] = word_sub(x[5], y[5], &carry);
479  z[6] = word_sub(x[6], y[6], &carry);
480  z[7] = word_sub(x[7], y[7], &carry);
481  return carry;
482 #endif
483  }
484 
485 /*
486 * Eight Word Block Linear Multiplication
487 */
488 inline word word8_linmul2(word x[8], word y, word carry)
489  {
490 #if defined(BOTAN_MP_USE_X86_32_ASM)
491  asm(
492  DO_8_TIMES(LINMUL_OP, "x")
493  : [carry]"=r"(carry)
494  : [x]"r"(x), [y]"rm"(y), "0"(carry)
495  : "cc", "%eax", "%edx");
496  return carry;
497 
498 #elif defined(BOTAN_MP_USE_X86_64_ASM)
499 
500  asm(
501  DO_8_TIMES(LINMUL_OP, "x")
502  : [carry]"=r"(carry)
503  : [x]"r"(x), [y]"rm"(y), "0"(carry)
504  : "cc", "%rax", "%rdx");
505  return carry;
506 
507 #elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM)
508 
509  __asm {
510  mov esi,[x]
511  mov eax,[esi] //load a
512  mul [y] //edx(hi):eax(lo)=a*b
513  add eax,[carry] //sum lo carry
514  adc edx,0 //sum hi carry
515  mov ecx,edx //store carry
516  mov [esi],eax //load a
517 
518  mov eax,[esi+4] //load a
519  mul [y] //edx(hi):eax(lo)=a*b
520  add eax,ecx //sum lo carry
521  adc edx,0 //sum hi carry
522  mov ecx,edx //store carry
523  mov [esi+4],eax //load a
524 
525  mov eax,[esi+8] //load a
526  mul [y] //edx(hi):eax(lo)=a*b
527  add eax,ecx //sum lo carry
528  adc edx,0 //sum hi carry
529  mov ecx,edx //store carry
530  mov [esi+8],eax //load a
531 
532  mov eax,[esi+12] //load a
533  mul [y] //edx(hi):eax(lo)=a*b
534  add eax,ecx //sum lo carry
535  adc edx,0 //sum hi carry
536  mov ecx,edx //store carry
537  mov [esi+12],eax //load a
538 
539  mov eax,[esi+16] //load a
540  mul [y] //edx(hi):eax(lo)=a*b
541  add eax,ecx //sum lo carry
542  adc edx,0 //sum hi carry
543  mov ecx,edx //store carry
544  mov [esi+16],eax //load a
545 
546  mov eax,[esi+20] //load a
547  mul [y] //edx(hi):eax(lo)=a*b
548  add eax,ecx //sum lo carry
549  adc edx,0 //sum hi carry
550  mov ecx,edx //store carry
551  mov [esi+20],eax //load a
552 
553  mov eax,[esi+24] //load a
554  mul [y] //edx(hi):eax(lo)=a*b
555  add eax,ecx //sum lo carry
556  adc edx,0 //sum hi carry
557  mov ecx,edx //store carry
558  mov [esi+24],eax //load a
559 
560  mov eax,[esi+28] //load a
561  mul [y] //edx(hi):eax(lo)=a*b
562  add eax,ecx //sum lo carry
563  adc edx,0 //sum hi carry
564  mov [esi+28],eax //load a
565 
566  mov eax,edx //store carry
567  }
568 
569 #else
570  x[0] = word_madd2(x[0], y, &carry);
571  x[1] = word_madd2(x[1], y, &carry);
572  x[2] = word_madd2(x[2], y, &carry);
573  x[3] = word_madd2(x[3], y, &carry);
574  x[4] = word_madd2(x[4], y, &carry);
575  x[5] = word_madd2(x[5], y, &carry);
576  x[6] = word_madd2(x[6], y, &carry);
577  x[7] = word_madd2(x[7], y, &carry);
578  return carry;
579 #endif
580  }
581 
582 /*
583 * Eight Word Block Linear Multiplication
584 */
585 inline word word8_linmul3(word z[8], const word x[8], word y, word carry)
586  {
587 #if defined(BOTAN_MP_USE_X86_32_ASM)
588  asm(
589  DO_8_TIMES(LINMUL_OP, "z")
590  : [carry]"=r"(carry)
591  : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
592  : "cc", "%eax", "%edx");
593  return carry;
594 
595 #elif defined(BOTAN_MP_USE_X86_64_ASM)
596  asm(
597  DO_8_TIMES(LINMUL_OP, "z")
598  : [carry]"=r"(carry)
599  : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
600  : "cc", "%rax", "%rdx");
601  return carry;
602 
603 #elif defined(BOTAN_MP_USE_X86_32_MSVC_ASM)
604 
605  __asm {
606  mov edi,[z]
607  mov esi,[x]
608  mov eax,[esi] //load a
609  mul [y] //edx(hi):eax(lo)=a*b
610  add eax,[carry] //sum lo carry
611  adc edx,0 //sum hi carry
612  mov ecx,edx //store carry
613  mov [edi],eax //load a
614 
615  mov eax,[esi+4] //load a
616  mul [y] //edx(hi):eax(lo)=a*b
617  add eax,ecx //sum lo carry
618  adc edx,0 //sum hi carry
619  mov ecx,edx //store carry
620  mov [edi+4],eax //load a
621 
622  mov eax,[esi+8] //load a
623  mul [y] //edx(hi):eax(lo)=a*b
624  add eax,ecx //sum lo carry
625  adc edx,0 //sum hi carry
626  mov ecx,edx //store carry
627  mov [edi+8],eax //load a
628 
629  mov eax,[esi+12] //load a
630  mul [y] //edx(hi):eax(lo)=a*b
631  add eax,ecx //sum lo carry
632  adc edx,0 //sum hi carry
633  mov ecx,edx //store carry
634  mov [edi+12],eax //load a
635 
636  mov eax,[esi+16] //load a
637  mul [y] //edx(hi):eax(lo)=a*b
638  add eax,ecx //sum lo carry
639  adc edx,0 //sum hi carry
640  mov ecx,edx //store carry
641  mov [edi+16],eax //load a
642 
643  mov eax,[esi+20] //load a
644  mul [y] //edx(hi):eax(lo)=a*b
645  add eax,ecx //sum lo carry
646  adc edx,0 //sum hi carry
647  mov ecx,edx //store carry
648  mov [edi+20],eax //load a
649 
650  mov eax,[esi+24] //load a
651  mul [y] //edx(hi):eax(lo)=a*b
652  add eax,ecx //sum lo carry
653  adc edx,0 //sum hi carry
654  mov ecx,edx //store carry
655  mov [edi+24],eax //load a
656 
657  mov eax,[esi+28] //load a
658  mul [y] //edx(hi):eax(lo)=a*b
659  add eax,ecx //sum lo carry
660  adc edx,0 //sum hi carry
661  mov [edi+28],eax //load a
662  mov eax,edx //store carry
663  }
664 
665 #else
666  z[0] = word_madd2(x[0], y, &carry);
667  z[1] = word_madd2(x[1], y, &carry);
668  z[2] = word_madd2(x[2], y, &carry);
669  z[3] = word_madd2(x[3], y, &carry);
670  z[4] = word_madd2(x[4], y, &carry);
671  z[5] = word_madd2(x[5], y, &carry);
672  z[6] = word_madd2(x[6], y, &carry);
673  z[7] = word_madd2(x[7], y, &carry);
674  return carry;
675 #endif
676  }
677 
678 /*
679 * Eight Word Block Multiply/Add
680 */
681 inline word word8_madd3(word z[8], const word x[8], word y, word carry)
682  {
683 #if defined(BOTAN_MP_USE_X86_32_ASM)
684  asm(
685  DO_8_TIMES(MULADD_OP, "")
686  : [carry]"=r"(carry)
687  : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
688  : "cc", "%eax", "%edx");
689  return carry;
690 
691 #elif defined(BOTAN_MP_USE_X86_64_ASM)
692 
693  asm(
694  DO_8_TIMES(MULADD_OP, "")
695  : [carry]"=r"(carry)
696  : [z]"r"(z), [x]"r"(x), [y]"rm"(y), "0"(carry)
697  : "cc", "%rax", "%rdx");
698  return carry;
699 
700 #else
701  z[0] = word_madd3(x[0], y, z[0], &carry);
702  z[1] = word_madd3(x[1], y, z[1], &carry);
703  z[2] = word_madd3(x[2], y, z[2], &carry);
704  z[3] = word_madd3(x[3], y, z[3], &carry);
705  z[4] = word_madd3(x[4], y, z[4], &carry);
706  z[5] = word_madd3(x[5], y, z[5], &carry);
707  z[6] = word_madd3(x[6], y, z[6], &carry);
708  z[7] = word_madd3(x[7], y, z[7], &carry);
709  return carry;
710 #endif
711  }
712 
713 /*
714 * Multiply-Add Accumulator
715 * (w2,w1,w0) += x * y
716 */
717 inline void word3_muladd(word* w2, word* w1, word* w0, word x, word y)
718  {
719 #if defined(BOTAN_MP_USE_X86_32_ASM)
720  asm(
721  ASM("mull %[y]")
722 
723  ASM("addl %[x],%[w0]")
724  ASM("adcl %[y],%[w1]")
725  ASM("adcl $0,%[w2]")
726 
727  : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
728  : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
729  : "cc");
730 
731 #elif defined(BOTAN_MP_USE_X86_64_ASM)
732 
733  asm(
734  ASM("mulq %[y]")
735 
736  ASM("addq %[x],%[w0]")
737  ASM("adcq %[y],%[w1]")
738  ASM("adcq $0,%[w2]")
739 
740  : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
741  : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
742  : "cc");
743 
744 #else
745  word carry = *w0;
746  *w0 = word_madd2(x, y, &carry);
747  *w1 += carry;
748  *w2 += (*w1 < carry) ? 1 : 0;
749 #endif
750  }
751 
752 /*
753 * Multiply-Add Accumulator
754 * (w2,w1,w0) += 2 * x * y
755 */
756 inline void word3_muladd_2(word* w2, word* w1, word* w0, word x, word y)
757  {
758 #if defined(BOTAN_MP_USE_X86_32_ASM)
759  asm(
760  ASM("mull %[y]")
761 
762  ASM("addl %[x],%[w0]")
763  ASM("adcl %[y],%[w1]")
764  ASM("adcl $0,%[w2]")
765 
766  ASM("addl %[x],%[w0]")
767  ASM("adcl %[y],%[w1]")
768  ASM("adcl $0,%[w2]")
769 
770  : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
771  : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
772  : "cc");
773 
774 #elif defined(BOTAN_MP_USE_X86_64_ASM)
775 
776  asm(
777  ASM("mulq %[y]")
778 
779  ASM("addq %[x],%[w0]")
780  ASM("adcq %[y],%[w1]")
781  ASM("adcq $0,%[w2]")
782 
783  ASM("addq %[x],%[w0]")
784  ASM("adcq %[y],%[w1]")
785  ASM("adcq $0,%[w2]")
786 
787  : [w0]"=r"(*w0), [w1]"=r"(*w1), [w2]"=r"(*w2)
788  : [x]"a"(x), [y]"d"(y), "0"(*w0), "1"(*w1), "2"(*w2)
789  : "cc");
790 
791 #else
792  word carry = 0;
793  x = word_madd2(x, y, &carry);
794  y = carry;
795 
796  word top = (y >> (BOTAN_MP_WORD_BITS-1));
797  y <<= 1;
798  y |= (x >> (BOTAN_MP_WORD_BITS-1));
799  x <<= 1;
800 
801  carry = 0;
802  *w0 = word_add(*w0, x, &carry);
803  *w1 = word_add(*w1, y, &carry);
804  *w2 = word_add(*w2, top, &carry);
805 #endif
806  }
807 
808 #if defined(ASM)
809  #undef ASM
810  #undef DO_8_TIMES
811  #undef ADD_OR_SUBTRACT
812  #undef ADDSUB2_OP
813  #undef ADDSUB3_OP
814  #undef LINMUL_OP
815  #undef MULADD_OP
816 #endif
817 
818 }
819 
820 #endif
word word8_sub2_rev(word x[8], const word y[8], word carry)
Definition: mp_asmi.h:381
void word3_muladd(word *w2, word *w1, word *w0, word x, word y)
Definition: mp_asmi.h:717
word word8_add2(word x[8], const word y[8], word carry)
Definition: mp_asmi.h:138
word word8_linmul3(word z[8], const word x[8], word y, word carry)
Definition: mp_asmi.h:585
word word_madd3(word a, word b, word c, word *d)
Definition: mp_madd.h:105
word word8_sub2(word x[8], const word y[8], word carry)
Definition: mp_asmi.h:311
word word_madd2(word a, word b, word *c)
Definition: mp_madd.h:59
word word8_madd3(word z[8], const word x[8], word y, word carry)
Definition: mp_asmi.h:681
word word8_linmul2(word x[8], word y, word carry)
Definition: mp_asmi.h:488
Definition: alg_id.cpp:13
word word8_add3(word z[8], const word x[8], const word y[8], word carry)
Definition: mp_asmi.h:200
void word3_muladd_2(word *w2, word *w1, word *w0, word x, word y)
Definition: mp_asmi.h:756
word word_sub(word x, word y, word *carry)
Definition: mp_asmi.h:280
word word_add(word x, word y, word *carry)
Definition: mp_asmi.h:107
word word8_sub3(word z[8], const word x[8], const word y[8], word carry)
Definition: mp_asmi.h:416