Botan  2.6.0
Crypto and TLS for C++11
mp_karat.cpp
Go to the documentation of this file.
1 /*
2 * Multiplication and Squaring
3 * (C) 1999-2010 Jack Lloyd
4 * 2016 Matthias Gierlings
5 *
6 * Botan is released under the Simplified BSD License (see license.txt)
7 */
8 
9 #include <botan/internal/mp_core.h>
10 #include <botan/internal/mp_asmi.h>
11 #include <botan/mem_ops.h>
12 #include <botan/exceptn.h>
13 
14 namespace Botan {
15 
16 namespace {
17 
18 const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32;
19 const size_t KARATSUBA_SQUARE_THRESHOLD = 32;
20 
21 /*
22 * Simple O(N^2) Multiplication
23 */
24 void basecase_mul(word z[], size_t z_size,
25  const word x[], size_t x_size,
26  const word y[], size_t y_size)
27  {
28  if(z_size < x_size + y_size)
29  throw Invalid_Argument("basecase_mul z_size too small");
30 
31  const size_t x_size_8 = x_size - (x_size % 8);
32 
33  clear_mem(z, z_size);
34 
35  for(size_t i = 0; i != y_size; ++i)
36  {
37  const word y_i = y[i];
38 
39  word carry = 0;
40 
41  for(size_t j = 0; j != x_size_8; j += 8)
42  carry = word8_madd3(z + i + j, x + j, y_i, carry);
43 
44  for(size_t j = x_size_8; j != x_size; ++j)
45  z[i+j] = word_madd3(x[j], y_i, z[i+j], &carry);
46 
47  z[x_size+i] = carry;
48  }
49  }
50 
51 void basecase_sqr(word z[], size_t z_size,
52  const word x[], size_t x_size)
53  {
54  if(z_size < 2*x_size)
55  throw Invalid_Argument("basecase_sqr z_size too small");
56 
57  const size_t x_size_8 = x_size - (x_size % 8);
58 
59  clear_mem(z, z_size);
60 
61  for(size_t i = 0; i != x_size; ++i)
62  {
63  const word x_i = x[i];
64 
65  word carry = 0;
66 
67  for(size_t j = 0; j != x_size_8; j += 8)
68  carry = word8_madd3(z + i + j, x + j, x_i, carry);
69 
70  for(size_t j = x_size_8; j != x_size; ++j)
71  z[i+j] = word_madd3(x[j], x_i, z[i+j], &carry);
72 
73  z[x_size+i] = carry;
74  }
75  }
76 
77 /*
78 * Karatsuba Multiplication Operation
79 */
80 void karatsuba_mul(word z[], const word x[], const word y[], size_t N,
81  word workspace[])
82  {
83  if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2)
84  {
85  if(N == 6)
86  return bigint_comba_mul6(z, x, y);
87  else if(N == 8)
88  return bigint_comba_mul8(z, x, y);
89  else if(N == 16)
90  return bigint_comba_mul16(z, x, y);
91  else
92  return basecase_mul(z, 2*N, x, N, y, N);
93  }
94 
95  const size_t N2 = N / 2;
96 
97  const word* x0 = x;
98  const word* x1 = x + N2;
99  const word* y0 = y;
100  const word* y1 = y + N2;
101  word* z0 = z;
102  word* z1 = z + N;
103 
104  const int32_t cmp0 = bigint_cmp(x0, N2, x1, N2);
105  const int32_t cmp1 = bigint_cmp(y1, N2, y0, N2);
106 
107  clear_mem(workspace, 2*N);
108 
109  /*
110  * If either of cmp0 or cmp1 is zero then z0 or z1 resp is zero here,
111  * resulting in a no-op - z0*z1 will be equal to zero so we don't need to do
112  * anything, clear_mem above already set the correct result.
113  *
114  * However we ignore the result of the comparisons and always perform the
115  * subtractions and recursively multiply to avoid the timing channel.
116  */
117 
118  //if(cmp0 && cmp1)
119  {
120  if(cmp0 > 0)
121  bigint_sub3(z0, x0, N2, x1, N2);
122  else
123  bigint_sub3(z0, x1, N2, x0, N2);
124 
125  if(cmp1 > 0)
126  bigint_sub3(z1, y1, N2, y0, N2);
127  else
128  bigint_sub3(z1, y0, N2, y1, N2);
129 
130  karatsuba_mul(workspace, z0, z1, N2, workspace+N);
131  }
132 
133  karatsuba_mul(z0, x0, y0, N2, workspace+N);
134  karatsuba_mul(z1, x1, y1, N2, workspace+N);
135 
136  const word ws_carry = bigint_add3_nc(workspace + N, z0, N, z1, N);
137  word z_carry = bigint_add2_nc(z + N2, N, workspace + N, N);
138 
139  z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
140  bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
141 
142  if((cmp0 == cmp1) || (cmp0 == 0) || (cmp1 == 0))
143  bigint_add2(z + N2, 2*N-N2, workspace, N);
144  else
145  bigint_sub2(z + N2, 2*N-N2, workspace, N);
146  }
147 
148 /*
149 * Karatsuba Squaring Operation
150 */
151 void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[])
152  {
153  if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2)
154  {
155  if(N == 6)
156  return bigint_comba_sqr6(z, x);
157  else if(N == 8)
158  return bigint_comba_sqr8(z, x);
159  else if(N == 16)
160  return bigint_comba_sqr16(z, x);
161  else
162  return basecase_sqr(z, 2*N, x, N);
163  }
164 
165  const size_t N2 = N / 2;
166 
167  const word* x0 = x;
168  const word* x1 = x + N2;
169  word* z0 = z;
170  word* z1 = z + N;
171 
172  const int32_t cmp = bigint_cmp(x0, N2, x1, N2);
173 
174  clear_mem(workspace, 2*N);
175 
176  // See comment in karatsuba_mul
177 
178  //if(cmp)
179  {
180  if(cmp > 0)
181  bigint_sub3(z0, x0, N2, x1, N2);
182  else
183  bigint_sub3(z0, x1, N2, x0, N2);
184 
185  karatsuba_sqr(workspace, z0, N2, workspace+N);
186  }
187 
188  karatsuba_sqr(z0, x0, N2, workspace+N);
189  karatsuba_sqr(z1, x1, N2, workspace+N);
190 
191  const word ws_carry = bigint_add3_nc(workspace + N, z0, N, z1, N);
192  word z_carry = bigint_add2_nc(z + N2, N, workspace + N, N);
193 
194  z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
195  bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
196 
197  /*
198  * This is only actually required if cmp is != 0, however
199  * if cmp==0 then workspace[0:N] == 0 and avoiding the jump
200  * hides a timing channel.
201  */
202  bigint_sub2(z + N2, 2*N-N2, workspace, N);
203  }
204 
205 /*
206 * Pick a good size for the Karatsuba multiply
207 */
208 size_t karatsuba_size(size_t z_size,
209  size_t x_size, size_t x_sw,
210  size_t y_size, size_t y_sw)
211  {
212  if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size)
213  return 0;
214 
215  if(((x_size == x_sw) && (x_size % 2)) ||
216  ((y_size == y_sw) && (y_size % 2)))
217  return 0;
218 
219  const size_t start = (x_sw > y_sw) ? x_sw : y_sw;
220  const size_t end = (x_size < y_size) ? x_size : y_size;
221 
222  if(start == end)
223  {
224  if(start % 2)
225  return 0;
226  return start;
227  }
228 
229  for(size_t j = start; j <= end; ++j)
230  {
231  if(j % 2)
232  continue;
233 
234  if(2*j > z_size)
235  return 0;
236 
237  if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size)
238  {
239  if(j % 4 == 2 &&
240  (j+2) <= x_size && (j+2) <= y_size && 2*(j+2) <= z_size)
241  return j+2;
242  return j;
243  }
244  }
245 
246  return 0;
247  }
248 
249 /*
250 * Pick a good size for the Karatsuba squaring
251 */
252 size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw)
253  {
254  if(x_sw == x_size)
255  {
256  if(x_sw % 2)
257  return 0;
258  return x_sw;
259  }
260 
261  for(size_t j = x_sw; j <= x_size; ++j)
262  {
263  if(j % 2)
264  continue;
265 
266  if(2*j > z_size)
267  return 0;
268 
269  if(j % 4 == 2 && (j+2) <= x_size && 2*(j+2) <= z_size)
270  return j+2;
271  return j;
272  }
273 
274  return 0;
275  }
276 
277 template<size_t SZ>
278 inline bool sized_for_comba_mul(size_t x_sw, size_t x_size,
279  size_t y_sw, size_t y_size,
280  size_t z_size)
281  {
282  return (x_sw <= SZ && x_size >= SZ &&
283  y_sw <= SZ && y_size >= SZ &&
284  z_size >= 2*SZ);
285  }
286 
287 template<size_t SZ>
288 inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size,
289  size_t z_size)
290  {
291  return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
292  }
293 
294 }
295 
296 void bigint_mul(word z[], size_t z_size,
297  const word x[], size_t x_size, size_t x_sw,
298  const word y[], size_t y_size, size_t y_sw,
299  word workspace[], size_t ws_size)
300  {
301  clear_mem(z, z_size);
302 
303  if(x_sw == 1)
304  {
305  bigint_linmul3(z, y, y_sw, x[0]);
306  }
307  else if(y_sw == 1)
308  {
309  bigint_linmul3(z, x, x_sw, y[0]);
310  }
311  else if(sized_for_comba_mul<4>(x_sw, x_size, y_sw, y_size, z_size))
312  {
313  bigint_comba_mul4(z, x, y);
314  }
315  else if(sized_for_comba_mul<6>(x_sw, x_size, y_sw, y_size, z_size))
316  {
317  bigint_comba_mul6(z, x, y);
318  }
319  else if(sized_for_comba_mul<8>(x_sw, x_size, y_sw, y_size, z_size))
320  {
321  bigint_comba_mul8(z, x, y);
322  }
323  else if(sized_for_comba_mul<9>(x_sw, x_size, y_sw, y_size, z_size))
324  {
325  bigint_comba_mul9(z, x, y);
326  }
327  else if(sized_for_comba_mul<16>(x_sw, x_size, y_sw, y_size, z_size))
328  {
329  bigint_comba_mul16(z, x, y);
330  }
331  else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
332  y_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
333  !workspace)
334  {
335  basecase_mul(z, z_size, x, x_sw, y, y_sw);
336  }
337  else
338  {
339  const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
340 
341  if(N && z_size >= 2*N && ws_size >= 2*N)
342  karatsuba_mul(z, x, y, N, workspace);
343  else
344  basecase_mul(z, z_size, x, x_sw, y, y_sw);
345  }
346  }
347 
348 /*
349 * Squaring Algorithm Dispatcher
350 */
351 void bigint_sqr(word z[], size_t z_size,
352  const word x[], size_t x_size, size_t x_sw,
353  word workspace[], size_t ws_size)
354  {
355  clear_mem(z, z_size);
356 
357  BOTAN_ASSERT(z_size/2 >= x_sw, "Output size is sufficient");
358 
359  if(x_sw == 1)
360  {
361  bigint_linmul3(z, x, x_sw, x[0]);
362  }
363  else if(sized_for_comba_sqr<4>(x_sw, x_size, z_size))
364  {
365  bigint_comba_sqr4(z, x);
366  }
367  else if(sized_for_comba_sqr<6>(x_sw, x_size, z_size))
368  {
369  bigint_comba_sqr6(z, x);
370  }
371  else if(sized_for_comba_sqr<8>(x_sw, x_size, z_size))
372  {
373  bigint_comba_sqr8(z, x);
374  }
375  else if(sized_for_comba_sqr<9>(x_sw, x_size, z_size))
376  {
377  bigint_comba_sqr9(z, x);
378  }
379  else if(sized_for_comba_sqr<16>(x_sw, x_size, z_size))
380  {
381  bigint_comba_sqr16(z, x);
382  }
383  else if(x_size < KARATSUBA_SQUARE_THRESHOLD || !workspace)
384  {
385  basecase_sqr(z, z_size, x, x_sw);
386  }
387  else
388  {
389  const size_t N = karatsuba_size(z_size, x_size, x_sw);
390 
391  if(N && z_size >= 2*N && ws_size >= 2*N)
392  karatsuba_sqr(z, x, N, workspace);
393  else
394  basecase_sqr(z, z_size, x, x_sw);
395  }
396  }
397 
398 }
void carry(int64_t &h0, int64_t &h1)
int32_t bigint_cmp(const word x[], size_t x_size, const word y[], size_t y_size)
Definition: mp_core.cpp:398
void clear_mem(T *ptr, size_t n)
Definition: mem_ops.h:97
word bigint_sub2(word x[], size_t x_size, const word y[], size_t y_size)
Definition: mp_core.cpp:177
word bigint_add3_nc(word z[], const word x[], size_t x_size, const word y[], size_t y_size)
Definition: mp_core.cpp:133
word word_madd3(word a, word b, word c, word *d)
Definition: mp_madd.h:105
void bigint_comba_mul4(word z[8], const word x[4], const word y[4])
Definition: mp_comba.cpp:50
word bigint_sub3(word z[], const word x[], size_t x_size, const word y[], size_t y_size)
Definition: mp_core.cpp:218
void bigint_comba_mul9(word z[18], const word x[9], const word y[9])
Definition: mp_comba.cpp:474
void bigint_sqr(word z[], size_t z_size, const word x[], size_t x_size, size_t x_sw, word workspace[], size_t ws_size)
Definition: mp_karat.cpp:351
#define BOTAN_ASSERT(expr, assertion_made)
Definition: assert.h:30
void bigint_linmul3(word z[], const word x[], size_t x_size, word y)
Definition: mp_core.cpp:260
void bigint_comba_sqr16(word z[32], const word x[16])
Definition: mp_comba.cpp:598
word word8_madd3(word z[8], const word x[8], word y, word carry)
Definition: mp_asmi.h:681
word bigint_add2_nc(word x[], size_t x_size, const word y[], size_t y_size)
Definition: mp_core.cpp:110
Definition: alg_id.cpp:13
void bigint_comba_sqr9(word z[18], const word x[9])
Definition: mp_comba.cpp:386
void bigint_mul(word z[], size_t z_size, const word x[], size_t x_size, size_t x_sw, const word y[], size_t y_size, size_t y_sw, word workspace[], size_t ws_size)
Definition: mp_karat.cpp:296
void bigint_comba_mul8(word z[16], const word x[8], const word y[8])
Definition: mp_comba.cpp:283
void bigint_comba_sqr8(word z[16], const word x[8])
Definition: mp_comba.cpp:208
void bigint_add2(word x[], size_t x_size, const word y[], size_t y_size)
Definition: mp_core.cpp:158
void bigint_comba_mul16(word z[32], const word x[16], const word y[16])
Definition: mp_comba.cpp:805
void bigint_comba_mul6(word z[12], const word x[6], const word y[6])
Definition: mp_comba.cpp:141
void bigint_comba_sqr4(word z[8], const word x[4])
Definition: mp_comba.cpp:17
void bigint_comba_sqr6(word z[12], const word x[6])
Definition: mp_comba.cpp:89