Botan 3.0.0-alpha0
Crypto and TLS for C&
mp_karat.cpp
Go to the documentation of this file.
1/*
2* Multiplication and Squaring
3* (C) 1999-2010,2018 Jack Lloyd
4* 2016 Matthias Gierlings
5*
6* Botan is released under the Simplified BSD License (see license.txt)
7*/
8
9#include <botan/internal/mp_core.h>
10#include <botan/internal/ct_utils.h>
11#include <botan/mem_ops.h>
12#include <botan/exceptn.h>
13
14namespace Botan {
15
16namespace {
17
18const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32;
19const size_t KARATSUBA_SQUARE_THRESHOLD = 32;
20
21/*
22* Simple O(N^2) Multiplication
23*/
24void basecase_mul(word z[], size_t z_size,
25 const word x[], size_t x_size,
26 const word y[], size_t y_size)
27 {
28 if(z_size < x_size + y_size)
29 throw Invalid_Argument("basecase_mul z_size too small");
30
31 const size_t x_size_8 = x_size - (x_size % 8);
32
33 clear_mem(z, z_size);
34
35 for(size_t i = 0; i != y_size; ++i)
36 {
37 const word y_i = y[i];
38
39 word carry = 0;
40
41 for(size_t j = 0; j != x_size_8; j += 8)
42 carry = word8_madd3(z + i + j, x + j, y_i, carry);
43
44 for(size_t j = x_size_8; j != x_size; ++j)
45 z[i+j] = word_madd3(x[j], y_i, z[i+j], &carry);
46
47 z[x_size+i] = carry;
48 }
49 }
50
51void basecase_sqr(word z[], size_t z_size,
52 const word x[], size_t x_size)
53 {
54 if(z_size < 2*x_size)
55 throw Invalid_Argument("basecase_sqr z_size too small");
56
57 const size_t x_size_8 = x_size - (x_size % 8);
58
59 clear_mem(z, z_size);
60
61 for(size_t i = 0; i != x_size; ++i)
62 {
63 const word x_i = x[i];
64
65 word carry = 0;
66
67 for(size_t j = 0; j != x_size_8; j += 8)
68 carry = word8_madd3(z + i + j, x + j, x_i, carry);
69
70 for(size_t j = x_size_8; j != x_size; ++j)
71 z[i+j] = word_madd3(x[j], x_i, z[i+j], &carry);
72
73 z[x_size+i] = carry;
74 }
75 }
76
77/*
78* Karatsuba Multiplication Operation
79*/
80void karatsuba_mul(word z[], const word x[], const word y[], size_t N,
81 word workspace[])
82 {
83 if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2)
84 {
85 switch(N)
86 {
87 case 6:
88 return bigint_comba_mul6(z, x, y);
89 case 8:
90 return bigint_comba_mul8(z, x, y);
91 case 9:
92 return bigint_comba_mul9(z, x, y);
93 case 16:
94 return bigint_comba_mul16(z, x, y);
95 case 24:
96 return bigint_comba_mul24(z, x, y);
97 default:
98 return basecase_mul(z, 2*N, x, N, y, N);
99 }
100 }
101
102 const size_t N2 = N / 2;
103
104 const word* x0 = x;
105 const word* x1 = x + N2;
106 const word* y0 = y;
107 const word* y1 = y + N2;
108 word* z0 = z;
109 word* z1 = z + N;
110
111 word* ws0 = workspace;
112 word* ws1 = workspace + N;
113
114 clear_mem(workspace, 2*N);
115
116 /*
117 * If either of cmp0 or cmp1 is zero then z0 or z1 resp is zero here,
118 * resulting in a no-op - z0*z1 will be equal to zero so we don't need to do
119 * anything, clear_mem above already set the correct result.
120 *
121 * However we ignore the result of the comparisons and always perform the
122 * subtractions and recursively multiply to avoid the timing channel.
123 */
124
125 // First compute (X_lo - X_hi)*(Y_hi - Y_lo)
126 const auto cmp0 = bigint_sub_abs(z0, x0, x1, N2, workspace);
127 const auto cmp1 = bigint_sub_abs(z1, y1, y0, N2, workspace);
128 const auto neg_mask = ~(cmp0 ^ cmp1);
129
130 karatsuba_mul(ws0, z0, z1, N2, ws1);
131
132 // Compute X_lo * Y_lo
133 karatsuba_mul(z0, x0, y0, N2, ws1);
134
135 // Compute X_hi * Y_hi
136 karatsuba_mul(z1, x1, y1, N2, ws1);
137
138 const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N);
139 word z_carry = bigint_add2_nc(z + N2, N, ws1, N);
140
141 z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
142 bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
143
144 clear_mem(workspace + N, N2);
145
146 bigint_cnd_add_or_sub(neg_mask, z + N2, workspace, 2*N-N2);
147 }
148
149/*
150* Karatsuba Squaring Operation
151*/
152void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[])
153 {
154 if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2)
155 {
156 switch(N)
157 {
158 case 6:
159 return bigint_comba_sqr6(z, x);
160 case 8:
161 return bigint_comba_sqr8(z, x);
162 case 9:
163 return bigint_comba_sqr9(z, x);
164 case 16:
165 return bigint_comba_sqr16(z, x);
166 case 24:
167 return bigint_comba_sqr24(z, x);
168 default:
169 return basecase_sqr(z, 2*N, x, N);
170 }
171 }
172
173 const size_t N2 = N / 2;
174
175 const word* x0 = x;
176 const word* x1 = x + N2;
177 word* z0 = z;
178 word* z1 = z + N;
179
180 word* ws0 = workspace;
181 word* ws1 = workspace + N;
182
183 clear_mem(workspace, 2*N);
184
185 // See comment in karatsuba_mul
186 bigint_sub_abs(z0, x0, x1, N2, workspace);
187 karatsuba_sqr(ws0, z0, N2, ws1);
188
189 karatsuba_sqr(z0, x0, N2, ws1);
190 karatsuba_sqr(z1, x1, N2, ws1);
191
192 const word ws_carry = bigint_add3_nc(ws1, z0, N, z1, N);
193 word z_carry = bigint_add2_nc(z + N2, N, ws1, N);
194
195 z_carry += bigint_add2_nc(z + N + N2, N2, &ws_carry, 1);
196 bigint_add2_nc(z + N + N2, N2, &z_carry, 1);
197
198 /*
199 * This is only actually required if cmp (result of bigint_sub_abs) is != 0,
200 * however if cmp==0 then ws0[0:N] == 0 and avoiding the jump hides a
201 * timing channel.
202 */
203 bigint_sub2(z + N2, 2*N-N2, ws0, N);
204 }
205
206/*
207* Pick a good size for the Karatsuba multiply
208*/
209size_t karatsuba_size(size_t z_size,
210 size_t x_size, size_t x_sw,
211 size_t y_size, size_t y_sw)
212 {
213 if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size)
214 return 0;
215
216 if(((x_size == x_sw) && (x_size % 2)) ||
217 ((y_size == y_sw) && (y_size % 2)))
218 return 0;
219
220 const size_t start = (x_sw > y_sw) ? x_sw : y_sw;
221 const size_t end = (x_size < y_size) ? x_size : y_size;
222
223 if(start == end)
224 {
225 if(start % 2)
226 return 0;
227 return start;
228 }
229
230 for(size_t j = start; j <= end; ++j)
231 {
232 if(j % 2)
233 continue;
234
235 if(2*j > z_size)
236 return 0;
237
238 if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size)
239 {
240 if(j % 4 == 2 &&
241 (j+2) <= x_size && (j+2) <= y_size && 2*(j+2) <= z_size)
242 return j+2;
243 return j;
244 }
245 }
246
247 return 0;
248 }
249
250/*
251* Pick a good size for the Karatsuba squaring
252*/
253size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw)
254 {
255 if(x_sw == x_size)
256 {
257 if(x_sw % 2)
258 return 0;
259 return x_sw;
260 }
261
262 for(size_t j = x_sw; j <= x_size; ++j)
263 {
264 if(j % 2)
265 continue;
266
267 if(2*j > z_size)
268 return 0;
269
270 if(j % 4 == 2 && (j+2) <= x_size && 2*(j+2) <= z_size)
271 return j+2;
272 return j;
273 }
274
275 return 0;
276 }
277
278template<size_t SZ>
279inline bool sized_for_comba_mul(size_t x_sw, size_t x_size,
280 size_t y_sw, size_t y_size,
281 size_t z_size)
282 {
283 return (x_sw <= SZ && x_size >= SZ &&
284 y_sw <= SZ && y_size >= SZ &&
285 z_size >= 2*SZ);
286 }
287
288template<size_t SZ>
289inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size,
290 size_t z_size)
291 {
292 return (x_sw <= SZ && x_size >= SZ && z_size >= 2*SZ);
293 }
294
295}
296
297void bigint_mul(word z[], size_t z_size,
298 const word x[], size_t x_size, size_t x_sw,
299 const word y[], size_t y_size, size_t y_sw,
300 word workspace[], size_t ws_size)
301 {
302 clear_mem(z, z_size);
303
304 if(x_sw == 1)
305 {
306 bigint_linmul3(z, y, y_sw, x[0]);
307 }
308 else if(y_sw == 1)
309 {
310 bigint_linmul3(z, x, x_sw, y[0]);
311 }
312 else if(sized_for_comba_mul<4>(x_sw, x_size, y_sw, y_size, z_size))
313 {
314 bigint_comba_mul4(z, x, y);
315 }
316 else if(sized_for_comba_mul<6>(x_sw, x_size, y_sw, y_size, z_size))
317 {
318 bigint_comba_mul6(z, x, y);
319 }
320 else if(sized_for_comba_mul<8>(x_sw, x_size, y_sw, y_size, z_size))
321 {
322 bigint_comba_mul8(z, x, y);
323 }
324 else if(sized_for_comba_mul<9>(x_sw, x_size, y_sw, y_size, z_size))
325 {
326 bigint_comba_mul9(z, x, y);
327 }
328 else if(sized_for_comba_mul<16>(x_sw, x_size, y_sw, y_size, z_size))
329 {
330 bigint_comba_mul16(z, x, y);
331 }
332 else if(sized_for_comba_mul<24>(x_sw, x_size, y_sw, y_size, z_size))
333 {
334 bigint_comba_mul24(z, x, y);
335 }
336 else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
337 y_sw < KARATSUBA_MULTIPLY_THRESHOLD ||
338 !workspace)
339 {
340 basecase_mul(z, z_size, x, x_sw, y, y_sw);
341 }
342 else
343 {
344 const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
345
346 if(N && z_size >= 2*N && ws_size >= 2*N)
347 karatsuba_mul(z, x, y, N, workspace);
348 else
349 basecase_mul(z, z_size, x, x_sw, y, y_sw);
350 }
351 }
352
353/*
354* Squaring Algorithm Dispatcher
355*/
356void bigint_sqr(word z[], size_t z_size,
357 const word x[], size_t x_size, size_t x_sw,
358 word workspace[], size_t ws_size)
359 {
360 clear_mem(z, z_size);
361
362 BOTAN_ASSERT(z_size/2 >= x_sw, "Output size is sufficient");
363
364 if(x_sw == 1)
365 {
366 bigint_linmul3(z, x, x_sw, x[0]);
367 }
368 else if(sized_for_comba_sqr<4>(x_sw, x_size, z_size))
369 {
370 bigint_comba_sqr4(z, x);
371 }
372 else if(sized_for_comba_sqr<6>(x_sw, x_size, z_size))
373 {
374 bigint_comba_sqr6(z, x);
375 }
376 else if(sized_for_comba_sqr<8>(x_sw, x_size, z_size))
377 {
378 bigint_comba_sqr8(z, x);
379 }
380 else if(sized_for_comba_sqr<9>(x_sw, x_size, z_size))
381 {
382 bigint_comba_sqr9(z, x);
383 }
384 else if(sized_for_comba_sqr<16>(x_sw, x_size, z_size))
385 {
386 bigint_comba_sqr16(z, x);
387 }
388 else if(sized_for_comba_sqr<24>(x_sw, x_size, z_size))
389 {
390 bigint_comba_sqr24(z, x);
391 }
392 else if(x_size < KARATSUBA_SQUARE_THRESHOLD || !workspace)
393 {
394 basecase_sqr(z, z_size, x, x_sw);
395 }
396 else
397 {
398 const size_t N = karatsuba_size(z_size, x_size, x_sw);
399
400 if(N && z_size >= 2*N && ws_size >= 2*N)
401 karatsuba_sqr(z, x, N, workspace);
402 else
403 basecase_sqr(z, z_size, x, x_sw);
404 }
405 }
406
407}
#define BOTAN_ASSERT(expr, assertion_made)
Definition: assert.h:54
Definition: alg_id.cpp:13
word word8_madd3(word z[8], const word x[8], word y, word carry)
Definition: mp_asmi.h:532
void bigint_linmul3(word z[], const word x[], size_t x_size, word y)
Definition: mp_core.h:504
word bigint_sub2(word x[], size_t x_size, const word y[], size_t y_size)
Definition: mp_core.h:300
void bigint_comba_mul4(word z[8], const word x[4], const word y[4])
Definition: mp_comba.cpp:49
void bigint_comba_sqr6(word z[12], const word x[6])
Definition: mp_comba.cpp:88
void bigint_comba_sqr16(word z[32], const word x[16])
Definition: mp_comba.cpp:597
void bigint_cnd_add_or_sub(CT::Mask< word > mask, word x[], const word y[], size_t size)
Definition: mp_core.h:139
void bigint_sqr(word z[], size_t z_size, const word x[], size_t x_size, size_t x_sw, word workspace[], size_t ws_size)
Definition: mp_karat.cpp:356
CT::Mask< word > bigint_sub_abs(word z[], const word x[], const word y[], size_t N, word ws[])
Definition: mp_core.h:377
void bigint_mul(word z[], size_t z_size, const word x[], size_t x_size, size_t x_sw, const word y[], size_t y_size, size_t y_sw, word workspace[], size_t ws_size)
Definition: mp_karat.cpp:297
void bigint_comba_mul8(word z[16], const word x[8], const word y[8])
Definition: mp_comba.cpp:282
void bigint_comba_sqr4(word z[8], const word x[4])
Definition: mp_comba.cpp:16
void carry(int64_t &h0, int64_t &h1)
word word_madd3(word a, word b, word c, word *d)
Definition: mp_asmi.h:92
void bigint_comba_sqr9(word z[18], const word x[9])
Definition: mp_comba.cpp:385
void bigint_comba_mul24(word z[48], const word x[24], const word y[24])
Definition: mp_comba.cpp:1534
void bigint_comba_sqr24(word z[48], const word x[24])
Definition: mp_comba.cpp:1131
void bigint_comba_mul9(word z[18], const word x[9], const word y[9])
Definition: mp_comba.cpp:473
word bigint_add2_nc(word x[], size_t x_size, const word y[], size_t y_size)
Definition: mp_core.h:227
word bigint_add3_nc(word z[], const word x[], size_t x_size, const word y[], size_t y_size)
Definition: mp_core.h:250
void bigint_comba_mul16(word z[32], const word x[16], const word y[16])
Definition: mp_comba.cpp:804
void bigint_comba_mul6(word z[12], const word x[6], const word y[6])
Definition: mp_comba.cpp:140
constexpr void clear_mem(T *ptr, size_t n)
Definition: mem_ops.h:115
void bigint_comba_sqr8(word z[16], const word x[8])
Definition: mp_comba.cpp:207