Botan 3.9.0
Crypto and TLS for C&
mp_karat.cpp
Go to the documentation of this file.
1/*
2* Multiplication and Squaring
3* (C) 1999-2010,2018 Jack Lloyd
4* 2016 Matthias Gierlings
5*
6* Botan is released under the Simplified BSD License (see license.txt)
7*/
8
9#include <botan/internal/mp_core.h>
10
11#include <botan/exceptn.h>
12#include <botan/mem_ops.h>
13#include <botan/internal/ct_utils.h>
14
15namespace Botan {
16
17/*
18* Simple O(N^2) Multiplication
19*/
20void basecase_mul(word z[], size_t z_size, const word x[], size_t x_size, const word y[], size_t y_size) {
21 if(z_size < x_size + y_size) {
22 throw Invalid_Argument("basecase_mul z_size too small");
23 }
24
25 const size_t x_size_8 = x_size - (x_size % 8);
26
27 clear_mem(z, z_size);
28
29 for(size_t i = 0; i != y_size; ++i) {
30 const word y_i = y[i];
31
32 word carry = 0;
33
34 for(size_t j = 0; j != x_size_8; j += 8) {
35 carry = word8_madd3(z + i + j, x + j, y_i, carry);
36 }
37
38 for(size_t j = x_size_8; j != x_size; ++j) {
39 z[i + j] = word_madd3(x[j], y_i, z[i + j], &carry);
40 }
41
42 z[x_size + i] = carry;
43 }
44}
45
46void basecase_sqr(word z[], size_t z_size, const word x[], size_t x_size) {
47 if(z_size < 2 * x_size) {
48 throw Invalid_Argument("basecase_sqr z_size too small");
49 }
50
51 const size_t x_size_8 = x_size - (x_size % 8);
52
53 clear_mem(z, z_size);
54
55 for(size_t i = 0; i != x_size; ++i) {
56 const word x_i = x[i];
57
58 word carry = 0;
59
60 for(size_t j = 0; j != x_size_8; j += 8) {
61 carry = word8_madd3(z + i + j, x + j, x_i, carry);
62 }
63
64 for(size_t j = x_size_8; j != x_size; ++j) {
65 z[i + j] = word_madd3(x[j], x_i, z[i + j], &carry);
66 }
67
68 z[x_size + i] = carry;
69 }
70}
71
72namespace {
73
74const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32;
75const size_t KARATSUBA_SQUARE_THRESHOLD = 32;
76
77/*
78* Karatsuba Multiplication Operation
79*/
80void karatsuba_mul(word z[], const word x[], const word y[], size_t N, word workspace[]) {
81 if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2 != 0) {
82 switch(N) {
83 case 6:
84 return bigint_comba_mul6(z, x, y);
85 case 8:
86 return bigint_comba_mul8(z, x, y);
87 case 9:
88 return bigint_comba_mul9(z, x, y);
89 case 16:
90 return bigint_comba_mul16(z, x, y);
91 case 24:
92 return bigint_comba_mul24(z, x, y);
93 default:
94 return basecase_mul(z, 2 * N, x, N, y, N);
95 }
96 }
97
98 const size_t N2 = N / 2;
99
100 const word* x0 = x;
101 const word* x1 = x + N2;
102 const word* y0 = y;
103 const word* y1 = y + N2;
104 word* z0 = z;
105 word* z1 = z + N;
106
107 word* ws0 = workspace;
108 word* ws1 = workspace + N;
109
110 clear_mem(workspace, 2 * N);
111
112 /*
113 * If either of cmp0 or cmp1 is zero then z0 or z1 resp is zero here,
114 * resulting in a no-op - z0*z1 will be equal to zero so we don't need to do
115 * anything, clear_mem above already set the correct result.
116 *
117 * However we ignore the result of the comparisons and always perform the
118 * subtractions and recursively multiply to avoid the timing channel.
119 */
120
121 // First compute (X_lo - X_hi)*(Y_hi - Y_lo)
122 const auto cmp0 = bigint_sub_abs(z0, x0, x1, N2, workspace);
123 const auto cmp1 = bigint_sub_abs(z1, y1, y0, N2, workspace);
124 const auto neg_mask = ~(cmp0 ^ cmp1);
125
126 karatsuba_mul(ws0, z0, z1, N2, ws1);
127
128 // Compute X_lo * Y_lo
129 karatsuba_mul(z0, x0, y0, N2, ws1);
130
131 // Compute X_hi * Y_hi
132 karatsuba_mul(z1, x1, y1, N2, ws1);
133
134 const word ws_carry = bigint_add3(ws1, z0, N, z1, N);
135 word z_carry = bigint_add2(z + N2, N, ws1, N);
136
137 z_carry += bigint_add2(z + N + N2, N2, &ws_carry, 1);
138 bigint_add2(z + N + N2, N2, &z_carry, 1);
139
140 clear_mem(workspace + N, N2);
141
142 bigint_cnd_add(neg_mask.value(), z + N2, workspace, 2 * N - N2);
143 bigint_cnd_sub((~neg_mask).value(), z + N2, workspace, 2 * N - N2);
144}
145
146/*
147* Karatsuba Squaring Operation
148*/
149void karatsuba_sqr(word z[], const word x[], size_t N, word workspace[]) {
150 if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2 != 0) {
151 switch(N) {
152 case 6:
153 return bigint_comba_sqr6(z, x);
154 case 8:
155 return bigint_comba_sqr8(z, x);
156 case 9:
157 return bigint_comba_sqr9(z, x);
158 case 16:
159 return bigint_comba_sqr16(z, x);
160 case 24:
161 return bigint_comba_sqr24(z, x);
162 default:
163 return basecase_sqr(z, 2 * N, x, N);
164 }
165 }
166
167 const size_t N2 = N / 2;
168
169 const word* x0 = x;
170 const word* x1 = x + N2;
171 word* z0 = z;
172 word* z1 = z + N;
173
174 word* ws0 = workspace;
175 word* ws1 = workspace + N;
176
177 clear_mem(workspace, 2 * N);
178
179 // See comment in karatsuba_mul
180 bigint_sub_abs(z0, x0, x1, N2, workspace);
181 karatsuba_sqr(ws0, z0, N2, ws1);
182
183 karatsuba_sqr(z0, x0, N2, ws1);
184 karatsuba_sqr(z1, x1, N2, ws1);
185
186 const word ws_carry = bigint_add3(ws1, z0, N, z1, N);
187 word z_carry = bigint_add2(z + N2, N, ws1, N);
188
189 z_carry += bigint_add2(z + N + N2, N2, &ws_carry, 1);
190 bigint_add2(z + N + N2, N2, &z_carry, 1);
191
192 /*
193 * This is only actually required if cmp (result of bigint_sub_abs) is != 0,
194 * however if cmp==0 then ws0[0:N] == 0 and avoiding the jump hides a
195 * timing channel.
196 */
197 bigint_sub2(z + N2, 2 * N - N2, ws0, N);
198}
199
200/*
201* Pick a good size for the Karatsuba multiply
202*/
203size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw, size_t y_size, size_t y_sw) {
204 if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size) {
205 return 0;
206 }
207
208 if(((x_size == x_sw) && (x_size % 2 != 0)) || ((y_size == y_sw) && (y_size % 2 != 0))) {
209 return 0;
210 }
211
212 const size_t start = (x_sw > y_sw) ? x_sw : y_sw;
213 const size_t end = (x_size < y_size) ? x_size : y_size;
214
215 if(start == end) {
216 if(start % 2 != 0) {
217 return 0;
218 }
219 return start;
220 }
221
222 for(size_t j = start; j <= end; ++j) {
223 if(j % 2 != 0) {
224 continue;
225 }
226
227 if(2 * j > z_size) {
228 return 0;
229 }
230
231 if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size) {
232 if(j % 4 == 2 && (j + 2) <= x_size && (j + 2) <= y_size && 2 * (j + 2) <= z_size) {
233 return j + 2;
234 }
235 return j;
236 }
237 }
238
239 return 0;
240}
241
242/*
243* Pick a good size for the Karatsuba squaring
244*/
245size_t karatsuba_size(size_t z_size, size_t x_size, size_t x_sw) {
246 if(x_sw == x_size) {
247 if(x_sw % 2 != 0) {
248 return 0;
249 }
250 return x_sw;
251 }
252
253 for(size_t j = x_sw; j <= x_size; ++j) {
254 if(j % 2 != 0) {
255 continue;
256 }
257
258 if(2 * j > z_size) {
259 return 0;
260 }
261
262 if(j % 4 == 2 && (j + 2) <= x_size && 2 * (j + 2) <= z_size) {
263 return j + 2;
264 }
265 return j;
266 }
267
268 return 0;
269}
270
271template <size_t SZ>
272inline bool sized_for_comba_mul(size_t x_sw, size_t x_size, size_t y_sw, size_t y_size, size_t z_size) {
273 return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ);
274}
275
276template <size_t SZ>
277inline bool sized_for_comba_sqr(size_t x_sw, size_t x_size, size_t z_size) {
278 return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ);
279}
280
281} // namespace
282
284 size_t z_size,
285 const word x[],
286 size_t x_size,
287 size_t x_sw,
288 const word y[],
289 size_t y_size,
290 size_t y_sw,
291 word workspace[],
292 size_t ws_size) {
293 clear_mem(z, z_size);
294
295 if(x_sw == 1) {
296 bigint_linmul3(z, y, y_sw, x[0]);
297 } else if(y_sw == 1) {
298 bigint_linmul3(z, x, x_sw, y[0]);
299 } else if(sized_for_comba_mul<4>(x_sw, x_size, y_sw, y_size, z_size)) {
300 bigint_comba_mul4(z, x, y);
301 } else if(sized_for_comba_mul<6>(x_sw, x_size, y_sw, y_size, z_size)) {
302 bigint_comba_mul6(z, x, y);
303 } else if(sized_for_comba_mul<8>(x_sw, x_size, y_sw, y_size, z_size)) {
304 bigint_comba_mul8(z, x, y);
305 } else if(sized_for_comba_mul<9>(x_sw, x_size, y_sw, y_size, z_size)) {
306 bigint_comba_mul9(z, x, y);
307 } else if(sized_for_comba_mul<16>(x_sw, x_size, y_sw, y_size, z_size)) {
308 bigint_comba_mul16(z, x, y);
309 } else if(sized_for_comba_mul<24>(x_sw, x_size, y_sw, y_size, z_size)) {
310 bigint_comba_mul24(z, x, y);
311 } else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD || y_sw < KARATSUBA_MULTIPLY_THRESHOLD || workspace == nullptr) {
312 basecase_mul(z, z_size, x, x_sw, y, y_sw);
313 } else {
314 const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
315
316 if(N > 0 && z_size >= 2 * N && ws_size >= 2 * N) {
317 karatsuba_mul(z, x, y, N, workspace);
318 } else {
319 basecase_mul(z, z_size, x, x_sw, y, y_sw);
320 }
321 }
322}
323
324/*
325* Squaring Algorithm Dispatcher
326*/
327void bigint_sqr(word z[], size_t z_size, const word x[], size_t x_size, size_t x_sw, word workspace[], size_t ws_size) {
328 clear_mem(z, z_size);
329
330 BOTAN_ASSERT(z_size / 2 >= x_sw, "Output size is sufficient");
331
332 if(x_sw == 1) {
333 bigint_linmul3(z, x, x_sw, x[0]);
334 } else if(sized_for_comba_sqr<4>(x_sw, x_size, z_size)) {
335 bigint_comba_sqr4(z, x);
336 } else if(sized_for_comba_sqr<6>(x_sw, x_size, z_size)) {
337 bigint_comba_sqr6(z, x);
338 } else if(sized_for_comba_sqr<8>(x_sw, x_size, z_size)) {
339 bigint_comba_sqr8(z, x);
340 } else if(sized_for_comba_sqr<9>(x_sw, x_size, z_size)) {
341 bigint_comba_sqr9(z, x);
342 } else if(sized_for_comba_sqr<16>(x_sw, x_size, z_size)) {
343 bigint_comba_sqr16(z, x);
344 } else if(sized_for_comba_sqr<24>(x_sw, x_size, z_size)) {
345 bigint_comba_sqr24(z, x);
346 } else if(x_size < KARATSUBA_SQUARE_THRESHOLD || workspace == nullptr) {
347 basecase_sqr(z, z_size, x, x_sw);
348 } else {
349 const size_t N = karatsuba_size(z_size, x_size, x_sw);
350
351 if(N > 0 && z_size >= 2 * N && ws_size >= 2 * N) {
352 karatsuba_sqr(z, x, N, workspace);
353 } else {
354 basecase_sqr(z, z_size, x, x_sw);
355 }
356 }
357}
358
359} // namespace Botan
#define BOTAN_ASSERT(expr, assertion_made)
Definition assert.h:62
constexpr auto bigint_add2(W x[], size_t x_size, const W y[], size_t y_size) -> W
Definition mp_core.h:96
constexpr void bigint_linmul3(W z[], const W x[], size_t x_size, W y)
Definition mp_core.h:405
constexpr auto bigint_cnd_sub(W cnd, W x[], const W y[], size_t size) -> W
Definition mp_core.h:64
constexpr auto bigint_add3(W z[], const W x[], size_t x_size, const W y[], size_t y_size) -> W
Definition mp_core.h:122
BOTAN_FUZZER_API void basecase_sqr(word z[], size_t z_size, const word x[], size_t x_size)
Definition mp_karat.cpp:46
void bigint_comba_sqr4(word z[8], const word x[4])
Definition mp_comba.cpp:16
void bigint_comba_sqr6(word z[12], const word x[6])
Definition mp_comba.cpp:74
constexpr auto word8_madd3(W z[8], const W x[8], W y, W carry) -> W
Definition mp_asmi.h:383
void bigint_comba_mul4(word z[8], const word x[4], const word y[4])
Definition mp_comba.cpp:42
void bigint_sqr(word z[], size_t z_size, const word x[], size_t x_size, size_t x_sw, word workspace[], size_t ws_size)
Definition mp_karat.cpp:327
void bigint_comba_mul16(word z[32], const word x[16], const word y[16])
Definition mp_comba.cpp:794
void bigint_mul(word z[], size_t z_size, const word x[], size_t x_size, size_t x_sw, const word y[], size_t y_size, size_t y_sw, word workspace[], size_t ws_size)
Definition mp_karat.cpp:283
void bigint_comba_mul6(word z[12], const word x[6], const word y[6])
Definition mp_comba.cpp:115
constexpr W bigint_cnd_add(W cnd, W x[], const W y[], size_t size)
Definition mp_core.h:47
void bigint_comba_mul9(word z[18], const word x[9], const word y[9])
Definition mp_comba.cpp:511
void carry(int64_t &h0, int64_t &h1)
void bigint_comba_mul24(word z[48], const word x[24], const word y[24])
constexpr auto bigint_sub_abs(W z[], const W x[], const W y[], size_t N, W ws[]) -> CT::Mask< W >
Definition mp_core.h:281
constexpr auto bigint_sub2(W x[], size_t x_size, const W y[], size_t y_size) -> W
Definition mp_core.h:150
void bigint_comba_sqr8(word z[16], const word x[8])
Definition mp_comba.cpp:292
void bigint_comba_sqr16(word z[32], const word x[16])
Definition mp_comba.cpp:618
void bigint_comba_sqr9(word z[18], const word x[9])
Definition mp_comba.cpp:440
BOTAN_FUZZER_API void basecase_mul(word z[], size_t z_size, const word x[], size_t x_size, const word y[], size_t y_size)
Definition mp_karat.cpp:20
std::conditional_t< HasNative64BitRegisters, std::uint64_t, uint32_t > word
Definition types.h:119
void bigint_comba_sqr24(word z[48], const word x[24])
void bigint_comba_mul8(word z[16], const word x[8], const word y[8])
Definition mp_comba.cpp:352
constexpr void clear_mem(T *ptr, size_t n)
Definition mem_ops.h:119
constexpr auto word_madd3(W a, W b, W c, W *d) -> W
Definition mp_asmi.h:112