9#include <botan/internal/mp_core.h>
11#include <botan/exceptn.h>
12#include <botan/mem_ops.h>
13#include <botan/internal/ct_utils.h>
20void basecase_mul(word z[],
size_t z_size,
const word x[],
size_t x_size,
const word y[],
size_t y_size) {
21 if(z_size < x_size + y_size) {
25 const size_t x_size_8 = x_size - (x_size % 8);
29 for(
size_t i = 0; i != y_size; ++i) {
30 const word y_i = y[i];
34 for(
size_t j = 0; j != x_size_8; j += 8) {
38 for(
size_t j = x_size_8; j != x_size; ++j) {
42 z[x_size + i] =
carry;
46void basecase_sqr(word z[],
size_t z_size,
const word x[],
size_t x_size) {
47 if(z_size < 2 * x_size) {
51 const size_t x_size_8 = x_size - (x_size % 8);
55 for(
size_t i = 0; i != x_size; ++i) {
56 const word x_i = x[i];
60 for(
size_t j = 0; j != x_size_8; j += 8) {
64 for(
size_t j = x_size_8; j != x_size; ++j) {
68 z[x_size + i] =
carry;
74const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32;
75const size_t KARATSUBA_SQUARE_THRESHOLD = 32;
80void karatsuba_mul(word z[],
const word x[],
const word y[],
size_t N, word workspace[]) {
81 if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2) {
98 const size_t N2 = N / 2;
101 const word* x1 = x + N2;
103 const word* y1 = y + N2;
107 word* ws0 = workspace;
108 word* ws1 = workspace + N;
124 const auto neg_mask = ~(cmp0 ^ cmp1);
126 karatsuba_mul(ws0, z0, z1, N2, ws1);
129 karatsuba_mul(z0, x0, y0, N2, ws1);
132 karatsuba_mul(z1, x1, y1, N2, ws1);
148void karatsuba_sqr(word z[],
const word x[],
size_t N, word workspace[]) {
149 if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2) {
166 const size_t N2 = N / 2;
169 const word* x1 = x + N2;
173 word* ws0 = workspace;
174 word* ws1 = workspace + N;
180 karatsuba_sqr(ws0, z0, N2, ws1);
182 karatsuba_sqr(z0, x0, N2, ws1);
183 karatsuba_sqr(z1, x1, N2, ws1);
202size_t karatsuba_size(
size_t z_size,
size_t x_size,
size_t x_sw,
size_t y_size,
size_t y_sw) {
203 if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size) {
207 if(((x_size == x_sw) && (x_size % 2)) || ((y_size == y_sw) && (y_size % 2))) {
211 const size_t start = (x_sw > y_sw) ? x_sw : y_sw;
212 const size_t end = (x_size < y_size) ? x_size : y_size;
221 for(
size_t j = start; j <= end; ++j) {
230 if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size) {
231 if(j % 4 == 2 && (j + 2) <= x_size && (j + 2) <= y_size && 2 * (j + 2) <= z_size) {
244size_t karatsuba_size(
size_t z_size,
size_t x_size,
size_t x_sw) {
252 for(
size_t j = x_sw; j <= x_size; ++j) {
261 if(j % 4 == 2 && (j + 2) <= x_size && 2 * (j + 2) <= z_size) {
271inline bool sized_for_comba_mul(
size_t x_sw,
size_t x_size,
size_t y_sw,
size_t y_size,
size_t z_size) {
272 return (x_sw <= SZ && x_size >= SZ && y_sw <= SZ && y_size >= SZ && z_size >= 2 * SZ);
276inline bool sized_for_comba_sqr(
size_t x_sw,
size_t x_size,
size_t z_size) {
277 return (x_sw <= SZ && x_size >= SZ && z_size >= 2 * SZ);
296 }
else if(y_sw == 1) {
298 }
else if(sized_for_comba_mul<4>(x_sw, x_size, y_sw, y_size, z_size)) {
300 }
else if(sized_for_comba_mul<6>(x_sw, x_size, y_sw, y_size, z_size)) {
302 }
else if(sized_for_comba_mul<8>(x_sw, x_size, y_sw, y_size, z_size)) {
304 }
else if(sized_for_comba_mul<9>(x_sw, x_size, y_sw, y_size, z_size)) {
306 }
else if(sized_for_comba_mul<16>(x_sw, x_size, y_sw, y_size, z_size)) {
308 }
else if(sized_for_comba_mul<24>(x_sw, x_size, y_sw, y_size, z_size)) {
310 }
else if(x_sw < KARATSUBA_MULTIPLY_THRESHOLD || y_sw < KARATSUBA_MULTIPLY_THRESHOLD || !workspace) {
313 const size_t N = karatsuba_size(z_size, x_size, x_sw, y_size, y_sw);
315 if(N && z_size >= 2 * N && ws_size >= 2 * N) {
316 karatsuba_mul(z, x, y, N, workspace);
326void bigint_sqr(word z[],
size_t z_size,
const word x[],
size_t x_size,
size_t x_sw, word workspace[],
size_t ws_size) {
329 BOTAN_ASSERT(z_size / 2 >= x_sw,
"Output size is sufficient");
333 }
else if(sized_for_comba_sqr<4>(x_sw, x_size, z_size)) {
335 }
else if(sized_for_comba_sqr<6>(x_sw, x_size, z_size)) {
337 }
else if(sized_for_comba_sqr<8>(x_sw, x_size, z_size)) {
339 }
else if(sized_for_comba_sqr<9>(x_sw, x_size, z_size)) {
341 }
else if(sized_for_comba_sqr<16>(x_sw, x_size, z_size)) {
343 }
else if(sized_for_comba_sqr<24>(x_sw, x_size, z_size)) {
345 }
else if(x_size < KARATSUBA_SQUARE_THRESHOLD || !workspace) {
348 const size_t N = karatsuba_size(z_size, x_size, x_sw);
350 if(N && z_size >= 2 * N && ws_size >= 2 * N) {
351 karatsuba_sqr(z, x, N, workspace);
#define BOTAN_ASSERT(expr, assertion_made)
constexpr void bigint_linmul3(W z[], const W x[], size_t x_size, W y)
BOTAN_FUZZER_API void basecase_sqr(word z[], size_t z_size, const word x[], size_t x_size)
void bigint_comba_sqr4(word z[8], const word x[4])
void bigint_comba_sqr6(word z[12], const word x[6])
constexpr auto word8_madd3(W z[8], const W x[8], W y, W carry) -> W
void bigint_comba_mul4(word z[8], const word x[4], const word y[4])
void bigint_sqr(word z[], size_t z_size, const word x[], size_t x_size, size_t x_sw, word workspace[], size_t ws_size)
void bigint_comba_mul16(word z[32], const word x[16], const word y[16])
void bigint_mul(word z[], size_t z_size, const word x[], size_t x_size, size_t x_sw, const word y[], size_t y_size, size_t y_sw, word workspace[], size_t ws_size)
void bigint_comba_mul6(word z[12], const word x[6], const word y[6])
void bigint_comba_mul9(word z[18], const word x[9], const word y[9])
void carry(int64_t &h0, int64_t &h1)
void bigint_comba_mul24(word z[48], const word x[24], const word y[24])
constexpr auto bigint_sub_abs(W z[], const W x[], const W y[], size_t N, W ws[]) -> CT::Mask< W >
constexpr auto bigint_sub2(W x[], size_t x_size, const W y[], size_t y_size) -> W
void bigint_comba_sqr8(word z[16], const word x[8])
void bigint_comba_sqr16(word z[32], const word x[16])
constexpr void bigint_cnd_add_or_sub(CT::Mask< W > mask, W x[], const W y[], size_t size)
void bigint_comba_sqr9(word z[18], const word x[9])
constexpr auto bigint_add2_nc(W x[], size_t x_size, const W y[], size_t y_size) -> W
BOTAN_FUZZER_API void basecase_mul(word z[], size_t z_size, const word x[], size_t x_size, const word y[], size_t y_size)
void bigint_comba_sqr24(word z[48], const word x[24])
void bigint_comba_mul8(word z[16], const word x[8], const word y[8])
constexpr void clear_mem(T *ptr, size_t n)
constexpr auto word_madd3(W a, W b, W c, W *d) -> W
constexpr auto bigint_add3_nc(W z[], const W x[], size_t x_size, const W y[], size_t y_size) -> W