35#include <botan/x25519.h>
37#include <botan/internal/ct_utils.h>
38#include <botan/internal/donna128.h>
39#include <botan/internal/loadstor.h>
45#if !defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
50inline void fsum(uint64_t out[5],
const uint64_t in[5]) {
64inline void fdifference_backwards(uint64_t out[5],
const uint64_t in[5]) {
66 const uint64_t two54m152 = (
static_cast<uint64_t
>(1) << 54) - 152;
67 const uint64_t two54m8 = (
static_cast<uint64_t
>(1) << 54) - 8;
69 out[0] = in[0] + two54m152 - out[0];
70 out[1] = in[1] + two54m8 - out[1];
71 out[2] = in[2] + two54m8 - out[2];
72 out[3] = in[3] + two54m8 - out[3];
73 out[4] = in[4] + two54m8 - out[4];
76inline void fadd_sub(uint64_t x[5], uint64_t y[5]) {
81 fdifference_backwards(x, tmp);
84const uint64_t MASK_63 = 0x7ffffffffffff;
87inline void fscalar_product(uint64_t out[5],
const uint64_t in[5],
const uint64_t scalar) {
88 uint128_t a = uint128_t(in[0]) * scalar;
100 a = uint128_t(in[4]) * scalar +
carry_shift(a, 51);
101 out[4] = a & MASK_63;
114inline void fmul(uint64_t out[5],
const uint64_t in[5],
const uint64_t in2[5]) {
115 const auto s0 = uint128_t(in2[0]);
116 const auto s1 = uint128_t(in2[1]);
117 const auto s2 = uint128_t(in2[2]);
118 const auto s3 = uint128_t(in2[3]);
119 const auto s4 = uint128_t(in2[4]);
127 uint128_t t0 = r0 * s0;
128 uint128_t t1 = r0 * s1 + r1 * s0;
129 uint128_t t2 = r0 * s2 + r2 * s0 + r1 * s1;
130 uint128_t t3 = r0 * s3 + r3 * s0 + r1 * s2 + r2 * s1;
131 uint128_t t4 = r0 * s4 + r4 * s0 + r3 * s1 + r1 * s3 + r2 * s2;
138 t0 += r4 * s1 + r1 * s4 + r2 * s3 + r3 * s2;
139 t1 += r4 * s2 + r2 * s4 + r3 * s3;
140 t2 += r4 * s3 + r3 * s4;
169inline void fsquare(uint64_t out[5],
const uint64_t in[5],
size_t count = 1) {
176 for(
size_t i = 0; i != count; ++i) {
177 const uint64_t d0 = r0 * 2;
178 const uint64_t d1 = r1 * 2;
179 const uint64_t d2 = r2 * 2 * 19;
180 const uint64_t d419 = r4 * 19;
181 const uint64_t d4 = d419 * 2;
183 uint128_t t0 = uint128_t(r0) * r0 + uint128_t(d4) * r1 + uint128_t(d2) * (r3);
184 uint128_t t1 = uint128_t(d0) * r1 + uint128_t(d4) * r2 + uint128_t(r3) * (r3 * 19);
185 uint128_t t2 = uint128_t(d0) * r2 + uint128_t(r1) * r1 + uint128_t(d4) * (r3);
186 uint128_t t3 = uint128_t(d0) * r3 + uint128_t(d1) * r2 + uint128_t(r4) * (d419);
187 uint128_t t4 = uint128_t(d0) * r4 + uint128_t(d1) * r3 + uint128_t(r2) * (r2);
217inline void fexpand(uint64_t* out,
const uint8_t* in) {
228inline void fcontract(uint8_t* out,
const uint64_t input[5]) {
229 auto t0 = uint128_t(input[0]);
230 auto t1 = uint128_t(input[1]);
231 auto t2 = uint128_t(input[2]);
232 auto t3 = uint128_t(input[3]);
233 auto t4 = uint128_t(input[4]);
235 for(
size_t i = 0; i != 2; ++i) {
244 t0 += (t4 >> 51U) * 19;
261 t0 += (t4 >> 51U) * 19;
266 t0 += 0x8000000000000 - 19;
267 t1 += 0x8000000000000 - 1;
268 t2 += 0x8000000000000 - 1;
269 t3 += 0x8000000000000 - 1;
270 t4 += 0x8000000000000 - 1;
300void fmonty(uint64_t result_two_q_x[5],
301 uint64_t result_two_q_z[5],
302 uint64_t result_q_plus_q_dash_x[5],
303 uint64_t result_q_plus_q_dash_z[5],
306 uint64_t in_q_dash_x[5],
307 uint64_t in_q_dash_z[5],
308 const uint64_t q_minus_q_dash[5]) {
314 uint64_t zzzprime[5];
316 fadd_sub(in_q_z, in_q_x);
317 fadd_sub(in_q_dash_z, in_q_dash_x);
319 fmul(xxprime, in_q_dash_x, in_q_z);
320 fmul(zzprime, in_q_dash_z, in_q_x);
322 fadd_sub(zzprime, xxprime);
324 fsquare(result_q_plus_q_dash_x, xxprime);
325 fsquare(zzzprime, zzprime);
326 fmul(result_q_plus_q_dash_z, zzzprime, q_minus_q_dash);
330 fmul(result_two_q_x, xx, zz);
332 fdifference_backwards(zz, xx);
333 fscalar_product(zzz, zz, 121665);
336 fmul(result_two_q_z, zz, zzz);
346inline void swap_conditional(uint64_t a[5], uint64_t b[5], uint64_t c[5], uint64_t d[5],
CT::Mask<uint64_t> swap) {
347 for(
size_t i = 0; i < 5; ++i) {
348 const uint64_t x0 = swap.if_set_return(a[i] ^ b[i]);
352 const uint64_t x1 = swap.if_set_return(c[i] ^ d[i]);
364void cmult(uint64_t resultx[5], uint64_t resultz[5],
const uint8_t n[32],
const uint64_t q[5]) {
376 for(
size_t i = 0; i < 32; ++i) {
377 const uint64_t si = n[31 - i];
387 swap_conditional(c, a, d, b, bit0);
388 fmonty(g, h, e, f, c, d, a, b, q);
390 swap_conditional(g, e, h, f, bit0 ^ bit1);
391 fmonty(c, d, a, b, g, h, e, f, q);
393 swap_conditional(c, a, d, b, bit1 ^ bit2);
394 fmonty(g, h, e, f, c, d, a, b, q);
396 swap_conditional(g, e, h, f, bit2 ^ bit3);
397 fmonty(c, d, a, b, g, h, e, f, q);
399 swap_conditional(c, a, d, b, bit3 ^ bit4);
400 fmonty(g, h, e, f, c, d, a, b, q);
402 swap_conditional(g, e, h, f, bit4 ^ bit5);
403 fmonty(c, d, a, b, g, h, e, f, q);
405 swap_conditional(c, a, d, b, bit5 ^ bit6);
406 fmonty(g, h, e, f, c, d, a, b, q);
408 swap_conditional(g, e, h, f, bit6 ^ bit7);
409 fmonty(c, d, a, b, g, h, e, f, q);
411 swap_conditional(c, a, d, b, bit7);
421void crecip(uint64_t out[5],
const uint64_t z[5]) {
453void curve25519_donna(uint8_t mypublic[32],
const uint8_t secret[32],
const uint8_t basepoint[32]) {
468 fexpand(bp, basepoint);
472 fcontract(mypublic, z);
static constexpr Mask< T > expand_bit(T v, size_t bit)
constexpr void unpoison(const T *p, size_t n)
constexpr void poison(const T *p, size_t n)
constexpr void copy_mem(T *out, const T *in, size_t n)
constexpr uint64_t carry_shift(const donna128 &a, size_t shift)
constexpr auto store_le(ParamTs &&... params)
constexpr auto load_le(ParamTs &&... params)
void curve25519_donna(uint8_t mypublic[32], const uint8_t secret[32], const uint8_t basepoint[32])
constexpr uint64_t combine_lower(const donna128 &a, size_t s1, const donna128 &b, size_t s2)