35#include <botan/x25519.h>
37#include <botan/internal/ct_utils.h>
38#include <botan/internal/donna128.h>
39#include <botan/internal/loadstor.h>
40#include <botan/internal/mul128.h>
46#if !defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
47typedef donna128 uint128_t;
51inline void fsum(uint64_t out[5],
const uint64_t in[5]) {
65inline void fdifference_backwards(uint64_t out[5],
const uint64_t in[5]) {
67 const uint64_t two54m152 = (
static_cast<uint64_t
>(1) << 54) - 152;
68 const uint64_t two54m8 = (
static_cast<uint64_t
>(1) << 54) - 8;
70 out[0] = in[0] + two54m152 - out[0];
71 out[1] = in[1] + two54m8 - out[1];
72 out[2] = in[2] + two54m8 - out[2];
73 out[3] = in[3] + two54m8 - out[3];
74 out[4] = in[4] + two54m8 - out[4];
77inline void fadd_sub(uint64_t x[5], uint64_t y[5]) {
82 fdifference_backwards(x, tmp);
85const uint64_t MASK_63 = 0x7ffffffffffff;
88inline void fscalar_product(uint64_t out[5],
const uint64_t in[5],
const uint64_t scalar) {
89 uint128_t a = uint128_t(in[0]) * scalar;
101 a = uint128_t(in[4]) * scalar +
carry_shift(a, 51);
102 out[4] = a & MASK_63;
115inline void fmul(uint64_t out[5],
const uint64_t in[5],
const uint64_t in2[5]) {
116 const uint128_t s0 = in2[0];
117 const uint128_t s1 = in2[1];
118 const uint128_t s2 = in2[2];
119 const uint128_t s3 = in2[3];
120 const uint128_t s4 = in2[4];
128 uint128_t t0 = r0 * s0;
129 uint128_t t1 = r0 * s1 + r1 * s0;
130 uint128_t t2 = r0 * s2 + r2 * s0 + r1 * s1;
131 uint128_t t3 = r0 * s3 + r3 * s0 + r1 * s2 + r2 * s1;
132 uint128_t t4 = r0 * s4 + r4 * s0 + r3 * s1 + r1 * s3 + r2 * s2;
139 t0 += r4 * s1 + r1 * s4 + r2 * s3 + r3 * s2;
140 t1 += r4 * s2 + r2 * s4 + r3 * s3;
141 t2 += r4 * s3 + r3 * s4;
170inline void fsquare(uint64_t out[5],
const uint64_t in[5],
size_t count = 1) {
177 for(
size_t i = 0; i != count; ++i) {
178 const uint64_t d0 = r0 * 2;
179 const uint64_t d1 = r1 * 2;
180 const uint64_t d2 = r2 * 2 * 19;
181 const uint64_t d419 = r4 * 19;
182 const uint64_t d4 = d419 * 2;
184 uint128_t t0 = uint128_t(r0) * r0 + uint128_t(d4) * r1 + uint128_t(d2) * (r3);
185 uint128_t t1 = uint128_t(d0) * r1 + uint128_t(d4) * r2 + uint128_t(r3) * (r3 * 19);
186 uint128_t t2 = uint128_t(d0) * r2 + uint128_t(r1) * r1 + uint128_t(d4) * (r3);
187 uint128_t t3 = uint128_t(d0) * r3 + uint128_t(d1) * r2 + uint128_t(r4) * (d419);
188 uint128_t t4 = uint128_t(d0) * r4 + uint128_t(d1) * r3 + uint128_t(r2) * (r2);
218inline void fexpand(uint64_t* out,
const uint8_t* in) {
229inline void fcontract(uint8_t* out,
const uint64_t input[5]) {
230 uint128_t t0 = input[0];
231 uint128_t t1 = input[1];
232 uint128_t t2 = input[2];
233 uint128_t t3 = input[3];
234 uint128_t t4 = input[4];
236 for(
size_t i = 0; i != 2; ++i) {
245 t0 += (t4 >> 51) * 19;
262 t0 += (t4 >> 51) * 19;
267 t0 += 0x8000000000000 - 19;
268 t1 += 0x8000000000000 - 1;
269 t2 += 0x8000000000000 - 1;
270 t3 += 0x8000000000000 - 1;
271 t4 += 0x8000000000000 - 1;
301void fmonty(uint64_t result_two_q_x[5],
302 uint64_t result_two_q_z[5],
303 uint64_t result_q_plus_q_dash_x[5],
304 uint64_t result_q_plus_q_dash_z[5],
307 uint64_t in_q_dash_x[5],
308 uint64_t in_q_dash_z[5],
309 const uint64_t q_minus_q_dash[5]) {
315 uint64_t zzzprime[5];
317 fadd_sub(in_q_z, in_q_x);
318 fadd_sub(in_q_dash_z, in_q_dash_x);
320 fmul(xxprime, in_q_dash_x, in_q_z);
321 fmul(zzprime, in_q_dash_z, in_q_x);
323 fadd_sub(zzprime, xxprime);
325 fsquare(result_q_plus_q_dash_x, xxprime);
326 fsquare(zzzprime, zzprime);
327 fmul(result_q_plus_q_dash_z, zzzprime, q_minus_q_dash);
331 fmul(result_two_q_x, xx, zz);
333 fdifference_backwards(zz, xx);
334 fscalar_product(zzz, zz, 121665);
337 fmul(result_two_q_z, zz, zzz);
347inline void swap_conditional(uint64_t a[5], uint64_t b[5], uint64_t c[5], uint64_t d[5], uint64_t iswap) {
348 const uint64_t swap = 0 - iswap;
350 for(
size_t i = 0; i < 5; ++i) {
351 const uint64_t x0 = swap & (a[i] ^ b[i]);
352 const uint64_t x1 = swap & (c[i] ^ d[i]);
366void cmult(uint64_t resultx[5], uint64_t resultz[5],
const uint8_t n[32],
const uint64_t q[5]) {
378 for(
size_t i = 0; i < 32; ++i) {
379 const uint64_t bit0 = (n[31 - i] >> 7) & 1;
380 const uint64_t bit1 = (n[31 - i] >> 6) & 1;
381 const uint64_t bit2 = (n[31 - i] >> 5) & 1;
382 const uint64_t bit3 = (n[31 - i] >> 4) & 1;
383 const uint64_t bit4 = (n[31 - i] >> 3) & 1;
384 const uint64_t bit5 = (n[31 - i] >> 2) & 1;
385 const uint64_t bit6 = (n[31 - i] >> 1) & 1;
386 const uint64_t bit7 = (n[31 - i] >> 0) & 1;
388 swap_conditional(c, a, d, b, bit0);
389 fmonty(g, h, e, f, c, d, a, b, q);
391 swap_conditional(g, e, h, f, bit0 ^ bit1);
392 fmonty(c, d, a, b, g, h, e, f, q);
394 swap_conditional(c, a, d, b, bit1 ^ bit2);
395 fmonty(g, h, e, f, c, d, a, b, q);
397 swap_conditional(g, e, h, f, bit2 ^ bit3);
398 fmonty(c, d, a, b, g, h, e, f, q);
400 swap_conditional(c, a, d, b, bit3 ^ bit4);
401 fmonty(g, h, e, f, c, d, a, b, q);
403 swap_conditional(g, e, h, f, bit4 ^ bit5);
404 fmonty(c, d, a, b, g, h, e, f, q);
406 swap_conditional(c, a, d, b, bit5 ^ bit6);
407 fmonty(g, h, e, f, c, d, a, b, q);
409 swap_conditional(g, e, h, f, bit6 ^ bit7);
410 fmonty(c, d, a, b, g, h, e, f, q);
412 swap_conditional(c, a, d, b, bit7);
422void crecip(uint64_t out[5],
const uint64_t z[5]) {
454void curve25519_donna(uint8_t mypublic[32],
const uint8_t secret[32],
const uint8_t basepoint[32]) {
458 uint64_t bp[5], x[5], z[5], zmone[5];
466 fexpand(bp, basepoint);
470 fcontract(mypublic, z);
constexpr void unpoison(const T *p, size_t n)
constexpr void poison(const T *p, size_t n)
constexpr uint64_t carry_shift(const donna128 &a, size_t shift)
constexpr auto store_le(ParamTs &&... params)
constexpr auto load_le(ParamTs &&... params)
void curve25519_donna(uint8_t mypublic[32], const uint8_t secret[32], const uint8_t basepoint[32])
constexpr void copy_mem(T *out, const T *in, size_t n)
constexpr uint64_t combine_lower(const donna128 &a, size_t s1, const donna128 &b, size_t s2)