Botan 3.11.0
Crypto and TLS for C&
curve448_gf.cpp
Go to the documentation of this file.
1/*
2* X448 Gf Modulo 2^448 - 2^224 - 1
3* (C) 2024 Jack Lloyd
4* 2024 Fabian Albert - Rohde & Schwarz Cybersecurity
5*
6* Botan is released under the Simplified BSD License (see license.txt)
7*/
8#include <botan/internal/curve448_gf.h>
9
10#include <botan/internal/ct_utils.h>
11#include <botan/internal/loadstor.h>
12#include <botan/internal/mp_asmi.h>
13#include <botan/internal/mp_core.h>
14
15namespace Botan {
16
17namespace {
18
19/**
20 * @brief Reduce the result of a addition modulo 2^448 - 2^224 - 1.
21 *
22 * Algorithm 1 of paper "Reduction Modulo 2^448 - 2^224 - 1", from line 27.
23 *
24 * @param h_3 Output
25 * @param h_1 Input
26 */
27void reduce_after_add(std::span<uint64_t, WORDS_448> h_3, std::span<const uint64_t, 8> h_1) {
28 std::array<uint64_t, 8> h_2; /* NOLINT(*-member-init) */
29 uint64_t carry = 0;
30
31 constexpr uint64_t zero = 0;
32
33 // Line 27+ (of the paper's algorithm 1)
34 h_2[0] = word_add(h_1[0], h_1[7], &carry);
35 h_2[1] = word_add(h_1[1], zero, &carry);
36 h_2[2] = word_add(h_1[2], zero, &carry);
37
38 // Line 30
39 h_2[3] = word_add(h_1[3], h_1[7] << 32, &carry);
40
41 // Line 31+
42 h_2[4] = word_add(h_1[4], zero, &carry);
43 h_2[5] = word_add(h_1[5], zero, &carry);
44 h_2[6] = word_add(h_1[6], zero, &carry);
45
46 h_2[7] = carry;
47
48 carry = 0;
49 h_3[0] = word_add(h_2[0], h_2[7], &carry);
50 h_3[1] = word_add(h_2[1], zero, &carry);
51 h_3[2] = word_add(h_2[2], zero, &carry);
52 // Line 37
53 h_3[3] = h_2[3] + (h_2[7] << 32) + carry;
54
55 // Line 38
56 h_3[4] = h_2[4];
57 h_3[5] = h_2[5];
58 h_3[6] = h_2[6];
59}
60
61/**
62 * @brief Reduce the result of a addition modulo 2^448 - 2^224 - 1.
63 *
64 * Algorithm 1 of paper "Reduction Modulo 2^448 - 2^224 - 1".
65 */
66void reduce_after_mul(std::span<uint64_t, WORDS_448> out, std::span<const uint64_t, 14> in) {
67 std::array<uint64_t, 8> r; // NOLINT(*-member-init)
68 std::array<uint64_t, 8> s; // NOLINT(*-member-init)
69 std::array<uint64_t, 8> t_0; // NOLINT(*-member-init)
70 std::array<uint64_t, 8> h_1; // NOLINT(*-member-init)
71
72 uint64_t carry = 0;
73
74 // Line 4 (of the paper's algorithm 1)
75 r[0] = word_add(in[0], in[7], &carry);
76
77 // Line 5-7
78 r[1] = word_add(in[1], in[1 + 7], &carry);
79 r[2] = word_add(in[2], in[2 + 7], &carry);
80 r[3] = word_add(in[3], in[3 + 7], &carry);
81 r[4] = word_add(in[4], in[4 + 7], &carry);
82 r[5] = word_add(in[5], in[5 + 7], &carry);
83 r[6] = word_add(in[6], in[6 + 7], &carry);
84 r[7] = carry;
85 s[0] = r[0];
86 s[1] = r[1];
87 s[2] = r[2];
88 // Line 10
89 carry = 0;
90 s[3] = word_add(r[3], in[10] & 0xFFFFFFFF00000000, &carry);
91 // Line 11-13
92 s[4] = word_add(r[4], in[4 + 7], &carry);
93 s[5] = word_add(r[5], in[5 + 7], &carry);
94 s[6] = word_add(r[6], in[6 + 7], &carry);
95 s[7] = r[7] + carry;
96
97 // Line 15-17
98 t_0[0] = (in[0 + 11] << 32) | (in[0 + 10] >> 32);
99 t_0[1] = (in[1 + 11] << 32) | (in[1 + 10] >> 32);
100 t_0[2] = (in[2 + 11] << 32) | (in[2 + 10] >> 32);
101 // Line 18
102 t_0[3] = (in[7] << 32) | (in[13] >> 32);
103 // Line 19-21
104 t_0[4] = (in[4 + 4] << 32) | (in[4 + 3] >> 32);
105 t_0[5] = (in[5 + 4] << 32) | (in[5 + 3] >> 32);
106 t_0[6] = (in[6 + 4] << 32) | (in[6 + 3] >> 32);
107 carry = 0;
108 // Line 23-25
109 h_1[0] = word_add(s[0], t_0[0], &carry);
110 h_1[1] = word_add(s[1], t_0[1], &carry);
111 h_1[2] = word_add(s[2], t_0[2], &carry);
112 h_1[3] = word_add(s[3], t_0[3], &carry);
113 h_1[4] = word_add(s[4], t_0[4], &carry);
114 h_1[5] = word_add(s[5], t_0[5], &carry);
115 h_1[6] = word_add(s[6], t_0[6], &carry);
116 h_1[7] = s[7] + carry;
117
118 reduce_after_add(out, h_1);
119}
120
121// Multiply by the Curve448 constant a24 = (a-2)/4 = 39081.
122// Uses a 7-word × 1-word multiply (7 muls vs 49 for full comba_mul<7>),
123// and the result fits in 8 words so only needs reduce_after_add.
124void gf_mul_a24(std::span<uint64_t, WORDS_448> out, std::span<const uint64_t, WORDS_448> a) {
125 constexpr uint64_t A24 = 39081;
126 std::array<uint64_t, 8> ws; // NOLINT(*-member-init)
127 uint64_t carry = 0;
128 ws[0] = word_madd2(a[0], A24, &carry);
129 ws[1] = word_madd2(a[1], A24, &carry);
130 ws[2] = word_madd2(a[2], A24, &carry);
131 ws[3] = word_madd2(a[3], A24, &carry);
132 ws[4] = word_madd2(a[4], A24, &carry);
133 ws[5] = word_madd2(a[5], A24, &carry);
134 ws[6] = word_madd2(a[6], A24, &carry);
135 ws[7] = carry;
136 reduce_after_add(out, ws);
137}
138
139void gf_mul(std::span<uint64_t, WORDS_448> out,
140 std::span<const uint64_t, WORDS_448> a,
141 std::span<const uint64_t, WORDS_448> b) {
142 std::array<uint64_t, 14> ws; // NOLINT(*-member-init)
143 comba_mul<7>(ws.data(), a.data(), b.data());
144 reduce_after_mul(out, ws);
145}
146
147void gf_square(std::span<uint64_t, WORDS_448> out, std::span<const uint64_t, WORDS_448> a) {
148 std::array<uint64_t, 14> ws; // NOLINT(*-member-init)
149 comba_sqr<7>(ws.data(), a.data());
150 reduce_after_mul(out, ws);
151}
152
153void gf_add(std::span<uint64_t, WORDS_448> out,
154 std::span<const uint64_t, WORDS_448> a,
155 std::span<const uint64_t, WORDS_448> b) {
156 std::array<uint64_t, WORDS_448 + 1> ws; // NOLINT(*-member-init)
157
158 uint64_t carry = 0;
159 ws[0] = word_add(a[0], b[0], &carry);
160 ws[1] = word_add(a[1], b[1], &carry);
161 ws[2] = word_add(a[2], b[2], &carry);
162 ws[3] = word_add(a[3], b[3], &carry);
163 ws[4] = word_add(a[4], b[4], &carry);
164 ws[5] = word_add(a[5], b[5], &carry);
165 ws[6] = word_add(a[6], b[6], &carry);
166 ws[7] = carry;
167
168 reduce_after_add(out, ws);
169}
170
171/**
172 * @brief Subtract two elements in GF(P). out = a - b
173 *
174 * Algorithm 2 of paper: "Reduction Modulo 2^448 - 2^224 - 1"
175 */
176void gf_sub(std::span<uint64_t, WORDS_448> out,
177 std::span<const uint64_t, WORDS_448> a,
178 std::span<const uint64_t, WORDS_448> b) {
179 std::array<uint64_t, WORDS_448> h_0; // NOLINT(*-member-init)
180 std::array<uint64_t, WORDS_448> h_1; // NOLINT(*-member-init)
181
182 uint64_t borrow = 0;
183 h_0[0] = word_sub(a[0], b[0], &borrow);
184 h_0[1] = word_sub(a[1], b[1], &borrow);
185 h_0[2] = word_sub(a[2], b[2], &borrow);
186 h_0[3] = word_sub(a[3], b[3], &borrow);
187 h_0[4] = word_sub(a[4], b[4], &borrow);
188 h_0[5] = word_sub(a[5], b[5], &borrow);
189 h_0[6] = word_sub(a[6], b[6], &borrow);
190 uint64_t delta = borrow;
191 uint64_t delta_p = delta << 32;
192 borrow = 0;
193
194 constexpr uint64_t zero = 0;
195
196 h_1[0] = word_sub(h_0[0], delta, &borrow);
197 h_1[1] = word_sub(h_0[1], zero, &borrow);
198 h_1[2] = word_sub(h_0[2], zero, &borrow);
199 h_1[3] = word_sub(h_0[3], delta_p, &borrow);
200 h_1[4] = word_sub(h_0[4], zero, &borrow);
201 h_1[5] = word_sub(h_0[5], zero, &borrow);
202 h_1[6] = word_sub(h_0[6], zero, &borrow);
203
204 delta = borrow;
205 delta_p = delta << 32;
206 borrow = 0;
207
208 out[0] = word_sub(h_1[0], delta, &borrow);
209 out[1] = word_sub(h_1[1], zero, &borrow);
210 out[2] = word_sub(h_1[2], zero, &borrow);
211 out[3] = word_sub(h_1[3], delta_p, &borrow);
212 out[4] = h_1[4];
213 out[5] = h_1[5];
214 out[6] = h_1[6];
215}
216
217/// Square a field element n times
218void gf_sqr_n(std::span<uint64_t, WORDS_448> out, std::span<const uint64_t, WORDS_448> a, size_t n) {
219 gf_square(out, a);
220 for(size_t i = 1; i < n; ++i) {
221 gf_square(out, out);
222 }
223}
224
225/**
226 * @brief Compute x^(2^222 - 1) using an addition chain.
227 *
228 * This is the shared prefix of the addition chains for both
229 * inversion (x^(p-2)) and square root (x^((p-3)/4)).
230 *
231 * Addition chain from addchain tool (cost 446):
232 * _11 = 1 + _10
233 * _111 = 1 + _110
234 * _111111 = _111 + _111 << 3
235 * x12 = _111111 << 6 + _111111
236 * x24 = x12 << 12 + x12
237 * x30 = _111111 + x24 << 6
238 * x48 = x24 << 6 << 18 + x24
239 * x96 = x48 << 48 + x48
240 * x192 = x96 << 96 + x96
241 * x222 = x192 << 30 + x30
242 */
243void gf_pow_2_222m1(std::span<uint64_t, WORDS_448> x222,
244 std::span<uint64_t, WORDS_448> x223,
245 std::span<const uint64_t, WORDS_448> a) {
246 std::array<uint64_t, WORDS_448> t; // NOLINT(*-member-init)
247
248 // _10 = a^2
249 std::array<uint64_t, WORDS_448> a2; // NOLINT(*-member-init)
250 gf_square(a2, a);
251
252 // _11 = a^3
253 std::array<uint64_t, WORDS_448> a3; // NOLINT(*-member-init)
254 gf_mul(a3, a, a2);
255
256 // _111 = a^7
257 std::array<uint64_t, WORDS_448> a7; // NOLINT(*-member-init)
258 gf_square(t, a3);
259 gf_mul(a7, a, t);
260
261 // _111111 = a^63
262 std::array<uint64_t, WORDS_448> a63; // NOLINT(*-member-init)
263 gf_sqr_n(t, a7, 3);
264 gf_mul(a63, a7, t);
265
266 // x12 = a^(2^12 - 1)
267 std::array<uint64_t, WORDS_448> x12; // NOLINT(*-member-init)
268 gf_sqr_n(t, a63, 6);
269 gf_mul(x12, a63, t);
270
271 // x24 = a^(2^24 - 1)
272 std::array<uint64_t, WORDS_448> x24; // NOLINT(*-member-init)
273 gf_sqr_n(t, x12, 12);
274 gf_mul(x24, x12, t);
275
276 // i34 = x24 << 6 = a^((2^24 - 1) * 2^6)
277 std::array<uint64_t, WORDS_448> i34; // NOLINT(*-member-init)
278 gf_sqr_n(i34, x24, 6);
279
280 // x30 = a^(2^30 - 1)
281 std::array<uint64_t, WORDS_448> x30; // NOLINT(*-member-init)
282 gf_mul(x30, a63, i34);
283
284 // x48 = a^(2^48 - 1)
285 std::array<uint64_t, WORDS_448> x48; // NOLINT(*-member-init)
286 gf_sqr_n(t, i34, 18);
287 gf_mul(x48, x24, t);
288
289 // x96 = a^(2^96 - 1)
290 std::array<uint64_t, WORDS_448> x96; // NOLINT(*-member-init)
291 gf_sqr_n(t, x48, 48);
292 gf_mul(x96, x48, t);
293
294 // x192 = a^(2^192 - 1)
295 std::array<uint64_t, WORDS_448> x192; // NOLINT(*-member-init)
296 gf_sqr_n(t, x96, 96);
297 gf_mul(x192, x96, t);
298
299 // x222 = a^(2^222 - 1)
300 gf_sqr_n(t, x192, 30);
301 gf_mul(x222, x30, t);
302
303 // x223 = a^(2^223 - 1)
304 gf_square(t, x222);
305 gf_mul(x223, a, t);
306}
307
308/**
309 * @brief Inversion in GF(P) using Fermat's little theorem:
310 * x^-1 = x^(P-2) mod P
311 *
312 * Uses an optimized addition chain (cost 460) found by addchain.
313 * P-2 = 2^448 - 2^224 - 3
314 * return = (x223 << 223 + x222) << 2 + 1
315 */
316void gf_inv(std::span<uint64_t, WORDS_448> out, std::span<const uint64_t, WORDS_448> a) {
317 std::array<uint64_t, WORDS_448> x222; // NOLINT(*-member-init)
318 std::array<uint64_t, WORDS_448> x223; // NOLINT(*-member-init)
319 gf_pow_2_222m1(x222, x223, a);
320
321 // (x223 << 223 + x222) << 2 + 1
322 std::array<uint64_t, WORDS_448> t; // NOLINT(*-member-init)
323 gf_sqr_n(t, x223, 223);
324 gf_mul(t, t, x222);
325 gf_sqr_n(t, t, 2);
326 gf_mul(out, t, a);
327}
328
329/**
330 * @brief Convert a number to its canonical representation.
331 *
332 * I.e. if the number is greater than P, subtract P. The number cannot be >= 2P
333 * since 2*P > 2^(7*64).
334 */
335std::array<uint64_t, WORDS_448> to_canonical(std::span<const uint64_t, WORDS_448> in) {
336 const std::array<uint64_t, WORDS_448> p = {0xffffffffffffffff,
337 0xffffffffffffffff,
338 0xffffffffffffffff,
339 0xfffffffeffffffff,
340 0xffffffffffffffff,
341 0xffffffffffffffff,
342 0xffffffffffffffff};
343
344 std::array<uint64_t, WORDS_448> in_minus_p; // NOLINT(*-member-init)
345 uint64_t borrow = 0;
346 for(size_t i = 0; i < WORDS_448; ++i) {
347 in_minus_p[i] = word_sub(in[i], p[i], &borrow);
348 }
349 std::array<uint64_t, WORDS_448> out; // NOLINT(*-member-init)
350 CT::Mask<uint64_t>::expand(borrow).select_n(out.data(), in.data(), in_minus_p.data(), WORDS_448);
351 return out;
352}
353
354} // namespace
355
356Gf448Elem::Gf448Elem(std::span<const uint8_t, BYTES_448> x) /* NOLINT(*-member-init) */ {
357 load_le(m_x, x);
358}
359
360Gf448Elem::Gf448Elem(uint64_t least_sig_word) /* NOLINT(*-member-init) */ {
361 clear_mem(m_x);
362 m_x[0] = least_sig_word;
363}
364
365void Gf448Elem::to_bytes(std::span<uint8_t, BYTES_448> out) const {
366 store_le(out, to_canonical(m_x));
367}
368
369std::array<uint8_t, BYTES_448> Gf448Elem::to_bytes() const {
370 std::array<uint8_t, BYTES_448> bytes{};
371 to_bytes(bytes);
372 return bytes;
373}
374
376 for(size_t i = 0; i < WORDS_448; ++i) {
377 mask.conditional_swap(m_x[i], other.m_x[i]);
378 }
379}
380
382 mask.select_n(m_x.data(), other.m_x.data(), m_x.data(), WORDS_448);
383}
384
386 Gf448Elem res(0);
387 gf_add(res.m_x, m_x, other.m_x);
388 return res;
389}
390
392 Gf448Elem res(0);
393 gf_sub(res.m_x, m_x, other.m_x);
394 return res;
395}
396
398 Gf448Elem res(0);
399 gf_sub(res.m_x, res.m_x, m_x);
400 return res;
401}
402
404 Gf448Elem res(0);
405 gf_mul(res.m_x, m_x, other.m_x);
406 return res;
407}
408
410 Gf448Elem res(0);
411 gf_inv(res.m_x, other.m_x);
412 gf_mul(res.m_x, m_x, res.m_x);
413 return res;
414}
415
416bool Gf448Elem::operator==(const Gf448Elem& other) const {
417 const auto canonical_form_this = to_canonical(m_x);
418 const auto canonical_form_other = to_canonical(other.m_x);
419 return CT::is_equal(canonical_form_this.data(), canonical_form_other.data(), WORDS_448).as_bool();
420}
421
422bool Gf448Elem::is_zero() const {
423 const auto canonical_form = to_canonical(m_x);
424
425 return CT::all_zeros(canonical_form.data(), WORDS_448).as_bool();
426}
427
428bool Gf448Elem::is_odd() const {
429 const auto canonical_form = to_canonical(m_x);
430 return (canonical_form[0] & 1) == 1;
431}
432
433bool Gf448Elem::bytes_are_canonical_representation(std::span<const uint8_t, BYTES_448> x) {
434 const auto x_words = load_le<std::array<uint64_t, WORDS_448>>(x);
435 const auto x_words_canonical = to_canonical(x_words);
436 return CT::is_equal(x_words.data(), x_words_canonical.data(), WORDS_448).as_bool();
437}
438
440 Gf448Elem res(0);
441 gf_mul_a24(res.words(), a.words());
442 return res;
443}
444
446 Gf448Elem res(0);
447 gf_square(res.words(), elem.words());
448 return res;
449}
450
452 // Compute elem^((P-3)/4) using an optimized addition chain (cost 457).
453 // (P-3)/4 = 2^446 - 2^222 - 1
454 // return = x223 << 223 + x222
455 std::array<uint64_t, WORDS_448> x222; // NOLINT(*-member-init)
456 std::array<uint64_t, WORDS_448> x223; // NOLINT(*-member-init)
457 gf_pow_2_222m1(x222, x223, elem.words());
458
459 Gf448Elem res(0);
460 gf_sqr_n(res.words(), x223, 223);
461 gf_mul(res.words(), res.words(), x222);
462 return res;
463}
464
465} // namespace Botan
void conditional_swap(U &x, U &y) const
Definition ct_utils.h:585
constexpr void select_n(T output[], const T x[], const T y[], size_t len) const
Definition ct_utils.h:565
static constexpr Mask< T > expand(T v)
Definition ct_utils.h:392
void ct_cond_swap(CT::Mask< uint64_t > mask, Gf448Elem &other)
Swap this and other if mask is set. Constant time.
Gf448Elem operator*(const Gf448Elem &other) const
Gf448Elem operator-() const
static bool bytes_are_canonical_representation(std::span< const uint8_t, BYTES_448 > x)
Given 56 bytes, checks that the (little endian) number from this bytes is a valid GF element,...
std::array< uint8_t, BYTES_448 > to_bytes() const
Return the canonical representation of the GF element as 56 bytes in little-endian order.
Gf448Elem operator/(const Gf448Elem &other) const
bool is_odd() const
Return true iff this element is odd. Constant time.
Gf448Elem operator+(const Gf448Elem &other) const
bool operator==(const Gf448Elem &other) const
std::span< uint64_t, WORDS_448 > words()
Accessor to the internal words of the GF element.
Gf448Elem(std::span< const uint8_t, BYTES_448 > x)
Construct a GF element from a 448-bit integer gives as 56 bytes x in little-endian order.
void ct_cond_assign(CT::Mask< uint64_t > mask, const Gf448Elem &other)
Set this to other if mask is true. Constant time.
bool is_zero() const
Return true iff this element is zero. Constant time.
constexpr CT::Mask< T > is_equal(const T x[], const T y[], size_t len)
Definition ct_utils.h:798
constexpr CT::Mask< T > all_zeros(const T elem[], size_t len)
Definition ct_utils.h:785
Gf448Elem mul_a24(const Gf448Elem &a)
Multiply a field element by the Curve448 constant a24 = 39081.
Gf448Elem root(const Gf448Elem &elem)
Compute the root of elem in the field.
constexpr auto word_sub(W x, W y, W *carry) -> W
Definition mp_asmi.h:320
constexpr auto word_add(W x, W y, W *carry) -> W
Definition mp_asmi.h:231
constexpr void comba_sqr(W z[2 *N], const W x[N])
Definition mp_core.h:837
constexpr void comba_mul(W z[2 *N], const W x[N], const W y[N])
Definition mp_core.h:801
BigInt square(const BigInt &x)
Definition numthry.cpp:184
constexpr auto word_madd2(W a, W b, W *c) -> W
Definition mp_asmi.h:90
constexpr auto store_le(ParamTs &&... params)
Definition loadstor.h:736
void carry(int64_t &h0, int64_t &h1)
constexpr auto load_le(ParamTs &&... params)
Definition loadstor.h:495
constexpr void clear_mem(T *ptr, size_t n)
Definition mem_ops.h:118
constexpr size_t WORDS_448
Definition curve448_gf.h:23