Botan 3.9.0
Crypto and TLS for C&
pcurves_mul.h
Go to the documentation of this file.
1/*
2* (C) 2024,2025 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#ifndef BOTAN_PCURVES_MUL_H_
8#define BOTAN_PCURVES_MUL_H_
9
10#include <botan/types.h>
11#include <botan/internal/ct_utils.h>
12#include <botan/internal/pcurves_algos.h>
13#include <vector>
14
15namespace Botan {
16
18
19/*
20* Multiplication algorithm window size parameters
21*/
22
23static constexpr size_t BasePointWindowBits = 5;
24static constexpr size_t VarPointWindowBits = 4;
25static constexpr size_t Mul2PrecompWindowBits = 3;
26static constexpr size_t Mul2WindowBits = 2;
27
28/**
29* A precomputed table of affine points with constant time lookup
30*
31* If R is zero then the entire table is scanned for each lookup.
32*
33* If R is not zero, then the table must be a multiple of R points long.
34* Each lookup will be examine a range of length R, as in
35* pts[0..R], pts[R..2*R], ...
36*/
37template <typename C, size_t R = 0>
38class AffinePointTable final {
39 public:
40 using AffinePoint = typename C::AffinePoint;
41 using ProjectivePoint = typename C::ProjectivePoint;
42 using WordType = typename C::WordType;
43
44 static constexpr bool WholeRangeSearch = (R == 0);
45
46 explicit AffinePointTable(std::span<const ProjectivePoint> pts) {
47 BOTAN_ASSERT_NOMSG(pts.size() > 1);
48
49 if constexpr(R > 0) {
50 BOTAN_ASSERT_NOMSG(pts.size() % R == 0);
51 }
52
53 // TODO scatter/gather with SIMD lookup
54 m_table = to_affine_batch<C>(pts);
55 }
56
57 /**
58 * If idx is zero then return the identity element. Otherwise return pts[idx - 1]
59 */
60 inline AffinePoint ct_select(size_t idx) const
61 requires(WholeRangeSearch)
62 {
63 BOTAN_DEBUG_ASSERT(idx < m_table.size() + 1);
64
65 auto result = AffinePoint::identity(m_table[0]);
66
67 // Intentionally wrapping; set to maximum size_t if idx == 0
68 const size_t idx1 = static_cast<size_t>(idx - 1);
69 for(size_t i = 0; i != m_table.size(); ++i) {
70 const auto found = CT::Mask<size_t>::is_equal(idx1, i).as_choice();
71 result.conditional_assign(found, m_table[i]);
72 }
73
74 return result;
75 }
76
77 /**
78 * If idx is zero then return the identity element. Otherwise return pts[idx - 1]
79 * out of the table subrange pts[iter*R..(iter+1)*R]
80 */
81 inline AffinePoint ct_select(size_t idx, size_t iter) const
82 requires(!WholeRangeSearch)
83 {
84 BOTAN_DEBUG_ASSERT(idx < R + 1);
85 BOTAN_DEBUG_ASSERT(R * (iter + 1) <= m_table.size());
86
87 auto result = AffinePoint::identity(m_table[R * iter]);
88
89 // Intentionally wrapping; set to maximum size_t if idx == 0
90 const size_t idx1 = static_cast<size_t>(idx - 1);
91 for(size_t i = 0; i != R; ++i) {
92 const auto found = CT::Mask<size_t>::is_equal(idx1, i).as_choice();
93 result.conditional_assign(found, m_table[R * iter + i]);
94 }
95
96 return result;
97 }
98
99 private:
100 std::vector<AffinePoint> m_table;
101};
102
103/*
104* Base point precomputation table
105*
106* This algorithm works by precomputing a set of points such that
107* the online phase of the point multiplication can be effected by
108* a sequence of point additions.
109*
110* The tables, even for W = 1, are large and costly to precompute, so
111* this is only used for the base point.
112*
113* The online phase of the algorithm uess `ceil(SB/W)` additions,
114* and no point doublings. The table is of size
115* `ceil(SB + W - 1)/W * ((1 << W) - 1)`
116* where SB is the bit length of the (blinded) scalar.
117*
118* Each window of the scalar is associated with a window in the table.
119* The table windows are unique to that offset within the scalar.
120*
121* The simplest version to understand is when W = 1. There the table
122* consists of [P, 2*P, 4*P, ..., 2^N*P] where N is the bit length of
123* the group order. The online phase consists of conditionally adding
124* table[i] depending on if bit i of the scalar is set or not.
125*
126* When W = 2, the scalar is examined 2 bits at a time, and the table
127* for a window index `I` is [(2^I)*P, (2^(I+1))*P, (2^I+2^(I+1))*P].
128*
129* This extends similarly for larger W
130*
131* At a certain point, the side channel silent table lookup becomes the
132* dominating cost
133*
134* For all W, each window in the table has an implicit element of
135* the identity element which is used if the scalar bits were all zero.
136* This is omitted to save space; AffinePoint::ct_select is designed
137* to assist in this by returning the identity element if its index
138* argument is zero, or otherwise it returns table[idx - 1]
139*/
140template <typename C, size_t WindowBits>
141std::vector<typename C::AffinePoint> basemul_setup(const typename C::AffinePoint& p, size_t max_scalar_bits) {
142 static_assert(WindowBits >= 1 && WindowBits <= 8);
143
144 // 2^W elements, less the identity element
145 constexpr size_t WindowElements = (1 << WindowBits) - 1;
146
147 const size_t Windows = (max_scalar_bits + WindowBits - 1) / WindowBits;
148
149 const size_t TableSize = Windows * WindowElements;
150
151 std::vector<typename C::ProjectivePoint> table;
152 table.reserve(TableSize);
153
154 auto accum = C::ProjectivePoint::from_affine(p);
155
156 for(size_t i = 0; i != TableSize; i += WindowElements) {
157 table.push_back(accum);
158
159 for(size_t j = 1; j != WindowElements; ++j) {
160 // Conditional ok: loop iteration count is public
161 if(j % 2 == 1) {
162 table.emplace_back(table[i + j / 2].dbl());
163 } else {
164 table.emplace_back(table[i + j - 1] + table[i]);
165 }
166 }
167
168 accum = table[i + (WindowElements / 2)].dbl();
169 }
170
171 return to_affine_batch<C>(table);
172}
173
174template <typename C, size_t WindowBits, typename BlindedScalar>
175typename C::ProjectivePoint basemul_exec(std::span<const typename C::AffinePoint> table,
176 const BlindedScalar& scalar,
178 // 2^W elements, less the identity element
179 static constexpr size_t WindowElements = (1 << WindowBits) - 1;
180
181 // TODO: C++23 - use std::mdspan to access table?
182
183 auto accum = [&]() {
184 const size_t w_0 = scalar.get_window(0);
185 const auto tbl_0 = table.first(WindowElements);
186 auto pt = C::ProjectivePoint::from_affine(C::AffinePoint::ct_select(tbl_0, w_0));
187 CT::poison(pt);
188 pt.randomize_rep(rng);
189 return pt;
190 }();
191
192 const size_t windows = (scalar.bits() + WindowBits - 1) / WindowBits;
193
194 for(size_t i = 1; i != windows; ++i) {
195 const size_t w_i = scalar.get_window(WindowBits * i);
196 const auto tbl_i = table.subspan(WindowElements * i, WindowElements);
197
198 /*
199 None of these additions can be doublings, because in each iteration, the
200 discrete logarithms of the points we're selecting out of the table are
201 larger than the largest possible dlog of accum.
202 */
203 accum += C::AffinePoint::ct_select(tbl_i, w_i);
204
205 // Conditional ok: loop iteration count is public
206 if(i <= 3) {
207 accum.randomize_rep(rng);
208 }
209 }
210
211 CT::unpoison(accum);
212 return accum;
213}
214
215/*
216* Variable point table mul setup and online phase
217*/
218template <typename C, size_t TableSize>
219AffinePointTable<C> varpoint_setup(const typename C::AffinePoint& p) {
220 static_assert(TableSize > 2);
221
222 std::vector<typename C::ProjectivePoint> table;
223 table.reserve(TableSize);
224 table.push_back(C::ProjectivePoint::from_affine(p));
225
226 for(size_t i = 1; i != TableSize; ++i) {
227 // Conditional ok: loop iteration count is public
228 if(i % 2 == 1) {
229 table.push_back(table[i / 2].dbl());
230 } else {
231 table.push_back(table[i - 1] + p);
232 }
233 }
234
235 return AffinePointTable<C>(table);
236}
237
238template <typename C, size_t WindowBits, typename BlindedScalar>
239typename C::ProjectivePoint varpoint_exec(const AffinePointTable<C>& table,
240 const BlindedScalar& scalar,
242 const size_t windows = (scalar.bits() + WindowBits - 1) / WindowBits;
243
244 auto accum = [&]() {
245 const size_t w_0 = scalar.get_window((windows - 1) * WindowBits);
246 auto pt = C::ProjectivePoint::from_affine(table.ct_select(w_0));
247 CT::poison(pt);
248 pt.randomize_rep(rng);
249 return pt;
250 }();
251
252 for(size_t i = 1; i != windows; ++i) {
253 accum = accum.dbl_n(WindowBits);
254 auto w_i = scalar.get_window((windows - i - 1) * WindowBits);
255
256 /*
257 This point addition cannot be a doubling (except once)
258
259 Consider the sequence of points that are operated on, and specifically
260 their discrete logarithms. We start out at the point at infinity
261 (dlog 0) and then add the initial window which is precisely P*w_0
262
263 We then perform WindowBits doublings, so accum's dlog at the point
264 of the addition in the first iteration of the loop (when i == 1) is
265 at least 2^W * w_0.
266
267 Since we know w_0 > 0, then in every iteration of the loop, accums
268 dlog will always be greater than the dlog of the table element we
269 just looked up (something between 0 and 2^W-1), and thus the
270 addition into accum cannot be a doubling.
271
272 However due to blinding this argument fails, since we perform
273 multiplications using a scalar that is larger than the group
274 order. In this case it's possible that the dlog of accum becomes
275 `order + x` (or, effectively, `x`) and `x` is smaller than 2^W.
276 In this case, a doubling may occur. Future iterations of the loop
277 cannot be doublings by the same argument above. Since the blinding
278 factor is always less than the group order (substantially so),
279 it is not possible for the dlog of accum to overflow a second time.
280 */
281
282 accum += table.ct_select(w_i);
283
284 // Conditional ok: loop iteration count is public
285 if(i <= 3) {
286 accum.randomize_rep(rng);
287 }
288 }
289
290 CT::unpoison(accum);
291 return accum;
292}
293
294/*
295* Effect 2-ary multiplication ie x*G + y*H
296*
297* This is done using a windowed variant of what is usually called
298* Shamir's trick.
299*
300* The W = 1 case is simple; we precompute an extra point GH = G + H,
301* and then examine 1 bit in each of x and y. If one or the other bits
302* are set then add G or H resp. If both bits are set, add GH.
303*
304* The example below is a precomputed table for W=2. The flattened table
305* begins at (x_i,y_i) = (1,0), i.e. the identity element is omitted.
306* The indices in each cell refer to the cell's location in m_table.
307*
308* x-> 0 1 2 3
309* 0 |/ (ident) |0 x |1 2x |2 3x |
310* 1 |3 y |4 x+y |5 2x+y |6 3x+y |
311* y = 2 |7 2y |8 x+2y |9 2(x+y) |10 3x+2y |
312* 3 |11 3y |12 x+3y |13 2x+3y |14 3x+3y |
313*/
314
315template <typename C, size_t WindowBits>
316std::vector<typename C::ProjectivePoint> mul2_setup(const typename C::AffinePoint& p,
317 const typename C::AffinePoint& q) {
318 static_assert(WindowBits >= 1 && WindowBits <= 4);
319
320 // 2^(2*W) elements, less the identity element
321 constexpr size_t TableSize = (1 << (2 * WindowBits)) - 1;
322 constexpr size_t WindowSize = (1 << WindowBits);
323
324 std::vector<typename C::ProjectivePoint> table;
325 table.reserve(TableSize);
326
327 for(size_t i = 0; i != TableSize; ++i) {
328 const size_t t_i = (i + 1);
329 const size_t p_i = t_i % WindowSize;
330 const size_t q_i = (t_i >> WindowBits) % WindowSize;
331
332 // Conditionals ok: all based on t_i/p_i/q_i which in turn are derived from public i
333
334 // Returns x_i * x + y_i * y
335 auto next_tbl_e = [&]() {
336 if(p_i % 2 == 0 && q_i % 2 == 0) {
337 // Where possible using doubling (eg indices 1, 7, 9 in
338 // the table above)
339 return table[(t_i / 2) - 1].dbl();
340 } else if(p_i > 0 && q_i > 0) {
341 // A combination of p and q
342 if(p_i == 1) {
343 return p + table[(q_i << WindowBits) - 1];
344 } else if(q_i == 1) {
345 return table[p_i - 1] + q;
346 } else {
347 return table[p_i - 1] + table[(q_i << WindowBits) - 1];
348 }
349 } else if(p_i > 0 && q_i == 0) {
350 // A multiple of p without a q component
351 if(p_i == 1) {
352 // Just p
353 return C::ProjectivePoint::from_affine(p);
354 } else {
355 // p * p_{i-1}
356 return p + table[p_i - 1 - 1];
357 }
358 } else if(p_i == 0 && q_i > 0) {
359 if(q_i == 1) {
360 // Just q
361 return C::ProjectivePoint::from_affine(q);
362 } else {
363 // q * q_{i-1}
364 return q + table[((q_i - 1) << WindowBits) - 1];
365 }
366 } else {
368 }
369 };
370
371 table.emplace_back(next_tbl_e());
372 }
373
374 return table;
375}
376
377template <typename C, size_t WindowBits, typename BlindedScalar>
378typename C::ProjectivePoint mul2_exec(const AffinePointTable<C>& table,
379 const BlindedScalar& x,
380 const BlindedScalar& y,
382 const size_t Windows = (x.bits() + WindowBits - 1) / WindowBits;
383
384 auto accum = [&]() {
385 const size_t w_1 = x.get_window((Windows - 1) * WindowBits);
386 const size_t w_2 = y.get_window((Windows - 1) * WindowBits);
387 const size_t window = w_1 + (w_2 << WindowBits);
388 auto pt = C::ProjectivePoint::from_affine(table.ct_select(window));
389 CT::poison(pt);
390 pt.randomize_rep(rng);
391 return pt;
392 }();
393
394 for(size_t i = 1; i != Windows; ++i) {
395 accum = accum.dbl_n(WindowBits);
396
397 const size_t w_1 = x.get_window((Windows - i - 1) * WindowBits);
398 const size_t w_2 = y.get_window((Windows - i - 1) * WindowBits);
399 const size_t window = w_1 + (w_2 << WindowBits);
400 accum += table.ct_select(window);
401
402 // Conditional ok: loop iteration count is public
403 if(i <= 3) {
404 accum.randomize_rep(rng);
405 }
406 }
407
408 CT::unpoison(accum);
409 return accum;
410}
411
412} // namespace Botan
413
414#endif
#define BOTAN_ASSERT_NOMSG(expr)
Definition assert.h:75
#define BOTAN_DEBUG_ASSERT(expr)
Definition assert.h:129
#define BOTAN_ASSERT_UNREACHABLE()
Definition assert.h:163
typename C::ProjectivePoint ProjectivePoint
Definition pcurves_mul.h:41
AffinePoint ct_select(size_t idx) const
Definition pcurves_mul.h:60
typename C::AffinePoint AffinePoint
Definition pcurves_mul.h:40
AffinePoint ct_select(size_t idx, size_t iter) const
Definition pcurves_mul.h:81
static constexpr bool WholeRangeSearch
Definition pcurves_mul.h:44
AffinePointTable(std::span< const ProjectivePoint > pts)
Definition pcurves_mul.h:46
static constexpr Mask< T > is_equal(T x, T y)
Definition ct_utils.h:470
constexpr void unpoison(const T *p, size_t n)
Definition ct_utils.h:65
constexpr void poison(const T *p, size_t n)
Definition ct_utils.h:54
C::ProjectivePoint varpoint_exec(const AffinePointTable< C > &table, const BlindedScalar &scalar, RandomNumberGenerator &rng)
auto to_affine_batch(std::span< const typename C::ProjectivePoint > projective)
C::ProjectivePoint mul2_exec(const AffinePointTable< C > &table, const BlindedScalar &x, const BlindedScalar &y, RandomNumberGenerator &rng)
C::ProjectivePoint basemul_exec(std::span< const typename C::AffinePoint > table, const BlindedScalar &scalar, RandomNumberGenerator &rng)
std::vector< typename C::ProjectivePoint > mul2_setup(const typename C::AffinePoint &p, const typename C::AffinePoint &q)
AffinePointTable< C > varpoint_setup(const typename C::AffinePoint &p)
std::vector< typename C::AffinePoint > basemul_setup(const typename C::AffinePoint &p, size_t max_scalar_bits)