Botan 3.4.0
Crypto and TLS for C&
ed25519_fe.cpp
Go to the documentation of this file.
1/*
2* Ed25519 field element
3* (C) 2017 Ribose Inc
4*
5* Based on the public domain code from SUPERCOP ref10 by
6* Peter Schwabe, Daniel J. Bernstein, Niels Duif, Tanja Lange, Bo-Yin Yang
7*
8* Botan is released under the Simplified BSD License (see license.txt)
9*/
10
11#include <botan/internal/ed25519_fe.h>
12
13#include <botan/internal/ed25519_internal.h>
14
15namespace Botan {
16
17//static
19 FE_25519 t0;
20 FE_25519 t1;
21 FE_25519 t2;
22 FE_25519 t3;
23
24 fe_sq(t0, z);
25 fe_sq_iter(t1, t0, 2);
26 fe_mul(t1, z, t1);
27 fe_mul(t0, t0, t1);
28 fe_sq(t2, t0);
29 fe_mul(t1, t1, t2);
30 fe_sq_iter(t2, t1, 5);
31 fe_mul(t1, t2, t1);
32 fe_sq_iter(t2, t1, 10);
33 fe_mul(t2, t2, t1);
34 fe_sq_iter(t3, t2, 20);
35 fe_mul(t2, t3, t2);
36 fe_sq_iter(t2, t2, 10);
37 fe_mul(t1, t2, t1);
38 fe_sq_iter(t2, t1, 50);
39 fe_mul(t2, t2, t1);
40 fe_sq_iter(t3, t2, 100);
41 fe_mul(t2, t3, t2);
42 fe_sq_iter(t2, t2, 50);
43 fe_mul(t1, t2, t1);
44 fe_sq_iter(t1, t1, 5);
45
46 fe_mul(t0, t1, t0);
47 return t0;
48}
49
51 FE_25519 t0;
52 FE_25519 t1;
53 FE_25519 t2;
54
55 fe_sq(t0, z);
56 fe_sq_iter(t1, t0, 2);
57 fe_mul(t1, z, t1);
58 fe_mul(t0, t0, t1);
59 fe_sq(t0, t0);
60 fe_mul(t0, t1, t0);
61 fe_sq_iter(t1, t0, 5);
62 fe_mul(t0, t1, t0);
63 fe_sq_iter(t1, t0, 10);
64 fe_mul(t1, t1, t0);
65 fe_sq_iter(t2, t1, 20);
66 fe_mul(t1, t2, t1);
67 fe_sq_iter(t1, t1, 10);
68 fe_mul(t0, t1, t0);
69 fe_sq_iter(t1, t0, 50);
70 fe_mul(t1, t1, t0);
71 fe_sq_iter(t2, t1, 100);
72 fe_mul(t1, t2, t1);
73 fe_sq_iter(t1, t1, 50);
74 fe_mul(t0, t1, t0);
75 fe_sq_iter(t0, t0, 2);
76
77 fe_mul(t0, t0, z);
78 return t0;
79}
80
81/*
82h = f * g
83Can overlap h with f or g.
84
85Preconditions:
86|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
87|g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
88
89Postconditions:
90|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
91*/
92
93/*
94Notes on implementation strategy:
95
96Using schoolbook multiplication.
97Karatsuba would save a little in some cost models.
98
99Most multiplications by 2 and 19 are 32-bit precomputations;
100cheaper than 64-bit postcomputations.
101
102There is one remaining multiplication by 19 in the carry chain;
103one *19 precomputation can be merged into this,
104but the resulting data flow is considerably less clean.
105
106There are 12 carries below.
10710 of them are 2-way parallelizable and vectorizable.
108Can get away with 11 carries, but then data flow is much deeper.
109
110With tighter constraints on inputs can squeeze carries into int32.
111*/
112
113//static
115 const int32_t f0 = f[0];
116 const int32_t f1 = f[1];
117 const int32_t f2 = f[2];
118 const int32_t f3 = f[3];
119 const int32_t f4 = f[4];
120 const int32_t f5 = f[5];
121 const int32_t f6 = f[6];
122 const int32_t f7 = f[7];
123 const int32_t f8 = f[8];
124 const int32_t f9 = f[9];
125
126 const int32_t g0 = g[0];
127 const int32_t g1 = g[1];
128 const int32_t g2 = g[2];
129 const int32_t g3 = g[3];
130 const int32_t g4 = g[4];
131 const int32_t g5 = g[5];
132 const int32_t g6 = g[6];
133 const int32_t g7 = g[7];
134 const int32_t g8 = g[8];
135 const int32_t g9 = g[9];
136
137 const int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
138 const int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
139 const int32_t g3_19 = 19 * g3;
140 const int32_t g4_19 = 19 * g4;
141 const int32_t g5_19 = 19 * g5;
142 const int32_t g6_19 = 19 * g6;
143 const int32_t g7_19 = 19 * g7;
144 const int32_t g8_19 = 19 * g8;
145 const int32_t g9_19 = 19 * g9;
146 const int32_t f1_2 = 2 * f1;
147 const int32_t f3_2 = 2 * f3;
148 const int32_t f5_2 = 2 * f5;
149 const int32_t f7_2 = 2 * f7;
150 const int32_t f9_2 = 2 * f9;
151
152 const int64_t f0g0 = f0 * static_cast<int64_t>(g0);
153 const int64_t f0g1 = f0 * static_cast<int64_t>(g1);
154 const int64_t f0g2 = f0 * static_cast<int64_t>(g2);
155 const int64_t f0g3 = f0 * static_cast<int64_t>(g3);
156 const int64_t f0g4 = f0 * static_cast<int64_t>(g4);
157 const int64_t f0g5 = f0 * static_cast<int64_t>(g5);
158 const int64_t f0g6 = f0 * static_cast<int64_t>(g6);
159 const int64_t f0g7 = f0 * static_cast<int64_t>(g7);
160 const int64_t f0g8 = f0 * static_cast<int64_t>(g8);
161 const int64_t f0g9 = f0 * static_cast<int64_t>(g9);
162 const int64_t f1g0 = f1 * static_cast<int64_t>(g0);
163 const int64_t f1g1_2 = f1_2 * static_cast<int64_t>(g1);
164 const int64_t f1g2 = f1 * static_cast<int64_t>(g2);
165 const int64_t f1g3_2 = f1_2 * static_cast<int64_t>(g3);
166 const int64_t f1g4 = f1 * static_cast<int64_t>(g4);
167 const int64_t f1g5_2 = f1_2 * static_cast<int64_t>(g5);
168 const int64_t f1g6 = f1 * static_cast<int64_t>(g6);
169 const int64_t f1g7_2 = f1_2 * static_cast<int64_t>(g7);
170 const int64_t f1g8 = f1 * static_cast<int64_t>(g8);
171 const int64_t f1g9_38 = f1_2 * static_cast<int64_t>(g9_19);
172 const int64_t f2g0 = f2 * static_cast<int64_t>(g0);
173 const int64_t f2g1 = f2 * static_cast<int64_t>(g1);
174 const int64_t f2g2 = f2 * static_cast<int64_t>(g2);
175 const int64_t f2g3 = f2 * static_cast<int64_t>(g3);
176 const int64_t f2g4 = f2 * static_cast<int64_t>(g4);
177 const int64_t f2g5 = f2 * static_cast<int64_t>(g5);
178 const int64_t f2g6 = f2 * static_cast<int64_t>(g6);
179 const int64_t f2g7 = f2 * static_cast<int64_t>(g7);
180 const int64_t f2g8_19 = f2 * static_cast<int64_t>(g8_19);
181 const int64_t f2g9_19 = f2 * static_cast<int64_t>(g9_19);
182 const int64_t f3g0 = f3 * static_cast<int64_t>(g0);
183 const int64_t f3g1_2 = f3_2 * static_cast<int64_t>(g1);
184 const int64_t f3g2 = f3 * static_cast<int64_t>(g2);
185 const int64_t f3g3_2 = f3_2 * static_cast<int64_t>(g3);
186 const int64_t f3g4 = f3 * static_cast<int64_t>(g4);
187 const int64_t f3g5_2 = f3_2 * static_cast<int64_t>(g5);
188 const int64_t f3g6 = f3 * static_cast<int64_t>(g6);
189 const int64_t f3g7_38 = f3_2 * static_cast<int64_t>(g7_19);
190 const int64_t f3g8_19 = f3 * static_cast<int64_t>(g8_19);
191 const int64_t f3g9_38 = f3_2 * static_cast<int64_t>(g9_19);
192 const int64_t f4g0 = f4 * static_cast<int64_t>(g0);
193 const int64_t f4g1 = f4 * static_cast<int64_t>(g1);
194 const int64_t f4g2 = f4 * static_cast<int64_t>(g2);
195 const int64_t f4g3 = f4 * static_cast<int64_t>(g3);
196 const int64_t f4g4 = f4 * static_cast<int64_t>(g4);
197 const int64_t f4g5 = f4 * static_cast<int64_t>(g5);
198 const int64_t f4g6_19 = f4 * static_cast<int64_t>(g6_19);
199 const int64_t f4g7_19 = f4 * static_cast<int64_t>(g7_19);
200 const int64_t f4g8_19 = f4 * static_cast<int64_t>(g8_19);
201 const int64_t f4g9_19 = f4 * static_cast<int64_t>(g9_19);
202 const int64_t f5g0 = f5 * static_cast<int64_t>(g0);
203 const int64_t f5g1_2 = f5_2 * static_cast<int64_t>(g1);
204 const int64_t f5g2 = f5 * static_cast<int64_t>(g2);
205 const int64_t f5g3_2 = f5_2 * static_cast<int64_t>(g3);
206 const int64_t f5g4 = f5 * static_cast<int64_t>(g4);
207 const int64_t f5g5_38 = f5_2 * static_cast<int64_t>(g5_19);
208 const int64_t f5g6_19 = f5 * static_cast<int64_t>(g6_19);
209 const int64_t f5g7_38 = f5_2 * static_cast<int64_t>(g7_19);
210 const int64_t f5g8_19 = f5 * static_cast<int64_t>(g8_19);
211 const int64_t f5g9_38 = f5_2 * static_cast<int64_t>(g9_19);
212 const int64_t f6g0 = f6 * static_cast<int64_t>(g0);
213 const int64_t f6g1 = f6 * static_cast<int64_t>(g1);
214 const int64_t f6g2 = f6 * static_cast<int64_t>(g2);
215 const int64_t f6g3 = f6 * static_cast<int64_t>(g3);
216 const int64_t f6g4_19 = f6 * static_cast<int64_t>(g4_19);
217 const int64_t f6g5_19 = f6 * static_cast<int64_t>(g5_19);
218 const int64_t f6g6_19 = f6 * static_cast<int64_t>(g6_19);
219 const int64_t f6g7_19 = f6 * static_cast<int64_t>(g7_19);
220 const int64_t f6g8_19 = f6 * static_cast<int64_t>(g8_19);
221 const int64_t f6g9_19 = f6 * static_cast<int64_t>(g9_19);
222 const int64_t f7g0 = f7 * static_cast<int64_t>(g0);
223 const int64_t f7g1_2 = f7_2 * static_cast<int64_t>(g1);
224 const int64_t f7g2 = f7 * static_cast<int64_t>(g2);
225 const int64_t f7g3_38 = f7_2 * static_cast<int64_t>(g3_19);
226 const int64_t f7g4_19 = f7 * static_cast<int64_t>(g4_19);
227 const int64_t f7g5_38 = f7_2 * static_cast<int64_t>(g5_19);
228 const int64_t f7g6_19 = f7 * static_cast<int64_t>(g6_19);
229 const int64_t f7g7_38 = f7_2 * static_cast<int64_t>(g7_19);
230 const int64_t f7g8_19 = f7 * static_cast<int64_t>(g8_19);
231 const int64_t f7g9_38 = f7_2 * static_cast<int64_t>(g9_19);
232 const int64_t f8g0 = f8 * static_cast<int64_t>(g0);
233 const int64_t f8g1 = f8 * static_cast<int64_t>(g1);
234 const int64_t f8g2_19 = f8 * static_cast<int64_t>(g2_19);
235 const int64_t f8g3_19 = f8 * static_cast<int64_t>(g3_19);
236 const int64_t f8g4_19 = f8 * static_cast<int64_t>(g4_19);
237 const int64_t f8g5_19 = f8 * static_cast<int64_t>(g5_19);
238 const int64_t f8g6_19 = f8 * static_cast<int64_t>(g6_19);
239 const int64_t f8g7_19 = f8 * static_cast<int64_t>(g7_19);
240 const int64_t f8g8_19 = f8 * static_cast<int64_t>(g8_19);
241 const int64_t f8g9_19 = f8 * static_cast<int64_t>(g9_19);
242 const int64_t f9g0 = f9 * static_cast<int64_t>(g0);
243 const int64_t f9g1_38 = f9_2 * static_cast<int64_t>(g1_19);
244 const int64_t f9g2_19 = f9 * static_cast<int64_t>(g2_19);
245 const int64_t f9g3_38 = f9_2 * static_cast<int64_t>(g3_19);
246 const int64_t f9g4_19 = f9 * static_cast<int64_t>(g4_19);
247 const int64_t f9g5_38 = f9_2 * static_cast<int64_t>(g5_19);
248 const int64_t f9g6_19 = f9 * static_cast<int64_t>(g6_19);
249 const int64_t f9g7_38 = f9_2 * static_cast<int64_t>(g7_19);
250 const int64_t f9g8_19 = f9 * static_cast<int64_t>(g8_19);
251 const int64_t f9g9_38 = f9_2 * static_cast<int64_t>(g9_19);
252
253 int64_t h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38;
254 int64_t h1 = f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19;
255 int64_t h2 = f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38;
256 int64_t h3 = f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19;
257 int64_t h4 = f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38;
258 int64_t h5 = f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19;
259 int64_t h6 = f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38;
260 int64_t h7 = f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19;
261 int64_t h8 = f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38;
262 int64_t h9 = f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0;
263
264 /*
265 |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
266 i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
267 |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
268 i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
269 */
270 carry<26>(h0, h1);
271 carry<26>(h4, h5);
272
273 /* |h0| <= 2^25 */
274 /* |h4| <= 2^25 */
275 /* |h1| <= 1.71*2^59 */
276 /* |h5| <= 1.71*2^59 */
277
278 carry<25>(h1, h2);
279 carry<25>(h5, h6);
280
281 /* |h1| <= 2^24; from now on fits into int32 */
282 /* |h5| <= 2^24; from now on fits into int32 */
283 /* |h2| <= 1.41*2^60 */
284 /* |h6| <= 1.41*2^60 */
285
286 carry<26>(h2, h3);
287 carry<26>(h6, h7);
288 /* |h2| <= 2^25; from now on fits into int32 unchanged */
289 /* |h6| <= 2^25; from now on fits into int32 unchanged */
290 /* |h3| <= 1.71*2^59 */
291 /* |h7| <= 1.71*2^59 */
292
293 carry<25>(h3, h4);
294 carry<25>(h7, h8);
295 /* |h3| <= 2^24; from now on fits into int32 unchanged */
296 /* |h7| <= 2^24; from now on fits into int32 unchanged */
297 /* |h4| <= 1.72*2^34 */
298 /* |h8| <= 1.41*2^60 */
299
300 carry<26>(h4, h5);
301 carry<26>(h8, h9);
302 /* |h4| <= 2^25; from now on fits into int32 unchanged */
303 /* |h8| <= 2^25; from now on fits into int32 unchanged */
304 /* |h5| <= 1.01*2^24 */
305 /* |h9| <= 1.71*2^59 */
306
307 carry<25, 19>(h9, h0);
308
309 /* |h9| <= 2^24; from now on fits into int32 unchanged */
310 /* |h0| <= 1.1*2^39 */
311
312 carry<26>(h0, h1);
313 /* |h0| <= 2^25; from now on fits into int32 unchanged */
314 /* |h1| <= 1.01*2^24 */
315
316 return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
317}
318
319/*
320h = f * f
321Can overlap h with f.
322
323Preconditions:
324|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
325
326Postconditions:
327|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
328*/
329
330/*
331See fe_mul.c for discussion of implementation strategy.
332*/
333
334//static
335FE_25519 FE_25519::sqr_iter(const FE_25519& f, size_t iter) {
336 int32_t f0 = f[0];
337 int32_t f1 = f[1];
338 int32_t f2 = f[2];
339 int32_t f3 = f[3];
340 int32_t f4 = f[4];
341 int32_t f5 = f[5];
342 int32_t f6 = f[6];
343 int32_t f7 = f[7];
344 int32_t f8 = f[8];
345 int32_t f9 = f[9];
346
347 for(size_t i = 0; i != iter; ++i) {
348 const int32_t f0_2 = 2 * f0;
349 const int32_t f1_2 = 2 * f1;
350 const int32_t f2_2 = 2 * f2;
351 const int32_t f3_2 = 2 * f3;
352 const int32_t f4_2 = 2 * f4;
353 const int32_t f5_2 = 2 * f5;
354 const int32_t f6_2 = 2 * f6;
355 const int32_t f7_2 = 2 * f7;
356 const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
357 const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
358 const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
359 const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
360 const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
361
362 const int64_t f0f0 = f0 * static_cast<int64_t>(f0);
363 const int64_t f0f1_2 = f0_2 * static_cast<int64_t>(f1);
364 const int64_t f0f2_2 = f0_2 * static_cast<int64_t>(f2);
365 const int64_t f0f3_2 = f0_2 * static_cast<int64_t>(f3);
366 const int64_t f0f4_2 = f0_2 * static_cast<int64_t>(f4);
367 const int64_t f0f5_2 = f0_2 * static_cast<int64_t>(f5);
368 const int64_t f0f6_2 = f0_2 * static_cast<int64_t>(f6);
369 const int64_t f0f7_2 = f0_2 * static_cast<int64_t>(f7);
370 const int64_t f0f8_2 = f0_2 * static_cast<int64_t>(f8);
371 const int64_t f0f9_2 = f0_2 * static_cast<int64_t>(f9);
372 const int64_t f1f1_2 = f1_2 * static_cast<int64_t>(f1);
373 const int64_t f1f2_2 = f1_2 * static_cast<int64_t>(f2);
374 const int64_t f1f3_4 = f1_2 * static_cast<int64_t>(f3_2);
375 const int64_t f1f4_2 = f1_2 * static_cast<int64_t>(f4);
376 const int64_t f1f5_4 = f1_2 * static_cast<int64_t>(f5_2);
377 const int64_t f1f6_2 = f1_2 * static_cast<int64_t>(f6);
378 const int64_t f1f7_4 = f1_2 * static_cast<int64_t>(f7_2);
379 const int64_t f1f8_2 = f1_2 * static_cast<int64_t>(f8);
380 const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
381 const int64_t f2f2 = f2 * static_cast<int64_t>(f2);
382 const int64_t f2f3_2 = f2_2 * static_cast<int64_t>(f3);
383 const int64_t f2f4_2 = f2_2 * static_cast<int64_t>(f4);
384 const int64_t f2f5_2 = f2_2 * static_cast<int64_t>(f5);
385 const int64_t f2f6_2 = f2_2 * static_cast<int64_t>(f6);
386 const int64_t f2f7_2 = f2_2 * static_cast<int64_t>(f7);
387 const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
388 const int64_t f2f9_38 = f2 * static_cast<int64_t>(f9_38);
389 const int64_t f3f3_2 = f3_2 * static_cast<int64_t>(f3);
390 const int64_t f3f4_2 = f3_2 * static_cast<int64_t>(f4);
391 const int64_t f3f5_4 = f3_2 * static_cast<int64_t>(f5_2);
392 const int64_t f3f6_2 = f3_2 * static_cast<int64_t>(f6);
393 const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
394 const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
395 const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
396 const int64_t f4f4 = f4 * static_cast<int64_t>(f4);
397 const int64_t f4f5_2 = f4_2 * static_cast<int64_t>(f5);
398 const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
399 const int64_t f4f7_38 = f4 * static_cast<int64_t>(f7_38);
400 const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
401 const int64_t f4f9_38 = f4 * static_cast<int64_t>(f9_38);
402 const int64_t f5f5_38 = f5 * static_cast<int64_t>(f5_38);
403 const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
404 const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
405 const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
406 const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
407 const int64_t f6f6_19 = f6 * static_cast<int64_t>(f6_19);
408 const int64_t f6f7_38 = f6 * static_cast<int64_t>(f7_38);
409 const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
410 const int64_t f6f9_38 = f6 * static_cast<int64_t>(f9_38);
411 const int64_t f7f7_38 = f7 * static_cast<int64_t>(f7_38);
412 const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
413 const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
414 const int64_t f8f8_19 = f8 * static_cast<int64_t>(f8_19);
415 const int64_t f8f9_38 = f8 * static_cast<int64_t>(f9_38);
416 const int64_t f9f9_38 = f9 * static_cast<int64_t>(f9_38);
417
418 int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
419 int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
420 int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
421 int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
422 int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
423 int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
424 int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
425 int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
426 int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
427 int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
428
429 carry<26>(h0, h1);
430 carry<26>(h4, h5);
431 carry<25>(h1, h2);
432 carry<25>(h5, h6);
433 carry<26>(h2, h3);
434 carry<26>(h6, h7);
435
436 carry<25>(h3, h4);
437 carry<25>(h7, h8);
438
439 carry<26>(h4, h5);
440 carry<26>(h8, h9);
441 carry<25, 19>(h9, h0);
442 carry<26>(h0, h1);
443
444 f0 = static_cast<int32_t>(h0);
445 f1 = static_cast<int32_t>(h1);
446 f2 = static_cast<int32_t>(h2);
447 f3 = static_cast<int32_t>(h3);
448 f4 = static_cast<int32_t>(h4);
449 f5 = static_cast<int32_t>(h5);
450 f6 = static_cast<int32_t>(h6);
451 f7 = static_cast<int32_t>(h7);
452 f8 = static_cast<int32_t>(h8);
453 f9 = static_cast<int32_t>(h9);
454 }
455
456 return FE_25519(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9);
457}
458
459/*
460h = 2 * f * f
461Can overlap h with f.
462
463Preconditions:
464|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
465
466Postconditions:
467|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
468*/
469
470/*
471See fe_mul.c for discussion of implementation strategy.
472*/
473
474//static
476 const int32_t f0 = f[0];
477 const int32_t f1 = f[1];
478 const int32_t f2 = f[2];
479 const int32_t f3 = f[3];
480 const int32_t f4 = f[4];
481 const int32_t f5 = f[5];
482 const int32_t f6 = f[6];
483 const int32_t f7 = f[7];
484 const int32_t f8 = f[8];
485 const int32_t f9 = f[9];
486 const int32_t f0_2 = 2 * f0;
487 const int32_t f1_2 = 2 * f1;
488 const int32_t f2_2 = 2 * f2;
489 const int32_t f3_2 = 2 * f3;
490 const int32_t f4_2 = 2 * f4;
491 const int32_t f5_2 = 2 * f5;
492 const int32_t f6_2 = 2 * f6;
493 const int32_t f7_2 = 2 * f7;
494 const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
495 const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
496 const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
497 const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
498 const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
499 const int64_t f0f0 = f0 * static_cast<int64_t>(f0);
500 const int64_t f0f1_2 = f0_2 * static_cast<int64_t>(f1);
501 const int64_t f0f2_2 = f0_2 * static_cast<int64_t>(f2);
502 const int64_t f0f3_2 = f0_2 * static_cast<int64_t>(f3);
503 const int64_t f0f4_2 = f0_2 * static_cast<int64_t>(f4);
504 const int64_t f0f5_2 = f0_2 * static_cast<int64_t>(f5);
505 const int64_t f0f6_2 = f0_2 * static_cast<int64_t>(f6);
506 const int64_t f0f7_2 = f0_2 * static_cast<int64_t>(f7);
507 const int64_t f0f8_2 = f0_2 * static_cast<int64_t>(f8);
508 const int64_t f0f9_2 = f0_2 * static_cast<int64_t>(f9);
509 const int64_t f1f1_2 = f1_2 * static_cast<int64_t>(f1);
510 const int64_t f1f2_2 = f1_2 * static_cast<int64_t>(f2);
511 const int64_t f1f3_4 = f1_2 * static_cast<int64_t>(f3_2);
512 const int64_t f1f4_2 = f1_2 * static_cast<int64_t>(f4);
513 const int64_t f1f5_4 = f1_2 * static_cast<int64_t>(f5_2);
514 const int64_t f1f6_2 = f1_2 * static_cast<int64_t>(f6);
515 const int64_t f1f7_4 = f1_2 * static_cast<int64_t>(f7_2);
516 const int64_t f1f8_2 = f1_2 * static_cast<int64_t>(f8);
517 const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
518 const int64_t f2f2 = f2 * static_cast<int64_t>(f2);
519 const int64_t f2f3_2 = f2_2 * static_cast<int64_t>(f3);
520 const int64_t f2f4_2 = f2_2 * static_cast<int64_t>(f4);
521 const int64_t f2f5_2 = f2_2 * static_cast<int64_t>(f5);
522 const int64_t f2f6_2 = f2_2 * static_cast<int64_t>(f6);
523 const int64_t f2f7_2 = f2_2 * static_cast<int64_t>(f7);
524 const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
525 const int64_t f2f9_38 = f2 * static_cast<int64_t>(f9_38);
526 const int64_t f3f3_2 = f3_2 * static_cast<int64_t>(f3);
527 const int64_t f3f4_2 = f3_2 * static_cast<int64_t>(f4);
528 const int64_t f3f5_4 = f3_2 * static_cast<int64_t>(f5_2);
529 const int64_t f3f6_2 = f3_2 * static_cast<int64_t>(f6);
530 const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
531 const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
532 const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
533 const int64_t f4f4 = f4 * static_cast<int64_t>(f4);
534 const int64_t f4f5_2 = f4_2 * static_cast<int64_t>(f5);
535 const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
536 const int64_t f4f7_38 = f4 * static_cast<int64_t>(f7_38);
537 const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
538 const int64_t f4f9_38 = f4 * static_cast<int64_t>(f9_38);
539 const int64_t f5f5_38 = f5 * static_cast<int64_t>(f5_38);
540 const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
541 const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
542 const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
543 const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
544 const int64_t f6f6_19 = f6 * static_cast<int64_t>(f6_19);
545 const int64_t f6f7_38 = f6 * static_cast<int64_t>(f7_38);
546 const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
547 const int64_t f6f9_38 = f6 * static_cast<int64_t>(f9_38);
548 const int64_t f7f7_38 = f7 * static_cast<int64_t>(f7_38);
549 const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
550 const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
551 const int64_t f8f8_19 = f8 * static_cast<int64_t>(f8_19);
552 const int64_t f8f9_38 = f8 * static_cast<int64_t>(f9_38);
553 const int64_t f9f9_38 = f9 * static_cast<int64_t>(f9_38);
554
555 int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
556 int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
557 int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
558 int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
559 int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
560 int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
561 int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
562 int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
563 int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
564 int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
565
566 h0 += h0;
567 h1 += h1;
568 h2 += h2;
569 h3 += h3;
570 h4 += h4;
571 h5 += h5;
572 h6 += h6;
573 h7 += h7;
574 h8 += h8;
575 h9 += h9;
576
577 carry<26>(h0, h1);
578 carry<26>(h4, h5);
579
580 carry<25>(h1, h2);
581 carry<25>(h5, h6);
582
583 carry<26>(h2, h3);
584 carry<26>(h6, h7);
585
586 carry<25>(h3, h4);
587 carry<25>(h7, h8);
588 carry<26>(h4, h5);
589 carry<26>(h8, h9);
590 carry<25, 19>(h9, h0);
591 carry<26>(h0, h1);
592
593 return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
594}
595
596/*
597Ignores top bit of h.
598*/
599
600void FE_25519::from_bytes(const uint8_t s[32]) {
601 int64_t h0 = load_4(s);
602 int64_t h1 = load_3(s + 4) << 6;
603 int64_t h2 = load_3(s + 7) << 5;
604 int64_t h3 = load_3(s + 10) << 3;
605 int64_t h4 = load_3(s + 13) << 2;
606 int64_t h5 = load_4(s + 16);
607 int64_t h6 = load_3(s + 20) << 7;
608 int64_t h7 = load_3(s + 23) << 5;
609 int64_t h8 = load_3(s + 26) << 4;
610 int64_t h9 = (load_3(s + 29) & 0x7fffff) << 2;
611
612 carry<25, 19>(h9, h0);
613 carry<25>(h1, h2);
614 carry<25>(h3, h4);
615 carry<25>(h5, h6);
616 carry<25>(h7, h8);
617
618 carry<26>(h0, h1);
619 carry<26>(h2, h3);
620 carry<26>(h4, h5);
621 carry<26>(h6, h7);
622 carry<26>(h8, h9);
623
624 m_fe[0] = static_cast<int32_t>(h0);
625 m_fe[1] = static_cast<int32_t>(h1);
626 m_fe[2] = static_cast<int32_t>(h2);
627 m_fe[3] = static_cast<int32_t>(h3);
628 m_fe[4] = static_cast<int32_t>(h4);
629 m_fe[5] = static_cast<int32_t>(h5);
630 m_fe[6] = static_cast<int32_t>(h6);
631 m_fe[7] = static_cast<int32_t>(h7);
632 m_fe[8] = static_cast<int32_t>(h8);
633 m_fe[9] = static_cast<int32_t>(h9);
634}
635
636/*
637Preconditions:
638|h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
639
640Write p=2^255-19; q=floor(h/p).
641Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
642
643Proof:
644Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
645Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
646
647Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
648Then 0<y<1.
649
650Write r=h-pq.
651Have 0<=r<=p-1=2^255-20.
652Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
653
654Write x=r+19(2^-255)r+y.
655Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
656
657Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
658so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
659*/
660
661void FE_25519::to_bytes(uint8_t s[32]) const {
662 const int64_t X25 = (1 << 25);
663
664 int32_t h0 = m_fe[0];
665 int32_t h1 = m_fe[1];
666 int32_t h2 = m_fe[2];
667 int32_t h3 = m_fe[3];
668 int32_t h4 = m_fe[4];
669 int32_t h5 = m_fe[5];
670 int32_t h6 = m_fe[6];
671 int32_t h7 = m_fe[7];
672 int32_t h8 = m_fe[8];
673 int32_t h9 = m_fe[9];
674 int32_t q;
675
676 q = (19 * h9 + ((static_cast<int32_t>(1) << 24))) >> 25;
677 q = (h0 + q) >> 26;
678 q = (h1 + q) >> 25;
679 q = (h2 + q) >> 26;
680 q = (h3 + q) >> 25;
681 q = (h4 + q) >> 26;
682 q = (h5 + q) >> 25;
683 q = (h6 + q) >> 26;
684 q = (h7 + q) >> 25;
685 q = (h8 + q) >> 26;
686 q = (h9 + q) >> 25;
687
688 /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
689 h0 += 19 * q;
690 /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
691
692 carry0<26>(h0, h1);
693 carry0<25>(h1, h2);
694 carry0<26>(h2, h3);
695 carry0<25>(h3, h4);
696 carry0<26>(h4, h5);
697 carry0<25>(h5, h6);
698 carry0<26>(h6, h7);
699 carry0<25>(h7, h8);
700 carry0<26>(h8, h9);
701
702 int32_t carry9 = h9 >> 25;
703 h9 -= carry9 * X25;
704 /* h10 = carry9 */
705
706 /*
707 Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
708 Have h0+...+2^230 h9 between 0 and 2^255-1;
709 evidently 2^255 h10-2^255 q = 0.
710 Goal: Output h0+...+2^230 h9.
711 */
712
713 s[0] = static_cast<uint8_t>(h0 >> 0);
714 s[1] = static_cast<uint8_t>(h0 >> 8);
715 s[2] = static_cast<uint8_t>(h0 >> 16);
716 s[3] = static_cast<uint8_t>((h0 >> 24) | (h1 << 2));
717 s[4] = static_cast<uint8_t>(h1 >> 6);
718 s[5] = static_cast<uint8_t>(h1 >> 14);
719 s[6] = static_cast<uint8_t>((h1 >> 22) | (h2 << 3));
720 s[7] = static_cast<uint8_t>(h2 >> 5);
721 s[8] = static_cast<uint8_t>(h2 >> 13);
722 s[9] = static_cast<uint8_t>((h2 >> 21) | (h3 << 5));
723 s[10] = static_cast<uint8_t>(h3 >> 3);
724 s[11] = static_cast<uint8_t>(h3 >> 11);
725 s[12] = static_cast<uint8_t>((h3 >> 19) | (h4 << 6));
726 s[13] = static_cast<uint8_t>(h4 >> 2);
727 s[14] = static_cast<uint8_t>(h4 >> 10);
728 s[15] = static_cast<uint8_t>(h4 >> 18);
729 s[16] = static_cast<uint8_t>(h5 >> 0);
730 s[17] = static_cast<uint8_t>(h5 >> 8);
731 s[18] = static_cast<uint8_t>(h5 >> 16);
732 s[19] = static_cast<uint8_t>((h5 >> 24) | (h6 << 1));
733 s[20] = static_cast<uint8_t>(h6 >> 7);
734 s[21] = static_cast<uint8_t>(h6 >> 15);
735 s[22] = static_cast<uint8_t>((h6 >> 23) | (h7 << 3));
736 s[23] = static_cast<uint8_t>(h7 >> 5);
737 s[24] = static_cast<uint8_t>(h7 >> 13);
738 s[25] = static_cast<uint8_t>((h7 >> 21) | (h8 << 4));
739 s[26] = static_cast<uint8_t>(h8 >> 4);
740 s[27] = static_cast<uint8_t>(h8 >> 12);
741 s[28] = static_cast<uint8_t>((h8 >> 20) | (h9 << 6));
742 s[29] = static_cast<uint8_t>(h9 >> 2);
743 s[30] = static_cast<uint8_t>(h9 >> 10);
744 s[31] = static_cast<uint8_t>(h9 >> 18);
745}
746
747} // namespace Botan
void to_bytes(uint8_t b[32]) const
void from_bytes(const uint8_t b[32])
static FE_25519 invert(const FE_25519 &a)
static FE_25519 mul(const FE_25519 &a, const FE_25519 &b)
static FE_25519 pow_22523(const FE_25519 &a)
FE_25519(int init=0)
Definition ed25519_fe.h:29
static FE_25519 sqr_iter(const FE_25519 &a, size_t iter)
static FE_25519 sqr2(const FE_25519 &a)
uint64_t load_4(const uint8_t *in)
void fe_mul(fe &x, const fe &a, const fe &b)
Definition ed25519_fe.h:190
void fe_sq(fe &x, const fe &z)
Definition ed25519_fe.h:194
void fe_sq_iter(fe &x, const fe &z, size_t iter)
Definition ed25519_fe.h:198
uint64_t load_3(const uint8_t in[3])