Botan  2.4.0
Crypto and TLS for C++11
ed25519_fe.cpp
Go to the documentation of this file.
1 /*
2 * Ed25519 field element
3 * (C) 2017 Ribose Inc
4 *
5 * Based on the public domain code from SUPERCOP ref10 by
6 * Peter Schwabe, Daniel J. Bernstein, Niels Duif, Tanja Lange, Bo-Yin Yang
7 *
8 * Botan is released under the Simplified BSD License (see license.txt)
9 */
10 
11 #include <botan/internal/ed25519_fe.h>
12 #include <botan/internal/ed25519_internal.h>
13 
14 namespace Botan {
15 
16 //static
18  {
19  fe t0;
20  fe t1;
21  fe t2;
22  fe t3;
23 
24  fe_sq(t0, z);
25  fe_sq_iter(t1, t0, 2);
26  fe_mul(t1, z, t1);
27  fe_mul(t0, t0, t1);
28  fe_sq(t2, t0);
29  fe_mul(t1, t1, t2);
30  fe_sq_iter(t2, t1, 5);
31  fe_mul(t1, t2, t1);
32  fe_sq_iter(t2, t1, 10);
33  fe_mul(t2, t2, t1);
34  fe_sq_iter(t3, t2, 20);
35  fe_mul(t2, t3, t2);
36  fe_sq_iter(t2, t2, 10);
37  fe_mul(t1, t2, t1);
38  fe_sq_iter(t2, t1, 50);
39  fe_mul(t2, t2, t1);
40  fe_sq_iter(t3, t2, 100);
41  fe_mul(t2, t3, t2);
42  fe_sq_iter(t2, t2, 50);
43  fe_mul(t1, t2, t1);
44  fe_sq_iter(t1, t1, 5);
45 
46  fe_mul(t0, t1, t0);
47  return t0;
48  }
49 
51  {
52  fe t0;
53  fe t1;
54  fe t2;
55 
56  fe_sq(t0, z);
57  fe_sq_iter(t1, t0, 2);
58  fe_mul(t1, z, t1);
59  fe_mul(t0, t0, t1);
60  fe_sq(t0, t0);
61  fe_mul(t0, t1, t0);
62  fe_sq_iter(t1, t0, 5);
63  fe_mul(t0, t1, t0);
64  fe_sq_iter(t1, t0, 10);
65  fe_mul(t1, t1, t0);
66  fe_sq_iter(t2, t1, 20);
67  fe_mul(t1, t2, t1);
68  fe_sq_iter(t1, t1, 10);
69  fe_mul(t0, t1, t0);
70  fe_sq_iter(t1, t0, 50);
71  fe_mul(t1, t1, t0);
72  fe_sq_iter(t2, t1, 100);
73  fe_mul(t1, t2, t1);
74  fe_sq_iter(t1, t1, 50);
75  fe_mul(t0, t1, t0);
76  fe_sq_iter(t0, t0, 2);
77 
78  fe_mul(t0, t0, z);
79  return t0;
80  }
81 
82 /*
83 h = f * g
84 Can overlap h with f or g.
85 
86 Preconditions:
87 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
88 |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
89 
90 Postconditions:
91 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
92 */
93 
94 /*
95 Notes on implementation strategy:
96 
97 Using schoolbook multiplication.
98 Karatsuba would save a little in some cost models.
99 
100 Most multiplications by 2 and 19 are 32-bit precomputations;
101 cheaper than 64-bit postcomputations.
102 
103 There is one remaining multiplication by 19 in the carry chain;
104 one *19 precomputation can be merged into this,
105 but the resulting data flow is considerably less clean.
106 
107 There are 12 carries below.
108 10 of them are 2-way parallelizable and vectorizable.
109 Can get away with 11 carries, but then data flow is much deeper.
110 
111 With tighter constraints on inputs can squeeze carries into int32.
112 */
113 
114 //static
116  {
117  int32_t f0 = f[0];
118  int32_t f1 = f[1];
119  int32_t f2 = f[2];
120  int32_t f3 = f[3];
121  int32_t f4 = f[4];
122  int32_t f5 = f[5];
123  int32_t f6 = f[6];
124  int32_t f7 = f[7];
125  int32_t f8 = f[8];
126  int32_t f9 = f[9];
127 
128  int32_t g0 = g[0];
129  int32_t g1 = g[1];
130  int32_t g2 = g[2];
131  int32_t g3 = g[3];
132  int32_t g4 = g[4];
133  int32_t g5 = g[5];
134  int32_t g6 = g[6];
135  int32_t g7 = g[7];
136  int32_t g8 = g[8];
137  int32_t g9 = g[9];
138 
139  int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
140  int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
141  int32_t g3_19 = 19 * g3;
142  int32_t g4_19 = 19 * g4;
143  int32_t g5_19 = 19 * g5;
144  int32_t g6_19 = 19 * g6;
145  int32_t g7_19 = 19 * g7;
146  int32_t g8_19 = 19 * g8;
147  int32_t g9_19 = 19 * g9;
148  int32_t f1_2 = 2 * f1;
149  int32_t f3_2 = 2 * f3;
150  int32_t f5_2 = 2 * f5;
151  int32_t f7_2 = 2 * f7;
152  int32_t f9_2 = 2 * f9;
153  int64_t f0g0 = f0 * static_cast<int64_t>(g0);
154  int64_t f0g1 = f0 * static_cast<int64_t>(g1);
155  int64_t f0g2 = f0 * static_cast<int64_t>(g2);
156  int64_t f0g3 = f0 * static_cast<int64_t>(g3);
157  int64_t f0g4 = f0 * static_cast<int64_t>(g4);
158  int64_t f0g5 = f0 * static_cast<int64_t>(g5);
159  int64_t f0g6 = f0 * static_cast<int64_t>(g6);
160  int64_t f0g7 = f0 * static_cast<int64_t>(g7);
161  int64_t f0g8 = f0 * static_cast<int64_t>(g8);
162  int64_t f0g9 = f0 * static_cast<int64_t>(g9);
163  int64_t f1g0 = f1 * static_cast<int64_t>(g0);
164  int64_t f1g1_2 = f1_2 * static_cast<int64_t>(g1);
165  int64_t f1g2 = f1 * static_cast<int64_t>(g2);
166  int64_t f1g3_2 = f1_2 * static_cast<int64_t>(g3);
167  int64_t f1g4 = f1 * static_cast<int64_t>(g4);
168  int64_t f1g5_2 = f1_2 * static_cast<int64_t>(g5);
169  int64_t f1g6 = f1 * static_cast<int64_t>(g6);
170  int64_t f1g7_2 = f1_2 * static_cast<int64_t>(g7);
171  int64_t f1g8 = f1 * static_cast<int64_t>(g8);
172  int64_t f1g9_38 = f1_2 * static_cast<int64_t>(g9_19);
173  int64_t f2g0 = f2 * static_cast<int64_t>(g0);
174  int64_t f2g1 = f2 * static_cast<int64_t>(g1);
175  int64_t f2g2 = f2 * static_cast<int64_t>(g2);
176  int64_t f2g3 = f2 * static_cast<int64_t>(g3);
177  int64_t f2g4 = f2 * static_cast<int64_t>(g4);
178  int64_t f2g5 = f2 * static_cast<int64_t>(g5);
179  int64_t f2g6 = f2 * static_cast<int64_t>(g6);
180  int64_t f2g7 = f2 * static_cast<int64_t>(g7);
181  int64_t f2g8_19 = f2 * static_cast<int64_t>(g8_19);
182  int64_t f2g9_19 = f2 * static_cast<int64_t>(g9_19);
183  int64_t f3g0 = f3 * static_cast<int64_t>(g0);
184  int64_t f3g1_2 = f3_2 * static_cast<int64_t>(g1);
185  int64_t f3g2 = f3 * static_cast<int64_t>(g2);
186  int64_t f3g3_2 = f3_2 * static_cast<int64_t>(g3);
187  int64_t f3g4 = f3 * static_cast<int64_t>(g4);
188  int64_t f3g5_2 = f3_2 * static_cast<int64_t>(g5);
189  int64_t f3g6 = f3 * static_cast<int64_t>(g6);
190  int64_t f3g7_38 = f3_2 * static_cast<int64_t>(g7_19);
191  int64_t f3g8_19 = f3 * static_cast<int64_t>(g8_19);
192  int64_t f3g9_38 = f3_2 * static_cast<int64_t>(g9_19);
193  int64_t f4g0 = f4 * static_cast<int64_t>(g0);
194  int64_t f4g1 = f4 * static_cast<int64_t>(g1);
195  int64_t f4g2 = f4 * static_cast<int64_t>(g2);
196  int64_t f4g3 = f4 * static_cast<int64_t>(g3);
197  int64_t f4g4 = f4 * static_cast<int64_t>(g4);
198  int64_t f4g5 = f4 * static_cast<int64_t>(g5);
199  int64_t f4g6_19 = f4 * static_cast<int64_t>(g6_19);
200  int64_t f4g7_19 = f4 * static_cast<int64_t>(g7_19);
201  int64_t f4g8_19 = f4 * static_cast<int64_t>(g8_19);
202  int64_t f4g9_19 = f4 * static_cast<int64_t>(g9_19);
203  int64_t f5g0 = f5 * static_cast<int64_t>(g0);
204  int64_t f5g1_2 = f5_2 * static_cast<int64_t>(g1);
205  int64_t f5g2 = f5 * static_cast<int64_t>(g2);
206  int64_t f5g3_2 = f5_2 * static_cast<int64_t>(g3);
207  int64_t f5g4 = f5 * static_cast<int64_t>(g4);
208  int64_t f5g5_38 = f5_2 * static_cast<int64_t>(g5_19);
209  int64_t f5g6_19 = f5 * static_cast<int64_t>(g6_19);
210  int64_t f5g7_38 = f5_2 * static_cast<int64_t>(g7_19);
211  int64_t f5g8_19 = f5 * static_cast<int64_t>(g8_19);
212  int64_t f5g9_38 = f5_2 * static_cast<int64_t>(g9_19);
213  int64_t f6g0 = f6 * static_cast<int64_t>(g0);
214  int64_t f6g1 = f6 * static_cast<int64_t>(g1);
215  int64_t f6g2 = f6 * static_cast<int64_t>(g2);
216  int64_t f6g3 = f6 * static_cast<int64_t>(g3);
217  int64_t f6g4_19 = f6 * static_cast<int64_t>(g4_19);
218  int64_t f6g5_19 = f6 * static_cast<int64_t>(g5_19);
219  int64_t f6g6_19 = f6 * static_cast<int64_t>(g6_19);
220  int64_t f6g7_19 = f6 * static_cast<int64_t>(g7_19);
221  int64_t f6g8_19 = f6 * static_cast<int64_t>(g8_19);
222  int64_t f6g9_19 = f6 * static_cast<int64_t>(g9_19);
223  int64_t f7g0 = f7 * static_cast<int64_t>(g0);
224  int64_t f7g1_2 = f7_2 * static_cast<int64_t>(g1);
225  int64_t f7g2 = f7 * static_cast<int64_t>(g2);
226  int64_t f7g3_38 = f7_2 * static_cast<int64_t>(g3_19);
227  int64_t f7g4_19 = f7 * static_cast<int64_t>(g4_19);
228  int64_t f7g5_38 = f7_2 * static_cast<int64_t>(g5_19);
229  int64_t f7g6_19 = f7 * static_cast<int64_t>(g6_19);
230  int64_t f7g7_38 = f7_2 * static_cast<int64_t>(g7_19);
231  int64_t f7g8_19 = f7 * static_cast<int64_t>(g8_19);
232  int64_t f7g9_38 = f7_2 * static_cast<int64_t>(g9_19);
233  int64_t f8g0 = f8 * static_cast<int64_t>(g0);
234  int64_t f8g1 = f8 * static_cast<int64_t>(g1);
235  int64_t f8g2_19 = f8 * static_cast<int64_t>(g2_19);
236  int64_t f8g3_19 = f8 * static_cast<int64_t>(g3_19);
237  int64_t f8g4_19 = f8 * static_cast<int64_t>(g4_19);
238  int64_t f8g5_19 = f8 * static_cast<int64_t>(g5_19);
239  int64_t f8g6_19 = f8 * static_cast<int64_t>(g6_19);
240  int64_t f8g7_19 = f8 * static_cast<int64_t>(g7_19);
241  int64_t f8g8_19 = f8 * static_cast<int64_t>(g8_19);
242  int64_t f8g9_19 = f8 * static_cast<int64_t>(g9_19);
243  int64_t f9g0 = f9 * static_cast<int64_t>(g0);
244  int64_t f9g1_38 = f9_2 * static_cast<int64_t>(g1_19);
245  int64_t f9g2_19 = f9 * static_cast<int64_t>(g2_19);
246  int64_t f9g3_38 = f9_2 * static_cast<int64_t>(g3_19);
247  int64_t f9g4_19 = f9 * static_cast<int64_t>(g4_19);
248  int64_t f9g5_38 = f9_2 * static_cast<int64_t>(g5_19);
249  int64_t f9g6_19 = f9 * static_cast<int64_t>(g6_19);
250  int64_t f9g7_38 = f9_2 * static_cast<int64_t>(g7_19);
251  int64_t f9g8_19 = f9 * static_cast<int64_t>(g8_19);
252  int64_t f9g9_38 = f9_2 * static_cast<int64_t>(g9_19);
253  int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
254  int64_t h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
255  int64_t h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
256  int64_t h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
257  int64_t h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
258  int64_t h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
259  int64_t h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38;
260  int64_t h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19;
261  int64_t h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38;
262  int64_t h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ;
263  int64_t carry0;
264  int64_t carry1;
265  int64_t carry2;
266  int64_t carry3;
267  int64_t carry4;
268  int64_t carry5;
269  int64_t carry6;
270  int64_t carry7;
271  int64_t carry8;
272  int64_t carry9;
273 
274  /*
275  |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
276  i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
277  |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
278  i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
279  */
280  const int64_t X24 = (1 << 24);
281  const int64_t X25 = (1 << 25);
282  const int64_t X26 = (1 << 26);
283 
284  carry0 = (h0 + X25) >> 26;
285  h1 += carry0;
286  h0 -= carry0 * X26;
287  carry4 = (h4 + X25) >> 26;
288  h5 += carry4;
289  h4 -= carry4 * X26;
290  /* |h0| <= 2^25 */
291  /* |h4| <= 2^25 */
292  /* |h1| <= 1.71*2^59 */
293  /* |h5| <= 1.71*2^59 */
294 
295  carry1 = (h1 + X24) >> 25;
296  h2 += carry1;
297  h1 -= carry1 * X25;
298  carry5 = (h5 + X24) >> 25;
299  h6 += carry5;
300  h5 -= carry5 * X25;
301  /* |h1| <= 2^24; from now on fits into int32 */
302  /* |h5| <= 2^24; from now on fits into int32 */
303  /* |h2| <= 1.41*2^60 */
304  /* |h6| <= 1.41*2^60 */
305 
306  carry2 = (h2 + X25) >> 26;
307  h3 += carry2;
308  h2 -= carry2 * X26;
309  carry6 = (h6 + X25) >> 26;
310  h7 += carry6;
311  h6 -= carry6 * X26;
312  /* |h2| <= 2^25; from now on fits into int32 unchanged */
313  /* |h6| <= 2^25; from now on fits into int32 unchanged */
314  /* |h3| <= 1.71*2^59 */
315  /* |h7| <= 1.71*2^59 */
316 
317  carry3 = (h3 + X24) >> 25;
318  h4 += carry3;
319  h3 -= carry3 * X25;
320  carry7 = (h7 + X24) >> 25;
321  h8 += carry7;
322  h7 -= carry7 * X25;
323  /* |h3| <= 2^24; from now on fits into int32 unchanged */
324  /* |h7| <= 2^24; from now on fits into int32 unchanged */
325  /* |h4| <= 1.72*2^34 */
326  /* |h8| <= 1.41*2^60 */
327 
328  carry4 = (h4 + X25) >> 26;
329  h5 += carry4;
330  h4 -= carry4 * X26;
331  carry8 = (h8 + X25) >> 26;
332  h9 += carry8;
333  h8 -= carry8 * X26;
334  /* |h4| <= 2^25; from now on fits into int32 unchanged */
335  /* |h8| <= 2^25; from now on fits into int32 unchanged */
336  /* |h5| <= 1.01*2^24 */
337  /* |h9| <= 1.71*2^59 */
338 
339  carry9 = (h9 + X24) >> 25;
340  h0 += carry9 * 19;
341  h9 -= carry9 * X25;
342  /* |h9| <= 2^24; from now on fits into int32 unchanged */
343  /* |h0| <= 1.1*2^39 */
344 
345  carry0 = (h0 + X25) >> 26;
346  h1 += carry0;
347  h0 -= carry0 * X26;
348  /* |h0| <= 2^25; from now on fits into int32 unchanged */
349  /* |h1| <= 1.01*2^24 */
350 
351  return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
352  }
353 
354 /*
355 h = f * f
356 Can overlap h with f.
357 
358 Preconditions:
359 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
360 
361 Postconditions:
362 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
363 */
364 
365 /*
366 See fe_mul.c for discussion of implementation strategy.
367 */
368 
369 //static
370 FE_25519 FE_25519::sqr_iter(const FE_25519& f, size_t iter)
371  {
372  const int64_t X24 = (1 << 24);
373  const int64_t X25 = (1 << 25);
374  const int64_t X26 = (1 << 26);
375 
376  int32_t f0 = f[0];
377  int32_t f1 = f[1];
378  int32_t f2 = f[2];
379  int32_t f3 = f[3];
380  int32_t f4 = f[4];
381  int32_t f5 = f[5];
382  int32_t f6 = f[6];
383  int32_t f7 = f[7];
384  int32_t f8 = f[8];
385  int32_t f9 = f[9];
386 
387  for(size_t i = 0; i != iter; ++i)
388  {
389  const int32_t f0_2 = 2 * f0;
390  const int32_t f1_2 = 2 * f1;
391  const int32_t f2_2 = 2 * f2;
392  const int32_t f3_2 = 2 * f3;
393  const int32_t f4_2 = 2 * f4;
394  const int32_t f5_2 = 2 * f5;
395  const int32_t f6_2 = 2 * f6;
396  const int32_t f7_2 = 2 * f7;
397  const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
398  const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
399  const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
400  const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
401  const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
402 
403  const int64_t f0f0 = f0 * static_cast<int64_t>(f0);
404  const int64_t f0f1_2 = f0_2 * static_cast<int64_t>(f1);
405  const int64_t f0f2_2 = f0_2 * static_cast<int64_t>(f2);
406  const int64_t f0f3_2 = f0_2 * static_cast<int64_t>(f3);
407  const int64_t f0f4_2 = f0_2 * static_cast<int64_t>(f4);
408  const int64_t f0f5_2 = f0_2 * static_cast<int64_t>(f5);
409  const int64_t f0f6_2 = f0_2 * static_cast<int64_t>(f6);
410  const int64_t f0f7_2 = f0_2 * static_cast<int64_t>(f7);
411  const int64_t f0f8_2 = f0_2 * static_cast<int64_t>(f8);
412  const int64_t f0f9_2 = f0_2 * static_cast<int64_t>(f9);
413  const int64_t f1f1_2 = f1_2 * static_cast<int64_t>(f1);
414  const int64_t f1f2_2 = f1_2 * static_cast<int64_t>(f2);
415  const int64_t f1f3_4 = f1_2 * static_cast<int64_t>(f3_2);
416  const int64_t f1f4_2 = f1_2 * static_cast<int64_t>(f4);
417  const int64_t f1f5_4 = f1_2 * static_cast<int64_t>(f5_2);
418  const int64_t f1f6_2 = f1_2 * static_cast<int64_t>(f6);
419  const int64_t f1f7_4 = f1_2 * static_cast<int64_t>(f7_2);
420  const int64_t f1f8_2 = f1_2 * static_cast<int64_t>(f8);
421  const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
422  const int64_t f2f2 = f2 * static_cast<int64_t>(f2);
423  const int64_t f2f3_2 = f2_2 * static_cast<int64_t>(f3);
424  const int64_t f2f4_2 = f2_2 * static_cast<int64_t>(f4);
425  const int64_t f2f5_2 = f2_2 * static_cast<int64_t>(f5);
426  const int64_t f2f6_2 = f2_2 * static_cast<int64_t>(f6);
427  const int64_t f2f7_2 = f2_2 * static_cast<int64_t>(f7);
428  const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
429  const int64_t f2f9_38 = f2 * static_cast<int64_t>(f9_38);
430  const int64_t f3f3_2 = f3_2 * static_cast<int64_t>(f3);
431  const int64_t f3f4_2 = f3_2 * static_cast<int64_t>(f4);
432  const int64_t f3f5_4 = f3_2 * static_cast<int64_t>(f5_2);
433  const int64_t f3f6_2 = f3_2 * static_cast<int64_t>(f6);
434  const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
435  const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
436  const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
437  const int64_t f4f4 = f4 * static_cast<int64_t>(f4);
438  const int64_t f4f5_2 = f4_2 * static_cast<int64_t>(f5);
439  const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
440  const int64_t f4f7_38 = f4 * static_cast<int64_t>(f7_38);
441  const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
442  const int64_t f4f9_38 = f4 * static_cast<int64_t>(f9_38);
443  const int64_t f5f5_38 = f5 * static_cast<int64_t>(f5_38);
444  const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
445  const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
446  const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
447  const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
448  const int64_t f6f6_19 = f6 * static_cast<int64_t>(f6_19);
449  const int64_t f6f7_38 = f6 * static_cast<int64_t>(f7_38);
450  const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
451  const int64_t f6f9_38 = f6 * static_cast<int64_t>(f9_38);
452  const int64_t f7f7_38 = f7 * static_cast<int64_t>(f7_38);
453  const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
454  const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
455  const int64_t f8f8_19 = f8 * static_cast<int64_t>(f8_19);
456  const int64_t f8f9_38 = f8 * static_cast<int64_t>(f9_38);
457  const int64_t f9f9_38 = f9 * static_cast<int64_t>(f9_38);
458 
459  int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
460  int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
461  int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
462  int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
463  int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
464  int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
465  int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
466  int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
467  int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
468  int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
469 
470  int64_t carry0;
471  int64_t carry1;
472  int64_t carry2;
473  int64_t carry3;
474  int64_t carry4;
475  int64_t carry5;
476  int64_t carry6;
477  int64_t carry7;
478  int64_t carry8;
479  int64_t carry9;
480 
481  carry0 = (h0 + X25) >> 26;
482  h1 += carry0;
483  h0 -= carry0 * X26;
484  carry4 = (h4 + X25) >> 26;
485  h5 += carry4;
486  h4 -= carry4 * X26;
487 
488  carry1 = (h1 + X24) >> 25;
489  h2 += carry1;
490  h1 -= carry1 * X25;
491  carry5 = (h5 + X24) >> 25;
492  h6 += carry5;
493  h5 -= carry5 * X25;
494 
495  carry2 = (h2 + X25) >> 26;
496  h3 += carry2;
497  h2 -= carry2 * X26;
498  carry6 = (h6 + X25) >> 26;
499  h7 += carry6;
500  h6 -= carry6 * X26;
501 
502  carry3 = (h3 + X24) >> 25;
503  h4 += carry3;
504  h3 -= carry3 * X25;
505  carry7 = (h7 + X24) >> 25;
506  h8 += carry7;
507  h7 -= carry7 * X25;
508 
509  carry4 = (h4 + X25) >> 26;
510  h5 += carry4;
511  h4 -= carry4 * X26;
512  carry8 = (h8 + X25) >> 26;
513  h9 += carry8;
514  h8 -= carry8 * X26;
515 
516  carry9 = (h9 + X24) >> 25;
517  h0 += carry9 * 19;
518  h9 -= carry9 * X25;
519  carry0 = (h0 + X25) >> 26;
520  h1 += carry0;
521  h0 -= carry0 * X26;
522 
523  f0 = h0;
524  f1 = h1;
525  f2 = h2;
526  f3 = h3;
527  f4 = h4;
528  f5 = h5;
529  f6 = h6;
530  f7 = h7;
531  f8 = h8;
532  f9 = h9;
533  }
534 
535  return FE_25519(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9);
536  }
537 
538 /*
539 h = 2 * f * f
540 Can overlap h with f.
541 
542 Preconditions:
543 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
544 
545 Postconditions:
546 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
547 */
548 
549 /*
550 See fe_mul.c for discussion of implementation strategy.
551 */
552 
553 //static
555  {
556  const int64_t X24 = (1 << 24);
557  const int64_t X25 = (1 << 25);
558  const int64_t X26 = (1 << 26);
559 
560  int32_t f0 = f[0];
561  int32_t f1 = f[1];
562  int32_t f2 = f[2];
563  int32_t f3 = f[3];
564  int32_t f4 = f[4];
565  int32_t f5 = f[5];
566  int32_t f6 = f[6];
567  int32_t f7 = f[7];
568  int32_t f8 = f[8];
569  int32_t f9 = f[9];
570  int32_t f0_2 = 2 * f0;
571  int32_t f1_2 = 2 * f1;
572  int32_t f2_2 = 2 * f2;
573  int32_t f3_2 = 2 * f3;
574  int32_t f4_2 = 2 * f4;
575  int32_t f5_2 = 2 * f5;
576  int32_t f6_2 = 2 * f6;
577  int32_t f7_2 = 2 * f7;
578  int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
579  int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
580  int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
581  int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
582  int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
583  int64_t f0f0 = f0 * static_cast<int64_t>(f0);
584  int64_t f0f1_2 = f0_2 * static_cast<int64_t>(f1);
585  int64_t f0f2_2 = f0_2 * static_cast<int64_t>(f2);
586  int64_t f0f3_2 = f0_2 * static_cast<int64_t>(f3);
587  int64_t f0f4_2 = f0_2 * static_cast<int64_t>(f4);
588  int64_t f0f5_2 = f0_2 * static_cast<int64_t>(f5);
589  int64_t f0f6_2 = f0_2 * static_cast<int64_t>(f6);
590  int64_t f0f7_2 = f0_2 * static_cast<int64_t>(f7);
591  int64_t f0f8_2 = f0_2 * static_cast<int64_t>(f8);
592  int64_t f0f9_2 = f0_2 * static_cast<int64_t>(f9);
593  int64_t f1f1_2 = f1_2 * static_cast<int64_t>(f1);
594  int64_t f1f2_2 = f1_2 * static_cast<int64_t>(f2);
595  int64_t f1f3_4 = f1_2 * static_cast<int64_t>(f3_2);
596  int64_t f1f4_2 = f1_2 * static_cast<int64_t>(f4);
597  int64_t f1f5_4 = f1_2 * static_cast<int64_t>(f5_2);
598  int64_t f1f6_2 = f1_2 * static_cast<int64_t>(f6);
599  int64_t f1f7_4 = f1_2 * static_cast<int64_t>(f7_2);
600  int64_t f1f8_2 = f1_2 * static_cast<int64_t>(f8);
601  int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
602  int64_t f2f2 = f2 * static_cast<int64_t>(f2);
603  int64_t f2f3_2 = f2_2 * static_cast<int64_t>(f3);
604  int64_t f2f4_2 = f2_2 * static_cast<int64_t>(f4);
605  int64_t f2f5_2 = f2_2 * static_cast<int64_t>(f5);
606  int64_t f2f6_2 = f2_2 * static_cast<int64_t>(f6);
607  int64_t f2f7_2 = f2_2 * static_cast<int64_t>(f7);
608  int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
609  int64_t f2f9_38 = f2 * static_cast<int64_t>(f9_38);
610  int64_t f3f3_2 = f3_2 * static_cast<int64_t>(f3);
611  int64_t f3f4_2 = f3_2 * static_cast<int64_t>(f4);
612  int64_t f3f5_4 = f3_2 * static_cast<int64_t>(f5_2);
613  int64_t f3f6_2 = f3_2 * static_cast<int64_t>(f6);
614  int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
615  int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
616  int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
617  int64_t f4f4 = f4 * static_cast<int64_t>(f4);
618  int64_t f4f5_2 = f4_2 * static_cast<int64_t>(f5);
619  int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
620  int64_t f4f7_38 = f4 * static_cast<int64_t>(f7_38);
621  int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
622  int64_t f4f9_38 = f4 * static_cast<int64_t>(f9_38);
623  int64_t f5f5_38 = f5 * static_cast<int64_t>(f5_38);
624  int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
625  int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
626  int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
627  int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
628  int64_t f6f6_19 = f6 * static_cast<int64_t>(f6_19);
629  int64_t f6f7_38 = f6 * static_cast<int64_t>(f7_38);
630  int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
631  int64_t f6f9_38 = f6 * static_cast<int64_t>(f9_38);
632  int64_t f7f7_38 = f7 * static_cast<int64_t>(f7_38);
633  int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
634  int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
635  int64_t f8f8_19 = f8 * static_cast<int64_t>(f8_19);
636  int64_t f8f9_38 = f8 * static_cast<int64_t>(f9_38);
637  int64_t f9f9_38 = f9 * static_cast<int64_t>(f9_38);
638  int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
639  int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
640  int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
641  int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
642  int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
643  int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
644  int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
645  int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
646  int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
647  int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
648  int64_t carry0;
649  int64_t carry1;
650  int64_t carry2;
651  int64_t carry3;
652  int64_t carry4;
653  int64_t carry5;
654  int64_t carry6;
655  int64_t carry7;
656  int64_t carry8;
657  int64_t carry9;
658 
659  h0 += h0;
660  h1 += h1;
661  h2 += h2;
662  h3 += h3;
663  h4 += h4;
664  h5 += h5;
665  h6 += h6;
666  h7 += h7;
667  h8 += h8;
668  h9 += h9;
669 
670  carry0 = (h0 + X25) >> 26;
671  h1 += carry0;
672  h0 -= carry0 * X26;
673  carry4 = (h4 + X25) >> 26;
674  h5 += carry4;
675  h4 -= carry4 * X26;
676 
677  carry1 = (h1 + X24) >> 25;
678  h2 += carry1;
679  h1 -= carry1 * X25;
680  carry5 = (h5 + X24) >> 25;
681  h6 += carry5;
682  h5 -= carry5 * X25;
683 
684  carry2 = (h2 + X25) >> 26;
685  h3 += carry2;
686  h2 -= carry2 * X26;
687  carry6 = (h6 + X25) >> 26;
688  h7 += carry6;
689  h6 -= carry6 * X26;
690 
691  carry3 = (h3 + X24) >> 25;
692  h4 += carry3;
693  h3 -= carry3 * X25;
694  carry7 = (h7 + X24) >> 25;
695  h8 += carry7;
696  h7 -= carry7 * X25;
697 
698  carry4 = (h4 + X25) >> 26;
699  h5 += carry4;
700  h4 -= carry4 * X26;
701  carry8 = (h8 + X25) >> 26;
702  h9 += carry8;
703  h8 -= carry8 * X26;
704 
705  carry9 = (h9 + X24) >> 25;
706  h0 += carry9 * 19;
707  h9 -= carry9 * X25;
708 
709  carry0 = (h0 + X25) >> 26;
710  h1 += carry0;
711  h0 -= carry0 * X26;
712 
713  return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
714  }
715 
716 /*
717 Ignores top bit of h.
718 */
719 
720 void FE_25519::from_bytes(const uint8_t s[32])
721  {
722  const int64_t X24 = (1 << 24);
723  const int64_t X25 = (1 << 25);
724  const int64_t X26 = (1 << 26);
725 
726  int64_t h0 = load_4(s);
727  int64_t h1 = load_3(s + 4) << 6;
728  int64_t h2 = load_3(s + 7) << 5;
729  int64_t h3 = load_3(s + 10) << 3;
730  int64_t h4 = load_3(s + 13) << 2;
731  int64_t h5 = load_4(s + 16);
732  int64_t h6 = load_3(s + 20) << 7;
733  int64_t h7 = load_3(s + 23) << 5;
734  int64_t h8 = load_3(s + 26) << 4;
735  int64_t h9 = (load_3(s + 29) & 0x7fffff) << 2;
736 
737  const int64_t carry9 = (h9 + X24) >> 25;
738  h0 += carry9 * 19;
739  h9 -= carry9 * X25;
740  const int64_t carry1 = (h1 + X24) >> 25;
741  h2 += carry1;
742  h1 -= carry1 * X25;
743  const int64_t carry3 = (h3 + X24) >> 25;
744  h4 += carry3;
745  h3 -= carry3 * X25;
746  const int64_t carry5 = (h5 + X24) >> 25;
747  h6 += carry5;
748  h5 -= carry5 * X25;
749  const int64_t carry7 = (h7 + X24) >> 25;
750  h8 += carry7;
751  h7 -= carry7 * X25;
752 
753  const int64_t carry0 = (h0 + X25) >> 26;
754  h1 += carry0;
755  h0 -= carry0 * X26;
756  const int64_t carry2 = (h2 + X25) >> 26;
757  h3 += carry2;
758  h2 -= carry2 * X26;
759  const int64_t carry4 = (h4 + X25) >> 26;
760  h5 += carry4;
761  h4 -= carry4 * X26;
762  const int64_t carry6 = (h6 + X25) >> 26;
763  h7 += carry6;
764  h6 -= carry6 * X26;
765  const int64_t carry8 = (h8 + X25) >> 26;
766  h9 += carry8;
767  h8 -= carry8 * X26;
768 
769  m_fe[0] = h0;
770  m_fe[1] = h1;
771  m_fe[2] = h2;
772  m_fe[3] = h3;
773  m_fe[4] = h4;
774  m_fe[5] = h5;
775  m_fe[6] = h6;
776  m_fe[7] = h7;
777  m_fe[8] = h8;
778  m_fe[9] = h9;
779  }
780 
781 /*
782 Preconditions:
783 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
784 
785 Write p=2^255-19; q=floor(h/p).
786 Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
787 
788 Proof:
789 Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
790 Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
791 
792 Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
793 Then 0<y<1.
794 
795 Write r=h-pq.
796 Have 0<=r<=p-1=2^255-20.
797 Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
798 
799 Write x=r+19(2^-255)r+y.
800 Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
801 
802 Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
803 so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
804 */
805 
806 void FE_25519::to_bytes(uint8_t s[32]) const
807  {
808  const int64_t X25 = (1 << 25);
809  const int64_t X26 = (1 << 26);
810 
811  int32_t h0 = m_fe[0];
812  int32_t h1 = m_fe[1];
813  int32_t h2 = m_fe[2];
814  int32_t h3 = m_fe[3];
815  int32_t h4 = m_fe[4];
816  int32_t h5 = m_fe[5];
817  int32_t h6 = m_fe[6];
818  int32_t h7 = m_fe[7];
819  int32_t h8 = m_fe[8];
820  int32_t h9 = m_fe[9];
821  int32_t q;
822  int32_t carry0;
823  int32_t carry1;
824  int32_t carry2;
825  int32_t carry3;
826  int32_t carry4;
827  int32_t carry5;
828  int32_t carry6;
829  int32_t carry7;
830  int32_t carry8;
831  int32_t carry9;
832 
833  q = (19 * h9 + ((static_cast<int32_t>(1) << 24))) >> 25;
834  q = (h0 + q) >> 26;
835  q = (h1 + q) >> 25;
836  q = (h2 + q) >> 26;
837  q = (h3 + q) >> 25;
838  q = (h4 + q) >> 26;
839  q = (h5 + q) >> 25;
840  q = (h6 + q) >> 26;
841  q = (h7 + q) >> 25;
842  q = (h8 + q) >> 26;
843  q = (h9 + q) >> 25;
844 
845  /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
846  h0 += 19 * q;
847  /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
848 
849  carry0 = h0 >> 26;
850  h1 += carry0;
851  h0 -= carry0 * X26;
852  carry1 = h1 >> 25;
853  h2 += carry1;
854  h1 -= carry1 * X25;
855  carry2 = h2 >> 26;
856  h3 += carry2;
857  h2 -= carry2 * X26;
858  carry3 = h3 >> 25;
859  h4 += carry3;
860  h3 -= carry3 * X25;
861  carry4 = h4 >> 26;
862  h5 += carry4;
863  h4 -= carry4 * X26;
864  carry5 = h5 >> 25;
865  h6 += carry5;
866  h5 -= carry5 * X25;
867  carry6 = h6 >> 26;
868  h7 += carry6;
869  h6 -= carry6 * X26;
870  carry7 = h7 >> 25;
871  h8 += carry7;
872  h7 -= carry7 * X25;
873  carry8 = h8 >> 26;
874  h9 += carry8;
875  h8 -= carry8 * X26;
876  carry9 = h9 >> 25;
877  h9 -= carry9 * X25;
878  /* h10 = carry9 */
879 
880  /*
881  Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
882  Have h0+...+2^230 h9 between 0 and 2^255-1;
883  evidently 2^255 h10-2^255 q = 0.
884  Goal: Output h0+...+2^230 h9.
885  */
886 
887  s[0] = h0 >> 0;
888  s[1] = h0 >> 8;
889  s[2] = h0 >> 16;
890  s[3] = (h0 >> 24) | (h1 << 2);
891  s[4] = h1 >> 6;
892  s[5] = h1 >> 14;
893  s[6] = (h1 >> 22) | (h2 << 3);
894  s[7] = h2 >> 5;
895  s[8] = h2 >> 13;
896  s[9] = (h2 >> 21) | (h3 << 5);
897  s[10] = h3 >> 3;
898  s[11] = h3 >> 11;
899  s[12] = (h3 >> 19) | (h4 << 6);
900  s[13] = h4 >> 2;
901  s[14] = h4 >> 10;
902  s[15] = h4 >> 18;
903  s[16] = h5 >> 0;
904  s[17] = h5 >> 8;
905  s[18] = h5 >> 16;
906  s[19] = (h5 >> 24) | (h6 << 1);
907  s[20] = h6 >> 7;
908  s[21] = h6 >> 15;
909  s[22] = (h6 >> 23) | (h7 << 3);
910  s[23] = h7 >> 5;
911  s[24] = h7 >> 13;
912  s[25] = (h7 >> 21) | (h8 << 4);
913  s[26] = h8 >> 4;
914  s[27] = h8 >> 12;
915  s[28] = (h8 >> 20) | (h9 << 6);
916  s[29] = h9 >> 2;
917  s[30] = h9 >> 10;
918  s[31] = h9 >> 18;
919  }
920 
921 }
static FE_25519 pow_22523(const FE_25519 &a)
Definition: ed25519_fe.cpp:50
static FE_25519 invert(const FE_25519 &a)
Definition: ed25519_fe.cpp:17
void from_bytes(const uint8_t b[32])
Definition: ed25519_fe.cpp:720
void fe_sq(fe &x, const fe &z)
Definition: ed25519_fe.h:201
uint64_t load_4(const uint8_t *in)
void to_bytes(uint8_t b[32]) const
Definition: ed25519_fe.cpp:806
uint64_t load_3(const uint8_t in[3])
static FE_25519 sqr_iter(const FE_25519 &a, size_t iter)
Definition: ed25519_fe.cpp:370
static FE_25519 sqr2(const FE_25519 &a)
Definition: ed25519_fe.cpp:554
void fe_sq_iter(fe &x, const fe &z, size_t iter)
Definition: ed25519_fe.h:206
static FE_25519 mul(const FE_25519 &a, const FE_25519 &b)
Definition: ed25519_fe.cpp:115
Definition: alg_id.cpp:13
FE_25519(int init=0)
Definition: ed25519_fe.h:29
void fe_mul(fe &x, const fe &a, const fe &b)
Definition: ed25519_fe.h:196