Botan  2.6.0
Crypto and TLS for C++11
ed25519_fe.cpp
Go to the documentation of this file.
1 /*
2 * Ed25519 field element
3 * (C) 2017 Ribose Inc
4 *
5 * Based on the public domain code from SUPERCOP ref10 by
6 * Peter Schwabe, Daniel J. Bernstein, Niels Duif, Tanja Lange, Bo-Yin Yang
7 *
8 * Botan is released under the Simplified BSD License (see license.txt)
9 */
10 
11 #include <botan/internal/ed25519_fe.h>
12 #include <botan/internal/ed25519_internal.h>
13 
14 namespace Botan {
15 
16 //static
18  {
19  fe t0;
20  fe t1;
21  fe t2;
22  fe t3;
23 
24  fe_sq(t0, z);
25  fe_sq_iter(t1, t0, 2);
26  fe_mul(t1, z, t1);
27  fe_mul(t0, t0, t1);
28  fe_sq(t2, t0);
29  fe_mul(t1, t1, t2);
30  fe_sq_iter(t2, t1, 5);
31  fe_mul(t1, t2, t1);
32  fe_sq_iter(t2, t1, 10);
33  fe_mul(t2, t2, t1);
34  fe_sq_iter(t3, t2, 20);
35  fe_mul(t2, t3, t2);
36  fe_sq_iter(t2, t2, 10);
37  fe_mul(t1, t2, t1);
38  fe_sq_iter(t2, t1, 50);
39  fe_mul(t2, t2, t1);
40  fe_sq_iter(t3, t2, 100);
41  fe_mul(t2, t3, t2);
42  fe_sq_iter(t2, t2, 50);
43  fe_mul(t1, t2, t1);
44  fe_sq_iter(t1, t1, 5);
45 
46  fe_mul(t0, t1, t0);
47  return t0;
48  }
49 
51  {
52  fe t0;
53  fe t1;
54  fe t2;
55 
56  fe_sq(t0, z);
57  fe_sq_iter(t1, t0, 2);
58  fe_mul(t1, z, t1);
59  fe_mul(t0, t0, t1);
60  fe_sq(t0, t0);
61  fe_mul(t0, t1, t0);
62  fe_sq_iter(t1, t0, 5);
63  fe_mul(t0, t1, t0);
64  fe_sq_iter(t1, t0, 10);
65  fe_mul(t1, t1, t0);
66  fe_sq_iter(t2, t1, 20);
67  fe_mul(t1, t2, t1);
68  fe_sq_iter(t1, t1, 10);
69  fe_mul(t0, t1, t0);
70  fe_sq_iter(t1, t0, 50);
71  fe_mul(t1, t1, t0);
72  fe_sq_iter(t2, t1, 100);
73  fe_mul(t1, t2, t1);
74  fe_sq_iter(t1, t1, 50);
75  fe_mul(t0, t1, t0);
76  fe_sq_iter(t0, t0, 2);
77 
78  fe_mul(t0, t0, z);
79  return t0;
80  }
81 
82 /*
83 h = f * g
84 Can overlap h with f or g.
85 
86 Preconditions:
87 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
88 |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
89 
90 Postconditions:
91 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
92 */
93 
94 /*
95 Notes on implementation strategy:
96 
97 Using schoolbook multiplication.
98 Karatsuba would save a little in some cost models.
99 
100 Most multiplications by 2 and 19 are 32-bit precomputations;
101 cheaper than 64-bit postcomputations.
102 
103 There is one remaining multiplication by 19 in the carry chain;
104 one *19 precomputation can be merged into this,
105 but the resulting data flow is considerably less clean.
106 
107 There are 12 carries below.
108 10 of them are 2-way parallelizable and vectorizable.
109 Can get away with 11 carries, but then data flow is much deeper.
110 
111 With tighter constraints on inputs can squeeze carries into int32.
112 */
113 
114 //static
116  {
117  const int32_t f0 = f[0];
118  const int32_t f1 = f[1];
119  const int32_t f2 = f[2];
120  const int32_t f3 = f[3];
121  const int32_t f4 = f[4];
122  const int32_t f5 = f[5];
123  const int32_t f6 = f[6];
124  const int32_t f7 = f[7];
125  const int32_t f8 = f[8];
126  const int32_t f9 = f[9];
127 
128  const int32_t g0 = g[0];
129  const int32_t g1 = g[1];
130  const int32_t g2 = g[2];
131  const int32_t g3 = g[3];
132  const int32_t g4 = g[4];
133  const int32_t g5 = g[5];
134  const int32_t g6 = g[6];
135  const int32_t g7 = g[7];
136  const int32_t g8 = g[8];
137  const int32_t g9 = g[9];
138 
139  const int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
140  const int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
141  const int32_t g3_19 = 19 * g3;
142  const int32_t g4_19 = 19 * g4;
143  const int32_t g5_19 = 19 * g5;
144  const int32_t g6_19 = 19 * g6;
145  const int32_t g7_19 = 19 * g7;
146  const int32_t g8_19 = 19 * g8;
147  const int32_t g9_19 = 19 * g9;
148  const int32_t f1_2 = 2 * f1;
149  const int32_t f3_2 = 2 * f3;
150  const int32_t f5_2 = 2 * f5;
151  const int32_t f7_2 = 2 * f7;
152  const int32_t f9_2 = 2 * f9;
153 
154  const int64_t f0g0 = f0 * static_cast<int64_t>(g0);
155  const int64_t f0g1 = f0 * static_cast<int64_t>(g1);
156  const int64_t f0g2 = f0 * static_cast<int64_t>(g2);
157  const int64_t f0g3 = f0 * static_cast<int64_t>(g3);
158  const int64_t f0g4 = f0 * static_cast<int64_t>(g4);
159  const int64_t f0g5 = f0 * static_cast<int64_t>(g5);
160  const int64_t f0g6 = f0 * static_cast<int64_t>(g6);
161  const int64_t f0g7 = f0 * static_cast<int64_t>(g7);
162  const int64_t f0g8 = f0 * static_cast<int64_t>(g8);
163  const int64_t f0g9 = f0 * static_cast<int64_t>(g9);
164  const int64_t f1g0 = f1 * static_cast<int64_t>(g0);
165  const int64_t f1g1_2 = f1_2 * static_cast<int64_t>(g1);
166  const int64_t f1g2 = f1 * static_cast<int64_t>(g2);
167  const int64_t f1g3_2 = f1_2 * static_cast<int64_t>(g3);
168  const int64_t f1g4 = f1 * static_cast<int64_t>(g4);
169  const int64_t f1g5_2 = f1_2 * static_cast<int64_t>(g5);
170  const int64_t f1g6 = f1 * static_cast<int64_t>(g6);
171  const int64_t f1g7_2 = f1_2 * static_cast<int64_t>(g7);
172  const int64_t f1g8 = f1 * static_cast<int64_t>(g8);
173  const int64_t f1g9_38 = f1_2 * static_cast<int64_t>(g9_19);
174  const int64_t f2g0 = f2 * static_cast<int64_t>(g0);
175  const int64_t f2g1 = f2 * static_cast<int64_t>(g1);
176  const int64_t f2g2 = f2 * static_cast<int64_t>(g2);
177  const int64_t f2g3 = f2 * static_cast<int64_t>(g3);
178  const int64_t f2g4 = f2 * static_cast<int64_t>(g4);
179  const int64_t f2g5 = f2 * static_cast<int64_t>(g5);
180  const int64_t f2g6 = f2 * static_cast<int64_t>(g6);
181  const int64_t f2g7 = f2 * static_cast<int64_t>(g7);
182  const int64_t f2g8_19 = f2 * static_cast<int64_t>(g8_19);
183  const int64_t f2g9_19 = f2 * static_cast<int64_t>(g9_19);
184  const int64_t f3g0 = f3 * static_cast<int64_t>(g0);
185  const int64_t f3g1_2 = f3_2 * static_cast<int64_t>(g1);
186  const int64_t f3g2 = f3 * static_cast<int64_t>(g2);
187  const int64_t f3g3_2 = f3_2 * static_cast<int64_t>(g3);
188  const int64_t f3g4 = f3 * static_cast<int64_t>(g4);
189  const int64_t f3g5_2 = f3_2 * static_cast<int64_t>(g5);
190  const int64_t f3g6 = f3 * static_cast<int64_t>(g6);
191  const int64_t f3g7_38 = f3_2 * static_cast<int64_t>(g7_19);
192  const int64_t f3g8_19 = f3 * static_cast<int64_t>(g8_19);
193  const int64_t f3g9_38 = f3_2 * static_cast<int64_t>(g9_19);
194  const int64_t f4g0 = f4 * static_cast<int64_t>(g0);
195  const int64_t f4g1 = f4 * static_cast<int64_t>(g1);
196  const int64_t f4g2 = f4 * static_cast<int64_t>(g2);
197  const int64_t f4g3 = f4 * static_cast<int64_t>(g3);
198  const int64_t f4g4 = f4 * static_cast<int64_t>(g4);
199  const int64_t f4g5 = f4 * static_cast<int64_t>(g5);
200  const int64_t f4g6_19 = f4 * static_cast<int64_t>(g6_19);
201  const int64_t f4g7_19 = f4 * static_cast<int64_t>(g7_19);
202  const int64_t f4g8_19 = f4 * static_cast<int64_t>(g8_19);
203  const int64_t f4g9_19 = f4 * static_cast<int64_t>(g9_19);
204  const int64_t f5g0 = f5 * static_cast<int64_t>(g0);
205  const int64_t f5g1_2 = f5_2 * static_cast<int64_t>(g1);
206  const int64_t f5g2 = f5 * static_cast<int64_t>(g2);
207  const int64_t f5g3_2 = f5_2 * static_cast<int64_t>(g3);
208  const int64_t f5g4 = f5 * static_cast<int64_t>(g4);
209  const int64_t f5g5_38 = f5_2 * static_cast<int64_t>(g5_19);
210  const int64_t f5g6_19 = f5 * static_cast<int64_t>(g6_19);
211  const int64_t f5g7_38 = f5_2 * static_cast<int64_t>(g7_19);
212  const int64_t f5g8_19 = f5 * static_cast<int64_t>(g8_19);
213  const int64_t f5g9_38 = f5_2 * static_cast<int64_t>(g9_19);
214  const int64_t f6g0 = f6 * static_cast<int64_t>(g0);
215  const int64_t f6g1 = f6 * static_cast<int64_t>(g1);
216  const int64_t f6g2 = f6 * static_cast<int64_t>(g2);
217  const int64_t f6g3 = f6 * static_cast<int64_t>(g3);
218  const int64_t f6g4_19 = f6 * static_cast<int64_t>(g4_19);
219  const int64_t f6g5_19 = f6 * static_cast<int64_t>(g5_19);
220  const int64_t f6g6_19 = f6 * static_cast<int64_t>(g6_19);
221  const int64_t f6g7_19 = f6 * static_cast<int64_t>(g7_19);
222  const int64_t f6g8_19 = f6 * static_cast<int64_t>(g8_19);
223  const int64_t f6g9_19 = f6 * static_cast<int64_t>(g9_19);
224  const int64_t f7g0 = f7 * static_cast<int64_t>(g0);
225  const int64_t f7g1_2 = f7_2 * static_cast<int64_t>(g1);
226  const int64_t f7g2 = f7 * static_cast<int64_t>(g2);
227  const int64_t f7g3_38 = f7_2 * static_cast<int64_t>(g3_19);
228  const int64_t f7g4_19 = f7 * static_cast<int64_t>(g4_19);
229  const int64_t f7g5_38 = f7_2 * static_cast<int64_t>(g5_19);
230  const int64_t f7g6_19 = f7 * static_cast<int64_t>(g6_19);
231  const int64_t f7g7_38 = f7_2 * static_cast<int64_t>(g7_19);
232  const int64_t f7g8_19 = f7 * static_cast<int64_t>(g8_19);
233  const int64_t f7g9_38 = f7_2 * static_cast<int64_t>(g9_19);
234  const int64_t f8g0 = f8 * static_cast<int64_t>(g0);
235  const int64_t f8g1 = f8 * static_cast<int64_t>(g1);
236  const int64_t f8g2_19 = f8 * static_cast<int64_t>(g2_19);
237  const int64_t f8g3_19 = f8 * static_cast<int64_t>(g3_19);
238  const int64_t f8g4_19 = f8 * static_cast<int64_t>(g4_19);
239  const int64_t f8g5_19 = f8 * static_cast<int64_t>(g5_19);
240  const int64_t f8g6_19 = f8 * static_cast<int64_t>(g6_19);
241  const int64_t f8g7_19 = f8 * static_cast<int64_t>(g7_19);
242  const int64_t f8g8_19 = f8 * static_cast<int64_t>(g8_19);
243  const int64_t f8g9_19 = f8 * static_cast<int64_t>(g9_19);
244  const int64_t f9g0 = f9 * static_cast<int64_t>(g0);
245  const int64_t f9g1_38 = f9_2 * static_cast<int64_t>(g1_19);
246  const int64_t f9g2_19 = f9 * static_cast<int64_t>(g2_19);
247  const int64_t f9g3_38 = f9_2 * static_cast<int64_t>(g3_19);
248  const int64_t f9g4_19 = f9 * static_cast<int64_t>(g4_19);
249  const int64_t f9g5_38 = f9_2 * static_cast<int64_t>(g5_19);
250  const int64_t f9g6_19 = f9 * static_cast<int64_t>(g6_19);
251  const int64_t f9g7_38 = f9_2 * static_cast<int64_t>(g7_19);
252  const int64_t f9g8_19 = f9 * static_cast<int64_t>(g8_19);
253  const int64_t f9g9_38 = f9_2 * static_cast<int64_t>(g9_19);
254 
255  int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38;
256  int64_t h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19;
257  int64_t h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38;
258  int64_t h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19;
259  int64_t h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38;
260  int64_t h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19;
261  int64_t h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38;
262  int64_t h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19;
263  int64_t h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38;
264  int64_t h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ;
265 
266  /*
267  |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
268  i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
269  |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
270  i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
271  */
272  carry<26>(h0, h1);
273  carry<26>(h4, h5);
274 
275  /* |h0| <= 2^25 */
276  /* |h4| <= 2^25 */
277  /* |h1| <= 1.71*2^59 */
278  /* |h5| <= 1.71*2^59 */
279 
280  carry<25>(h1, h2);
281  carry<25>(h5, h6);
282 
283  /* |h1| <= 2^24; from now on fits into int32 */
284  /* |h5| <= 2^24; from now on fits into int32 */
285  /* |h2| <= 1.41*2^60 */
286  /* |h6| <= 1.41*2^60 */
287 
288  carry<26>(h2, h3);
289  carry<26>(h6, h7);
290  /* |h2| <= 2^25; from now on fits into int32 unchanged */
291  /* |h6| <= 2^25; from now on fits into int32 unchanged */
292  /* |h3| <= 1.71*2^59 */
293  /* |h7| <= 1.71*2^59 */
294 
295  carry<25>(h3, h4);
296  carry<25>(h7, h8);
297  /* |h3| <= 2^24; from now on fits into int32 unchanged */
298  /* |h7| <= 2^24; from now on fits into int32 unchanged */
299  /* |h4| <= 1.72*2^34 */
300  /* |h8| <= 1.41*2^60 */
301 
302  carry<26>(h4, h5);
303  carry<26>(h8, h9);
304  /* |h4| <= 2^25; from now on fits into int32 unchanged */
305  /* |h8| <= 2^25; from now on fits into int32 unchanged */
306  /* |h5| <= 1.01*2^24 */
307  /* |h9| <= 1.71*2^59 */
308 
309  carry<25, 19>(h9, h0);
310 
311  /* |h9| <= 2^24; from now on fits into int32 unchanged */
312  /* |h0| <= 1.1*2^39 */
313 
314  carry<26>(h0, h1);
315  /* |h0| <= 2^25; from now on fits into int32 unchanged */
316  /* |h1| <= 1.01*2^24 */
317 
318  return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
319  }
320 
321 /*
322 h = f * f
323 Can overlap h with f.
324 
325 Preconditions:
326 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
327 
328 Postconditions:
329 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
330 */
331 
332 /*
333 See fe_mul.c for discussion of implementation strategy.
334 */
335 
336 //static
337 FE_25519 FE_25519::sqr_iter(const FE_25519& f, size_t iter)
338  {
339  int32_t f0 = f[0];
340  int32_t f1 = f[1];
341  int32_t f2 = f[2];
342  int32_t f3 = f[3];
343  int32_t f4 = f[4];
344  int32_t f5 = f[5];
345  int32_t f6 = f[6];
346  int32_t f7 = f[7];
347  int32_t f8 = f[8];
348  int32_t f9 = f[9];
349 
350  for(size_t i = 0; i != iter; ++i)
351  {
352  const int32_t f0_2 = 2 * f0;
353  const int32_t f1_2 = 2 * f1;
354  const int32_t f2_2 = 2 * f2;
355  const int32_t f3_2 = 2 * f3;
356  const int32_t f4_2 = 2 * f4;
357  const int32_t f5_2 = 2 * f5;
358  const int32_t f6_2 = 2 * f6;
359  const int32_t f7_2 = 2 * f7;
360  const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
361  const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
362  const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
363  const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
364  const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
365 
366  const int64_t f0f0 = f0 * static_cast<int64_t>(f0);
367  const int64_t f0f1_2 = f0_2 * static_cast<int64_t>(f1);
368  const int64_t f0f2_2 = f0_2 * static_cast<int64_t>(f2);
369  const int64_t f0f3_2 = f0_2 * static_cast<int64_t>(f3);
370  const int64_t f0f4_2 = f0_2 * static_cast<int64_t>(f4);
371  const int64_t f0f5_2 = f0_2 * static_cast<int64_t>(f5);
372  const int64_t f0f6_2 = f0_2 * static_cast<int64_t>(f6);
373  const int64_t f0f7_2 = f0_2 * static_cast<int64_t>(f7);
374  const int64_t f0f8_2 = f0_2 * static_cast<int64_t>(f8);
375  const int64_t f0f9_2 = f0_2 * static_cast<int64_t>(f9);
376  const int64_t f1f1_2 = f1_2 * static_cast<int64_t>(f1);
377  const int64_t f1f2_2 = f1_2 * static_cast<int64_t>(f2);
378  const int64_t f1f3_4 = f1_2 * static_cast<int64_t>(f3_2);
379  const int64_t f1f4_2 = f1_2 * static_cast<int64_t>(f4);
380  const int64_t f1f5_4 = f1_2 * static_cast<int64_t>(f5_2);
381  const int64_t f1f6_2 = f1_2 * static_cast<int64_t>(f6);
382  const int64_t f1f7_4 = f1_2 * static_cast<int64_t>(f7_2);
383  const int64_t f1f8_2 = f1_2 * static_cast<int64_t>(f8);
384  const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
385  const int64_t f2f2 = f2 * static_cast<int64_t>(f2);
386  const int64_t f2f3_2 = f2_2 * static_cast<int64_t>(f3);
387  const int64_t f2f4_2 = f2_2 * static_cast<int64_t>(f4);
388  const int64_t f2f5_2 = f2_2 * static_cast<int64_t>(f5);
389  const int64_t f2f6_2 = f2_2 * static_cast<int64_t>(f6);
390  const int64_t f2f7_2 = f2_2 * static_cast<int64_t>(f7);
391  const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
392  const int64_t f2f9_38 = f2 * static_cast<int64_t>(f9_38);
393  const int64_t f3f3_2 = f3_2 * static_cast<int64_t>(f3);
394  const int64_t f3f4_2 = f3_2 * static_cast<int64_t>(f4);
395  const int64_t f3f5_4 = f3_2 * static_cast<int64_t>(f5_2);
396  const int64_t f3f6_2 = f3_2 * static_cast<int64_t>(f6);
397  const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
398  const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
399  const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
400  const int64_t f4f4 = f4 * static_cast<int64_t>(f4);
401  const int64_t f4f5_2 = f4_2 * static_cast<int64_t>(f5);
402  const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
403  const int64_t f4f7_38 = f4 * static_cast<int64_t>(f7_38);
404  const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
405  const int64_t f4f9_38 = f4 * static_cast<int64_t>(f9_38);
406  const int64_t f5f5_38 = f5 * static_cast<int64_t>(f5_38);
407  const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
408  const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
409  const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
410  const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
411  const int64_t f6f6_19 = f6 * static_cast<int64_t>(f6_19);
412  const int64_t f6f7_38 = f6 * static_cast<int64_t>(f7_38);
413  const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
414  const int64_t f6f9_38 = f6 * static_cast<int64_t>(f9_38);
415  const int64_t f7f7_38 = f7 * static_cast<int64_t>(f7_38);
416  const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
417  const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
418  const int64_t f8f8_19 = f8 * static_cast<int64_t>(f8_19);
419  const int64_t f8f9_38 = f8 * static_cast<int64_t>(f9_38);
420  const int64_t f9f9_38 = f9 * static_cast<int64_t>(f9_38);
421 
422  int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
423  int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
424  int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
425  int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
426  int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
427  int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
428  int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
429  int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
430  int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
431  int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
432 
433  carry<26>(h0, h1);
434  carry<26>(h4, h5);
435  carry<25>(h1, h2);
436  carry<25>(h5, h6);
437  carry<26>(h2, h3);
438  carry<26>(h6, h7);
439 
440  carry<25>(h3, h4);
441  carry<25>(h7, h8);
442 
443  carry<26>(h4, h5);
444  carry<26>(h8, h9);
445  carry<25,19>(h9, h0);
446  carry<26>(h0, h1);
447 
448  f0 = h0;
449  f1 = h1;
450  f2 = h2;
451  f3 = h3;
452  f4 = h4;
453  f5 = h5;
454  f6 = h6;
455  f7 = h7;
456  f8 = h8;
457  f9 = h9;
458  }
459 
460  return FE_25519(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9);
461  }
462 
463 /*
464 h = 2 * f * f
465 Can overlap h with f.
466 
467 Preconditions:
468 |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
469 
470 Postconditions:
471 |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
472 */
473 
474 /*
475 See fe_mul.c for discussion of implementation strategy.
476 */
477 
478 //static
480  {
481  const int32_t f0 = f[0];
482  const int32_t f1 = f[1];
483  const int32_t f2 = f[2];
484  const int32_t f3 = f[3];
485  const int32_t f4 = f[4];
486  const int32_t f5 = f[5];
487  const int32_t f6 = f[6];
488  const int32_t f7 = f[7];
489  const int32_t f8 = f[8];
490  const int32_t f9 = f[9];
491  const int32_t f0_2 = 2 * f0;
492  const int32_t f1_2 = 2 * f1;
493  const int32_t f2_2 = 2 * f2;
494  const int32_t f3_2 = 2 * f3;
495  const int32_t f4_2 = 2 * f4;
496  const int32_t f5_2 = 2 * f5;
497  const int32_t f6_2 = 2 * f6;
498  const int32_t f7_2 = 2 * f7;
499  const int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
500  const int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
501  const int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
502  const int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
503  const int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
504  const int64_t f0f0 = f0 * static_cast<int64_t>(f0);
505  const int64_t f0f1_2 = f0_2 * static_cast<int64_t>(f1);
506  const int64_t f0f2_2 = f0_2 * static_cast<int64_t>(f2);
507  const int64_t f0f3_2 = f0_2 * static_cast<int64_t>(f3);
508  const int64_t f0f4_2 = f0_2 * static_cast<int64_t>(f4);
509  const int64_t f0f5_2 = f0_2 * static_cast<int64_t>(f5);
510  const int64_t f0f6_2 = f0_2 * static_cast<int64_t>(f6);
511  const int64_t f0f7_2 = f0_2 * static_cast<int64_t>(f7);
512  const int64_t f0f8_2 = f0_2 * static_cast<int64_t>(f8);
513  const int64_t f0f9_2 = f0_2 * static_cast<int64_t>(f9);
514  const int64_t f1f1_2 = f1_2 * static_cast<int64_t>(f1);
515  const int64_t f1f2_2 = f1_2 * static_cast<int64_t>(f2);
516  const int64_t f1f3_4 = f1_2 * static_cast<int64_t>(f3_2);
517  const int64_t f1f4_2 = f1_2 * static_cast<int64_t>(f4);
518  const int64_t f1f5_4 = f1_2 * static_cast<int64_t>(f5_2);
519  const int64_t f1f6_2 = f1_2 * static_cast<int64_t>(f6);
520  const int64_t f1f7_4 = f1_2 * static_cast<int64_t>(f7_2);
521  const int64_t f1f8_2 = f1_2 * static_cast<int64_t>(f8);
522  const int64_t f1f9_76 = f1_2 * static_cast<int64_t>(f9_38);
523  const int64_t f2f2 = f2 * static_cast<int64_t>(f2);
524  const int64_t f2f3_2 = f2_2 * static_cast<int64_t>(f3);
525  const int64_t f2f4_2 = f2_2 * static_cast<int64_t>(f4);
526  const int64_t f2f5_2 = f2_2 * static_cast<int64_t>(f5);
527  const int64_t f2f6_2 = f2_2 * static_cast<int64_t>(f6);
528  const int64_t f2f7_2 = f2_2 * static_cast<int64_t>(f7);
529  const int64_t f2f8_38 = f2_2 * static_cast<int64_t>(f8_19);
530  const int64_t f2f9_38 = f2 * static_cast<int64_t>(f9_38);
531  const int64_t f3f3_2 = f3_2 * static_cast<int64_t>(f3);
532  const int64_t f3f4_2 = f3_2 * static_cast<int64_t>(f4);
533  const int64_t f3f5_4 = f3_2 * static_cast<int64_t>(f5_2);
534  const int64_t f3f6_2 = f3_2 * static_cast<int64_t>(f6);
535  const int64_t f3f7_76 = f3_2 * static_cast<int64_t>(f7_38);
536  const int64_t f3f8_38 = f3_2 * static_cast<int64_t>(f8_19);
537  const int64_t f3f9_76 = f3_2 * static_cast<int64_t>(f9_38);
538  const int64_t f4f4 = f4 * static_cast<int64_t>(f4);
539  const int64_t f4f5_2 = f4_2 * static_cast<int64_t>(f5);
540  const int64_t f4f6_38 = f4_2 * static_cast<int64_t>(f6_19);
541  const int64_t f4f7_38 = f4 * static_cast<int64_t>(f7_38);
542  const int64_t f4f8_38 = f4_2 * static_cast<int64_t>(f8_19);
543  const int64_t f4f9_38 = f4 * static_cast<int64_t>(f9_38);
544  const int64_t f5f5_38 = f5 * static_cast<int64_t>(f5_38);
545  const int64_t f5f6_38 = f5_2 * static_cast<int64_t>(f6_19);
546  const int64_t f5f7_76 = f5_2 * static_cast<int64_t>(f7_38);
547  const int64_t f5f8_38 = f5_2 * static_cast<int64_t>(f8_19);
548  const int64_t f5f9_76 = f5_2 * static_cast<int64_t>(f9_38);
549  const int64_t f6f6_19 = f6 * static_cast<int64_t>(f6_19);
550  const int64_t f6f7_38 = f6 * static_cast<int64_t>(f7_38);
551  const int64_t f6f8_38 = f6_2 * static_cast<int64_t>(f8_19);
552  const int64_t f6f9_38 = f6 * static_cast<int64_t>(f9_38);
553  const int64_t f7f7_38 = f7 * static_cast<int64_t>(f7_38);
554  const int64_t f7f8_38 = f7_2 * static_cast<int64_t>(f8_19);
555  const int64_t f7f9_76 = f7_2 * static_cast<int64_t>(f9_38);
556  const int64_t f8f8_19 = f8 * static_cast<int64_t>(f8_19);
557  const int64_t f8f9_38 = f8 * static_cast<int64_t>(f9_38);
558  const int64_t f9f9_38 = f9 * static_cast<int64_t>(f9_38);
559 
560  int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
561  int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
562  int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
563  int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
564  int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
565  int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
566  int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
567  int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
568  int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
569  int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
570 
571  h0 += h0;
572  h1 += h1;
573  h2 += h2;
574  h3 += h3;
575  h4 += h4;
576  h5 += h5;
577  h6 += h6;
578  h7 += h7;
579  h8 += h8;
580  h9 += h9;
581 
582  carry<26>(h0, h1);
583  carry<26>(h4, h5);
584 
585  carry<25>(h1, h2);
586  carry<25>(h5, h6);
587 
588  carry<26>(h2, h3);
589  carry<26>(h6, h7);
590 
591  carry<25>(h3, h4);
592  carry<25>(h7, h8);
593  carry<26>(h4, h5);
594  carry<26>(h8, h9);
595  carry<25,19>(h9, h0);
596  carry<26>(h0, h1);
597 
598  return FE_25519(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9);
599  }
600 
601 /*
602 Ignores top bit of h.
603 */
604 
605 void FE_25519::from_bytes(const uint8_t s[32])
606  {
607  int64_t h0 = load_4(s);
608  int64_t h1 = load_3(s + 4) << 6;
609  int64_t h2 = load_3(s + 7) << 5;
610  int64_t h3 = load_3(s + 10) << 3;
611  int64_t h4 = load_3(s + 13) << 2;
612  int64_t h5 = load_4(s + 16);
613  int64_t h6 = load_3(s + 20) << 7;
614  int64_t h7 = load_3(s + 23) << 5;
615  int64_t h8 = load_3(s + 26) << 4;
616  int64_t h9 = (load_3(s + 29) & 0x7fffff) << 2;
617 
618  carry<25,19>(h9, h0);
619  carry<25>(h1, h2);
620  carry<25>(h3, h4);
621  carry<25>(h5, h6);
622  carry<25>(h7, h8);
623 
624  carry<26>(h0, h1);
625  carry<26>(h2, h3);
626  carry<26>(h4, h5);
627  carry<26>(h6, h7);
628  carry<26>(h8, h9);
629 
630  m_fe[0] = h0;
631  m_fe[1] = h1;
632  m_fe[2] = h2;
633  m_fe[3] = h3;
634  m_fe[4] = h4;
635  m_fe[5] = h5;
636  m_fe[6] = h6;
637  m_fe[7] = h7;
638  m_fe[8] = h8;
639  m_fe[9] = h9;
640  }
641 
642 /*
643 Preconditions:
644 |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
645 
646 Write p=2^255-19; q=floor(h/p).
647 Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
648 
649 Proof:
650 Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
651 Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
652 
653 Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
654 Then 0<y<1.
655 
656 Write r=h-pq.
657 Have 0<=r<=p-1=2^255-20.
658 Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
659 
660 Write x=r+19(2^-255)r+y.
661 Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
662 
663 Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
664 so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
665 */
666 
667 void FE_25519::to_bytes(uint8_t s[32]) const
668  {
669  const int64_t X25 = (1 << 25);
670 
671  int32_t h0 = m_fe[0];
672  int32_t h1 = m_fe[1];
673  int32_t h2 = m_fe[2];
674  int32_t h3 = m_fe[3];
675  int32_t h4 = m_fe[4];
676  int32_t h5 = m_fe[5];
677  int32_t h6 = m_fe[6];
678  int32_t h7 = m_fe[7];
679  int32_t h8 = m_fe[8];
680  int32_t h9 = m_fe[9];
681  int32_t q;
682 
683  q = (19 * h9 + ((static_cast<int32_t>(1) << 24))) >> 25;
684  q = (h0 + q) >> 26;
685  q = (h1 + q) >> 25;
686  q = (h2 + q) >> 26;
687  q = (h3 + q) >> 25;
688  q = (h4 + q) >> 26;
689  q = (h5 + q) >> 25;
690  q = (h6 + q) >> 26;
691  q = (h7 + q) >> 25;
692  q = (h8 + q) >> 26;
693  q = (h9 + q) >> 25;
694 
695  /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
696  h0 += 19 * q;
697  /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
698 
699  carry0<26>(h0, h1);
700  carry0<25>(h1, h2);
701  carry0<26>(h2, h3);
702  carry0<25>(h3, h4);
703  carry0<26>(h4, h5);
704  carry0<25>(h5, h6);
705  carry0<26>(h6, h7);
706  carry0<25>(h7, h8);
707  carry0<26>(h8, h9);
708 
709  int32_t carry9 = h9 >> 25;
710  h9 -= carry9 * X25;
711  /* h10 = carry9 */
712 
713  /*
714  Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
715  Have h0+...+2^230 h9 between 0 and 2^255-1;
716  evidently 2^255 h10-2^255 q = 0.
717  Goal: Output h0+...+2^230 h9.
718  */
719 
720  s[0] = h0 >> 0;
721  s[1] = h0 >> 8;
722  s[2] = h0 >> 16;
723  s[3] = (h0 >> 24) | (h1 << 2);
724  s[4] = h1 >> 6;
725  s[5] = h1 >> 14;
726  s[6] = (h1 >> 22) | (h2 << 3);
727  s[7] = h2 >> 5;
728  s[8] = h2 >> 13;
729  s[9] = (h2 >> 21) | (h3 << 5);
730  s[10] = h3 >> 3;
731  s[11] = h3 >> 11;
732  s[12] = (h3 >> 19) | (h4 << 6);
733  s[13] = h4 >> 2;
734  s[14] = h4 >> 10;
735  s[15] = h4 >> 18;
736  s[16] = h5 >> 0;
737  s[17] = h5 >> 8;
738  s[18] = h5 >> 16;
739  s[19] = (h5 >> 24) | (h6 << 1);
740  s[20] = h6 >> 7;
741  s[21] = h6 >> 15;
742  s[22] = (h6 >> 23) | (h7 << 3);
743  s[23] = h7 >> 5;
744  s[24] = h7 >> 13;
745  s[25] = (h7 >> 21) | (h8 << 4);
746  s[26] = h8 >> 4;
747  s[27] = h8 >> 12;
748  s[28] = (h8 >> 20) | (h9 << 6);
749  s[29] = h9 >> 2;
750  s[30] = h9 >> 10;
751  s[31] = h9 >> 18;
752  }
753 
754 }
static FE_25519 pow_22523(const FE_25519 &a)
Definition: ed25519_fe.cpp:50
static FE_25519 invert(const FE_25519 &a)
Definition: ed25519_fe.cpp:17
void from_bytes(const uint8_t b[32])
Definition: ed25519_fe.cpp:605
void fe_sq(fe &x, const fe &z)
Definition: ed25519_fe.h:201
uint64_t load_4(const uint8_t *in)
void to_bytes(uint8_t b[32]) const
Definition: ed25519_fe.cpp:667
uint64_t load_3(const uint8_t in[3])
static FE_25519 sqr_iter(const FE_25519 &a, size_t iter)
Definition: ed25519_fe.cpp:337
static FE_25519 sqr2(const FE_25519 &a)
Definition: ed25519_fe.cpp:479
void fe_sq_iter(fe &x, const fe &z, size_t iter)
Definition: ed25519_fe.h:206
static FE_25519 mul(const FE_25519 &a, const FE_25519 &b)
Definition: ed25519_fe.cpp:115
Definition: alg_id.cpp:13
FE_25519(int init=0)
Definition: ed25519_fe.h:29
void fe_mul(fe &x, const fe &a, const fe &b)
Definition: ed25519_fe.h:196