Bitcoin Core  26.99.0 P2P Digital Currency
modinv32_impl.h
Go to the documentation of this file.
1 /***********************************************************************
2  * Copyright (c) 2020 Peter Dettman *
5  **********************************************************************/
6
7 #ifndef SECP256K1_MODINV32_IMPL_H
8 #define SECP256K1_MODINV32_IMPL_H
9
10 #include "modinv32.h"
11
12 #include "util.h"
13
14 #include <stdlib.h>
15
16 /* This file implements modular inversion based on the paper "Fast constant-time gcd computation and
17  * modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
18  *
19  * For an explanation of the algorithm, see doc/safegcd_implementation.md. This file contains an
20  * implementation for N=30, using 30-bit signed limbs represented as int32_t.
21  */
22
23 #ifdef VERIFY
24 static const secp256k1_modinv32_signed30 SECP256K1_SIGNED30_ONE = {{1}};
25
26 /* Compute a*factor and put it in r. All but the top limb in r will be in range [0,2^30). */
27 static void secp256k1_modinv32_mul_30(secp256k1_modinv32_signed30 *r, const secp256k1_modinv32_signed30 *a, int alen, int32_t factor) {
28  const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
29  int64_t c = 0;
30  int i;
31  for (i = 0; i < 8; ++i) {
32  if (i < alen) c += (int64_t)a->v[i] * factor;
33  r->v[i] = (int32_t)c & M30; c >>= 30;
34  }
35  if (8 < alen) c += (int64_t)a->v[8] * factor;
36  VERIFY_CHECK(c == (int32_t)c);
37  r->v[8] = (int32_t)c;
38 }
39
40 /* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. A consists of alen limbs; b has 9. */
41 static int secp256k1_modinv32_mul_cmp_30(const secp256k1_modinv32_signed30 *a, int alen, const secp256k1_modinv32_signed30 *b, int32_t factor) {
42  int i;
44  secp256k1_modinv32_mul_30(&am, a, alen, 1); /* Normalize all but the top limb of a. */
45  secp256k1_modinv32_mul_30(&bm, b, 9, factor);
46  for (i = 0; i < 8; ++i) {
47  /* Verify that all but the top limb of a and b are normalized. */
48  VERIFY_CHECK(am.v[i] >> 30 == 0);
49  VERIFY_CHECK(bm.v[i] >> 30 == 0);
50  }
51  for (i = 8; i >= 0; --i) {
52  if (am.v[i] < bm.v[i]) return -1;
53  if (am.v[i] > bm.v[i]) return 1;
54  }
55  return 0;
56 }
57 #endif
58
59 /* Take as input a signed30 number in range (-2*modulus,modulus), and add a multiple of the modulus
60  * to it to bring it to range [0,modulus). If sign < 0, the input will also be negated in the
61  * process. The input must have limbs in range (-2^30,2^30). The output will have limbs in range
62  * [0,2^30). */
64  const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
65  int32_t r0 = r->v[0], r1 = r->v[1], r2 = r->v[2], r3 = r->v[3], r4 = r->v[4],
66  r5 = r->v[5], r6 = r->v[6], r7 = r->v[7], r8 = r->v[8];
68
69 #ifdef VERIFY
70  /* Verify that all limbs are in range (-2^30,2^30). */
71  int i;
72  for (i = 0; i < 9; ++i) {
73  VERIFY_CHECK(r->v[i] >= -M30);
74  VERIFY_CHECK(r->v[i] <= M30);
75  }
76  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, -2) > 0); /* r > -2*modulus */
77  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
78 #endif
79
80  /* In a first step, add the modulus if the input is negative, and then negate if requested.
81  * This brings r from range (-2*modulus,modulus) to range (-modulus,modulus). As all input
82  * limbs are in range (-2^30,2^30), this cannot overflow an int32_t. Note that the right
83  * shifts below are signed sign-extending shifts (see assumptions.h for tests that that is
84  * indeed the behavior of the right shift operator). */
85  cond_add = r8 >> 31;
86  r0 += modinfo->modulus.v[0] & cond_add;
87  r1 += modinfo->modulus.v[1] & cond_add;
88  r2 += modinfo->modulus.v[2] & cond_add;
89  r3 += modinfo->modulus.v[3] & cond_add;
90  r4 += modinfo->modulus.v[4] & cond_add;
91  r5 += modinfo->modulus.v[5] & cond_add;
92  r6 += modinfo->modulus.v[6] & cond_add;
93  r7 += modinfo->modulus.v[7] & cond_add;
94  r8 += modinfo->modulus.v[8] & cond_add;
95  cond_negate = sign >> 31;
96  r0 = (r0 ^ cond_negate) - cond_negate;
97  r1 = (r1 ^ cond_negate) - cond_negate;
98  r2 = (r2 ^ cond_negate) - cond_negate;
99  r3 = (r3 ^ cond_negate) - cond_negate;
100  r4 = (r4 ^ cond_negate) - cond_negate;
101  r5 = (r5 ^ cond_negate) - cond_negate;
102  r6 = (r6 ^ cond_negate) - cond_negate;
103  r7 = (r7 ^ cond_negate) - cond_negate;
104  r8 = (r8 ^ cond_negate) - cond_negate;
105  /* Propagate the top bits, to bring limbs back to range (-2^30,2^30). */
106  r1 += r0 >> 30; r0 &= M30;
107  r2 += r1 >> 30; r1 &= M30;
108  r3 += r2 >> 30; r2 &= M30;
109  r4 += r3 >> 30; r3 &= M30;
110  r5 += r4 >> 30; r4 &= M30;
111  r6 += r5 >> 30; r5 &= M30;
112  r7 += r6 >> 30; r6 &= M30;
113  r8 += r7 >> 30; r7 &= M30;
114
115  /* In a second step add the modulus again if the result is still negative, bringing r to range
116  * [0,modulus). */
117  cond_add = r8 >> 31;
118  r0 += modinfo->modulus.v[0] & cond_add;
119  r1 += modinfo->modulus.v[1] & cond_add;
120  r2 += modinfo->modulus.v[2] & cond_add;
121  r3 += modinfo->modulus.v[3] & cond_add;
122  r4 += modinfo->modulus.v[4] & cond_add;
123  r5 += modinfo->modulus.v[5] & cond_add;
124  r6 += modinfo->modulus.v[6] & cond_add;
125  r7 += modinfo->modulus.v[7] & cond_add;
126  r8 += modinfo->modulus.v[8] & cond_add;
127  /* And propagate again. */
128  r1 += r0 >> 30; r0 &= M30;
129  r2 += r1 >> 30; r1 &= M30;
130  r3 += r2 >> 30; r2 &= M30;
131  r4 += r3 >> 30; r3 &= M30;
132  r5 += r4 >> 30; r4 &= M30;
133  r6 += r5 >> 30; r5 &= M30;
134  r7 += r6 >> 30; r6 &= M30;
135  r8 += r7 >> 30; r7 &= M30;
136
137  r->v[0] = r0;
138  r->v[1] = r1;
139  r->v[2] = r2;
140  r->v[3] = r3;
141  r->v[4] = r4;
142  r->v[5] = r5;
143  r->v[6] = r6;
144  r->v[7] = r7;
145  r->v[8] = r8;
146
147  VERIFY_CHECK(r0 >> 30 == 0);
148  VERIFY_CHECK(r1 >> 30 == 0);
149  VERIFY_CHECK(r2 >> 30 == 0);
150  VERIFY_CHECK(r3 >> 30 == 0);
151  VERIFY_CHECK(r4 >> 30 == 0);
152  VERIFY_CHECK(r5 >> 30 == 0);
153  VERIFY_CHECK(r6 >> 30 == 0);
154  VERIFY_CHECK(r7 >> 30 == 0);
155  VERIFY_CHECK(r8 >> 30 == 0);
156  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 0) >= 0); /* r >= 0 */
157  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
158 }
159
160 /* Data type for transition matrices (see section 3 of explanation).
161  *
162  * t = [ u v ]
163  * [ q r ]
164  */
165 typedef struct {
166  int32_t u, v, q, r;
168
169 /* Compute the transition matrix and zeta for 30 divsteps.
170  *
171  * Input: zeta: initial zeta
172  * f0: bottom limb of initial f
173  * g0: bottom limb of initial g
174  * Output: t: transition matrix
175  * Return: final zeta
176  *
177  * Implements the divsteps_n_matrix function from the explanation.
178  */
179 static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
180  /* u,v,q,r are the elements of the transformation matrix being built up,
181  * starting with the identity matrix. Semantically they are signed integers
182  * in range [-2^30,2^30], but here represented as unsigned mod 2^32. This
183  * permits left shifting (which is UB for negative numbers). The range
184  * being inside [-2^31,2^31) means that casting to signed works correctly.
185  */
186  uint32_t u = 1, v = 0, q = 0, r = 1;
187  volatile uint32_t c1, c2;
188  uint32_t mask1, mask2, f = f0, g = g0, x, y, z;
189  int i;
190
191  for (i = 0; i < 30; ++i) {
192  VERIFY_CHECK((f & 1) == 1); /* f must always be odd */
193  VERIFY_CHECK((u * f0 + v * g0) == f << i);
194  VERIFY_CHECK((q * f0 + r * g0) == g << i);
195  /* Compute conditional masks for (zeta < 0) and for (g & 1). */
196  c1 = zeta >> 31;
198  c2 = g & 1;
200  /* Compute x,y,z, conditionally negated versions of f,u,v. */
204  /* Conditionally add x,y,z to g,q,r. */
205  g += x & mask2;
206  q += y & mask2;
207  r += z & mask2;
208  /* In what follows, mask1 is a condition mask for (zeta < 0) and (g & 1). */
210  /* Conditionally change zeta into -zeta-2 or zeta-1. */
211  zeta = (zeta ^ mask1) - 1;
212  /* Conditionally add g,q,r to f,u,v. */
213  f += g & mask1;
214  u += q & mask1;
215  v += r & mask1;
216  /* Shifts */
217  g >>= 1;
218  u <<= 1;
219  v <<= 1;
220  /* Bounds on zeta that follow from the bounds on iteration count (max 20*30 divsteps). */
221  VERIFY_CHECK(zeta >= -601 && zeta <= 601);
222  }
223  /* Return data in t and return value. */
224  t->u = (int32_t)u;
225  t->v = (int32_t)v;
226  t->q = (int32_t)q;
227  t->r = (int32_t)r;
228  /* The determinant of t must be a power of two. This guarantees that multiplication with t
229  * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
230  * will be divided out again). As each divstep's individual matrix has determinant 2, the
231  * aggregate of 30 of them will have determinant 2^30. */
232  VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
233  return zeta;
234 }
235
236 /* secp256k1_modinv32_inv256[i] = -(2*i+1)^-1 (mod 256) */
237 static const uint8_t secp256k1_modinv32_inv256[128] = {
238  0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
239  0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
240  0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
241  0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
242  0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
243  0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
244  0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
245  0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
246  0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
247  0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
248  0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
249 };
250
251 /* Compute the transition matrix and eta for 30 divsteps (variable time).
252  *
253  * Input: eta: initial eta
254  * f0: bottom limb of initial f
255  * g0: bottom limb of initial g
256  * Output: t: transition matrix
257  * Return: final eta
258  *
259  * Implements the divsteps_n_matrix_var function from the explanation.
260  */
261 static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
262  /* Transformation matrix; see comments in secp256k1_modinv32_divsteps_30. */
263  uint32_t u = 1, v = 0, q = 0, r = 1;
264  uint32_t f = f0, g = g0, m;
265  uint16_t w;
266  int i = 30, limit, zeros;
267
268  for (;;) {
269  /* Use a sentinel bit to count zeros only up to i. */
270  zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
271  /* Perform zeros divsteps at once; they all just divide g by two. */
272  g >>= zeros;
273  u <<= zeros;
274  v <<= zeros;
275  eta -= zeros;
276  i -= zeros;
277  /* We're done once we've done 30 divsteps. */
278  if (i == 0) break;
279  VERIFY_CHECK((f & 1) == 1);
280  VERIFY_CHECK((g & 1) == 1);
281  VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
282  VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
283  /* Bounds on eta that follow from the bounds on iteration count (max 25*30 divsteps). */
284  VERIFY_CHECK(eta >= -751 && eta <= 751);
285  /* If eta is negative, negate it and replace f,g with g,-f. */
286  if (eta < 0) {
287  uint32_t tmp;
288  eta = -eta;
289  tmp = f; f = g; g = -tmp;
290  tmp = u; u = q; q = -tmp;
291  tmp = v; v = r; r = -tmp;
292  }
293  /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
294  * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
295  * can be done as its sign will flip once that happens. */
296  limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
297  /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
298  VERIFY_CHECK(limit > 0 && limit <= 30);
299  m = (UINT32_MAX >> (32 - limit)) & 255U;
300  /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
301  w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
302  /* Do so. */
303  g += f * w;
304  q += u * w;
305  r += v * w;
306  VERIFY_CHECK((g & m) == 0);
307  }
308  /* Return data in t and return value. */
309  t->u = (int32_t)u;
310  t->v = (int32_t)v;
311  t->q = (int32_t)q;
312  t->r = (int32_t)r;
313  /* The determinant of t must be a power of two. This guarantees that multiplication with t
314  * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
315  * will be divided out again). As each divstep's individual matrix has determinant 2, the
316  * aggregate of 30 of them will have determinant 2^30. */
317  VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
318  return eta;
319 }
320
321 /* Compute the transition matrix and eta for 30 posdivsteps (variable time, eta=-delta), and keeps track
322  * of the Jacobi symbol along the way. f0 and g0 must be f and g mod 2^32 rather than 2^30, because
323  * Jacobi tracking requires knowing (f mod 8) rather than just (f mod 2).
324  *
325  * Input: eta: initial eta
326  * f0: bottom limb of initial f
327  * g0: bottom limb of initial g
328  * Output: t: transition matrix
329  * Input/Output: (*jacp & 1) is bitflipped if and only if the Jacobi symbol of (f | g) changes sign
330  * by applying the returned transformation matrix to it. The other bits of *jacp may
331  * change, but are meaningless.
332  * Return: final eta
333  */
334 static int32_t secp256k1_modinv32_posdivsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t, int *jacp) {
335  /* Transformation matrix. */
336  uint32_t u = 1, v = 0, q = 0, r = 1;
337  uint32_t f = f0, g = g0, m;
338  uint16_t w;
339  int i = 30, limit, zeros;
340  int jac = *jacp;
341
342  for (;;) {
343  /* Use a sentinel bit to count zeros only up to i. */
344  zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
345  /* Perform zeros divsteps at once; they all just divide g by two. */
346  g >>= zeros;
347  u <<= zeros;
348  v <<= zeros;
349  eta -= zeros;
350  i -= zeros;
351  /* Update the bottom bit of jac: when dividing g by an odd power of 2,
352  * if (f mod 8) is 3 or 5, the Jacobi symbol changes sign. */
353  jac ^= (zeros & ((f >> 1) ^ (f >> 2)));
354  /* We're done once we've done 30 posdivsteps. */
355  if (i == 0) break;
356  VERIFY_CHECK((f & 1) == 1);
357  VERIFY_CHECK((g & 1) == 1);
358  VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
359  VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
360  /* If eta is negative, negate it and replace f,g with g,f. */
361  if (eta < 0) {
362  uint32_t tmp;
363  eta = -eta;
364  /* Update bottom bit of jac: when swapping f and g, the Jacobi symbol changes sign
365  * if both f and g are 3 mod 4. */
366  jac ^= ((f & g) >> 1);
367  tmp = f; f = g; g = tmp;
368  tmp = u; u = q; q = tmp;
369  tmp = v; v = r; r = tmp;
370  }
371  /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
372  * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
373  * can be done as its sign will flip once that happens. */
374  limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
375  /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
376  VERIFY_CHECK(limit > 0 && limit <= 30);
377  m = (UINT32_MAX >> (32 - limit)) & 255U;
378  /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
379  w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
380  /* Do so. */
381  g += f * w;
382  q += u * w;
383  r += v * w;
384  VERIFY_CHECK((g & m) == 0);
385  }
386  /* Return data in t and return value. */
387  t->u = (int32_t)u;
388  t->v = (int32_t)v;
389  t->q = (int32_t)q;
390  t->r = (int32_t)r;
391  /* The determinant of t must be a power of two. This guarantees that multiplication with t
392  * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
393  * will be divided out again). As each divstep's individual matrix has determinant 2 or -2,
394  * the aggregate of 30 of them will have determinant 2^30 or -2^30. */
395  VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30 ||
396  (int64_t)t->u * t->r - (int64_t)t->v * t->q == -(((int64_t)1) << 30));
397  *jacp = jac;
398  return eta;
399 }
400
401 /* Compute (t/2^30) * [d, e] mod modulus, where t is a transition matrix for 30 divsteps.
402  *
403  * On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range
404  * (-2^30,2^30).
405  *
406  * This implements the update_de function from the explanation.
407  */
409  const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
410  const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
411  int32_t di, ei, md, me, sd, se;
412  int64_t cd, ce;
413  int i;
414  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
415  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */
416  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
417  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */
418  VERIFY_CHECK(labs(u) <= (M30 + 1 - labs(v))); /* |u|+|v| <= 2^30 */
419  VERIFY_CHECK(labs(q) <= (M30 + 1 - labs(r))); /* |q|+|r| <= 2^30 */
420
421  /* [md,me] start as zero; plus [u,q] if d is negative; plus [v,r] if e is negative. */
422  sd = d->v[8] >> 31;
423  se = e->v[8] >> 31;
424  md = (u & sd) + (v & se);
425  me = (q & sd) + (r & se);
426  /* Begin computing t*[d,e]. */
427  di = d->v[0];
428  ei = e->v[0];
429  cd = (int64_t)u * di + (int64_t)v * ei;
430  ce = (int64_t)q * di + (int64_t)r * ei;
431  /* Correct md,me so that t*[d,e]+modulus*[md,me] has 30 zero bottom bits. */
432  md -= (modinfo->modulus_inv30 * (uint32_t)cd + md) & M30;
433  me -= (modinfo->modulus_inv30 * (uint32_t)ce + me) & M30;
434  /* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */
435  cd += (int64_t)modinfo->modulus.v[0] * md;
436  ce += (int64_t)modinfo->modulus.v[0] * me;
437  /* Verify that the low 30 bits of the computation are indeed zero, and then throw them away. */
438  VERIFY_CHECK(((int32_t)cd & M30) == 0); cd >>= 30;
439  VERIFY_CHECK(((int32_t)ce & M30) == 0); ce >>= 30;
440  /* Now iteratively compute limb i=1..8 of t*[d,e]+modulus*[md,me], and store them in output
441  * limb i-1 (shifting down by 30 bits). */
442  for (i = 1; i < 9; ++i) {
443  di = d->v[i];
444  ei = e->v[i];
445  cd += (int64_t)u * di + (int64_t)v * ei;
446  ce += (int64_t)q * di + (int64_t)r * ei;
447  cd += (int64_t)modinfo->modulus.v[i] * md;
448  ce += (int64_t)modinfo->modulus.v[i] * me;
449  d->v[i - 1] = (int32_t)cd & M30; cd >>= 30;
450  e->v[i - 1] = (int32_t)ce & M30; ce >>= 30;
451  }
452  /* What remains is limb 9 of t*[d,e]+modulus*[md,me]; store it as output limb 8. */
453  d->v[8] = (int32_t)cd;
454  e->v[8] = (int32_t)ce;
455
456  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
457  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */
458  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
459  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */
460 }
461
462 /* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
463  *
464  * This implements the update_fg function from the explanation.
465  */
467  const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
468  const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
469  int32_t fi, gi;
470  int64_t cf, cg;
471  int i;
472  /* Start computing t*[f,g]. */
473  fi = f->v[0];
474  gi = g->v[0];
475  cf = (int64_t)u * fi + (int64_t)v * gi;
476  cg = (int64_t)q * fi + (int64_t)r * gi;
477  /* Verify that the bottom 30 bits of the result are zero, and then throw them away. */
478  VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
479  VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
480  /* Now iteratively compute limb i=1..8 of t*[f,g], and store them in output limb i-1 (shifting
481  * down by 30 bits). */
482  for (i = 1; i < 9; ++i) {
483  fi = f->v[i];
484  gi = g->v[i];
485  cf += (int64_t)u * fi + (int64_t)v * gi;
486  cg += (int64_t)q * fi + (int64_t)r * gi;
487  f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
488  g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
489  }
490  /* What remains is limb 9 of t*[f,g]; store it as output limb 8. */
491  f->v[8] = (int32_t)cf;
492  g->v[8] = (int32_t)cg;
493 }
494
495 /* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
496  *
497  * Version that operates on a variable number of limbs in f and g.
498  *
499  * This implements the update_fg function from the explanation in modinv64_impl.h.
500  */
502  const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
503  const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
504  int32_t fi, gi;
505  int64_t cf, cg;
506  int i;
507  VERIFY_CHECK(len > 0);
508  /* Start computing t*[f,g]. */
509  fi = f->v[0];
510  gi = g->v[0];
511  cf = (int64_t)u * fi + (int64_t)v * gi;
512  cg = (int64_t)q * fi + (int64_t)r * gi;
513  /* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
514  VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
515  VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
516  /* Now iteratively compute limb i=1..len of t*[f,g], and store them in output limb i-1 (shifting
517  * down by 30 bits). */
518  for (i = 1; i < len; ++i) {
519  fi = f->v[i];
520  gi = g->v[i];
521  cf += (int64_t)u * fi + (int64_t)v * gi;
522  cg += (int64_t)q * fi + (int64_t)r * gi;
523  f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
524  g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
525  }
526  /* What remains is limb (len) of t*[f,g]; store it as output limb (len-1). */
527  f->v[len - 1] = (int32_t)cf;
528  g->v[len - 1] = (int32_t)cg;
529 }
530
531 /* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */
534  secp256k1_modinv32_signed30 d = {{0}};
535  secp256k1_modinv32_signed30 e = {{1}};
538  int i;
539  int32_t zeta = -1; /* zeta = -(delta+1/2); delta is initially 1/2. */
540
541  /* Do 20 iterations of 30 divsteps each = 600 divsteps. 590 suffices for 256-bit inputs. */
542  for (i = 0; i < 20; ++i) {
543  /* Compute transition matrix and new zeta after 30 divsteps. */
545  zeta = secp256k1_modinv32_divsteps_30(zeta, f.v[0], g.v[0], &t);
546  /* Update d,e using that transition matrix. */
547  secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
548  /* Update f,g using that transition matrix. */
549  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
550  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
551  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
552  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */
553
555
556  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
557  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
558  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
559  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */
560  }
561
562  /* At this point sufficient iterations have been performed that g must have reached 0
563  * and (if g was not originally 0) f must now equal +/- GCD of the initial f, g
564  * values i.e. +/- 1, and d now contains +/- the modular inverse. */
565
566  /* g == 0 */
567  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &SECP256K1_SIGNED30_ONE, 0) == 0);
568  /* |f| == 1, or (x == 0 and d == 0 and |f|=modulus) */
569  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
570  secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
571  (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
572  secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
573  (secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) == 0 ||
574  secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) == 0)));
575
576  /* Optionally negate d, normalize to [0,modulus), and return it. */
577  secp256k1_modinv32_normalize_30(&d, f.v[8], modinfo);
578  *x = d;
579 }
580
581 /* Compute the inverse of x modulo modinfo->modulus, and replace x with it (variable time). */
584  secp256k1_modinv32_signed30 d = {{0, 0, 0, 0, 0, 0, 0, 0, 0}};
585  secp256k1_modinv32_signed30 e = {{1, 0, 0, 0, 0, 0, 0, 0, 0}};
588 #ifdef VERIFY
589  int i = 0;
590 #endif
591  int j, len = 9;
592  int32_t eta = -1; /* eta = -delta; delta is initially 1 (faster for the variable-time code) */
593  int32_t cond, fn, gn;
594
595  /* Do iterations of 30 divsteps each until g=0. */
596  while (1) {
597  /* Compute transition matrix and new eta after 30 divsteps. */
599  eta = secp256k1_modinv32_divsteps_30_var(eta, f.v[0], g.v[0], &t);
600  /* Update d,e using that transition matrix. */
601  secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
602  /* Update f,g using that transition matrix. */
603
604  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
605  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
606  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
607  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
608
610  /* If the bottom limb of g is 0, there is a chance g=0. */
611  if (g.v[0] == 0) {
612  cond = 0;
613  /* Check if all other limbs are also 0. */
614  for (j = 1; j < len; ++j) {
615  cond |= g.v[j];
616  }
617  /* If so, we're done. */
618  if (cond == 0) break;
619  }
620
621  /* Determine if len>1 and limb (len-1) of both f and g is 0 or -1. */
622  fn = f.v[len - 1];
623  gn = g.v[len - 1];
624  cond = ((int32_t)len - 2) >> 31;
625  cond |= fn ^ (fn >> 31);
626  cond |= gn ^ (gn >> 31);
627  /* If so, reduce length, propagating the sign of f and g's top limb into the one below. */
628  if (cond == 0) {
629  f.v[len - 2] |= (uint32_t)fn << 30;
630  g.v[len - 2] |= (uint32_t)gn << 30;
631  --len;
632  }
633
634  VERIFY_CHECK(++i < 25); /* We should never need more than 25*30 = 750 divsteps */
635  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
636  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
637  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
638  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
639  }
640
641  /* At this point g is 0 and (if g was not originally 0) f must now equal +/- GCD of
642  * the initial f, g values i.e. +/- 1, and d now contains +/- the modular inverse. */
643
644  /* g == 0 */
645  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &SECP256K1_SIGNED30_ONE, 0) == 0);
646  /* |f| == 1, or (x == 0 and d == 0 and |f|=modulus) */
647  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
648  secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
649  (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
650  secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
651  (secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) == 0 ||
652  secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) == 0)));
653
654  /* Optionally negate d, normalize to [0,modulus), and return it. */
655  secp256k1_modinv32_normalize_30(&d, f.v[len - 1], modinfo);
656  *x = d;
657 }
658
659 /* Do up to 50 iterations of 30 posdivsteps (up to 1500 steps; more is extremely rare) each until f=1.
660  * In VERIFY mode use a lower number of iterations (750, close to the median 756), so failure actually occurs. */
661 #ifdef VERIFY
662 #define JACOBI32_ITERATIONS 25
663 #else
664 #define JACOBI32_ITERATIONS 50
665 #endif
666
667 /* Compute the Jacobi symbol of x modulo modinfo->modulus (variable time). gcd(x,modulus) must be 1. */
672  int j, len = 9;
673  int32_t eta = -1; /* eta = -delta; delta is initially 1 */
674  int32_t cond, fn, gn;
675  int jac = 0;
676  int count;
677
678  /* The input limbs must all be non-negative. */
679  VERIFY_CHECK(g.v[0] >= 0 && g.v[1] >= 0 && g.v[2] >= 0 && g.v[3] >= 0 && g.v[4] >= 0 && g.v[5] >= 0 && g.v[6] >= 0 && g.v[7] >= 0 && g.v[8] >= 0);
680
681  /* If x > 0, then if the loop below converges, it converges to f=g=gcd(x,modulus). Since we
682  * require that gcd(x,modulus)=1 and modulus>=3, x cannot be 0. Thus, we must reach f=1 (or
683  * time out). */
684  VERIFY_CHECK((g.v[0] | g.v[1] | g.v[2] | g.v[3] | g.v[4] | g.v[5] | g.v[6] | g.v[7] | g.v[8]) != 0);
685
686  for (count = 0; count < JACOBI32_ITERATIONS; ++count) {
687  /* Compute transition matrix and new eta after 30 posdivsteps. */
689  eta = secp256k1_modinv32_posdivsteps_30_var(eta, f.v[0] | ((uint32_t)f.v[1] << 30), g.v[0] | ((uint32_t)g.v[1] << 30), &t, &jac);
690  /* Update f,g using that transition matrix. */
691  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
692  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
693  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
694  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
695
697  /* If the bottom limb of f is 1, there is a chance that f=1. */
698  if (f.v[0] == 1) {
699  cond = 0;
700  /* Check if the other limbs are also 0. */
701  for (j = 1; j < len; ++j) {
702  cond |= f.v[j];
703  }
704  /* If so, we're done. If f=1, the Jacobi symbol (g | f)=1. */
705  if (cond == 0) return 1 - 2*(jac & 1);
706  }
707
708  /* Determine if len>1 and limb (len-1) of both f and g is 0. */
709  fn = f.v[len - 1];
710  gn = g.v[len - 1];
711  cond = ((int32_t)len - 2) >> 31;
712  cond |= fn;
713  cond |= gn;
714  /* If so, reduce length. */
715  if (cond == 0) --len;
716
717  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
718  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
719  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
720  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
721  }
722
723  /* The loop failed to converge to f=g after 1500 iterations. Return 0, indicating unknown result. */
724  return 0;
725 }
726
727 #endif /* SECP256K1_MODINV32_IMPL_H */
static void secp256k1_modinv32_update_fg_30_var(int len, secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t)
static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t)
static void secp256k1_modinv32_normalize_30(secp256k1_modinv32_signed30 *r, int32_t sign, const secp256k1_modinv32_modinfo *modinfo)
Definition: modinv32_impl.h:63
#define JACOBI32_ITERATIONS
static int32_t secp256k1_modinv32_posdivsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t, int *jacp)
static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t)
static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static void secp256k1_modinv32_update_fg_30(secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t)
static const uint8_t secp256k1_modinv32_inv256[128]
static void secp256k1_modinv32_update_de_30(secp256k1_modinv32_signed30 *d, secp256k1_modinv32_signed30 *e, const secp256k1_modinv32_trans2x2 *t, const secp256k1_modinv32_modinfo *modinfo)
static SECP256K1_INLINE int secp256k1_ctz32_var(uint32_t x)
Definition: util.h:305
#define VERIFY_CHECK(cond)
Definition: util.h:139
secp256k1_modinv32_signed30 modulus
Definition: modinv32.h:21
static int count