Bitcoin ABC  0.26.3 P2P Digital Currency
modinv32_impl.h
Go to the documentation of this file.
1 /***********************************************************************
2  * Copyright (c) 2020 Peter Dettman *
3  * Distributed under the MIT software license, see the accompanying *
4  * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
5  **********************************************************************/
6
7 #ifndef SECP256K1_MODINV32_IMPL_H
8 #define SECP256K1_MODINV32_IMPL_H
9
10 #include "modinv32.h"
11
12 #include "util.h"
13
14 #include <stdlib.h>
15
16 /* This file implements modular inversion based on the paper "Fast constant-time gcd computation and
17  * modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
18  *
19  * For an explanation of the algorithm, see doc/safegcd_implementation.md. This file contains an
20  * implementation for N=30, using 30-bit signed limbs represented as int32_t.
21  */
22
23 #ifdef VERIFY
24 static const secp256k1_modinv32_signed30 SECP256K1_SIGNED30_ONE = {{1}};
25
26 /* Compute a*factor and put it in r. All but the top limb in r will be in range [0,2^30). */
27 static void secp256k1_modinv32_mul_30(secp256k1_modinv32_signed30 *r, const secp256k1_modinv32_signed30 *a, int alen, int32_t factor) {
28  const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
29  int64_t c = 0;
30  int i;
31  for (i = 0; i < 8; ++i) {
32  if (i < alen) c += (int64_t)a->v[i] * factor;
33  r->v[i] = (int32_t)c & M30; c >>= 30;
34  }
35  if (8 < alen) c += (int64_t)a->v[8] * factor;
36  VERIFY_CHECK(c == (int32_t)c);
37  r->v[8] = (int32_t)c;
38 }
39
40 /* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. A consists of alen limbs; b has 9. */
41 static int secp256k1_modinv32_mul_cmp_30(const secp256k1_modinv32_signed30 *a, int alen, const secp256k1_modinv32_signed30 *b, int32_t factor) {
42  int i;
44  secp256k1_modinv32_mul_30(&am, a, alen, 1); /* Normalize all but the top limb of a. */
45  secp256k1_modinv32_mul_30(&bm, b, 9, factor);
46  for (i = 0; i < 8; ++i) {
47  /* Verify that all but the top limb of a and b are normalized. */
48  VERIFY_CHECK(am.v[i] >> 30 == 0);
49  VERIFY_CHECK(bm.v[i] >> 30 == 0);
50  }
51  for (i = 8; i >= 0; --i) {
52  if (am.v[i] < bm.v[i]) return -1;
53  if (am.v[i] > bm.v[i]) return 1;
54  }
55  return 0;
56 }
57 #endif
58
59 /* Take as input a signed30 number in range (-2*modulus,modulus), and add a multiple of the modulus
60  * to it to bring it to range [0,modulus). If sign < 0, the input will also be negated in the
61  * process. The input must have limbs in range (-2^30,2^30). The output will have limbs in range
62  * [0,2^30). */
64  const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
65  int32_t r0 = r->v[0], r1 = r->v[1], r2 = r->v[2], r3 = r->v[3], r4 = r->v[4],
66  r5 = r->v[5], r6 = r->v[6], r7 = r->v[7], r8 = r->v[8];
67  int32_t cond_add, cond_negate;
68
69 #ifdef VERIFY
70  /* Verify that all limbs are in range (-2^30,2^30). */
71  int i;
72  for (i = 0; i < 9; ++i) {
73  VERIFY_CHECK(r->v[i] >= -M30);
74  VERIFY_CHECK(r->v[i] <= M30);
75  }
76  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, -2) > 0); /* r > -2*modulus */
77  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
78 #endif
79
80  /* In a first step, add the modulus if the input is negative, and then negate if requested.
81  * This brings r from range (-2*modulus,modulus) to range (-modulus,modulus). As all input
82  * limbs are in range (-2^30,2^30), this cannot overflow an int32_t. Note that the right
83  * shifts below are signed sign-extending shifts (see assumptions.h for tests that that is
84  * indeed the behavior of the right shift operator). */
85  cond_add = r8 >> 31;
86  r0 += modinfo->modulus.v[0] & cond_add;
87  r1 += modinfo->modulus.v[1] & cond_add;
88  r2 += modinfo->modulus.v[2] & cond_add;
89  r3 += modinfo->modulus.v[3] & cond_add;
90  r4 += modinfo->modulus.v[4] & cond_add;
91  r5 += modinfo->modulus.v[5] & cond_add;
92  r6 += modinfo->modulus.v[6] & cond_add;
93  r7 += modinfo->modulus.v[7] & cond_add;
94  r8 += modinfo->modulus.v[8] & cond_add;
95  cond_negate = sign >> 31;
96  r0 = (r0 ^ cond_negate) - cond_negate;
97  r1 = (r1 ^ cond_negate) - cond_negate;
98  r2 = (r2 ^ cond_negate) - cond_negate;
99  r3 = (r3 ^ cond_negate) - cond_negate;
100  r4 = (r4 ^ cond_negate) - cond_negate;
101  r5 = (r5 ^ cond_negate) - cond_negate;
102  r6 = (r6 ^ cond_negate) - cond_negate;
103  r7 = (r7 ^ cond_negate) - cond_negate;
104  r8 = (r8 ^ cond_negate) - cond_negate;
105  /* Propagate the top bits, to bring limbs back to range (-2^30,2^30). */
106  r1 += r0 >> 30; r0 &= M30;
107  r2 += r1 >> 30; r1 &= M30;
108  r3 += r2 >> 30; r2 &= M30;
109  r4 += r3 >> 30; r3 &= M30;
110  r5 += r4 >> 30; r4 &= M30;
111  r6 += r5 >> 30; r5 &= M30;
112  r7 += r6 >> 30; r6 &= M30;
113  r8 += r7 >> 30; r7 &= M30;
114
115  /* In a second step add the modulus again if the result is still negative, bringing r to range
116  * [0,modulus). */
117  cond_add = r8 >> 31;
118  r0 += modinfo->modulus.v[0] & cond_add;
119  r1 += modinfo->modulus.v[1] & cond_add;
120  r2 += modinfo->modulus.v[2] & cond_add;
121  r3 += modinfo->modulus.v[3] & cond_add;
122  r4 += modinfo->modulus.v[4] & cond_add;
123  r5 += modinfo->modulus.v[5] & cond_add;
124  r6 += modinfo->modulus.v[6] & cond_add;
125  r7 += modinfo->modulus.v[7] & cond_add;
126  r8 += modinfo->modulus.v[8] & cond_add;
127  /* And propagate again. */
128  r1 += r0 >> 30; r0 &= M30;
129  r2 += r1 >> 30; r1 &= M30;
130  r3 += r2 >> 30; r2 &= M30;
131  r4 += r3 >> 30; r3 &= M30;
132  r5 += r4 >> 30; r4 &= M30;
133  r6 += r5 >> 30; r5 &= M30;
134  r7 += r6 >> 30; r6 &= M30;
135  r8 += r7 >> 30; r7 &= M30;
136
137  r->v[0] = r0;
138  r->v[1] = r1;
139  r->v[2] = r2;
140  r->v[3] = r3;
141  r->v[4] = r4;
142  r->v[5] = r5;
143  r->v[6] = r6;
144  r->v[7] = r7;
145  r->v[8] = r8;
146
147 #ifdef VERIFY
148  VERIFY_CHECK(r0 >> 30 == 0);
149  VERIFY_CHECK(r1 >> 30 == 0);
150  VERIFY_CHECK(r2 >> 30 == 0);
151  VERIFY_CHECK(r3 >> 30 == 0);
152  VERIFY_CHECK(r4 >> 30 == 0);
153  VERIFY_CHECK(r5 >> 30 == 0);
154  VERIFY_CHECK(r6 >> 30 == 0);
155  VERIFY_CHECK(r7 >> 30 == 0);
156  VERIFY_CHECK(r8 >> 30 == 0);
157  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 0) >= 0); /* r >= 0 */
158  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
159 #endif
160 }
161
162 /* Data type for transition matrices (see section 3 of explanation).
163  *
164  * t = [ u v ]
165  * [ q r ]
166  */
167 typedef struct {
168  int32_t u, v, q, r;
170
171 /* Compute the transition matrix and eta for 30 divsteps.
172  *
173  * Input: eta: initial eta
174  * f0: bottom limb of initial f
175  * g0: bottom limb of initial g
176  * Output: t: transition matrix
177  * Return: final eta
178  *
179  * Implements the divsteps_n_matrix function from the explanation.
180  */
181 static int32_t secp256k1_modinv32_divsteps_30(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
182  /* u,v,q,r are the elements of the transformation matrix being built up,
183  * starting with the identity matrix. Semantically they are signed integers
184  * in range [-2^30,2^30], but here represented as unsigned mod 2^32. This
185  * permits left shifting (which is UB for negative numbers). The range
186  * being inside [-2^31,2^31) means that casting to signed works correctly.
187  */
188  uint32_t u = 1, v = 0, q = 0, r = 1;
189  uint32_t c1, c2, f = f0, g = g0, x, y, z;
190  int i;
191
192  for (i = 0; i < 30; ++i) {
193  VERIFY_CHECK((f & 1) == 1); /* f must always be odd */
194  VERIFY_CHECK((u * f0 + v * g0) == f << i);
195  VERIFY_CHECK((q * f0 + r * g0) == g << i);
196  /* Compute conditional masks for (eta < 0) and for (g & 1). */
197  c1 = eta >> 31;
198  c2 = -(g & 1);
199  /* Compute x,y,z, conditionally negated versions of f,u,v. */
200  x = (f ^ c1) - c1;
201  y = (u ^ c1) - c1;
202  z = (v ^ c1) - c1;
203  /* Conditionally add x,y,z to g,q,r. */
204  g += x & c2;
205  q += y & c2;
206  r += z & c2;
207  /* In what follows, c1 is a condition mask for (eta < 0) and (g & 1). */
208  c1 &= c2;
209  /* Conditionally negate eta, and unconditionally subtract 1. */
210  eta = (eta ^ c1) - (c1 + 1);
211  /* Conditionally add g,q,r to f,u,v. */
212  f += g & c1;
213  u += q & c1;
214  v += r & c1;
215  /* Shifts */
216  g >>= 1;
217  u <<= 1;
218  v <<= 1;
219  /* Bounds on eta that follow from the bounds on iteration count (max 25*30 divsteps). */
220  VERIFY_CHECK(eta >= -751 && eta <= 751);
221  }
222  /* Return data in t and return value. */
223  t->u = (int32_t)u;
224  t->v = (int32_t)v;
225  t->q = (int32_t)q;
226  t->r = (int32_t)r;
227  /* The determinant of t must be a power of two. This guarantees that multiplication with t
228  * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
229  * will be divided out again). As each divstep's individual matrix has determinant 2, the
230  * aggregate of 30 of them will have determinant 2^30. */
231  VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
232  return eta;
233 }
234
235 /* Compute the transition matrix and eta for 30 divsteps (variable time).
236  *
237  * Input: eta: initial eta
238  * f0: bottom limb of initial f
239  * g0: bottom limb of initial g
240  * Output: t: transition matrix
241  * Return: final eta
242  *
243  * Implements the divsteps_n_matrix_var function from the explanation.
244  */
245 static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
246  /* inv256[i] = -(2*i+1)^-1 (mod 256) */
247  static const uint8_t inv256[128] = {
248  0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
249  0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
250  0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
251  0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
252  0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
253  0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
254  0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
255  0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
256  0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
257  0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
258  0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
259  };
260
261  /* Transformation matrix; see comments in secp256k1_modinv32_divsteps_30. */
262  uint32_t u = 1, v = 0, q = 0, r = 1;
263  uint32_t f = f0, g = g0, m;
264  uint16_t w;
265  int i = 30, limit, zeros;
266
267  for (;;) {
268  /* Use a sentinel bit to count zeros only up to i. */
269  zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
270  /* Perform zeros divsteps at once; they all just divide g by two. */
271  g >>= zeros;
272  u <<= zeros;
273  v <<= zeros;
274  eta -= zeros;
275  i -= zeros;
276  /* We're done once we've done 30 divsteps. */
277  if (i == 0) break;
278  VERIFY_CHECK((f & 1) == 1);
279  VERIFY_CHECK((g & 1) == 1);
280  VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
281  VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
282  /* Bounds on eta that follow from the bounds on iteration count (max 25*30 divsteps). */
283  VERIFY_CHECK(eta >= -751 && eta <= 751);
284  /* If eta is negative, negate it and replace f,g with g,-f. */
285  if (eta < 0) {
286  uint32_t tmp;
287  eta = -eta;
288  tmp = f; f = g; g = -tmp;
289  tmp = u; u = q; q = -tmp;
290  tmp = v; v = r; r = -tmp;
291  }
292  /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
293  * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
294  * can be done as its sign will flip once that happens. */
295  limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
296  /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
297  VERIFY_CHECK(limit > 0 && limit <= 30);
298  m = (UINT32_MAX >> (32 - limit)) & 255U;
299  /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
300  w = (g * inv256[(f >> 1) & 127]) & m;
301  /* Do so. */
302  g += f * w;
303  q += u * w;
304  r += v * w;
305  VERIFY_CHECK((g & m) == 0);
306  }
307  /* Return data in t and return value. */
308  t->u = (int32_t)u;
309  t->v = (int32_t)v;
310  t->q = (int32_t)q;
311  t->r = (int32_t)r;
312  /* The determinant of t must be a power of two. This guarantees that multiplication with t
313  * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
314  * will be divided out again). As each divstep's individual matrix has determinant 2, the
315  * aggregate of 30 of them will have determinant 2^30. */
316  VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
317  return eta;
318 }
319
320 /* Compute (t/2^30) * [d, e] mod modulus, where t is a transition matrix for 30 divsteps.
321  *
322  * On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range
323  * (-2^30,2^30).
324  *
325  * This implements the update_de function from the explanation.
326  */
328  const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
329  const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
330  int32_t di, ei, md, me, sd, se;
331  int64_t cd, ce;
332  int i;
333 #ifdef VERIFY
334  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
335  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */
336  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
337  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */
338  VERIFY_CHECK((labs(u) + labs(v)) >= 0); /* |u|+|v| doesn't overflow */
339  VERIFY_CHECK((labs(q) + labs(r)) >= 0); /* |q|+|r| doesn't overflow */
340  VERIFY_CHECK((labs(u) + labs(v)) <= M30 + 1); /* |u|+|v| <= 2^30 */
341  VERIFY_CHECK((labs(q) + labs(r)) <= M30 + 1); /* |q|+|r| <= 2^30 */
342 #endif
343  /* [md,me] start as zero; plus [u,q] if d is negative; plus [v,r] if e is negative. */
344  sd = d->v[8] >> 31;
345  se = e->v[8] >> 31;
346  md = (u & sd) + (v & se);
347  me = (q & sd) + (r & se);
348  /* Begin computing t*[d,e]. */
349  di = d->v[0];
350  ei = e->v[0];
351  cd = (int64_t)u * di + (int64_t)v * ei;
352  ce = (int64_t)q * di + (int64_t)r * ei;
353  /* Correct md,me so that t*[d,e]+modulus*[md,me] has 30 zero bottom bits. */
354  md -= (modinfo->modulus_inv30 * (uint32_t)cd + md) & M30;
355  me -= (modinfo->modulus_inv30 * (uint32_t)ce + me) & M30;
356  /* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */
357  cd += (int64_t)modinfo->modulus.v[0] * md;
358  ce += (int64_t)modinfo->modulus.v[0] * me;
359  /* Verify that the low 30 bits of the computation are indeed zero, and then throw them away. */
360  VERIFY_CHECK(((int32_t)cd & M30) == 0); cd >>= 30;
361  VERIFY_CHECK(((int32_t)ce & M30) == 0); ce >>= 30;
362  /* Now iteratively compute limb i=1..8 of t*[d,e]+modulus*[md,me], and store them in output
363  * limb i-1 (shifting down by 30 bits). */
364  for (i = 1; i < 9; ++i) {
365  di = d->v[i];
366  ei = e->v[i];
367  cd += (int64_t)u * di + (int64_t)v * ei;
368  ce += (int64_t)q * di + (int64_t)r * ei;
369  cd += (int64_t)modinfo->modulus.v[i] * md;
370  ce += (int64_t)modinfo->modulus.v[i] * me;
371  d->v[i - 1] = (int32_t)cd & M30; cd >>= 30;
372  e->v[i - 1] = (int32_t)ce & M30; ce >>= 30;
373  }
374  /* What remains is limb 9 of t*[d,e]+modulus*[md,me]; store it as output limb 8. */
375  d->v[8] = (int32_t)cd;
376  e->v[8] = (int32_t)ce;
377 #ifdef VERIFY
378  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
379  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */
380  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
381  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */
382 #endif
383 }
384
385 /* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
386  *
387  * This implements the update_fg function from the explanation.
388  */
390  const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
391  const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
392  int32_t fi, gi;
393  int64_t cf, cg;
394  int i;
395  /* Start computing t*[f,g]. */
396  fi = f->v[0];
397  gi = g->v[0];
398  cf = (int64_t)u * fi + (int64_t)v * gi;
399  cg = (int64_t)q * fi + (int64_t)r * gi;
400  /* Verify that the bottom 30 bits of the result are zero, and then throw them away. */
401  VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
402  VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
403  /* Now iteratively compute limb i=1..8 of t*[f,g], and store them in output limb i-1 (shifting
404  * down by 30 bits). */
405  for (i = 1; i < 9; ++i) {
406  fi = f->v[i];
407  gi = g->v[i];
408  cf += (int64_t)u * fi + (int64_t)v * gi;
409  cg += (int64_t)q * fi + (int64_t)r * gi;
410  f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
411  g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
412  }
413  /* What remains is limb 9 of t*[f,g]; store it as output limb 8. */
414  f->v[8] = (int32_t)cf;
415  g->v[8] = (int32_t)cg;
416 }
417
418 /* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
419  *
420  * Version that operates on a variable number of limbs in f and g.
421  *
422  * This implements the update_fg function from the explanation in modinv64_impl.h.
423  */
425  const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
426  const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
427  int32_t fi, gi;
428  int64_t cf, cg;
429  int i;
430  VERIFY_CHECK(len > 0);
431  /* Start computing t*[f,g]. */
432  fi = f->v[0];
433  gi = g->v[0];
434  cf = (int64_t)u * fi + (int64_t)v * gi;
435  cg = (int64_t)q * fi + (int64_t)r * gi;
436  /* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
437  VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
438  VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
439  /* Now iteratively compute limb i=1..len of t*[f,g], and store them in output limb i-1 (shifting
440  * down by 30 bits). */
441  for (i = 1; i < len; ++i) {
442  fi = f->v[i];
443  gi = g->v[i];
444  cf += (int64_t)u * fi + (int64_t)v * gi;
445  cg += (int64_t)q * fi + (int64_t)r * gi;
446  f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
447  g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
448  }
449  /* What remains is limb (len) of t*[f,g]; store it as output limb (len-1). */
450  f->v[len - 1] = (int32_t)cf;
451  g->v[len - 1] = (int32_t)cg;
452 }
453
454 /* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */
456  /* Start with d=0, e=1, f=modulus, g=x, eta=-1. */
457  secp256k1_modinv32_signed30 d = {{0}};
458  secp256k1_modinv32_signed30 e = {{1}};
461  int i;
462  int32_t eta = -1;
463
464  /* Do 25 iterations of 30 divsteps each = 750 divsteps. 724 suffices for 256-bit inputs. */
465  for (i = 0; i < 25; ++i) {
466  /* Compute transition matrix and new eta after 30 divsteps. */
468  eta = secp256k1_modinv32_divsteps_30(eta, f.v[0], g.v[0], &t);
469  /* Update d,e using that transition matrix. */
470  secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
471  /* Update f,g using that transition matrix. */
472 #ifdef VERIFY
473  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
474  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
475  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
476  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */
477 #endif
479 #ifdef VERIFY
480  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
481  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
482  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
483  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */
484 #endif
485  }
486
487  /* At this point sufficient iterations have been performed that g must have reached 0
488  * and (if g was not originally 0) f must now equal +/- GCD of the initial f, g
489  * values i.e. +/- 1, and d now contains +/- the modular inverse. */
490 #ifdef VERIFY
491  /* g == 0 */
492  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &SECP256K1_SIGNED30_ONE, 0) == 0);
493  /* |f| == 1, or (x == 0 and d == 0 and |f|=modulus) */
494  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
495  secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
496  (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
497  secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
498  (secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) == 0 ||
499  secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) == 0)));
500 #endif
501
502  /* Optionally negate d, normalize to [0,modulus), and return it. */
503  secp256k1_modinv32_normalize_30(&d, f.v[8], modinfo);
504  *x = d;
505 }
506
507 /* Compute the inverse of x modulo modinfo->modulus, and replace x with it (variable time). */
509  /* Start with d=0, e=1, f=modulus, g=x, eta=-1. */
510  secp256k1_modinv32_signed30 d = {{0, 0, 0, 0, 0, 0, 0, 0, 0}};
511  secp256k1_modinv32_signed30 e = {{1, 0, 0, 0, 0, 0, 0, 0, 0}};
514 #ifdef VERIFY
515  int i = 0;
516 #endif
517  int j, len = 9;
518  int32_t eta = -1;
519  int32_t cond, fn, gn;
520
521  /* Do iterations of 30 divsteps each until g=0. */
522  while (1) {
523  /* Compute transition matrix and new eta after 30 divsteps. */
525  eta = secp256k1_modinv32_divsteps_30_var(eta, f.v[0], g.v[0], &t);
526  /* Update d,e using that transition matrix. */
527  secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
528  /* Update f,g using that transition matrix. */
529 #ifdef VERIFY
530  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
531  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
532  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
533  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
534 #endif
535  secp256k1_modinv32_update_fg_30_var(len, &f, &g, &t);
536  /* If the bottom limb of g is 0, there is a chance g=0. */
537  if (g.v[0] == 0) {
538  cond = 0;
539  /* Check if all other limbs are also 0. */
540  for (j = 1; j < len; ++j) {
541  cond |= g.v[j];
542  }
543  /* If so, we're done. */
544  if (cond == 0) break;
545  }
546
547  /* Determine if len>1 and limb (len-1) of both f and g is 0 or -1. */
548  fn = f.v[len - 1];
549  gn = g.v[len - 1];
550  cond = ((int32_t)len - 2) >> 31;
551  cond |= fn ^ (fn >> 31);
552  cond |= gn ^ (gn >> 31);
553  /* If so, reduce length, propagating the sign of f and g's top limb into the one below. */
554  if (cond == 0) {
555  f.v[len - 2] |= (uint32_t)fn << 30;
556  g.v[len - 2] |= (uint32_t)gn << 30;
557  --len;
558  }
559 #ifdef VERIFY
560  VERIFY_CHECK(++i < 25); /* We should never need more than 25*30 = 750 divsteps */
561  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
562  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
563  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
564  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
565 #endif
566  }
567
568  /* At this point g is 0 and (if g was not originally 0) f must now equal +/- GCD of
569  * the initial f, g values i.e. +/- 1, and d now contains +/- the modular inverse. */
570 #ifdef VERIFY
571  /* g == 0 */
572  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &SECP256K1_SIGNED30_ONE, 0) == 0);
573  /* |f| == 1, or (x == 0 and d == 0 and |f|=modulus) */
574  VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
575  secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
576  (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
577  secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
578  (secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) == 0 ||
579  secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) == 0)));
580 #endif
581
582  /* Optionally negate d, normalize to [0,modulus), and return it. */
583  secp256k1_modinv32_normalize_30(&d, f.v[len - 1], modinfo);
584  *x = d;
585 }
586
587 #endif /* SECP256K1_MODINV32_IMPL_H */
static void secp256k1_modinv32_update_fg_30_var(int len, secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t)
static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t)
static void secp256k1_modinv32_normalize_30(secp256k1_modinv32_signed30 *r, int32_t sign, const secp256k1_modinv32_modinfo *modinfo)
Definition: modinv32_impl.h:63
static int32_t secp256k1_modinv32_divsteps_30(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t)
static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo)
static void secp256k1_modinv32_update_fg_30(secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t)
static void secp256k1_modinv32_update_de_30(secp256k1_modinv32_signed30 *d, secp256k1_modinv32_signed30 *e, const secp256k1_modinv32_trans2x2 *t, const secp256k1_modinv32_modinfo *modinfo)
static SECP256K1_INLINE int secp256k1_ctz32_var(uint32_t x)
Definition: util.h:309
#define VERIFY_CHECK(cond)
Definition: util.h:68
secp256k1_modinv32_signed30 modulus
Definition: modinv32.h:25