Bitcoin ABC  0.24.7
P2P Digital Currency
sha256.cpp
Go to the documentation of this file.
1 // Copyright (c) 2014 The Bitcoin Core developers
2 // Distributed under the MIT software license, see the accompanying
3 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
4 
5 #include <crypto/sha256.h>
6 
7 #include <compat/cpuid.h>
8 #include <crypto/common.h>
9 
10 #include <cassert>
11 #include <cstring>
12 
13 #if defined(__x86_64__) || defined(__amd64__) || defined(__i386__)
14 #if defined(USE_ASM)
15 namespace sha256_sse4 {
16 void Transform(uint32_t *s, const uint8_t *chunk, size_t blocks);
17 }
18 #endif
19 #endif
20 
21 namespace sha256d64_sse41 {
22 void Transform_4way(uint8_t *out, const uint8_t *in);
23 }
24 
25 namespace sha256d64_avx2 {
26 void Transform_8way(uint8_t *out, const uint8_t *in);
27 }
28 
29 namespace sha256d64_shani {
30 void Transform_2way(uint8_t *out, const uint8_t *in);
31 }
32 
33 namespace sha256_shani {
34 void Transform(uint32_t *s, const uint8_t *chunk, size_t blocks);
35 }
36 
37 // Internal implementation code.
38 namespace {
40 namespace sha256 {
41  inline uint32_t Ch(uint32_t x, uint32_t y, uint32_t z) {
42  return z ^ (x & (y ^ z));
43  }
44  inline uint32_t Maj(uint32_t x, uint32_t y, uint32_t z) {
45  return (x & y) | (z & (x | y));
46  }
47  inline uint32_t Sigma0(uint32_t x) {
48  return (x >> 2 | x << 30) ^ (x >> 13 | x << 19) ^ (x >> 22 | x << 10);
49  }
50  inline uint32_t Sigma1(uint32_t x) {
51  return (x >> 6 | x << 26) ^ (x >> 11 | x << 21) ^ (x >> 25 | x << 7);
52  }
53  inline uint32_t sigma0(uint32_t x) {
54  return (x >> 7 | x << 25) ^ (x >> 18 | x << 14) ^ (x >> 3);
55  }
56  inline uint32_t sigma1(uint32_t x) {
57  return (x >> 17 | x << 15) ^ (x >> 19 | x << 13) ^ (x >> 10);
58  }
59 
61  inline void Round(uint32_t a, uint32_t b, uint32_t c, uint32_t &d,
62  uint32_t e, uint32_t f, uint32_t g, uint32_t &h,
63  uint32_t k) {
64  uint32_t t1 = h + Sigma1(e) + Ch(e, f, g) + k;
65  uint32_t t2 = Sigma0(a) + Maj(a, b, c);
66  d += t1;
67  h = t1 + t2;
68  }
69 
71  inline void Initialize(uint32_t *s) {
72  s[0] = 0x6a09e667ul;
73  s[1] = 0xbb67ae85ul;
74  s[2] = 0x3c6ef372ul;
75  s[3] = 0xa54ff53aul;
76  s[4] = 0x510e527ful;
77  s[5] = 0x9b05688cul;
78  s[6] = 0x1f83d9abul;
79  s[7] = 0x5be0cd19ul;
80  }
81 
85  void Transform(uint32_t *s, const uint8_t *chunk, size_t blocks) {
86  while (blocks--) {
87  uint32_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5],
88  g = s[6], h = s[7];
89  uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13,
90  w14, w15;
91 
92  Round(a, b, c, d, e, f, g, h,
93  0x428a2f98 + (w0 = ReadBE32(chunk + 0)));
94  Round(h, a, b, c, d, e, f, g,
95  0x71374491 + (w1 = ReadBE32(chunk + 4)));
96  Round(g, h, a, b, c, d, e, f,
97  0xb5c0fbcf + (w2 = ReadBE32(chunk + 8)));
98  Round(f, g, h, a, b, c, d, e,
99  0xe9b5dba5 + (w3 = ReadBE32(chunk + 12)));
100  Round(e, f, g, h, a, b, c, d,
101  0x3956c25b + (w4 = ReadBE32(chunk + 16)));
102  Round(d, e, f, g, h, a, b, c,
103  0x59f111f1 + (w5 = ReadBE32(chunk + 20)));
104  Round(c, d, e, f, g, h, a, b,
105  0x923f82a4 + (w6 = ReadBE32(chunk + 24)));
106  Round(b, c, d, e, f, g, h, a,
107  0xab1c5ed5 + (w7 = ReadBE32(chunk + 28)));
108  Round(a, b, c, d, e, f, g, h,
109  0xd807aa98 + (w8 = ReadBE32(chunk + 32)));
110  Round(h, a, b, c, d, e, f, g,
111  0x12835b01 + (w9 = ReadBE32(chunk + 36)));
112  Round(g, h, a, b, c, d, e, f,
113  0x243185be + (w10 = ReadBE32(chunk + 40)));
114  Round(f, g, h, a, b, c, d, e,
115  0x550c7dc3 + (w11 = ReadBE32(chunk + 44)));
116  Round(e, f, g, h, a, b, c, d,
117  0x72be5d74 + (w12 = ReadBE32(chunk + 48)));
118  Round(d, e, f, g, h, a, b, c,
119  0x80deb1fe + (w13 = ReadBE32(chunk + 52)));
120  Round(c, d, e, f, g, h, a, b,
121  0x9bdc06a7 + (w14 = ReadBE32(chunk + 56)));
122  Round(b, c, d, e, f, g, h, a,
123  0xc19bf174 + (w15 = ReadBE32(chunk + 60)));
124 
125  Round(a, b, c, d, e, f, g, h,
126  0xe49b69c1 + (w0 += sigma1(w14) + w9 + sigma0(w1)));
127  Round(h, a, b, c, d, e, f, g,
128  0xefbe4786 + (w1 += sigma1(w15) + w10 + sigma0(w2)));
129  Round(g, h, a, b, c, d, e, f,
130  0x0fc19dc6 + (w2 += sigma1(w0) + w11 + sigma0(w3)));
131  Round(f, g, h, a, b, c, d, e,
132  0x240ca1cc + (w3 += sigma1(w1) + w12 + sigma0(w4)));
133  Round(e, f, g, h, a, b, c, d,
134  0x2de92c6f + (w4 += sigma1(w2) + w13 + sigma0(w5)));
135  Round(d, e, f, g, h, a, b, c,
136  0x4a7484aa + (w5 += sigma1(w3) + w14 + sigma0(w6)));
137  Round(c, d, e, f, g, h, a, b,
138  0x5cb0a9dc + (w6 += sigma1(w4) + w15 + sigma0(w7)));
139  Round(b, c, d, e, f, g, h, a,
140  0x76f988da + (w7 += sigma1(w5) + w0 + sigma0(w8)));
141  Round(a, b, c, d, e, f, g, h,
142  0x983e5152 + (w8 += sigma1(w6) + w1 + sigma0(w9)));
143  Round(h, a, b, c, d, e, f, g,
144  0xa831c66d + (w9 += sigma1(w7) + w2 + sigma0(w10)));
145  Round(g, h, a, b, c, d, e, f,
146  0xb00327c8 + (w10 += sigma1(w8) + w3 + sigma0(w11)));
147  Round(f, g, h, a, b, c, d, e,
148  0xbf597fc7 + (w11 += sigma1(w9) + w4 + sigma0(w12)));
149  Round(e, f, g, h, a, b, c, d,
150  0xc6e00bf3 + (w12 += sigma1(w10) + w5 + sigma0(w13)));
151  Round(d, e, f, g, h, a, b, c,
152  0xd5a79147 + (w13 += sigma1(w11) + w6 + sigma0(w14)));
153  Round(c, d, e, f, g, h, a, b,
154  0x06ca6351 + (w14 += sigma1(w12) + w7 + sigma0(w15)));
155  Round(b, c, d, e, f, g, h, a,
156  0x14292967 + (w15 += sigma1(w13) + w8 + sigma0(w0)));
157 
158  Round(a, b, c, d, e, f, g, h,
159  0x27b70a85 + (w0 += sigma1(w14) + w9 + sigma0(w1)));
160  Round(h, a, b, c, d, e, f, g,
161  0x2e1b2138 + (w1 += sigma1(w15) + w10 + sigma0(w2)));
162  Round(g, h, a, b, c, d, e, f,
163  0x4d2c6dfc + (w2 += sigma1(w0) + w11 + sigma0(w3)));
164  Round(f, g, h, a, b, c, d, e,
165  0x53380d13 + (w3 += sigma1(w1) + w12 + sigma0(w4)));
166  Round(e, f, g, h, a, b, c, d,
167  0x650a7354 + (w4 += sigma1(w2) + w13 + sigma0(w5)));
168  Round(d, e, f, g, h, a, b, c,
169  0x766a0abb + (w5 += sigma1(w3) + w14 + sigma0(w6)));
170  Round(c, d, e, f, g, h, a, b,
171  0x81c2c92e + (w6 += sigma1(w4) + w15 + sigma0(w7)));
172  Round(b, c, d, e, f, g, h, a,
173  0x92722c85 + (w7 += sigma1(w5) + w0 + sigma0(w8)));
174  Round(a, b, c, d, e, f, g, h,
175  0xa2bfe8a1 + (w8 += sigma1(w6) + w1 + sigma0(w9)));
176  Round(h, a, b, c, d, e, f, g,
177  0xa81a664b + (w9 += sigma1(w7) + w2 + sigma0(w10)));
178  Round(g, h, a, b, c, d, e, f,
179  0xc24b8b70 + (w10 += sigma1(w8) + w3 + sigma0(w11)));
180  Round(f, g, h, a, b, c, d, e,
181  0xc76c51a3 + (w11 += sigma1(w9) + w4 + sigma0(w12)));
182  Round(e, f, g, h, a, b, c, d,
183  0xd192e819 + (w12 += sigma1(w10) + w5 + sigma0(w13)));
184  Round(d, e, f, g, h, a, b, c,
185  0xd6990624 + (w13 += sigma1(w11) + w6 + sigma0(w14)));
186  Round(c, d, e, f, g, h, a, b,
187  0xf40e3585 + (w14 += sigma1(w12) + w7 + sigma0(w15)));
188  Round(b, c, d, e, f, g, h, a,
189  0x106aa070 + (w15 += sigma1(w13) + w8 + sigma0(w0)));
190 
191  Round(a, b, c, d, e, f, g, h,
192  0x19a4c116 + (w0 += sigma1(w14) + w9 + sigma0(w1)));
193  Round(h, a, b, c, d, e, f, g,
194  0x1e376c08 + (w1 += sigma1(w15) + w10 + sigma0(w2)));
195  Round(g, h, a, b, c, d, e, f,
196  0x2748774c + (w2 += sigma1(w0) + w11 + sigma0(w3)));
197  Round(f, g, h, a, b, c, d, e,
198  0x34b0bcb5 + (w3 += sigma1(w1) + w12 + sigma0(w4)));
199  Round(e, f, g, h, a, b, c, d,
200  0x391c0cb3 + (w4 += sigma1(w2) + w13 + sigma0(w5)));
201  Round(d, e, f, g, h, a, b, c,
202  0x4ed8aa4a + (w5 += sigma1(w3) + w14 + sigma0(w6)));
203  Round(c, d, e, f, g, h, a, b,
204  0x5b9cca4f + (w6 += sigma1(w4) + w15 + sigma0(w7)));
205  Round(b, c, d, e, f, g, h, a,
206  0x682e6ff3 + (w7 += sigma1(w5) + w0 + sigma0(w8)));
207  Round(a, b, c, d, e, f, g, h,
208  0x748f82ee + (w8 += sigma1(w6) + w1 + sigma0(w9)));
209  Round(h, a, b, c, d, e, f, g,
210  0x78a5636f + (w9 += sigma1(w7) + w2 + sigma0(w10)));
211  Round(g, h, a, b, c, d, e, f,
212  0x84c87814 + (w10 += sigma1(w8) + w3 + sigma0(w11)));
213  Round(f, g, h, a, b, c, d, e,
214  0x8cc70208 + (w11 += sigma1(w9) + w4 + sigma0(w12)));
215  Round(e, f, g, h, a, b, c, d,
216  0x90befffa + (w12 += sigma1(w10) + w5 + sigma0(w13)));
217  Round(d, e, f, g, h, a, b, c,
218  0xa4506ceb + (w13 += sigma1(w11) + w6 + sigma0(w14)));
219  Round(c, d, e, f, g, h, a, b,
220  0xbef9a3f7 + (w14 + sigma1(w12) + w7 + sigma0(w15)));
221  Round(b, c, d, e, f, g, h, a,
222  0xc67178f2 + (w15 + sigma1(w13) + w8 + sigma0(w0)));
223 
224  s[0] += a;
225  s[1] += b;
226  s[2] += c;
227  s[3] += d;
228  s[4] += e;
229  s[5] += f;
230  s[6] += g;
231  s[7] += h;
232  chunk += 64;
233  }
234  }
235 
236  void TransformD64(uint8_t *out, const uint8_t *in) {
237  // Transform 1
238  uint32_t a = 0x6a09e667ul;
239  uint32_t b = 0xbb67ae85ul;
240  uint32_t c = 0x3c6ef372ul;
241  uint32_t d = 0xa54ff53aul;
242  uint32_t e = 0x510e527ful;
243  uint32_t f = 0x9b05688cul;
244  uint32_t g = 0x1f83d9abul;
245  uint32_t h = 0x5be0cd19ul;
246 
247  uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13,
248  w14, w15;
249 
250  Round(a, b, c, d, e, f, g, h, 0x428a2f98ul + (w0 = ReadBE32(in + 0)));
251  Round(h, a, b, c, d, e, f, g, 0x71374491ul + (w1 = ReadBE32(in + 4)));
252  Round(g, h, a, b, c, d, e, f, 0xb5c0fbcful + (w2 = ReadBE32(in + 8)));
253  Round(f, g, h, a, b, c, d, e, 0xe9b5dba5ul + (w3 = ReadBE32(in + 12)));
254  Round(e, f, g, h, a, b, c, d, 0x3956c25bul + (w4 = ReadBE32(in + 16)));
255  Round(d, e, f, g, h, a, b, c, 0x59f111f1ul + (w5 = ReadBE32(in + 20)));
256  Round(c, d, e, f, g, h, a, b, 0x923f82a4ul + (w6 = ReadBE32(in + 24)));
257  Round(b, c, d, e, f, g, h, a, 0xab1c5ed5ul + (w7 = ReadBE32(in + 28)));
258  Round(a, b, c, d, e, f, g, h, 0xd807aa98ul + (w8 = ReadBE32(in + 32)));
259  Round(h, a, b, c, d, e, f, g, 0x12835b01ul + (w9 = ReadBE32(in + 36)));
260  Round(g, h, a, b, c, d, e, f, 0x243185beul + (w10 = ReadBE32(in + 40)));
261  Round(f, g, h, a, b, c, d, e, 0x550c7dc3ul + (w11 = ReadBE32(in + 44)));
262  Round(e, f, g, h, a, b, c, d, 0x72be5d74ul + (w12 = ReadBE32(in + 48)));
263  Round(d, e, f, g, h, a, b, c, 0x80deb1feul + (w13 = ReadBE32(in + 52)));
264  Round(c, d, e, f, g, h, a, b, 0x9bdc06a7ul + (w14 = ReadBE32(in + 56)));
265  Round(b, c, d, e, f, g, h, a, 0xc19bf174ul + (w15 = ReadBE32(in + 60)));
266  Round(a, b, c, d, e, f, g, h,
267  0xe49b69c1ul + (w0 += sigma1(w14) + w9 + sigma0(w1)));
268  Round(h, a, b, c, d, e, f, g,
269  0xefbe4786ul + (w1 += sigma1(w15) + w10 + sigma0(w2)));
270  Round(g, h, a, b, c, d, e, f,
271  0x0fc19dc6ul + (w2 += sigma1(w0) + w11 + sigma0(w3)));
272  Round(f, g, h, a, b, c, d, e,
273  0x240ca1ccul + (w3 += sigma1(w1) + w12 + sigma0(w4)));
274  Round(e, f, g, h, a, b, c, d,
275  0x2de92c6ful + (w4 += sigma1(w2) + w13 + sigma0(w5)));
276  Round(d, e, f, g, h, a, b, c,
277  0x4a7484aaul + (w5 += sigma1(w3) + w14 + sigma0(w6)));
278  Round(c, d, e, f, g, h, a, b,
279  0x5cb0a9dcul + (w6 += sigma1(w4) + w15 + sigma0(w7)));
280  Round(b, c, d, e, f, g, h, a,
281  0x76f988daul + (w7 += sigma1(w5) + w0 + sigma0(w8)));
282  Round(a, b, c, d, e, f, g, h,
283  0x983e5152ul + (w8 += sigma1(w6) + w1 + sigma0(w9)));
284  Round(h, a, b, c, d, e, f, g,
285  0xa831c66dul + (w9 += sigma1(w7) + w2 + sigma0(w10)));
286  Round(g, h, a, b, c, d, e, f,
287  0xb00327c8ul + (w10 += sigma1(w8) + w3 + sigma0(w11)));
288  Round(f, g, h, a, b, c, d, e,
289  0xbf597fc7ul + (w11 += sigma1(w9) + w4 + sigma0(w12)));
290  Round(e, f, g, h, a, b, c, d,
291  0xc6e00bf3ul + (w12 += sigma1(w10) + w5 + sigma0(w13)));
292  Round(d, e, f, g, h, a, b, c,
293  0xd5a79147ul + (w13 += sigma1(w11) + w6 + sigma0(w14)));
294  Round(c, d, e, f, g, h, a, b,
295  0x06ca6351ul + (w14 += sigma1(w12) + w7 + sigma0(w15)));
296  Round(b, c, d, e, f, g, h, a,
297  0x14292967ul + (w15 += sigma1(w13) + w8 + sigma0(w0)));
298  Round(a, b, c, d, e, f, g, h,
299  0x27b70a85ul + (w0 += sigma1(w14) + w9 + sigma0(w1)));
300  Round(h, a, b, c, d, e, f, g,
301  0x2e1b2138ul + (w1 += sigma1(w15) + w10 + sigma0(w2)));
302  Round(g, h, a, b, c, d, e, f,
303  0x4d2c6dfcul + (w2 += sigma1(w0) + w11 + sigma0(w3)));
304  Round(f, g, h, a, b, c, d, e,
305  0x53380d13ul + (w3 += sigma1(w1) + w12 + sigma0(w4)));
306  Round(e, f, g, h, a, b, c, d,
307  0x650a7354ul + (w4 += sigma1(w2) + w13 + sigma0(w5)));
308  Round(d, e, f, g, h, a, b, c,
309  0x766a0abbul + (w5 += sigma1(w3) + w14 + sigma0(w6)));
310  Round(c, d, e, f, g, h, a, b,
311  0x81c2c92eul + (w6 += sigma1(w4) + w15 + sigma0(w7)));
312  Round(b, c, d, e, f, g, h, a,
313  0x92722c85ul + (w7 += sigma1(w5) + w0 + sigma0(w8)));
314  Round(a, b, c, d, e, f, g, h,
315  0xa2bfe8a1ul + (w8 += sigma1(w6) + w1 + sigma0(w9)));
316  Round(h, a, b, c, d, e, f, g,
317  0xa81a664bul + (w9 += sigma1(w7) + w2 + sigma0(w10)));
318  Round(g, h, a, b, c, d, e, f,
319  0xc24b8b70ul + (w10 += sigma1(w8) + w3 + sigma0(w11)));
320  Round(f, g, h, a, b, c, d, e,
321  0xc76c51a3ul + (w11 += sigma1(w9) + w4 + sigma0(w12)));
322  Round(e, f, g, h, a, b, c, d,
323  0xd192e819ul + (w12 += sigma1(w10) + w5 + sigma0(w13)));
324  Round(d, e, f, g, h, a, b, c,
325  0xd6990624ul + (w13 += sigma1(w11) + w6 + sigma0(w14)));
326  Round(c, d, e, f, g, h, a, b,
327  0xf40e3585ul + (w14 += sigma1(w12) + w7 + sigma0(w15)));
328  Round(b, c, d, e, f, g, h, a,
329  0x106aa070ul + (w15 += sigma1(w13) + w8 + sigma0(w0)));
330  Round(a, b, c, d, e, f, g, h,
331  0x19a4c116ul + (w0 += sigma1(w14) + w9 + sigma0(w1)));
332  Round(h, a, b, c, d, e, f, g,
333  0x1e376c08ul + (w1 += sigma1(w15) + w10 + sigma0(w2)));
334  Round(g, h, a, b, c, d, e, f,
335  0x2748774cul + (w2 += sigma1(w0) + w11 + sigma0(w3)));
336  Round(f, g, h, a, b, c, d, e,
337  0x34b0bcb5ul + (w3 += sigma1(w1) + w12 + sigma0(w4)));
338  Round(e, f, g, h, a, b, c, d,
339  0x391c0cb3ul + (w4 += sigma1(w2) + w13 + sigma0(w5)));
340  Round(d, e, f, g, h, a, b, c,
341  0x4ed8aa4aul + (w5 += sigma1(w3) + w14 + sigma0(w6)));
342  Round(c, d, e, f, g, h, a, b,
343  0x5b9cca4ful + (w6 += sigma1(w4) + w15 + sigma0(w7)));
344  Round(b, c, d, e, f, g, h, a,
345  0x682e6ff3ul + (w7 += sigma1(w5) + w0 + sigma0(w8)));
346  Round(a, b, c, d, e, f, g, h,
347  0x748f82eeul + (w8 += sigma1(w6) + w1 + sigma0(w9)));
348  Round(h, a, b, c, d, e, f, g,
349  0x78a5636ful + (w9 += sigma1(w7) + w2 + sigma0(w10)));
350  Round(g, h, a, b, c, d, e, f,
351  0x84c87814ul + (w10 += sigma1(w8) + w3 + sigma0(w11)));
352  Round(f, g, h, a, b, c, d, e,
353  0x8cc70208ul + (w11 += sigma1(w9) + w4 + sigma0(w12)));
354  Round(e, f, g, h, a, b, c, d,
355  0x90befffaul + (w12 += sigma1(w10) + w5 + sigma0(w13)));
356  Round(d, e, f, g, h, a, b, c,
357  0xa4506cebul + (w13 += sigma1(w11) + w6 + sigma0(w14)));
358  Round(c, d, e, f, g, h, a, b,
359  0xbef9a3f7ul + (w14 + sigma1(w12) + w7 + sigma0(w15)));
360  Round(b, c, d, e, f, g, h, a,
361  0xc67178f2ul + (w15 + sigma1(w13) + w8 + sigma0(w0)));
362 
363  a += 0x6a09e667ul;
364  b += 0xbb67ae85ul;
365  c += 0x3c6ef372ul;
366  d += 0xa54ff53aul;
367  e += 0x510e527ful;
368  f += 0x9b05688cul;
369  g += 0x1f83d9abul;
370  h += 0x5be0cd19ul;
371 
372  uint32_t t0 = a, t1 = b, t2 = c, t3 = d, t4 = e, t5 = f, t6 = g, t7 = h;
373 
374  // Transform 2
375  Round(a, b, c, d, e, f, g, h, 0xc28a2f98ul);
376  Round(h, a, b, c, d, e, f, g, 0x71374491ul);
377  Round(g, h, a, b, c, d, e, f, 0xb5c0fbcful);
378  Round(f, g, h, a, b, c, d, e, 0xe9b5dba5ul);
379  Round(e, f, g, h, a, b, c, d, 0x3956c25bul);
380  Round(d, e, f, g, h, a, b, c, 0x59f111f1ul);
381  Round(c, d, e, f, g, h, a, b, 0x923f82a4ul);
382  Round(b, c, d, e, f, g, h, a, 0xab1c5ed5ul);
383  Round(a, b, c, d, e, f, g, h, 0xd807aa98ul);
384  Round(h, a, b, c, d, e, f, g, 0x12835b01ul);
385  Round(g, h, a, b, c, d, e, f, 0x243185beul);
386  Round(f, g, h, a, b, c, d, e, 0x550c7dc3ul);
387  Round(e, f, g, h, a, b, c, d, 0x72be5d74ul);
388  Round(d, e, f, g, h, a, b, c, 0x80deb1feul);
389  Round(c, d, e, f, g, h, a, b, 0x9bdc06a7ul);
390  Round(b, c, d, e, f, g, h, a, 0xc19bf374ul);
391  Round(a, b, c, d, e, f, g, h, 0x649b69c1ul);
392  Round(h, a, b, c, d, e, f, g, 0xf0fe4786ul);
393  Round(g, h, a, b, c, d, e, f, 0x0fe1edc6ul);
394  Round(f, g, h, a, b, c, d, e, 0x240cf254ul);
395  Round(e, f, g, h, a, b, c, d, 0x4fe9346ful);
396  Round(d, e, f, g, h, a, b, c, 0x6cc984beul);
397  Round(c, d, e, f, g, h, a, b, 0x61b9411eul);
398  Round(b, c, d, e, f, g, h, a, 0x16f988faul);
399  Round(a, b, c, d, e, f, g, h, 0xf2c65152ul);
400  Round(h, a, b, c, d, e, f, g, 0xa88e5a6dul);
401  Round(g, h, a, b, c, d, e, f, 0xb019fc65ul);
402  Round(f, g, h, a, b, c, d, e, 0xb9d99ec7ul);
403  Round(e, f, g, h, a, b, c, d, 0x9a1231c3ul);
404  Round(d, e, f, g, h, a, b, c, 0xe70eeaa0ul);
405  Round(c, d, e, f, g, h, a, b, 0xfdb1232bul);
406  Round(b, c, d, e, f, g, h, a, 0xc7353eb0ul);
407  Round(a, b, c, d, e, f, g, h, 0x3069bad5ul);
408  Round(h, a, b, c, d, e, f, g, 0xcb976d5ful);
409  Round(g, h, a, b, c, d, e, f, 0x5a0f118ful);
410  Round(f, g, h, a, b, c, d, e, 0xdc1eeefdul);
411  Round(e, f, g, h, a, b, c, d, 0x0a35b689ul);
412  Round(d, e, f, g, h, a, b, c, 0xde0b7a04ul);
413  Round(c, d, e, f, g, h, a, b, 0x58f4ca9dul);
414  Round(b, c, d, e, f, g, h, a, 0xe15d5b16ul);
415  Round(a, b, c, d, e, f, g, h, 0x007f3e86ul);
416  Round(h, a, b, c, d, e, f, g, 0x37088980ul);
417  Round(g, h, a, b, c, d, e, f, 0xa507ea32ul);
418  Round(f, g, h, a, b, c, d, e, 0x6fab9537ul);
419  Round(e, f, g, h, a, b, c, d, 0x17406110ul);
420  Round(d, e, f, g, h, a, b, c, 0x0d8cd6f1ul);
421  Round(c, d, e, f, g, h, a, b, 0xcdaa3b6dul);
422  Round(b, c, d, e, f, g, h, a, 0xc0bbbe37ul);
423  Round(a, b, c, d, e, f, g, h, 0x83613bdaul);
424  Round(h, a, b, c, d, e, f, g, 0xdb48a363ul);
425  Round(g, h, a, b, c, d, e, f, 0x0b02e931ul);
426  Round(f, g, h, a, b, c, d, e, 0x6fd15ca7ul);
427  Round(e, f, g, h, a, b, c, d, 0x521afacaul);
428  Round(d, e, f, g, h, a, b, c, 0x31338431ul);
429  Round(c, d, e, f, g, h, a, b, 0x6ed41a95ul);
430  Round(b, c, d, e, f, g, h, a, 0x6d437890ul);
431  Round(a, b, c, d, e, f, g, h, 0xc39c91f2ul);
432  Round(h, a, b, c, d, e, f, g, 0x9eccabbdul);
433  Round(g, h, a, b, c, d, e, f, 0xb5c9a0e6ul);
434  Round(f, g, h, a, b, c, d, e, 0x532fb63cul);
435  Round(e, f, g, h, a, b, c, d, 0xd2c741c6ul);
436  Round(d, e, f, g, h, a, b, c, 0x07237ea3ul);
437  Round(c, d, e, f, g, h, a, b, 0xa4954b68ul);
438  Round(b, c, d, e, f, g, h, a, 0x4c191d76ul);
439 
440  w0 = t0 + a;
441  w1 = t1 + b;
442  w2 = t2 + c;
443  w3 = t3 + d;
444  w4 = t4 + e;
445  w5 = t5 + f;
446  w6 = t6 + g;
447  w7 = t7 + h;
448 
449  // Transform 3
450  a = 0x6a09e667ul;
451  b = 0xbb67ae85ul;
452  c = 0x3c6ef372ul;
453  d = 0xa54ff53aul;
454  e = 0x510e527ful;
455  f = 0x9b05688cul;
456  g = 0x1f83d9abul;
457  h = 0x5be0cd19ul;
458 
459  Round(a, b, c, d, e, f, g, h, 0x428a2f98ul + w0);
460  Round(h, a, b, c, d, e, f, g, 0x71374491ul + w1);
461  Round(g, h, a, b, c, d, e, f, 0xb5c0fbcful + w2);
462  Round(f, g, h, a, b, c, d, e, 0xe9b5dba5ul + w3);
463  Round(e, f, g, h, a, b, c, d, 0x3956c25bul + w4);
464  Round(d, e, f, g, h, a, b, c, 0x59f111f1ul + w5);
465  Round(c, d, e, f, g, h, a, b, 0x923f82a4ul + w6);
466  Round(b, c, d, e, f, g, h, a, 0xab1c5ed5ul + w7);
467  Round(a, b, c, d, e, f, g, h, 0x5807aa98ul);
468  Round(h, a, b, c, d, e, f, g, 0x12835b01ul);
469  Round(g, h, a, b, c, d, e, f, 0x243185beul);
470  Round(f, g, h, a, b, c, d, e, 0x550c7dc3ul);
471  Round(e, f, g, h, a, b, c, d, 0x72be5d74ul);
472  Round(d, e, f, g, h, a, b, c, 0x80deb1feul);
473  Round(c, d, e, f, g, h, a, b, 0x9bdc06a7ul);
474  Round(b, c, d, e, f, g, h, a, 0xc19bf274ul);
475  Round(a, b, c, d, e, f, g, h, 0xe49b69c1ul + (w0 += sigma0(w1)));
476  Round(h, a, b, c, d, e, f, g,
477  0xefbe4786ul + (w1 += 0xa00000ul + sigma0(w2)));
478  Round(g, h, a, b, c, d, e, f,
479  0x0fc19dc6ul + (w2 += sigma1(w0) + sigma0(w3)));
480  Round(f, g, h, a, b, c, d, e,
481  0x240ca1ccul + (w3 += sigma1(w1) + sigma0(w4)));
482  Round(e, f, g, h, a, b, c, d,
483  0x2de92c6ful + (w4 += sigma1(w2) + sigma0(w5)));
484  Round(d, e, f, g, h, a, b, c,
485  0x4a7484aaul + (w5 += sigma1(w3) + sigma0(w6)));
486  Round(c, d, e, f, g, h, a, b,
487  0x5cb0a9dcul + (w6 += sigma1(w4) + 0x100ul + sigma0(w7)));
488  Round(b, c, d, e, f, g, h, a,
489  0x76f988daul + (w7 += sigma1(w5) + w0 + 0x11002000ul));
490  Round(a, b, c, d, e, f, g, h,
491  0x983e5152ul + (w8 = 0x80000000ul + sigma1(w6) + w1));
492  Round(h, a, b, c, d, e, f, g, 0xa831c66dul + (w9 = sigma1(w7) + w2));
493  Round(g, h, a, b, c, d, e, f, 0xb00327c8ul + (w10 = sigma1(w8) + w3));
494  Round(f, g, h, a, b, c, d, e, 0xbf597fc7ul + (w11 = sigma1(w9) + w4));
495  Round(e, f, g, h, a, b, c, d, 0xc6e00bf3ul + (w12 = sigma1(w10) + w5));
496  Round(d, e, f, g, h, a, b, c, 0xd5a79147ul + (w13 = sigma1(w11) + w6));
497  Round(c, d, e, f, g, h, a, b,
498  0x06ca6351ul + (w14 = sigma1(w12) + w7 + 0x400022ul));
499  Round(b, c, d, e, f, g, h, a,
500  0x14292967ul + (w15 = 0x100ul + sigma1(w13) + w8 + sigma0(w0)));
501  Round(a, b, c, d, e, f, g, h,
502  0x27b70a85ul + (w0 += sigma1(w14) + w9 + sigma0(w1)));
503  Round(h, a, b, c, d, e, f, g,
504  0x2e1b2138ul + (w1 += sigma1(w15) + w10 + sigma0(w2)));
505  Round(g, h, a, b, c, d, e, f,
506  0x4d2c6dfcul + (w2 += sigma1(w0) + w11 + sigma0(w3)));
507  Round(f, g, h, a, b, c, d, e,
508  0x53380d13ul + (w3 += sigma1(w1) + w12 + sigma0(w4)));
509  Round(e, f, g, h, a, b, c, d,
510  0x650a7354ul + (w4 += sigma1(w2) + w13 + sigma0(w5)));
511  Round(d, e, f, g, h, a, b, c,
512  0x766a0abbul + (w5 += sigma1(w3) + w14 + sigma0(w6)));
513  Round(c, d, e, f, g, h, a, b,
514  0x81c2c92eul + (w6 += sigma1(w4) + w15 + sigma0(w7)));
515  Round(b, c, d, e, f, g, h, a,
516  0x92722c85ul + (w7 += sigma1(w5) + w0 + sigma0(w8)));
517  Round(a, b, c, d, e, f, g, h,
518  0xa2bfe8a1ul + (w8 += sigma1(w6) + w1 + sigma0(w9)));
519  Round(h, a, b, c, d, e, f, g,
520  0xa81a664bul + (w9 += sigma1(w7) + w2 + sigma0(w10)));
521  Round(g, h, a, b, c, d, e, f,
522  0xc24b8b70ul + (w10 += sigma1(w8) + w3 + sigma0(w11)));
523  Round(f, g, h, a, b, c, d, e,
524  0xc76c51a3ul + (w11 += sigma1(w9) + w4 + sigma0(w12)));
525  Round(e, f, g, h, a, b, c, d,
526  0xd192e819ul + (w12 += sigma1(w10) + w5 + sigma0(w13)));
527  Round(d, e, f, g, h, a, b, c,
528  0xd6990624ul + (w13 += sigma1(w11) + w6 + sigma0(w14)));
529  Round(c, d, e, f, g, h, a, b,
530  0xf40e3585ul + (w14 += sigma1(w12) + w7 + sigma0(w15)));
531  Round(b, c, d, e, f, g, h, a,
532  0x106aa070ul + (w15 += sigma1(w13) + w8 + sigma0(w0)));
533  Round(a, b, c, d, e, f, g, h,
534  0x19a4c116ul + (w0 += sigma1(w14) + w9 + sigma0(w1)));
535  Round(h, a, b, c, d, e, f, g,
536  0x1e376c08ul + (w1 += sigma1(w15) + w10 + sigma0(w2)));
537  Round(g, h, a, b, c, d, e, f,
538  0x2748774cul + (w2 += sigma1(w0) + w11 + sigma0(w3)));
539  Round(f, g, h, a, b, c, d, e,
540  0x34b0bcb5ul + (w3 += sigma1(w1) + w12 + sigma0(w4)));
541  Round(e, f, g, h, a, b, c, d,
542  0x391c0cb3ul + (w4 += sigma1(w2) + w13 + sigma0(w5)));
543  Round(d, e, f, g, h, a, b, c,
544  0x4ed8aa4aul + (w5 += sigma1(w3) + w14 + sigma0(w6)));
545  Round(c, d, e, f, g, h, a, b,
546  0x5b9cca4ful + (w6 += sigma1(w4) + w15 + sigma0(w7)));
547  Round(b, c, d, e, f, g, h, a,
548  0x682e6ff3ul + (w7 += sigma1(w5) + w0 + sigma0(w8)));
549  Round(a, b, c, d, e, f, g, h,
550  0x748f82eeul + (w8 += sigma1(w6) + w1 + sigma0(w9)));
551  Round(h, a, b, c, d, e, f, g,
552  0x78a5636ful + (w9 += sigma1(w7) + w2 + sigma0(w10)));
553  Round(g, h, a, b, c, d, e, f,
554  0x84c87814ul + (w10 += sigma1(w8) + w3 + sigma0(w11)));
555  Round(f, g, h, a, b, c, d, e,
556  0x8cc70208ul + (w11 += sigma1(w9) + w4 + sigma0(w12)));
557  Round(e, f, g, h, a, b, c, d,
558  0x90befffaul + (w12 += sigma1(w10) + w5 + sigma0(w13)));
559  Round(d, e, f, g, h, a, b, c,
560  0xa4506cebul + (w13 += sigma1(w11) + w6 + sigma0(w14)));
561  Round(c, d, e, f, g, h, a, b,
562  0xbef9a3f7ul + (w14 + sigma1(w12) + w7 + sigma0(w15)));
563  Round(b, c, d, e, f, g, h, a,
564  0xc67178f2ul + (w15 + sigma1(w13) + w8 + sigma0(w0)));
565 
566  // Output
567  WriteBE32(out + 0, a + 0x6a09e667ul);
568  WriteBE32(out + 4, b + 0xbb67ae85ul);
569  WriteBE32(out + 8, c + 0x3c6ef372ul);
570  WriteBE32(out + 12, d + 0xa54ff53aul);
571  WriteBE32(out + 16, e + 0x510e527ful);
572  WriteBE32(out + 20, f + 0x9b05688cul);
573  WriteBE32(out + 24, g + 0x1f83d9abul);
574  WriteBE32(out + 28, h + 0x5be0cd19ul);
575  }
576 
577 } // namespace sha256
578 
579 typedef void (*TransformType)(uint32_t *, const uint8_t *, size_t);
580 typedef void (*TransformD64Type)(uint8_t *, const uint8_t *);
581 
582 template <TransformType tr>
583 void TransformD64Wrapper(uint8_t *out, const uint8_t *in) {
584  uint32_t s[8];
585  static const uint8_t padding1[64] = {
586  0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
587  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
588  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0};
589  uint8_t buffer2[64] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
590  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
591  0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
592  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0};
593  sha256::Initialize(s);
594  tr(s, in, 1);
595  tr(s, padding1, 1);
596  WriteBE32(buffer2 + 0, s[0]);
597  WriteBE32(buffer2 + 4, s[1]);
598  WriteBE32(buffer2 + 8, s[2]);
599  WriteBE32(buffer2 + 12, s[3]);
600  WriteBE32(buffer2 + 16, s[4]);
601  WriteBE32(buffer2 + 20, s[5]);
602  WriteBE32(buffer2 + 24, s[6]);
603  WriteBE32(buffer2 + 28, s[7]);
604  sha256::Initialize(s);
605  tr(s, buffer2, 1);
606  WriteBE32(out + 0, s[0]);
607  WriteBE32(out + 4, s[1]);
608  WriteBE32(out + 8, s[2]);
609  WriteBE32(out + 12, s[3]);
610  WriteBE32(out + 16, s[4]);
611  WriteBE32(out + 20, s[5]);
612  WriteBE32(out + 24, s[6]);
613  WriteBE32(out + 28, s[7]);
614 }
615 
616 TransformType Transform = sha256::Transform;
617 TransformD64Type TransformD64 = sha256::TransformD64;
618 TransformD64Type TransformD64_2way = nullptr;
619 TransformD64Type TransformD64_4way = nullptr;
620 TransformD64Type TransformD64_8way = nullptr;
621 
622 bool SelfTest() {
623  // Input state (equal to the initial SHA256 state)
624  static const uint32_t init[8] = {0x6a09e667ul, 0xbb67ae85ul, 0x3c6ef372ul,
625  0xa54ff53aul, 0x510e527ful, 0x9b05688cul,
626  0x1f83d9abul, 0x5be0cd19ul};
627  // Some random input data to test with
628  static const uint8_t data[641] =
629  "-" // Intentionally not aligned
630  "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do "
631  "eiusmod tempor incididunt ut labore et dolore magna aliqua. Et m"
632  "olestie ac feugiat sed lectus vestibulum mattis ullamcorper. Mor"
633  "bi blandit cursus risus at ultrices mi tempus imperdiet nulla. N"
634  "unc congue nisi vita suscipit tellus mauris. Imperdiet proin fer"
635  "mentum leo vel orci. Massa tempor nec feugiat nisl pretium fusce"
636  " id velit. Telus in metus vulputate eu scelerisque felis. Mi tem"
637  "pus imperdiet nulla malesuada pellentesque. Tristique magna sit.";
638  // Expected output state for hashing the i*64 first input bytes above
639  // (excluding SHA256 padding).
640  static const uint32_t result[9][8] = {
641  {0x6a09e667ul, 0xbb67ae85ul, 0x3c6ef372ul, 0xa54ff53aul, 0x510e527ful,
642  0x9b05688cul, 0x1f83d9abul, 0x5be0cd19ul},
643  {0x91f8ec6bul, 0x4da10fe3ul, 0x1c9c292cul, 0x45e18185ul, 0x435cc111ul,
644  0x3ca26f09ul, 0xeb954caeul, 0x402a7069ul},
645  {0xcabea5acul, 0x374fb97cul, 0x182ad996ul, 0x7bd69cbful, 0x450ff900ul,
646  0xc1d2be8aul, 0x6a41d505ul, 0xe6212dc3ul},
647  {0xbcff09d6ul, 0x3e76f36eul, 0x3ecb2501ul, 0x78866e97ul, 0xe1c1e2fdul,
648  0x32f4eafful, 0x8aa6c4e5ul, 0xdfc024bcul},
649  {0xa08c5d94ul, 0x0a862f93ul, 0x6b7f2f40ul, 0x8f9fae76ul, 0x6d40439ful,
650  0x79dcee0cul, 0x3e39ff3aul, 0xdc3bdbb1ul},
651  {0x216a0895ul, 0x9f1a3662ul, 0xe99946f9ul, 0x87ba4364ul, 0x0fb5db2cul,
652  0x12bed3d3ul, 0x6689c0c7ul, 0x292f1b04ul},
653  {0xca3067f8ul, 0xbc8c2656ul, 0x37cb7e0dul, 0x9b6b8b0ful, 0x46dc380bul,
654  0xf1287f57ul, 0xc42e4b23ul, 0x3fefe94dul},
655  {0x3e4c4039ul, 0xbb6fca8cul, 0x6f27d2f7ul, 0x301e44a4ul, 0x8352ba14ul,
656  0x5769ce37ul, 0x48a1155ful, 0xc0e1c4c6ul},
657  {0xfe2fa9ddul, 0x69d0862bul, 0x1ae0db23ul, 0x471f9244ul, 0xf55c0145ul,
658  0xc30f9c3bul, 0x40a84ea0ul, 0x5b8a266cul},
659  };
660  // Expected output for each of the individual 8 64-byte messages under full
661  // double SHA256 (including padding).
662  static const uint8_t result_d64[256] = {
663  0x09, 0x3a, 0xc4, 0xd0, 0x0f, 0xf7, 0x57, 0xe1, 0x72, 0x85, 0x79, 0x42,
664  0xfe, 0xe7, 0xe0, 0xa0, 0xfc, 0x52, 0xd7, 0xdb, 0x07, 0x63, 0x45, 0xfb,
665  0x53, 0x14, 0x7d, 0x17, 0x22, 0x86, 0xf0, 0x52, 0x48, 0xb6, 0x11, 0x9e,
666  0x6e, 0x48, 0x81, 0x6d, 0xcc, 0x57, 0x1f, 0xb2, 0x97, 0xa8, 0xd5, 0x25,
667  0x9b, 0x82, 0xaa, 0x89, 0xe2, 0xfd, 0x2d, 0x56, 0xe8, 0x28, 0x83, 0x0b,
668  0xe2, 0xfa, 0x53, 0xb7, 0xd6, 0x6b, 0x07, 0x85, 0x83, 0xb0, 0x10, 0xa2,
669  0xf5, 0x51, 0x3c, 0xf9, 0x60, 0x03, 0xab, 0x45, 0x6c, 0x15, 0x6e, 0xef,
670  0xb5, 0xac, 0x3e, 0x6c, 0xdf, 0xb4, 0x92, 0x22, 0x2d, 0xce, 0xbf, 0x3e,
671  0xe9, 0xe5, 0xf6, 0x29, 0x0e, 0x01, 0x4f, 0xd2, 0xd4, 0x45, 0x65, 0xb3,
672  0xbb, 0xf2, 0x4c, 0x16, 0x37, 0x50, 0x3c, 0x6e, 0x49, 0x8c, 0x5a, 0x89,
673  0x2b, 0x1b, 0xab, 0xc4, 0x37, 0xd1, 0x46, 0xe9, 0x3d, 0x0e, 0x85, 0xa2,
674  0x50, 0x73, 0xa1, 0x5e, 0x54, 0x37, 0xd7, 0x94, 0x17, 0x56, 0xc2, 0xd8,
675  0xe5, 0x9f, 0xed, 0x4e, 0xae, 0x15, 0x42, 0x06, 0x0d, 0x74, 0x74, 0x5e,
676  0x24, 0x30, 0xce, 0xd1, 0x9e, 0x50, 0xa3, 0x9a, 0xb8, 0xf0, 0x4a, 0x57,
677  0x69, 0x78, 0x67, 0x12, 0x84, 0x58, 0xbe, 0xc7, 0x36, 0xaa, 0xee, 0x7c,
678  0x64, 0xa3, 0x76, 0xec, 0xff, 0x55, 0x41, 0x00, 0x2a, 0x44, 0x68, 0x4d,
679  0xb6, 0x53, 0x9e, 0x1c, 0x95, 0xb7, 0xca, 0xdc, 0x7f, 0x7d, 0x74, 0x27,
680  0x5c, 0x8e, 0xa6, 0x84, 0xb5, 0xac, 0x87, 0xa9, 0xf3, 0xff, 0x75, 0xf2,
681  0x34, 0xcd, 0x1a, 0x3b, 0x82, 0x2c, 0x2b, 0x4e, 0x6a, 0x46, 0x30, 0xa6,
682  0x89, 0x86, 0x23, 0xac, 0xf8, 0xa5, 0x15, 0xe9, 0x0a, 0xaa, 0x1e, 0x9a,
683  0xd7, 0x93, 0x6b, 0x28, 0xe4, 0x3b, 0xfd, 0x59, 0xc6, 0xed, 0x7c, 0x5f,
684  0xa5, 0x41, 0xcb, 0x51};
685 
686  // Test Transform() for 0 through 8 transformations.
687  for (size_t i = 0; i <= 8; ++i) {
688  uint32_t state[8];
689  std::copy(init, init + 8, state);
690  Transform(state, data + 1, i);
691  if (!std::equal(state, state + 8, result[i])) {
692  return false;
693  }
694  }
695 
696  // Test TransformD64
697  {
698  uint8_t out[32];
699  TransformD64(out, data + 1);
700  if (!std::equal(out, out + 32, result_d64)) {
701  return false;
702  }
703  }
704 
705  // Test TransformD64_2way, if available.
706  if (TransformD64_2way) {
707  uint8_t out[64];
708  TransformD64_2way(out, data + 1);
709  if (!std::equal(out, out + 64, result_d64)) {
710  return false;
711  }
712  }
713 
714  // Test TransformD64_4way, if available.
715  if (TransformD64_4way) {
716  uint8_t out[128];
717  TransformD64_4way(out, data + 1);
718  if (!std::equal(out, out + 128, result_d64)) {
719  return false;
720  }
721  }
722 
723  // Test TransformD64_8way, if available.
724  if (TransformD64_8way) {
725  uint8_t out[256];
726  TransformD64_8way(out, data + 1);
727  if (!std::equal(out, out + 256, result_d64)) {
728  return false;
729  }
730  }
731 
732  return true;
733 }
734 
735 #if defined(USE_ASM) && \
736  (defined(__x86_64__) || defined(__amd64__) || defined(__i386__))
737 
738 bool AVXEnabled() {
739  uint32_t a, d;
740  __asm__("xgetbv" : "=a"(a), "=d"(d) : "c"(0));
741  return (a & 6) == 6;
742 }
743 #endif
744 } // namespace
745 
746 std::string SHA256AutoDetect() {
747  std::string ret = "standard";
748 #if defined(USE_ASM) && defined(HAVE_GETCPUID)
749  bool have_sse4 = false;
750  bool have_xsave = false;
751  bool have_avx = false;
752  bool have_avx2 = false;
753  bool have_shani = false;
754  bool enabled_avx = false;
755 
756  (void)AVXEnabled;
757  (void)have_sse4;
758  (void)have_avx;
759  (void)have_xsave;
760  (void)have_avx2;
761  (void)have_shani;
762  (void)enabled_avx;
763 
764  uint32_t eax, ebx, ecx, edx;
765  GetCPUID(1, 0, eax, ebx, ecx, edx);
766  have_sse4 = (ecx >> 19) & 1;
767  have_xsave = (ecx >> 27) & 1;
768  have_avx = (ecx >> 28) & 1;
769  if (have_xsave && have_avx) {
770  enabled_avx = AVXEnabled();
771  }
772  if (have_sse4) {
773  GetCPUID(7, 0, eax, ebx, ecx, edx);
774  have_avx2 = (ebx >> 5) & 1;
775  have_shani = (ebx >> 29) & 1;
776  }
777 
778 #if defined(ENABLE_SHANI) && !defined(BUILD_BITCOIN_INTERNAL)
779  if (have_shani) {
781  TransformD64 = TransformD64Wrapper<sha256_shani::Transform>;
782  TransformD64_2way = sha256d64_shani::Transform_2way;
783  ret = "shani(1way,2way)";
784  have_sse4 = false; // Disable SSE4/AVX2;
785  have_avx2 = false;
786  }
787 #endif
788 
789  if (have_sse4) {
790 #if defined(__x86_64__) || defined(__amd64__)
792  TransformD64 = TransformD64Wrapper<sha256_sse4::Transform>;
793  ret = "sse4(1way)";
794 #endif
795 #if defined(ENABLE_SSE41) && !defined(BUILD_BITCOIN_INTERNAL)
796  TransformD64_4way = sha256d64_sse41::Transform_4way;
797  ret += ",sse41(4way)";
798 #endif
799  }
800 
801 #if defined(ENABLE_AVX2) && !defined(BUILD_BITCOIN_INTERNAL)
802  if (have_avx2 && have_avx && enabled_avx) {
803  TransformD64_8way = sha256d64_avx2::Transform_8way;
804  ret += ",avx2(8way)";
805  }
806 #endif
807 #endif
808 
809  assert(SelfTest());
810  return ret;
811 }
812 
814 
815 CSHA256::CSHA256() : bytes(0) {
816  sha256::Initialize(s);
817 }
818 
819 CSHA256 &CSHA256::Write(const uint8_t *data, size_t len) {
820  const uint8_t *end = data + len;
821  size_t bufsize = bytes % 64;
822  if (bufsize && bufsize + len >= 64) {
823  // Fill the buffer, and process it.
824  memcpy(buf + bufsize, data, 64 - bufsize);
825  bytes += 64 - bufsize;
826  data += 64 - bufsize;
827  Transform(s, buf, 1);
828  bufsize = 0;
829  }
830  if (end - data >= 64) {
831  size_t blocks = (end - data) / 64;
832  Transform(s, data, blocks);
833  data += 64 * blocks;
834  bytes += 64 * blocks;
835  }
836  if (end > data) {
837  // Fill the buffer with what remains.
838  memcpy(buf + bufsize, data, end - data);
839  bytes += end - data;
840  }
841  return *this;
842 }
843 
844 void CSHA256::Finalize(uint8_t hash[OUTPUT_SIZE]) {
845  static const uint8_t pad[64] = {0x80};
846  uint8_t sizedesc[8];
847  WriteBE64(sizedesc, bytes << 3);
848  Write(pad, 1 + ((119 - (bytes % 64)) % 64));
849  Write(sizedesc, 8);
850  WriteBE32(hash, s[0]);
851  WriteBE32(hash + 4, s[1]);
852  WriteBE32(hash + 8, s[2]);
853  WriteBE32(hash + 12, s[3]);
854  WriteBE32(hash + 16, s[4]);
855  WriteBE32(hash + 20, s[5]);
856  WriteBE32(hash + 24, s[6]);
857  WriteBE32(hash + 28, s[7]);
858 }
859 
861  bytes = 0;
862  sha256::Initialize(s);
863  return *this;
864 }
865 
866 void SHA256D64(uint8_t *out, const uint8_t *in, size_t blocks) {
867  if (TransformD64_8way) {
868  while (blocks >= 8) {
869  TransformD64_8way(out, in);
870  out += 256;
871  in += 512;
872  blocks -= 8;
873  }
874  }
875  if (TransformD64_4way) {
876  while (blocks >= 4) {
877  TransformD64_4way(out, in);
878  out += 128;
879  in += 256;
880  blocks -= 4;
881  }
882  }
883  if (TransformD64_2way) {
884  while (blocks >= 2) {
885  TransformD64_2way(out, in);
886  out += 64;
887  in += 128;
888  blocks -= 2;
889  }
890  }
891  while (blocks) {
892  TransformD64(out, in);
893  out += 32;
894  in += 64;
895  --blocks;
896  }
897 }
CSHA256::buf
uint8_t buf[64]
Definition: sha256.h:16
SHA256D64
void SHA256D64(uint8_t *out, const uint8_t *in, size_t blocks)
Compute multiple double-SHA256's of 64-byte blobs.
Definition: sha256.cpp:866
Maj
#define Maj(x, y, z)
Definition: hash_impl.h:18
CSHA256::s
uint32_t s[8]
Definition: sha256.h:15
Sigma0
#define Sigma0(x)
Definition: hash_impl.h:19
cpuid.h
sha256
Internal SHA-256 implementation.
Definition: sha256.cpp:40
CSHA256::Reset
CSHA256 & Reset()
Definition: sha256.cpp:860
Round
#define Round(a, b, c, d, e, f, g, h, k, w)
Definition: hash_impl.h:24
sha256_shani
Definition: sha256.cpp:33
SHA256AutoDetect
std::string SHA256AutoDetect()
Autodetect the best available SHA256 implementation.
Definition: sha256.cpp:746
sha256d64_sse41
Definition: sha256.cpp:21
CSHA256::Finalize
void Finalize(uint8_t hash[OUTPUT_SIZE])
Definition: sha256.cpp:844
sigma1
#define sigma1(x)
Definition: hash_impl.h:22
WriteBE64
static void WriteBE64(uint8_t *ptr, uint64_t x)
Definition: common.h:73
WriteBE32
static void WriteBE32(uint8_t *ptr, uint32_t x)
Definition: common.h:68
sha256d64_sse41::Transform_4way
void Transform_4way(uint8_t *out, const uint8_t *in)
sha256d64_avx2::Transform_8way
void Transform_8way(uint8_t *out, const uint8_t *in)
sha256.h
common.h
sha256d64_shani
Definition: sha256.cpp:29
sigma0
#define sigma0(x)
Definition: hash_impl.h:21
sha256d64_avx2
Definition: sha256.cpp:25
CSHA256::CSHA256
CSHA256()
Definition: sha256.cpp:815
CSHA256
A hasher class for SHA-256.
Definition: sha256.h:13
Ch
#define Ch(x, y, z)
Definition: hash_impl.h:17
sha256_shani::Transform
void Transform(uint32_t *s, const uint8_t *chunk, size_t blocks)
CSHA256::Write
CSHA256 & Write(const uint8_t *data, size_t len)
Definition: sha256.cpp:819
CSHA256::bytes
uint64_t bytes
Definition: sha256.h:17
sha256d64_shani::Transform_2way
void Transform_2way(uint8_t *out, const uint8_t *in)
Sigma1
#define Sigma1(x)
Definition: hash_impl.h:20
ReadBE32
static uint32_t ReadBE32(const uint8_t *ptr)
Definition: common.h:56