Bitcoin Core  24.99.0
P2P Digital Currency
univalue_read.cpp
Go to the documentation of this file.
1 // Copyright 2014 BitPay Inc.
2 // Distributed under the MIT software license, see the accompanying
3 // file COPYING or https://opensource.org/licenses/mit-license.php.
4 
5 #include <univalue.h>
6 #include <univalue_utffilter.h>
7 
8 #include <cstdio>
9 #include <cstdint>
10 #include <cstring>
11 #include <string>
12 #include <vector>
13 
14 /*
15  * According to stackexchange, the original json test suite wanted
16  * to limit depth to 22. Widely-deployed PHP bails at depth 512,
17  * so we will follow PHP's lead, which should be more than sufficient
18  * (further stackexchange comments indicate depth > 32 rarely occurs).
19  */
20 static constexpr size_t MAX_JSON_DEPTH = 512;
21 
22 static bool json_isdigit(int ch)
23 {
24  return ((ch >= '0') && (ch <= '9'));
25 }
26 
27 // convert hexadecimal string to unsigned integer
28 static const char *hatoui(const char *first, const char *last,
29  unsigned int& out)
30 {
31  unsigned int result = 0;
32  for (; first != last; ++first)
33  {
34  int digit;
35  if (json_isdigit(*first))
36  digit = *first - '0';
37 
38  else if (*first >= 'a' && *first <= 'f')
39  digit = *first - 'a' + 10;
40 
41  else if (*first >= 'A' && *first <= 'F')
42  digit = *first - 'A' + 10;
43 
44  else
45  break;
46 
47  result = 16 * result + digit;
48  }
49  out = result;
50 
51  return first;
52 }
53 
54 enum jtokentype getJsonToken(std::string& tokenVal, unsigned int& consumed,
55  const char *raw, const char *end)
56 {
57  tokenVal.clear();
58  consumed = 0;
59 
60  const char *rawStart = raw;
61 
62  while (raw < end && (json_isspace(*raw))) // skip whitespace
63  raw++;
64 
65  if (raw >= end)
66  return JTOK_NONE;
67 
68  switch (*raw) {
69 
70  case '{':
71  raw++;
72  consumed = (raw - rawStart);
73  return JTOK_OBJ_OPEN;
74  case '}':
75  raw++;
76  consumed = (raw - rawStart);
77  return JTOK_OBJ_CLOSE;
78  case '[':
79  raw++;
80  consumed = (raw - rawStart);
81  return JTOK_ARR_OPEN;
82  case ']':
83  raw++;
84  consumed = (raw - rawStart);
85  return JTOK_ARR_CLOSE;
86 
87  case ':':
88  raw++;
89  consumed = (raw - rawStart);
90  return JTOK_COLON;
91  case ',':
92  raw++;
93  consumed = (raw - rawStart);
94  return JTOK_COMMA;
95 
96  case 'n':
97  case 't':
98  case 'f':
99  if (!strncmp(raw, "null", 4)) {
100  raw += 4;
101  consumed = (raw - rawStart);
102  return JTOK_KW_NULL;
103  } else if (!strncmp(raw, "true", 4)) {
104  raw += 4;
105  consumed = (raw - rawStart);
106  return JTOK_KW_TRUE;
107  } else if (!strncmp(raw, "false", 5)) {
108  raw += 5;
109  consumed = (raw - rawStart);
110  return JTOK_KW_FALSE;
111  } else
112  return JTOK_ERR;
113 
114  case '-':
115  case '0':
116  case '1':
117  case '2':
118  case '3':
119  case '4':
120  case '5':
121  case '6':
122  case '7':
123  case '8':
124  case '9': {
125  // part 1: int
126  std::string numStr;
127 
128  const char *first = raw;
129 
130  const char *firstDigit = first;
131  if (!json_isdigit(*firstDigit))
132  firstDigit++;
133  if ((*firstDigit == '0') && json_isdigit(firstDigit[1]))
134  return JTOK_ERR;
135 
136  numStr += *raw; // copy first char
137  raw++;
138 
139  if ((*first == '-') && (raw < end) && (!json_isdigit(*raw)))
140  return JTOK_ERR;
141 
142  while (raw < end && json_isdigit(*raw)) { // copy digits
143  numStr += *raw;
144  raw++;
145  }
146 
147  // part 2: frac
148  if (raw < end && *raw == '.') {
149  numStr += *raw; // copy .
150  raw++;
151 
152  if (raw >= end || !json_isdigit(*raw))
153  return JTOK_ERR;
154  while (raw < end && json_isdigit(*raw)) { // copy digits
155  numStr += *raw;
156  raw++;
157  }
158  }
159 
160  // part 3: exp
161  if (raw < end && (*raw == 'e' || *raw == 'E')) {
162  numStr += *raw; // copy E
163  raw++;
164 
165  if (raw < end && (*raw == '-' || *raw == '+')) { // copy +/-
166  numStr += *raw;
167  raw++;
168  }
169 
170  if (raw >= end || !json_isdigit(*raw))
171  return JTOK_ERR;
172  while (raw < end && json_isdigit(*raw)) { // copy digits
173  numStr += *raw;
174  raw++;
175  }
176  }
177 
178  tokenVal = numStr;
179  consumed = (raw - rawStart);
180  return JTOK_NUMBER;
181  }
182 
183  case '"': {
184  raw++; // skip "
185 
186  std::string valStr;
187  JSONUTF8StringFilter writer(valStr);
188 
189  while (true) {
190  if (raw >= end || (unsigned char)*raw < 0x20)
191  return JTOK_ERR;
192 
193  else if (*raw == '\\') {
194  raw++; // skip backslash
195 
196  if (raw >= end)
197  return JTOK_ERR;
198 
199  switch (*raw) {
200  case '"': writer.push_back('\"'); break;
201  case '\\': writer.push_back('\\'); break;
202  case '/': writer.push_back('/'); break;
203  case 'b': writer.push_back('\b'); break;
204  case 'f': writer.push_back('\f'); break;
205  case 'n': writer.push_back('\n'); break;
206  case 'r': writer.push_back('\r'); break;
207  case 't': writer.push_back('\t'); break;
208 
209  case 'u': {
210  unsigned int codepoint;
211  if (raw + 1 + 4 >= end ||
212  hatoui(raw + 1, raw + 1 + 4, codepoint) !=
213  raw + 1 + 4)
214  return JTOK_ERR;
215  writer.push_back_u(codepoint);
216  raw += 4;
217  break;
218  }
219  default:
220  return JTOK_ERR;
221 
222  }
223 
224  raw++; // skip esc'd char
225  }
226 
227  else if (*raw == '"') {
228  raw++; // skip "
229  break; // stop scanning
230  }
231 
232  else {
233  writer.push_back(static_cast<unsigned char>(*raw));
234  raw++;
235  }
236  }
237 
238  if (!writer.finalize())
239  return JTOK_ERR;
240  tokenVal = valStr;
241  consumed = (raw - rawStart);
242  return JTOK_STRING;
243  }
244 
245  default:
246  return JTOK_ERR;
247  }
248 }
249 
250 enum expect_bits : unsigned {
251  EXP_OBJ_NAME = (1U << 0),
252  EXP_COLON = (1U << 1),
253  EXP_ARR_VALUE = (1U << 2),
254  EXP_VALUE = (1U << 3),
255  EXP_NOT_VALUE = (1U << 4),
256 };
257 
258 #define expect(bit) (expectMask & (EXP_##bit))
259 #define setExpect(bit) (expectMask |= EXP_##bit)
260 #define clearExpect(bit) (expectMask &= ~EXP_##bit)
261 
262 bool UniValue::read(const char *raw, size_t size)
263 {
264  clear();
265 
266  uint32_t expectMask = 0;
267  std::vector<UniValue*> stack;
268 
269  std::string tokenVal;
270  unsigned int consumed;
271  enum jtokentype tok = JTOK_NONE;
272  enum jtokentype last_tok = JTOK_NONE;
273  const char* end = raw + size;
274  do {
275  last_tok = tok;
276 
277  tok = getJsonToken(tokenVal, consumed, raw, end);
278  if (tok == JTOK_NONE || tok == JTOK_ERR)
279  return false;
280  raw += consumed;
281 
282  bool isValueOpen = jsonTokenIsValue(tok) ||
283  tok == JTOK_OBJ_OPEN || tok == JTOK_ARR_OPEN;
284 
285  if (expect(VALUE)) {
286  if (!isValueOpen)
287  return false;
288  clearExpect(VALUE);
289 
290  } else if (expect(ARR_VALUE)) {
291  bool isArrValue = isValueOpen || (tok == JTOK_ARR_CLOSE);
292  if (!isArrValue)
293  return false;
294 
295  clearExpect(ARR_VALUE);
296 
297  } else if (expect(OBJ_NAME)) {
298  bool isObjName = (tok == JTOK_OBJ_CLOSE || tok == JTOK_STRING);
299  if (!isObjName)
300  return false;
301 
302  } else if (expect(COLON)) {
303  if (tok != JTOK_COLON)
304  return false;
305  clearExpect(COLON);
306 
307  } else if (!expect(COLON) && (tok == JTOK_COLON)) {
308  return false;
309  }
310 
311  if (expect(NOT_VALUE)) {
312  if (isValueOpen)
313  return false;
314  clearExpect(NOT_VALUE);
315  }
316 
317  switch (tok) {
318 
319  case JTOK_OBJ_OPEN:
320  case JTOK_ARR_OPEN: {
321  VType utyp = (tok == JTOK_OBJ_OPEN ? VOBJ : VARR);
322  if (!stack.size()) {
323  if (utyp == VOBJ)
324  setObject();
325  else
326  setArray();
327  stack.push_back(this);
328  } else {
329  UniValue tmpVal(utyp);
330  UniValue *top = stack.back();
331  top->values.push_back(tmpVal);
332 
333  UniValue *newTop = &(top->values.back());
334  stack.push_back(newTop);
335  }
336 
337  if (stack.size() > MAX_JSON_DEPTH)
338  return false;
339 
340  if (utyp == VOBJ)
341  setExpect(OBJ_NAME);
342  else
343  setExpect(ARR_VALUE);
344  break;
345  }
346 
347  case JTOK_OBJ_CLOSE:
348  case JTOK_ARR_CLOSE: {
349  if (!stack.size() || (last_tok == JTOK_COMMA))
350  return false;
351 
352  VType utyp = (tok == JTOK_OBJ_CLOSE ? VOBJ : VARR);
353  UniValue *top = stack.back();
354  if (utyp != top->getType())
355  return false;
356 
357  stack.pop_back();
358  clearExpect(OBJ_NAME);
359  setExpect(NOT_VALUE);
360  break;
361  }
362 
363  case JTOK_COLON: {
364  if (!stack.size())
365  return false;
366 
367  UniValue *top = stack.back();
368  if (top->getType() != VOBJ)
369  return false;
370 
371  setExpect(VALUE);
372  break;
373  }
374 
375  case JTOK_COMMA: {
376  if (!stack.size() ||
377  (last_tok == JTOK_COMMA) || (last_tok == JTOK_ARR_OPEN))
378  return false;
379 
380  UniValue *top = stack.back();
381  if (top->getType() == VOBJ)
382  setExpect(OBJ_NAME);
383  else
384  setExpect(ARR_VALUE);
385  break;
386  }
387 
388  case JTOK_KW_NULL:
389  case JTOK_KW_TRUE:
390  case JTOK_KW_FALSE: {
391  UniValue tmpVal;
392  switch (tok) {
393  case JTOK_KW_NULL:
394  // do nothing more
395  break;
396  case JTOK_KW_TRUE:
397  tmpVal.setBool(true);
398  break;
399  case JTOK_KW_FALSE:
400  tmpVal.setBool(false);
401  break;
402  default: /* impossible */ break;
403  }
404 
405  if (!stack.size()) {
406  *this = tmpVal;
407  break;
408  }
409 
410  UniValue *top = stack.back();
411  top->values.push_back(tmpVal);
412 
413  setExpect(NOT_VALUE);
414  break;
415  }
416 
417  case JTOK_NUMBER: {
418  UniValue tmpVal(VNUM, tokenVal);
419  if (!stack.size()) {
420  *this = tmpVal;
421  break;
422  }
423 
424  UniValue *top = stack.back();
425  top->values.push_back(tmpVal);
426 
427  setExpect(NOT_VALUE);
428  break;
429  }
430 
431  case JTOK_STRING: {
432  if (expect(OBJ_NAME)) {
433  UniValue *top = stack.back();
434  top->keys.push_back(tokenVal);
435  clearExpect(OBJ_NAME);
436  setExpect(COLON);
437  } else {
438  UniValue tmpVal(VSTR, tokenVal);
439  if (!stack.size()) {
440  *this = tmpVal;
441  break;
442  }
443  UniValue *top = stack.back();
444  top->values.push_back(tmpVal);
445  }
446 
447  setExpect(NOT_VALUE);
448  break;
449  }
450 
451  default:
452  return false;
453  }
454  } while (!stack.empty ());
455 
456  /* Check that nothing follows the initial construct (parsed above). */
457  tok = getJsonToken(tokenVal, consumed, raw, end);
458  if (tok != JTOK_NONE)
459  return false;
460 
461  return true;
462 }
463 
Filter that generates and validates UTF-8, as well as collates UTF-16 surrogate pairs as specified in...
void push_back(UniValue val)
Definition: univalue.cpp:104
enum VType getType() const
Definition: univalue.h:64
@ VOBJ
Definition: univalue.h:21
@ VSTR
Definition: univalue.h:21
@ VARR
Definition: univalue.h:21
@ VNUM
Definition: univalue.h:21
void setArray()
Definition: univalue.cpp:92
void clear()
Definition: univalue.cpp:18
size_t size() const
Definition: univalue.h:68
void setBool(bool val)
Definition: univalue.cpp:31
std::vector< UniValue > values
Definition: univalue.h:105
std::vector< std::string > keys
Definition: univalue.h:104
void setObject()
Definition: univalue.cpp:98
bool read(const char *raw, size_t len)
static bool jsonTokenIsValue(enum jtokentype jtt)
Definition: univalue.h:169
static bool json_isspace(int ch)
Definition: univalue.h:186
jtokentype
Definition: univalue.h:149
@ JTOK_OBJ_CLOSE
Definition: univalue.h:153
@ JTOK_STRING
Definition: univalue.h:162
@ JTOK_COLON
Definition: univalue.h:156
@ JTOK_OBJ_OPEN
Definition: univalue.h:152
@ JTOK_NUMBER
Definition: univalue.h:161
@ JTOK_KW_NULL
Definition: univalue.h:158
@ JTOK_COMMA
Definition: univalue.h:157
@ JTOK_ARR_CLOSE
Definition: univalue.h:155
@ JTOK_KW_TRUE
Definition: univalue.h:159
@ JTOK_ARR_OPEN
Definition: univalue.h:154
@ JTOK_KW_FALSE
Definition: univalue.h:160
@ JTOK_ERR
Definition: univalue.h:150
@ JTOK_NONE
Definition: univalue.h:151
static bool json_isdigit(int ch)
#define clearExpect(bit)
static constexpr size_t MAX_JSON_DEPTH
enum jtokentype getJsonToken(std::string &tokenVal, unsigned int &consumed, const char *raw, const char *end)
#define expect(bit)
expect_bits
@ EXP_ARR_VALUE
@ EXP_NOT_VALUE
@ EXP_COLON
@ EXP_VALUE
@ EXP_OBJ_NAME
static const char * hatoui(const char *first, const char *last, unsigned int &out)
#define setExpect(bit)