Bitcoin Core  27.99.0
P2P Digital Currency
univalue_read.cpp
Go to the documentation of this file.
1 // Copyright 2014 BitPay Inc.
2 // Distributed under the MIT software license, see the accompanying
3 // file COPYING or https://opensource.org/licenses/mit-license.php.
4 
5 #include <univalue.h>
6 #include <univalue_utffilter.h>
7 
8 #include <cstdint>
9 #include <cstdio>
10 #include <cstring>
11 #include <string>
12 #include <string_view>
13 #include <vector>
14 
15 /*
16  * According to stackexchange, the original json test suite wanted
17  * to limit depth to 22. Widely-deployed PHP bails at depth 512,
18  * so we will follow PHP's lead, which should be more than sufficient
19  * (further stackexchange comments indicate depth > 32 rarely occurs).
20  */
21 static constexpr size_t MAX_JSON_DEPTH = 512;
22 
23 static bool json_isdigit(int ch)
24 {
25  return ((ch >= '0') && (ch <= '9'));
26 }
27 
28 // convert hexadecimal string to unsigned integer
29 static const char *hatoui(const char *first, const char *last,
30  unsigned int& out)
31 {
32  unsigned int result = 0;
33  for (; first != last; ++first)
34  {
35  int digit;
36  if (json_isdigit(*first))
37  digit = *first - '0';
38 
39  else if (*first >= 'a' && *first <= 'f')
40  digit = *first - 'a' + 10;
41 
42  else if (*first >= 'A' && *first <= 'F')
43  digit = *first - 'A' + 10;
44 
45  else
46  break;
47 
48  result = 16 * result + digit;
49  }
50  out = result;
51 
52  return first;
53 }
54 
55 enum jtokentype getJsonToken(std::string& tokenVal, unsigned int& consumed,
56  const char *raw, const char *end)
57 {
58  tokenVal.clear();
59  consumed = 0;
60 
61  const char *rawStart = raw;
62 
63  while (raw < end && (json_isspace(*raw))) // skip whitespace
64  raw++;
65 
66  if (raw >= end)
67  return JTOK_NONE;
68 
69  switch (*raw) {
70 
71  case '{':
72  raw++;
73  consumed = (raw - rawStart);
74  return JTOK_OBJ_OPEN;
75  case '}':
76  raw++;
77  consumed = (raw - rawStart);
78  return JTOK_OBJ_CLOSE;
79  case '[':
80  raw++;
81  consumed = (raw - rawStart);
82  return JTOK_ARR_OPEN;
83  case ']':
84  raw++;
85  consumed = (raw - rawStart);
86  return JTOK_ARR_CLOSE;
87 
88  case ':':
89  raw++;
90  consumed = (raw - rawStart);
91  return JTOK_COLON;
92  case ',':
93  raw++;
94  consumed = (raw - rawStart);
95  return JTOK_COMMA;
96 
97  case 'n':
98  case 't':
99  case 'f':
100  if (!strncmp(raw, "null", 4)) {
101  raw += 4;
102  consumed = (raw - rawStart);
103  return JTOK_KW_NULL;
104  } else if (!strncmp(raw, "true", 4)) {
105  raw += 4;
106  consumed = (raw - rawStart);
107  return JTOK_KW_TRUE;
108  } else if (!strncmp(raw, "false", 5)) {
109  raw += 5;
110  consumed = (raw - rawStart);
111  return JTOK_KW_FALSE;
112  } else
113  return JTOK_ERR;
114 
115  case '-':
116  case '0':
117  case '1':
118  case '2':
119  case '3':
120  case '4':
121  case '5':
122  case '6':
123  case '7':
124  case '8':
125  case '9': {
126  // part 1: int
127  std::string numStr;
128 
129  const char *first = raw;
130 
131  const char *firstDigit = first;
132  if (!json_isdigit(*firstDigit))
133  firstDigit++;
134  if ((*firstDigit == '0') && json_isdigit(firstDigit[1]))
135  return JTOK_ERR;
136 
137  numStr += *raw; // copy first char
138  raw++;
139 
140  if ((*first == '-') && (raw < end) && (!json_isdigit(*raw)))
141  return JTOK_ERR;
142 
143  while (raw < end && json_isdigit(*raw)) { // copy digits
144  numStr += *raw;
145  raw++;
146  }
147 
148  // part 2: frac
149  if (raw < end && *raw == '.') {
150  numStr += *raw; // copy .
151  raw++;
152 
153  if (raw >= end || !json_isdigit(*raw))
154  return JTOK_ERR;
155  while (raw < end && json_isdigit(*raw)) { // copy digits
156  numStr += *raw;
157  raw++;
158  }
159  }
160 
161  // part 3: exp
162  if (raw < end && (*raw == 'e' || *raw == 'E')) {
163  numStr += *raw; // copy E
164  raw++;
165 
166  if (raw < end && (*raw == '-' || *raw == '+')) { // copy +/-
167  numStr += *raw;
168  raw++;
169  }
170 
171  if (raw >= end || !json_isdigit(*raw))
172  return JTOK_ERR;
173  while (raw < end && json_isdigit(*raw)) { // copy digits
174  numStr += *raw;
175  raw++;
176  }
177  }
178 
179  tokenVal = numStr;
180  consumed = (raw - rawStart);
181  return JTOK_NUMBER;
182  }
183 
184  case '"': {
185  raw++; // skip "
186 
187  std::string valStr;
188  JSONUTF8StringFilter writer(valStr);
189 
190  while (true) {
191  if (raw >= end || (unsigned char)*raw < 0x20)
192  return JTOK_ERR;
193 
194  else if (*raw == '\\') {
195  raw++; // skip backslash
196 
197  if (raw >= end)
198  return JTOK_ERR;
199 
200  switch (*raw) {
201  case '"': writer.push_back('\"'); break;
202  case '\\': writer.push_back('\\'); break;
203  case '/': writer.push_back('/'); break;
204  case 'b': writer.push_back('\b'); break;
205  case 'f': writer.push_back('\f'); break;
206  case 'n': writer.push_back('\n'); break;
207  case 'r': writer.push_back('\r'); break;
208  case 't': writer.push_back('\t'); break;
209 
210  case 'u': {
211  unsigned int codepoint;
212  if (raw + 1 + 4 >= end ||
213  hatoui(raw + 1, raw + 1 + 4, codepoint) !=
214  raw + 1 + 4)
215  return JTOK_ERR;
216  writer.push_back_u(codepoint);
217  raw += 4;
218  break;
219  }
220  default:
221  return JTOK_ERR;
222 
223  }
224 
225  raw++; // skip esc'd char
226  }
227 
228  else if (*raw == '"') {
229  raw++; // skip "
230  break; // stop scanning
231  }
232 
233  else {
234  writer.push_back(static_cast<unsigned char>(*raw));
235  raw++;
236  }
237  }
238 
239  if (!writer.finalize())
240  return JTOK_ERR;
241  tokenVal = valStr;
242  consumed = (raw - rawStart);
243  return JTOK_STRING;
244  }
245 
246  default:
247  return JTOK_ERR;
248  }
249 }
250 
251 enum expect_bits : unsigned {
252  EXP_OBJ_NAME = (1U << 0),
253  EXP_COLON = (1U << 1),
254  EXP_ARR_VALUE = (1U << 2),
255  EXP_VALUE = (1U << 3),
256  EXP_NOT_VALUE = (1U << 4),
257 };
258 
259 #define expect(bit) (expectMask & (EXP_##bit))
260 #define setExpect(bit) (expectMask |= EXP_##bit)
261 #define clearExpect(bit) (expectMask &= ~EXP_##bit)
262 
263 bool UniValue::read(std::string_view str_in)
264 {
265  clear();
266 
267  uint32_t expectMask = 0;
268  std::vector<UniValue*> stack;
269 
270  std::string tokenVal;
271  unsigned int consumed;
272  enum jtokentype tok = JTOK_NONE;
273  enum jtokentype last_tok = JTOK_NONE;
274  const char* raw{str_in.data()};
275  const char* end{raw + str_in.size()};
276  do {
277  last_tok = tok;
278 
279  tok = getJsonToken(tokenVal, consumed, raw, end);
280  if (tok == JTOK_NONE || tok == JTOK_ERR)
281  return false;
282  raw += consumed;
283 
284  bool isValueOpen = jsonTokenIsValue(tok) ||
285  tok == JTOK_OBJ_OPEN || tok == JTOK_ARR_OPEN;
286 
287  if (expect(VALUE)) {
288  if (!isValueOpen)
289  return false;
290  clearExpect(VALUE);
291 
292  } else if (expect(ARR_VALUE)) {
293  bool isArrValue = isValueOpen || (tok == JTOK_ARR_CLOSE);
294  if (!isArrValue)
295  return false;
296 
297  clearExpect(ARR_VALUE);
298 
299  } else if (expect(OBJ_NAME)) {
300  bool isObjName = (tok == JTOK_OBJ_CLOSE || tok == JTOK_STRING);
301  if (!isObjName)
302  return false;
303 
304  } else if (expect(COLON)) {
305  if (tok != JTOK_COLON)
306  return false;
307  clearExpect(COLON);
308 
309  } else if (!expect(COLON) && (tok == JTOK_COLON)) {
310  return false;
311  }
312 
313  if (expect(NOT_VALUE)) {
314  if (isValueOpen)
315  return false;
316  clearExpect(NOT_VALUE);
317  }
318 
319  switch (tok) {
320 
321  case JTOK_OBJ_OPEN:
322  case JTOK_ARR_OPEN: {
323  VType utyp = (tok == JTOK_OBJ_OPEN ? VOBJ : VARR);
324  if (!stack.size()) {
325  if (utyp == VOBJ)
326  setObject();
327  else
328  setArray();
329  stack.push_back(this);
330  } else {
331  UniValue tmpVal(utyp);
332  UniValue *top = stack.back();
333  top->values.push_back(tmpVal);
334 
335  UniValue *newTop = &(top->values.back());
336  stack.push_back(newTop);
337  }
338 
339  if (stack.size() > MAX_JSON_DEPTH)
340  return false;
341 
342  if (utyp == VOBJ)
343  setExpect(OBJ_NAME);
344  else
345  setExpect(ARR_VALUE);
346  break;
347  }
348 
349  case JTOK_OBJ_CLOSE:
350  case JTOK_ARR_CLOSE: {
351  if (!stack.size() || (last_tok == JTOK_COMMA))
352  return false;
353 
354  VType utyp = (tok == JTOK_OBJ_CLOSE ? VOBJ : VARR);
355  UniValue *top = stack.back();
356  if (utyp != top->getType())
357  return false;
358 
359  stack.pop_back();
360  clearExpect(OBJ_NAME);
361  setExpect(NOT_VALUE);
362  break;
363  }
364 
365  case JTOK_COLON: {
366  if (!stack.size())
367  return false;
368 
369  UniValue *top = stack.back();
370  if (top->getType() != VOBJ)
371  return false;
372 
373  setExpect(VALUE);
374  break;
375  }
376 
377  case JTOK_COMMA: {
378  if (!stack.size() ||
379  (last_tok == JTOK_COMMA) || (last_tok == JTOK_ARR_OPEN))
380  return false;
381 
382  UniValue *top = stack.back();
383  if (top->getType() == VOBJ)
384  setExpect(OBJ_NAME);
385  else
386  setExpect(ARR_VALUE);
387  break;
388  }
389 
390  case JTOK_KW_NULL:
391  case JTOK_KW_TRUE:
392  case JTOK_KW_FALSE: {
393  UniValue tmpVal;
394  switch (tok) {
395  case JTOK_KW_NULL:
396  // do nothing more
397  break;
398  case JTOK_KW_TRUE:
399  tmpVal.setBool(true);
400  break;
401  case JTOK_KW_FALSE:
402  tmpVal.setBool(false);
403  break;
404  default: /* impossible */ break;
405  }
406 
407  if (!stack.size()) {
408  *this = tmpVal;
409  break;
410  }
411 
412  UniValue *top = stack.back();
413  top->values.push_back(tmpVal);
414 
415  setExpect(NOT_VALUE);
416  break;
417  }
418 
419  case JTOK_NUMBER: {
420  UniValue tmpVal(VNUM, tokenVal);
421  if (!stack.size()) {
422  *this = tmpVal;
423  break;
424  }
425 
426  UniValue *top = stack.back();
427  top->values.push_back(tmpVal);
428 
429  setExpect(NOT_VALUE);
430  break;
431  }
432 
433  case JTOK_STRING: {
434  if (expect(OBJ_NAME)) {
435  UniValue *top = stack.back();
436  top->keys.push_back(tokenVal);
437  clearExpect(OBJ_NAME);
438  setExpect(COLON);
439  } else {
440  UniValue tmpVal(VSTR, tokenVal);
441  if (!stack.size()) {
442  *this = tmpVal;
443  break;
444  }
445  UniValue *top = stack.back();
446  top->values.push_back(tmpVal);
447  }
448 
449  setExpect(NOT_VALUE);
450  break;
451  }
452 
453  default:
454  return false;
455  }
456  } while (!stack.empty ());
457 
458  /* Check that nothing follows the initial construct (parsed above). */
459  tok = getJsonToken(tokenVal, consumed, raw, end);
460  if (tok != JTOK_NONE)
461  return false;
462 
463  return true;
464 }
465 
Filter that generates and validates UTF-8, as well as collates UTF-16 surrogate pairs as specified in...
void push_back(UniValue val)
Definition: univalue.cpp:104
enum VType getType() const
Definition: univalue.h:67
@ VOBJ
Definition: univalue.h:24
@ VSTR
Definition: univalue.h:24
@ VARR
Definition: univalue.h:24
@ VNUM
Definition: univalue.h:24
void setArray()
Definition: univalue.cpp:92
void clear()
Definition: univalue.cpp:18
void setBool(bool val)
Definition: univalue.cpp:31
std::vector< UniValue > values
Definition: univalue.h:106
std::vector< std::string > keys
Definition: univalue.h:105
bool read(std::string_view raw)
void setObject()
Definition: univalue.cpp:98
static bool jsonTokenIsValue(enum jtokentype jtt)
Definition: univalue.h:170
static bool json_isspace(int ch)
Definition: univalue.h:187
jtokentype
Definition: univalue.h:150
@ JTOK_OBJ_CLOSE
Definition: univalue.h:154
@ JTOK_STRING
Definition: univalue.h:163
@ JTOK_COLON
Definition: univalue.h:157
@ JTOK_OBJ_OPEN
Definition: univalue.h:153
@ JTOK_NUMBER
Definition: univalue.h:162
@ JTOK_KW_NULL
Definition: univalue.h:159
@ JTOK_COMMA
Definition: univalue.h:158
@ JTOK_ARR_CLOSE
Definition: univalue.h:156
@ JTOK_KW_TRUE
Definition: univalue.h:160
@ JTOK_ARR_OPEN
Definition: univalue.h:155
@ JTOK_KW_FALSE
Definition: univalue.h:161
@ JTOK_ERR
Definition: univalue.h:151
@ JTOK_NONE
Definition: univalue.h:152
static bool json_isdigit(int ch)
#define clearExpect(bit)
static constexpr size_t MAX_JSON_DEPTH
enum jtokentype getJsonToken(std::string &tokenVal, unsigned int &consumed, const char *raw, const char *end)
#define expect(bit)
expect_bits
@ EXP_ARR_VALUE
@ EXP_NOT_VALUE
@ EXP_COLON
@ EXP_VALUE
@ EXP_OBJ_NAME
static const char * hatoui(const char *first, const char *last, unsigned int &out)
#define setExpect(bit)