1 : /*
2 : * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
3 : *
4 : * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
5 : * Michael Clark <michael@metaparadigm.com>
6 : *
7 : * This library is free software; you can redistribute it and/or modify
8 : * it under the terms of the MIT license. See COPYING for details.
9 : *
10 : *
11 : * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
12 : * The copyrights to the contents of this file are licensed under the MIT License
13 : * (http://www.opensource.org/licenses/mit-license.php)
14 : */
15 :
16 : #include "config.h"
17 :
18 : #include "cpl_conv.h"
19 :
20 : #include <stdio.h>
21 : #include <stdlib.h>
22 : #include <stddef.h>
23 : #include <ctype.h>
24 : #include <string.h>
25 :
26 : #include "bits.h"
27 : #include "debug.h"
28 : #include "printbuf.h"
29 : #include "arraylist.h"
30 : #include "json_object.h"
31 : #include "json_tokener.h"
32 :
33 : #include <cpl_port.h> /* MIN and MAX macros */
34 :
35 : #if !HAVE_STRNCASECMP
36 : # if defined(_MSC_VER)
37 : /* MSC has the version as _strnicmp */
38 : # define strncasecmp _strnicmp
39 : # else
40 : # error You do not have strncasecmp on your system.
41 : # endif
42 : #endif /* HAVE_STRNCASECMP */
43 :
44 :
45 : static const char* json_null_str = "null";
46 : static const char* json_true_str = "true";
47 : static const char* json_false_str = "false";
48 :
49 : const char* json_tokener_errors[] = {
50 : "success",
51 : "continue",
52 : "nesting to deep",
53 : "unexpected end of data",
54 : "unexpected character",
55 : "null expected",
56 : "boolean expected",
57 : "number expected",
58 : "array value separator ',' expected",
59 : "quoted object property name expected",
60 : "object property name separator ':' expected",
61 : "object value separator ',' expected",
62 : "invalid string sequence",
63 : "expected comment",
64 : };
65 :
66 :
67 220 : struct json_tokener* json_tokener_new(void)
68 : {
69 : struct json_tokener *tok;
70 :
71 220 : tok = (struct json_tokener*)calloc(1, sizeof(struct json_tokener));
72 220 : if (!tok) return NULL;
73 220 : tok->pb = printbuf_new();
74 220 : json_tokener_reset(tok);
75 220 : return tok;
76 : }
77 :
78 220 : void json_tokener_free(struct json_tokener *tok)
79 : {
80 220 : json_tokener_reset(tok);
81 220 : if(tok) printbuf_free(tok->pb);
82 220 : free(tok);
83 220 : }
84 :
85 39664 : static void json_tokener_reset_level(struct json_tokener *tok, int depth)
86 : {
87 39664 : tok->stack[depth].state = json_tokener_state_eatws;
88 39664 : tok->stack[depth].saved_state = json_tokener_state_start;
89 39664 : json_object_put(tok->stack[depth].current);
90 39664 : tok->stack[depth].current = NULL;
91 39664 : free(tok->stack[depth].obj_field_name);
92 39664 : tok->stack[depth].obj_field_name = NULL;
93 39664 : }
94 :
95 440 : void json_tokener_reset(struct json_tokener *tok)
96 : {
97 : int i;
98 440 : if (!tok)
99 0 : return;
100 :
101 880 : for(i = tok->depth; i >= 0; i--)
102 440 : json_tokener_reset_level(tok, i);
103 440 : tok->depth = 0;
104 440 : tok->err = json_tokener_success;
105 : }
106 :
107 114 : struct json_object* json_tokener_parse(const char *str)
108 : {
109 : struct json_tokener* tok;
110 : struct json_object* obj;
111 :
112 114 : tok = json_tokener_new();
113 114 : obj = json_tokener_parse_ex(tok, str, -1);
114 114 : if(tok->err != json_tokener_success)
115 0 : obj = (struct json_object*)error_ptr(-tok->err);
116 114 : json_tokener_free(tok);
117 114 : return obj;
118 : }
119 :
120 :
121 : #if !HAVE_STRNDUP
122 : /* CAW: compliant version of strndup() */
123 : char* strndup(const char* str, size_t n)
124 : {
125 0 : if(str) {
126 0 : size_t len = strlen(str);
127 0 : size_t nn = json_min(len,n);
128 0 : char* s = (char*)malloc(sizeof(char) * (nn + 1));
129 :
130 0 : if(s) {
131 0 : memcpy(s, str, nn);
132 0 : s[nn] = '\0';
133 : }
134 :
135 0 : return s;
136 : }
137 :
138 0 : return NULL;
139 : }
140 : #endif
141 :
142 :
143 : #define state tok->stack[tok->depth].state
144 : #define saved_state tok->stack[tok->depth].saved_state
145 : #define current tok->stack[tok->depth].current
146 : #define obj_field_name tok->stack[tok->depth].obj_field_name
147 :
148 : /* Optimization:
149 : * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
150 : * iterating character-by character. A large performance boost is
151 : * achieved by using tighter loops to locally handle units such as
152 : * comments and strings. Loops that handle an entire token within
153 : * their scope also gather entire strings and pass them to
154 : * printbuf_memappend() in a single call, rather than calling
155 : * printbuf_memappend() one char at a time.
156 : *
157 : * POP_CHAR() and ADVANCE_CHAR() macros are used for code that is
158 : * common to both the main loop and the tighter loops.
159 : */
160 :
161 : /* POP_CHAR(dest, tok) macro:
162 : * Not really a pop()...peeks at the current char and stores it in dest.
163 : * Returns 1 on success, sets tok->err and returns 0 if no more chars.
164 : * Implicit inputs: str, len vars
165 : */
166 : #define POP_CHAR(dest, tok) \
167 : (((tok)->char_offset == len) ? \
168 : (((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
169 : (((tok)->err = json_tokener_success), 0) \
170 : : \
171 : (((tok)->err = json_tokener_continue), 0) \
172 : ) : \
173 : (((dest) = *str), 1) \
174 : )
175 :
176 : /* ADVANCE_CHAR() macro:
177 : * Incrementes str & tok->char_offset.
178 : * For convenience of existing conditionals, returns the old value of c (0 on eof)
179 : * Implicit inputs: c var
180 : */
181 : #define ADVANCE_CHAR(str, tok) \
182 : ( ++(str), ((tok)->char_offset)++, c)
183 :
184 : /* End optimization macro defs */
185 :
186 :
187 220 : struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
188 : const char *str, int len)
189 : {
190 220 : struct json_object *obj = NULL;
191 220 : char c = '\1';
192 :
193 220 : tok->char_offset = 0;
194 220 : tok->err = json_tokener_success;
195 :
196 46625 : while (POP_CHAR(c, tok)) {
197 :
198 : redo_char:
199 216357 : switch(state) {
200 :
201 : case json_tokener_state_eatws:
202 : /* Advance until we change state */
203 191019 : while (isspace(c)) {
204 13587 : if ((!ADVANCE_CHAR(str, tok)) || (!POP_CHAR(c, tok)))
205 : goto out;
206 : }
207 88716 : if(c == '/') {
208 0 : printbuf_reset(tok->pb);
209 0 : printbuf_memappend_fast(tok->pb, &c, 1);
210 0 : state = json_tokener_state_comment_start;
211 : } else {
212 88716 : state = saved_state;
213 88716 : goto redo_char;
214 : }
215 0 : break;
216 :
217 : case json_tokener_state_start:
218 19832 : switch(c) {
219 : case '{':
220 1271 : state = json_tokener_state_eatws;
221 1271 : saved_state = json_tokener_state_object_field_start;
222 1271 : current = json_object_new_object();
223 1271 : break;
224 : case '[':
225 5115 : state = json_tokener_state_eatws;
226 5115 : saved_state = json_tokener_state_array;
227 5115 : current = json_object_new_array();
228 5115 : break;
229 : case 'N':
230 : case 'n':
231 102 : state = json_tokener_state_null;
232 102 : printbuf_reset(tok->pb);
233 102 : tok->st_pos = 0;
234 102 : goto redo_char;
235 : case '"':
236 : case '\'':
237 2246 : state = json_tokener_state_string;
238 2246 : printbuf_reset(tok->pb);
239 2246 : tok->quote_char = c;
240 2246 : break;
241 : case 'T':
242 : case 't':
243 : case 'F':
244 : case 'f':
245 15 : state = json_tokener_state_boolean;
246 15 : printbuf_reset(tok->pb);
247 15 : tok->st_pos = 0;
248 15 : goto redo_char;
249 : #if defined(__GNUC__)
250 : case '0' ... '9':
251 : #else
252 : case '0':
253 : case '1':
254 : case '2':
255 : case '3':
256 : case '4':
257 : case '5':
258 : case '6':
259 : case '7':
260 : case '8':
261 : case '9':
262 : #endif
263 : case '-':
264 11083 : state = json_tokener_state_number;
265 11083 : printbuf_reset(tok->pb);
266 11083 : tok->is_double = 0;
267 11083 : goto redo_char;
268 : default:
269 0 : tok->err = json_tokener_error_parse_unexpected;
270 0 : goto out;
271 : }
272 8632 : break;
273 :
274 : case json_tokener_state_finish:
275 19832 : if(tok->depth == 0) goto out;
276 19612 : obj = json_object_get(current);
277 19612 : json_tokener_reset_level(tok, tok->depth);
278 19612 : tok->depth--;
279 19612 : goto redo_char;
280 :
281 : case json_tokener_state_null:
282 510 : printbuf_memappend_fast(tok->pb, &c, 1);
283 1020 : if(strncasecmp(json_null_str, tok->pb->buf,
284 1020 : json_min(tok->st_pos+1, strlen(json_null_str))) == 0) {
285 510 : if(tok->st_pos == strlen(json_null_str)) {
286 102 : current = NULL;
287 102 : saved_state = json_tokener_state_finish;
288 102 : state = json_tokener_state_eatws;
289 102 : goto redo_char;
290 : }
291 : } else {
292 0 : tok->err = json_tokener_error_parse_null;
293 0 : goto out;
294 : }
295 408 : tok->st_pos++;
296 408 : break;
297 :
298 : case json_tokener_state_comment_start:
299 0 : if(c == '*') {
300 0 : state = json_tokener_state_comment;
301 0 : } else if(c == '/') {
302 0 : state = json_tokener_state_comment_eol;
303 : } else {
304 0 : tok->err = json_tokener_error_parse_comment;
305 0 : goto out;
306 : }
307 0 : printbuf_memappend_fast(tok->pb, &c, 1);
308 0 : break;
309 :
310 : case json_tokener_state_comment:
311 : {
312 : /* Advance until we change state */
313 0 : const char *case_start = str;
314 0 : while(c != '*') {
315 0 : if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
316 0 : printbuf_memappend_fast(tok->pb, case_start, str-case_start);
317 0 : goto out;
318 : }
319 : }
320 0 : printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
321 0 : state = json_tokener_state_comment_end;
322 : }
323 0 : break;
324 :
325 : case json_tokener_state_comment_eol:
326 : {
327 : /* Advance until we change state */
328 0 : const char *case_start = str;
329 0 : while(c != '\n') {
330 0 : if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
331 0 : printbuf_memappend_fast(tok->pb, case_start, str-case_start);
332 0 : goto out;
333 : }
334 : }
335 0 : printbuf_memappend_fast(tok->pb, case_start, str-case_start);
336 : MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
337 0 : state = json_tokener_state_eatws;
338 : }
339 0 : break;
340 :
341 : case json_tokener_state_comment_end:
342 0 : printbuf_memappend_fast(tok->pb, &c, 1);
343 0 : if(c == '/') {
344 : MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
345 0 : state = json_tokener_state_eatws;
346 : } else {
347 0 : state = json_tokener_state_comment;
348 : }
349 0 : break;
350 :
351 : case json_tokener_state_string:
352 : {
353 : /* Advance until we change state */
354 2500 : const char *case_start = str;
355 : while(1) {
356 41283 : if(c == tok->quote_char) {
357 2246 : printbuf_memappend_fast(tok->pb, case_start, str-case_start);
358 2246 : current = json_object_new_string(tok->pb->buf);
359 2246 : saved_state = json_tokener_state_finish;
360 2246 : state = json_tokener_state_eatws;
361 2246 : break;
362 39037 : } else if(c == '\\') {
363 254 : printbuf_memappend_fast(tok->pb, case_start, str-case_start);
364 254 : saved_state = json_tokener_state_string;
365 254 : state = json_tokener_state_string_escape;
366 254 : break;
367 : }
368 38783 : if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
369 0 : printbuf_memappend_fast(tok->pb, case_start, str-case_start);
370 0 : goto out;
371 : }
372 38783 : }
373 : }
374 2500 : break;
375 :
376 : case json_tokener_state_string_escape:
377 254 : switch(c) {
378 : case '"':
379 : case '\\':
380 : case '/':
381 254 : printbuf_memappend_fast(tok->pb, &c, 1);
382 254 : state = saved_state;
383 254 : break;
384 : case 'b':
385 : case 'n':
386 : case 'r':
387 : case 't':
388 0 : if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
389 0 : else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
390 0 : else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
391 0 : else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
392 0 : state = saved_state;
393 0 : break;
394 : case 'u':
395 0 : tok->ucs_char = 0;
396 0 : tok->st_pos = 0;
397 0 : state = json_tokener_state_escape_unicode;
398 0 : break;
399 : default:
400 0 : tok->err = json_tokener_error_parse_string;
401 0 : goto out;
402 : }
403 254 : break;
404 :
405 : case json_tokener_state_escape_unicode:
406 : /* Note that the following code is inefficient for handling large
407 : * chunks of extended chars, calling printbuf_memappend() once
408 : * for each multi-byte character of input.
409 : * This is a good area for future optimization.
410 : */
411 : {
412 : /* Advance until we change state */
413 : while(1) {
414 0 : if(strchr(json_hex_chars, c)) {
415 0 : tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
416 0 : if(tok->st_pos == 4) {
417 : unsigned char utf_out[3];
418 0 : if (tok->ucs_char < 0x80) {
419 0 : utf_out[0] = tok->ucs_char;
420 0 : printbuf_memappend_fast(tok->pb, (char*)utf_out, 1);
421 0 : } else if (tok->ucs_char < 0x800) {
422 0 : utf_out[0] = 0xc0 | (tok->ucs_char >> 6);
423 0 : utf_out[1] = 0x80 | (tok->ucs_char & 0x3f);
424 0 : printbuf_memappend_fast(tok->pb, (char*)utf_out, 2);
425 : } else {
426 0 : utf_out[0] = 0xe0 | (tok->ucs_char >> 12);
427 0 : utf_out[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
428 0 : utf_out[2] = 0x80 | (tok->ucs_char & 0x3f);
429 0 : printbuf_memappend_fast(tok->pb, (char*)utf_out, 3);
430 : }
431 0 : state = saved_state;
432 : break;
433 : }
434 : } else {
435 0 : tok->err = json_tokener_error_parse_string;
436 0 : goto out;
437 : }
438 0 : if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok))
439 : goto out;
440 0 : }
441 : }
442 0 : break;
443 :
444 : case json_tokener_state_boolean:
445 90 : printbuf_memappend_fast(tok->pb, &c, 1);
446 180 : if(strncasecmp(json_true_str, tok->pb->buf,
447 180 : json_min(tok->st_pos+1, strlen(json_true_str))) == 0) {
448 0 : if(tok->st_pos == strlen(json_true_str)) {
449 0 : current = json_object_new_boolean(1);
450 0 : saved_state = json_tokener_state_finish;
451 0 : state = json_tokener_state_eatws;
452 0 : goto redo_char;
453 : }
454 180 : } else if(strncasecmp(json_false_str, tok->pb->buf,
455 180 : json_min(tok->st_pos+1, strlen(json_false_str))) == 0) {
456 90 : if(tok->st_pos == strlen(json_false_str)) {
457 15 : current = json_object_new_boolean(0);
458 15 : saved_state = json_tokener_state_finish;
459 15 : state = json_tokener_state_eatws;
460 15 : goto redo_char;
461 : }
462 : } else {
463 0 : tok->err = json_tokener_error_parse_boolean;
464 0 : goto out;
465 : }
466 75 : tok->st_pos++;
467 75 : break;
468 :
469 : case json_tokener_state_number:
470 : {
471 : /* Advance until we change state */
472 11083 : const char *case_start = str;
473 11083 : int case_len=0;
474 124562 : while(c && strchr(json_number_chars, c)) {
475 102396 : ++case_len;
476 102396 : if(c == '.' || c == 'e') tok->is_double = 1;
477 102396 : if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
478 0 : printbuf_memappend_fast(tok->pb, case_start, case_len);
479 0 : goto out;
480 : }
481 : }
482 11083 : if (case_len>0)
483 11083 : printbuf_memappend_fast(tok->pb, case_start, case_len);
484 : }
485 : {
486 : int numi;
487 : double numd;
488 11851 : if(!tok->is_double && sscanf(tok->pb->buf, "%d", &numi) == 1) {
489 768 : current = json_object_new_int(numi);
490 10315 : } else if(tok->is_double) {
491 10315 : numd = CPLAtof(tok->pb->buf);
492 10315 : current = json_object_new_double(numd);
493 : } else {
494 0 : tok->err = json_tokener_error_parse_number;
495 0 : goto out;
496 : }
497 11083 : saved_state = json_tokener_state_finish;
498 11083 : state = json_tokener_state_eatws;
499 11083 : goto redo_char;
500 : }
501 : break;
502 :
503 : case json_tokener_state_array:
504 14748 : if(c == ']') {
505 12 : saved_state = json_tokener_state_finish;
506 12 : state = json_tokener_state_eatws;
507 : } else {
508 14736 : if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
509 0 : tok->err = json_tokener_error_depth;
510 0 : goto out;
511 : }
512 14736 : state = json_tokener_state_array_add;
513 14736 : tok->depth++;
514 14736 : json_tokener_reset_level(tok, tok->depth);
515 14736 : goto redo_char;
516 : }
517 12 : break;
518 :
519 : case json_tokener_state_array_add:
520 14736 : json_object_array_add(current, obj);
521 14736 : saved_state = json_tokener_state_array_sep;
522 14736 : state = json_tokener_state_eatws;
523 14736 : goto redo_char;
524 :
525 : case json_tokener_state_array_sep:
526 14736 : if(c == ']') {
527 5103 : saved_state = json_tokener_state_finish;
528 5103 : state = json_tokener_state_eatws;
529 9633 : } else if(c == ',') {
530 9633 : saved_state = json_tokener_state_array;
531 9633 : state = json_tokener_state_eatws;
532 : } else {
533 0 : tok->err = json_tokener_error_parse_array;
534 0 : goto out;
535 : }
536 14736 : break;
537 :
538 : case json_tokener_state_object_field_start:
539 4940 : if(c == '}') {
540 64 : saved_state = json_tokener_state_finish;
541 64 : state = json_tokener_state_eatws;
542 9752 : } else if (c == '"' || c == '\'') {
543 4876 : tok->quote_char = c;
544 4876 : printbuf_reset(tok->pb);
545 4876 : state = json_tokener_state_object_field;
546 : } else {
547 0 : tok->err = json_tokener_error_parse_object_key_name;
548 0 : goto out;
549 : }
550 4940 : break;
551 :
552 : case json_tokener_state_object_field:
553 : {
554 : /* Advance until we change state */
555 4876 : const char *case_start = str;
556 : while(1) {
557 32424 : if(c == tok->quote_char) {
558 4876 : printbuf_memappend_fast(tok->pb, case_start, str-case_start);
559 4876 : obj_field_name = strdup(tok->pb->buf);
560 4876 : saved_state = json_tokener_state_object_field_end;
561 4876 : state = json_tokener_state_eatws;
562 4876 : break;
563 27548 : } else if(c == '\\') {
564 0 : printbuf_memappend_fast(tok->pb, case_start, str-case_start);
565 0 : saved_state = json_tokener_state_object_field;
566 0 : state = json_tokener_state_string_escape;
567 0 : break;
568 : }
569 27548 : if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
570 0 : printbuf_memappend_fast(tok->pb, case_start, str-case_start);
571 0 : goto out;
572 : }
573 27548 : }
574 : }
575 4876 : break;
576 :
577 : case json_tokener_state_object_field_end:
578 4876 : if(c == ':') {
579 4876 : saved_state = json_tokener_state_object_value;
580 4876 : state = json_tokener_state_eatws;
581 : } else {
582 0 : tok->err = json_tokener_error_parse_object_key_sep;
583 0 : goto out;
584 : }
585 4876 : break;
586 :
587 : case json_tokener_state_object_value:
588 4876 : if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
589 0 : tok->err = json_tokener_error_depth;
590 0 : goto out;
591 : }
592 4876 : state = json_tokener_state_object_value_add;
593 4876 : tok->depth++;
594 4876 : json_tokener_reset_level(tok, tok->depth);
595 4876 : goto redo_char;
596 :
597 : case json_tokener_state_object_value_add:
598 4876 : json_object_object_add(current, obj_field_name, obj);
599 4876 : free(obj_field_name);
600 4876 : obj_field_name = NULL;
601 4876 : saved_state = json_tokener_state_object_sep;
602 4876 : state = json_tokener_state_eatws;
603 4876 : goto redo_char;
604 :
605 : case json_tokener_state_object_sep:
606 4876 : if(c == '}') {
607 1207 : saved_state = json_tokener_state_finish;
608 1207 : state = json_tokener_state_eatws;
609 3669 : } else if(c == ',') {
610 3669 : saved_state = json_tokener_state_object_field_start;
611 3669 : state = json_tokener_state_eatws;
612 : } else {
613 0 : tok->err = json_tokener_error_parse_object_value_sep;
614 0 : goto out;
615 : }
616 : break;
617 :
618 : }
619 46185 : if (!ADVANCE_CHAR(str, tok))
620 0 : goto out;
621 : } /* while(POP_CHAR) */
622 :
623 : out:
624 220 : if (!c) { /* We hit an eof char (0) */
625 220 : if(state != json_tokener_state_finish &&
626 0 : saved_state != json_tokener_state_finish)
627 0 : tok->err = json_tokener_error_parse_eof;
628 : }
629 :
630 220 : if(tok->err == json_tokener_success) return json_object_get(current);
631 : MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
632 : json_tokener_errors[tok->err], tok->char_offset);
633 0 : return NULL;
634 : }
|