3 * Minimal complete JSON generator and parser for FusionForge
5 * Copyright © 2010, 2011, 2012
6 * Thorsten “mirabilos” Glaser <t.glaser@tarent.de>
9 * This file is part of FusionForge. FusionForge is free software;
10 * you can redistribute it and/or modify it under the terms of the
11 * GNU General Public License as published by the Free Software
12 * Foundation; either version 2 of the License, or (at your option)
15 * FusionForge is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License along
21 * with FusionForge; if not, write to the Free Software Foundation, Inc.,
22 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * Do *not* use PHP’s json_encode because it is broken.
25 * Note that JSON is case-sensitive. My notes are at:
26 * https://www.mirbsd.org/cvs.cgi/contrib/hosted/tg/json.txt?rev=HEAD
30 * I was really, really bad at writing parsers. I still am really bad at
36 * Encodes an array (indexed or associative) as JSON.
38 * in: array x (Value to be encoded)
39 * in: string indent or bool false to skip beautification
42 function minijson_encode($x, $ri="") {
43 if (!isset($x) || is_null($x) || (is_float($x) &&
44 (is_nan($x) || is_infinite($x))))
58 $rs = sprintf("%.14e", $x);
59 $v = explode("e", $rs);
60 $rs = rtrim($v[0], "0");
61 if (substr($rs, -1) == ".") {
64 if ($v[1] != "-0" && $v[1] != "+0") {
73 * A bit unbelievable: not only does mb_check_encoding
74 * not exist from the start, but also does it not check
75 * reliably – so converting forth and back is the way
76 * they recommend… also, JSON is not binary-safe either…
80 if (function_exists('mb_internal_encoding') &&
81 function_exists('mb_convert_encoding')) {
82 $mb_encoding = mb_internal_encoding();
83 mb_internal_encoding("UTF-8");
84 $z = mb_convert_encoding($x, "UTF-16LE", "UTF-8");
85 $y = mb_convert_encoding($z, "UTF-8", "UTF-16LE");
86 $isunicode = ($y == $x);
89 $z = str_split($z, 2);
97 $y |= ord($v[1]) << 8;
101 } else if ($y == 8) {
103 } else if ($y == 9) {
105 } else if ($y == 10) {
107 } else if ($y == 12) {
109 } else if ($y == 13) {
111 } else if ($y == 34) {
113 } else if ($y == 92) {
115 } else if ($y < 0x20 || $y > 0xFFFD ||
116 ($y >= 0xD800 && $y <= 0xDFFF) ||
117 ($y > 0x7E && (!$isunicode || $y < 0xA0))) {
118 $rs .= sprintf("\\u%04X", $y);
119 } else if ($isunicode && $y > 0x7E) {
120 $rs .= mb_convert_encoding($v, "UTF-8",
126 if ($mb_encoding !== false) {
127 mb_internal_encoding($mb_encoding);
150 /* all array keys are integers */
152 sort($s, SORT_NUMERIC);
153 /* test keys for order and delta */
164 $si = $ri === false ? false : $ri . " ";
167 /* all array keys are integers 0‥n */
174 else if ($ri === false)
180 $rs .= minijson_encode($x[$v], $si);
191 sort($k, SORT_STRING);
195 else if ($ri === false)
201 $rs .= minijson_encode((string)$v, false);
206 $rs .= minijson_encode($x[$v], $si);
214 /* treat everything else as array or string */
216 return minijson_encode((array)$x, $ri);
217 return minijson_encode((string)$x, $ri);
221 * Decodes a UTF-8 string from JSON (ECMA 262).
224 * in: reference output-variable (or error string)
225 * in: integer (optional) recursion depth (default: 32)
226 * out: boolean false if an error occured, true = output is valid
228 function minijson_decode($sj, &$ov, $depth=32) {
229 if (!isset($sj) || !$sj) {
234 /* mb_convert_encoding simply must exist for the decoder */
235 $mb_encoding = mb_internal_encoding();
236 mb_internal_encoding("UTF-8");
238 /* see note about mb_check_encoding in the JSON encoder… */
239 $wj = mb_convert_encoding($sj, "UTF-16LE", "UTF-8");
240 $mj = mb_convert_encoding($wj, "UTF-8", "UTF-16LE");
246 /* convert UTF-16LE string to array of wchar_t */
248 foreach (str_split($wj, 2) as $v) {
249 $wc = ord($v[0]) | (ord($v[1]) << 8);
255 /* skip Byte Order Mark if present */
257 if ($j[$p] == 0xFEFF)
260 /* parse recursively */
261 $rv = minijson_decode_value($j, $p, $ov, $depth);
263 $ov = "input not valid UTF-8";
267 /* skip optional whitespace after tokens */
268 minijson_skip_wsp($j, $p);
272 /* no, trailing waste */
273 $ov = "expected EOS at wchar #" . $p;
278 mb_internal_encoding($mb_encoding);
282 function minijson_skip_wsp(&$j, &$p) {
283 /* skip all wide characters that are JSON whitespace */
286 } while ($wc == 0x09 || $wc == 0x0A || $wc == 0x0D || $wc == 0x20);
290 function minijson_get_hexdigit(&$j, &$p, &$v, $i) {
292 if ($wc >= 0x30 && $wc <= 0x39) {
294 } else if ($wc >= 0x41 && $wc <= 0x46) {
296 } else if ($wc >= 0x61 && $wc <= 0x66) {
299 $ov = sprintf("invalid hex in unicode escape" .
300 " sequence (%d) at wchar #%u", $i, $p);
306 function minijson_decode_array(&$j, &$p, &$ov, $depth) {
310 /* I wish there were a goto in PHP… */
312 /* skip optional whitespace between tokens */
313 minijson_skip_wsp($j, $p);
315 /* end of the array? */
316 if ($j[$p] == 0x5D) {
317 /* regular exit point for the loop */
323 /* member separator? */
324 if ($j[$p] == 0x2C) {
327 /* no comma before the first member */
328 $ov = "unexpected comma at wchar #" . $p;
331 } else if (!$first) {
333 * all but the first member require a separating
334 * comma; this also catches e.g. trailing
335 * rubbish after numbers
337 $ov = "expected comma at wchar #" . $p;
342 /* parse the member value */
344 if (!minijson_decode_value($j, $p, $v, $depth)) {
345 /* pass through error code */
353 function minijson_decode_object(&$j, &$p, &$ov, $depth) {
358 /* skip optional whitespace between tokens */
359 minijson_skip_wsp($j, $p);
361 /* end of the object? */
362 if ($j[$p] == 0x7D) {
363 /* regular exit point for the loop */
369 /* member separator? */
370 if ($j[$p] == 0x2C) {
373 /* no comma before the first member */
374 $ov = "unexpected comma at wchar #" . $p;
377 } else if (!$first) {
379 * all but the first member require a separating
380 * comma; this also catches e.g. trailing
381 * rubbish after numbers
383 $ov = "expected comma at wchar #" . $p;
388 /* skip optional whitespace between tokens */
389 minijson_skip_wsp($j, $p);
391 /* parse the member key */
392 if ($j[$p++] != 0x22) {
393 $ov = "expected key string at wchar #" . $p;
397 if (!minijson_decode_string($j, $p, $k)) {
398 /* pass through error code */
403 /* skip optional whitespace between tokens */
404 minijson_skip_wsp($j, $p);
406 /* key-value separator? */
407 if ($j[$p++] != 0x3A) {
408 $ov = "expected colon at wchar #" . $p;
412 /* parse the member value */
414 if (!minijson_decode_value($j, $p, $v, $depth)) {
415 /* pass through error code */
423 function minijson_decode_value(&$j, &$p, &$ov, $depth) {
424 /* skip optional whitespace between tokens */
425 minijson_skip_wsp($j, $p);
427 /* parse begin of Value token */
430 /* style: falling through exits with false */
432 $ov = "unexpected EOS at wchar #" . $p;
433 } else if ($wc == 0x6E) {
435 if ($j[$p++] == 0x75 &&
441 $ov = "expected ull after n near wchar #" . $p;
442 } else if ($wc == 0x74) {
444 if ($j[$p++] == 0x72 &&
450 $ov = "expected rue after t near wchar #" . $p;
451 } else if ($wc == 0x66) {
453 if ($j[$p++] == 0x61 &&
460 $ov = "expected alse after f near wchar #" . $p;
461 } else if ($wc == 0x5B) {
463 return minijson_decode_array($j, $p, $ov, $depth);
465 $ov = "recursion limit exceeded at wchar #" . $p;
466 } else if ($wc == 0x7B) {
468 return minijson_decode_object($j, $p, $ov, $depth);
470 $ov = "recursion limit exceeded at wchar #" . $p;
471 } else if ($wc == 0x22) {
472 return minijson_decode_string($j, $p, $ov);
473 } else if ($wc == 0x2D || ($wc >= 0x30 && $wc <= 0x39)) {
475 return minijson_decode_number($j, $p, $ov);
477 $ov = sprintf("unexpected U+%04X at wchar #%u", $wc, $p);
482 function minijson_decode_string(&$j, &$p, &$ov) {
483 /* UTF-16LE string buffer */
489 $ov = "unescaped control character $wc at wchar #" . $p;
491 } else if ($wc == 0x22) {
492 /* regular exit point for the loop */
494 /* convert to UTF-8, then re-check against UTF-16 */
495 $ov = mb_convert_encoding($s, "UTF-8", "UTF-16LE");
496 $tmp = mb_convert_encoding($ov, "UTF-16LE", "UTF-8");
498 $ov = "no Unicode string before wchar #" . $p;
502 } else if ($wc == 0x5C) {
507 $s .= chr($wc) . chr(0);
508 } else if ($wc == 0x62) {
509 $s .= chr(0x08) . chr(0);
510 } else if ($wc == 0x66) {
511 $s .= chr(0x0C) . chr(0);
512 } else if ($wc == 0x6E) {
513 $s .= chr(0x0A) . chr(0);
514 } else if ($wc == 0x72) {
515 $s .= chr(0x0D) . chr(0);
516 } else if ($wc == 0x74) {
517 $s .= chr(0x09) . chr(0);
518 } else if ($wc == 0x75) {
520 for ($tmp = 1; $tmp <= 4; $tmp++) {
522 if (!minijson_get_hexdigit($j, $p,
524 /* pass through error code */
528 if ($v < 1 || $v > 0xFFFD) {
529 $ov = "non-Unicode escape $v before wchar #" . $p;
532 $s .= chr($v & 0xFF) . chr($v >> 8);
534 $ov = "invalid escape sequence at wchar #" . $p;
537 } else if ($wc > 0xD7FF && $wc < 0xE000) {
538 $ov = "surrogate $wc at wchar #" . $p;
540 } else if ($wc > 0xFFFD) {
541 $ov = "non-Unicode char $wc at wchar #" . $p;
544 $s .= chr($wc & 0xFF) . chr($wc >> 8);
549 function minijson_decode_number(&$j, &$p, &$ov) {
553 /* check for an optional minus sign */
561 /* begins with zero (0 or 0.x) */
564 if ($wc >= 0x30 && $wc <= 0x39) {
565 $ov = "no leading zeroes please at wchar #" . $p;
568 } else if ($wc >= 0x31 && $wc <= 0x39) {
569 /* begins with 1‥9 */
570 while ($wc >= 0x30 && $wc <= 0x39) {
575 $ov = "decimal digit expected at wchar #" . $p;
577 /* we had none, so it’s allowed to prepend one */
578 $ov = "minus sign or " . $ov;
583 /* do we have a fractional part? */
588 if ($wc < 0x30 || $wc > 0x39) {
589 $ov = "fractional digit expected at wchar #" . $p;
592 while ($wc >= 0x30 && $wc <= 0x39) {
598 /* do we have an exponent, treat number as mantissa? */
599 if ($wc == 0x45 || $wc == 0x65) {
603 if ($wc == 0x2B || $wc == 0x2D) {
607 if ($wc < 0x30 || $wc > 0x39) {
608 $ov = "exponent digit expected at wchar #" . $p;
611 while ($wc >= 0x30 && $wc <= 0x39) {
619 /* no fractional part, no exponent */
622 if ((string)$v == $s) {