3 * Minimal complete JSON generator and parser for FusionForge
5 * Copyright © 2010, 2011
6 * Thorsten “mirabilos” Glaser <t.glaser@tarent.de>
9 * This file is part of FusionForge. FusionForge is free software;
10 * you can redistribute it and/or modify it under the terms of the
11 * GNU General Public License as published by the Free Software
12 * Foundation; either version 2 of the License, or (at your option)
15 * FusionForge is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License along
21 * with FusionForge; if not, write to the Free Software Foundation, Inc.,
22 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * Do *not* use PHP’s json_encode because it is broken.
25 * Note that JSON is case-sensitive. My notes are at:
26 * https://www.mirbsd.org/cvs.cgi/contrib/hosted/tg/json.txt?rev=HEAD
30 * I was really, really bad at writing parsers. I still am really bad at
36 * Encodes an array (indexed or associative) as JSON.
38 * in: array x (Value to be encoded)
39 * in: string indent or bool false to skip beautification
42 function minijson_encode($x, $ri="") {
43 if (!isset($x) || is_null($x) || (is_float($x) &&
44 (is_nan($x) || is_infinite($x))))
58 $rs = sprintf("%.14e", $x);
59 $v = explode("e", $rs);
60 $rs = rtrim($v[0], "0");
61 if (substr($rs, -1) == ".") {
64 if ($v[1] != "-0" && $v[1] != "+0") {
73 * A bit unbelievable: not only does mb_check_encoding
74 * not exist from the start, but also does it not check
75 * reliably – so converting forth and back is the way
76 * they recommend… also, JSON is not binary-safe either…
80 if (function_exists('mb_internal_encoding') &&
81 function_exists('mb_convert_encoding')) {
82 $mb_encoding = mb_internal_encoding();
83 mb_internal_encoding("UTF-8");
84 $z = mb_convert_encoding($x, "UTF-16LE", "UTF-8");
85 $y = mb_convert_encoding($z, "UTF-8", "UTF-16LE");
86 $isunicode = ($y == $x);
89 $z = str_split($z, 2);
97 $y |= ord($v[1]) << 8;
101 } else if ($y == 8) {
103 } else if ($y == 9) {
105 } else if ($y == 10) {
107 } else if ($y == 12) {
109 } else if ($y == 13) {
111 } else if ($y == 34) {
113 } else if ($y == 92) {
115 } else if ($y < 0x20 || $y > 0xFFFD ||
116 ($y >= 0xD800 && $y <= 0xDFFF) ||
117 ($y > 0x7E && (!$isunicode || $y < 0xA0))) {
118 $rs .= sprintf("\\u%04X", $y);
119 } else if ($isunicode && $y > 0x7E) {
120 $rs .= mb_convert_encoding($v, "UTF-8",
126 if ($mb_encoding !== false) {
127 mb_internal_encoding($mb_encoding);
150 /* all array keys are integers */
152 sort($s, SORT_NUMERIC);
153 /* test keys for order and delta */
164 $si = $ri === false ? false : $ri . " ";
167 /* all array keys are integers 0‥n */
174 else if ($ri === false)
180 $rs .= minijson_encode($x[$v], $si);
194 else if ($ri === false)
200 $rs .= minijson_encode((string)$v, false);
205 $rs .= minijson_encode($x[$v], $si);
213 /* treat everything else as array or string */
215 return minijson_encode((array)$x, $ri);
216 return minijson_encode((string)$x, $ri);
220 * Decodes a UTF-8 string from JSON (ECMA 262).
223 * in: reference output-variable (or error string)
224 * in: integer (optional) recursion depth (default: 32)
225 * out: boolean false if an error occured, true = output is valid
227 function minijson_decode($sj, &$ov, $depth=32) {
228 if (!isset($sj) || !$sj) {
233 /* mb_convert_encoding simply must exist for the decoder */
234 $mb_encoding = mb_internal_encoding();
235 mb_internal_encoding("UTF-8");
237 /* see note about mb_check_encoding in the JSON encoder… */
238 $wj = mb_convert_encoding($sj, "UTF-16LE", "UTF-8");
239 $mj = mb_convert_encoding($wj, "UTF-8", "UTF-16LE");
245 /* convert UTF-16LE string to array of wchar_t */
247 foreach (str_split($wj, 2) as $v) {
248 $wc = ord($v[0]) | (ord($v[1]) << 8);
254 /* skip Byte Order Mark if present */
256 if ($j[$p] == 0xFEFF)
259 /* parse recursively */
260 $rv = minijson_decode_value($j, $p, $ov, $depth);
262 $ov = "input not valid UTF-8";
266 /* skip optional whitespace after tokens */
267 minijson_skip_wsp($j, $p);
271 /* no, trailing waste */
272 $ov = "expected EOS at wchar #" . $p;
277 mb_internal_encoding($mb_encoding);
281 function minijson_skip_wsp(&$j, &$p) {
282 /* skip all wide characters that are JSON whitespace */
285 } while ($wc == 0x09 || $wc == 0x0A || $wc == 0x0D || $wc == 0x20);
289 function minijson_get_hexdigit(&$j, &$p, &$v, $i) {
291 if ($wc >= 0x30 && $wc <= 0x39) {
293 } else if ($wc >= 0x41 && $wc <= 0x46) {
295 } else if ($wc >= 0x61 && $wc <= 0x66) {
298 $ov = sprintf("invalid hex in unicode escape" .
299 " sequence (%d) at wchar #%u", $i, $p);
305 function minijson_decode_array(&$j, &$p, &$ov, $depth) {
309 /* I wish there were a goto in PHP… */
311 /* skip optional whitespace between tokens */
312 minijson_skip_wsp($j, $p);
314 /* end of the array? */
315 if ($j[$p] == 0x5D) {
316 /* regular exit point for the loop */
322 /* member separator? */
323 if ($j[$p] == 0x2C) {
326 /* no comma before the first member */
327 $ov = "unexpected comma at wchar #" . $p;
330 } else if (!$first) {
332 * all but the first member require a separating
333 * comma; this also catches e.g. trailing
334 * rubbish after numbers
336 $ov = "expected comma at wchar #" . $p;
341 /* parse the member value */
343 if (!minijson_decode_value($j, $p, $v, $depth)) {
344 /* pass through error code */
352 function minijson_decode_object(&$j, &$p, &$ov, $depth) {
357 /* skip optional whitespace between tokens */
358 minijson_skip_wsp($j, $p);
360 /* end of the object? */
361 if ($j[$p] == 0x7D) {
362 /* regular exit point for the loop */
368 /* member separator? */
369 if ($j[$p] == 0x2C) {
372 /* no comma before the first member */
373 $ov = "unexpected comma at wchar #" . $p;
376 } else if (!$first) {
378 * all but the first member require a separating
379 * comma; this also catches e.g. trailing
380 * rubbish after numbers
382 $ov = "expected comma at wchar #" . $p;
387 /* skip optional whitespace between tokens */
388 minijson_skip_wsp($j, $p);
390 /* parse the member key */
391 if ($j[$p++] != 0x22) {
392 $ov = "expected key string at wchar #" . $p;
396 if (!minijson_decode_string($j, $p, $k)) {
397 /* pass through error code */
402 /* skip optional whitespace between tokens */
403 minijson_skip_wsp($j, $p);
405 /* key-value separator? */
406 if ($j[$p++] != 0x3A) {
407 $ov = "expected colon at wchar #" . $p;
411 /* parse the member value */
413 if (!minijson_decode_value($j, $p, $v, $depth)) {
414 /* pass through error code */
422 function minijson_decode_value(&$j, &$p, &$ov, $depth) {
423 /* skip optional whitespace between tokens */
424 minijson_skip_wsp($j, $p);
426 /* parse begin of Value token */
429 /* style: falling through exits with false */
431 $ov = "unexpected EOS at wchar #" . $p;
432 } else if ($wc == 0x6E) {
434 if ($j[$p++] == 0x75 &&
440 $ov = "expected ull after n near wchar #" . $p;
441 } else if ($wc == 0x74) {
443 if ($j[$p++] == 0x72 &&
449 $ov = "expected rue after t near wchar #" . $p;
450 } else if ($wc == 0x66) {
452 if ($j[$p++] == 0x61 &&
459 $ov = "expected alse after f near wchar #" . $p;
460 } else if ($wc == 0x5B) {
462 return minijson_decode_array($j, $p, $ov, $depth);
464 $ov = "recursion limit exceeded at wchar #" . $p;
465 } else if ($wc == 0x7B) {
467 return minijson_decode_object($j, $p, $ov, $depth);
469 $ov = "recursion limit exceeded at wchar #" . $p;
470 } else if ($wc == 0x22) {
471 return minijson_decode_string($j, $p, $ov);
472 } else if ($wc == 0x2D || ($wc >= 0x30 && $wc <= 0x39)) {
474 return minijson_decode_number($j, $p, $ov);
476 $ov = sprintf("unexpected U+%04X at wchar #%u", $wc, $p);
481 function minijson_decode_string(&$j, &$p, &$ov) {
482 /* UTF-16LE string buffer */
488 $ov = "unescaped control character $wc at wchar #" . $p;
490 } else if ($wc == 0x22) {
491 /* regular exit point for the loop */
493 /* convert to UTF-8, then re-check against UTF-16 */
494 $ov = mb_convert_encoding($s, "UTF-8", "UTF-16LE");
495 $tmp = mb_convert_encoding($ov, "UTF-16LE", "UTF-8");
497 $ov = "no Unicode string before wchar #" . $p;
501 } else if ($wc == 0x5C) {
506 $s .= chr($wc) . chr(0);
507 } else if ($wc == 0x62) {
508 $s .= chr(0x08) . chr(0);
509 } else if ($wc == 0x66) {
510 $s .= chr(0x0C) . chr(0);
511 } else if ($wc == 0x6E) {
512 $s .= chr(0x0A) . chr(0);
513 } else if ($wc == 0x72) {
514 $s .= chr(0x0D) . chr(0);
515 } else if ($wc == 0x74) {
516 $s .= chr(0x09) . chr(0);
517 } else if ($wc == 0x75) {
519 for ($tmp = 1; $tmp <= 4; $tmp++) {
521 if (!minijson_get_hexdigit($j, $p,
523 /* pass through error code */
527 if ($v < 1 || $v > 0xFFFD) {
528 $ov = "non-Unicode escape $v before wchar #" . $p;
531 $s .= chr($v & 0xFF) . chr($v >> 8);
533 $ov = "invalid escape sequence at wchar #" . $p;
536 } else if ($wc > 0xD7FF && $wc < 0xE000) {
537 $ov = "surrogate $wc at wchar #" . $p;
539 } else if ($wc > 0xFFFD) {
540 $ov = "non-Unicode char $wc at wchar #" . $p;
543 $s .= chr($wc & 0xFF) . chr($wc >> 8);
548 function minijson_decode_number(&$j, &$p, &$ov) {
552 /* check for an optional minus sign */
560 /* begins with zero (0 or 0.x) */
563 if ($wc >= 0x30 && $wc <= 0x39) {
564 $ov = "no leading zeroes please at wchar #" . $p;
567 } else if ($wc >= 0x31 && $wc <= 0x39) {
568 /* begins with 1‥9 */
569 while ($wc >= 0x30 && $wc <= 0x39) {
574 $ov = "decimal digit expected at wchar #" . $p;
576 /* we had none, so it’s allowed to prepend one */
577 $ov = "minus sign or " . $ov;
582 /* do we have a fractional part? */
587 if ($wc < 0x30 || $wc > 0x39) {
588 $ov = "fractional digit expected at wchar #" . $p;
591 while ($wc >= 0x30 && $wc <= 0x39) {
597 /* do we have an exponent, treat number as mantissa? */
598 if ($wc == 0x45 || $wc == 0x65) {
602 if ($wc == 0x2B || $wc == 0x2D) {
606 if ($wc < 0x30 || $wc > 0x39) {
607 $ov = "exponent digit expected at wchar #" . $p;
610 while ($wc >= 0x30 && $wc <= 0x39) {
618 /* no fractional part, no exponent */
621 if ((string)$v == $s) {