4 * Does not extract triples, needs sub-class for RDF extraction
6 * @author Benjamin Nowack <bnowack@semsol.com>
7 * @license http://arc.semsol.org/license
8 * @homepage <http://arc.semsol.org/>
13 ARC2::inc('RDFParser');
15 class ARC2_JSONParser extends ARC2_RDFParser {
17 function __construct($a = '', &$caller) {
18 parent::__construct($a, $caller);
21 function ARC2_JSONParser($a = '', &$caller) {
22 $this->__construct($a, $caller);
31 function x($re, $v, $options = 'si') {
32 while (preg_match('/^\s*(\/\*.*\*\/)(.*)$/Usi', $v, $m)) {/* comment removal */
35 $this->unparsed_code = (strlen($this->unparsed_code) > strlen($v)) ? $v : $this->unparsed_code;
36 return ARC2::x($re, $v, $options);
39 function parse($path, $data = '') {
42 if (!$this->v('reader')) {
44 $this->reader = & new ARC2_Reader($this->a, $this);
46 $this->reader->setAcceptHeader('Accept: application/json; q=0.9, */*; q=0.1');
47 $this->reader->activate($path, $data);
48 $this->x_base = isset($this->a['base']) && $this->a['base'] ? $this->a['base'] : $this->reader->base;
51 while ($d = $this->reader->readStream()) {
54 $this->reader->closeStream();
56 $doc = preg_replace('/^[^\{]*(.*\})[^\}]*$/is', '\\1', $doc);
57 $this->unparsed_code = $doc;
58 list($this->struct, $rest) = $this->extractObject($doc);
64 function extractObject($v) {
65 if (function_exists('json_decode')) return array(json_decode($v, 1), '');
68 if ($sub_r = $this->x('\{', $v)) {
70 while ((list($sub_r, $v) = $this->extractEntry($v)) && $sub_r) {
71 $r[$sub_r['key']] = $sub_r['value'];
73 if ($sub_r = $this->x('\}', $v)) $v = $sub_r[1];
76 elseif ($sub_r = $this->x('\[', $v)) {
78 while ((list($sub_r, $v) = $this->extractObject($v)) && $sub_r) {
82 if ($sub_r = $this->x('\]', $v)) $v = $sub_r[1];
85 elseif ((list($sub_r, $v) = $this->extractValue($v)) && ($sub_r !== false)) {
91 function extractEntry($v) {
92 if ($r = $this->x('\,', $v)) $v = $r[1];
94 if ($r = $this->x('\"([^\"]+)\"\s*\:', $v)) {
97 if (list($sub_r, $sub_v) = $this->extractObject($sub_v)) {
99 array('key' => $k, 'value' => $sub_r),
107 function extractValue($v) {
108 if ($r = $this->x('\,', $v)) $v = $r[1];
109 if ($sub_r = $this->x('null', $v)) {
110 return array(null, $sub_r[1]);
112 if ($sub_r = $this->x('(true|false)', $v)) {
113 return array($sub_r[1], $sub_r[2]);
115 if ($sub_r = $this->x('([\-\+]?[0-9\.]+)', $v)) {
116 return array($sub_r[1], $sub_r[2]);
118 if ($sub_r = $this->x('\"', $v)) {
120 if (preg_match('/^([^\x5c]*|.*[^\x5c]|.*\x5c{2})\"(.*)$/sU', $rest, $m)) {
122 /* unescape chars (single-byte) */
123 $val = preg_replace('/\\\u(.{4})/e', 'chr(hexdec("\\1"))', $val);
124 //$val = preg_replace('/\\\u00(.{2})/e', 'rawurldecode("%\\1")', $val);
125 /* other escaped chars */
126 $from = array('\\\\', '\r', '\t', '\n', '\"', '\b', '\f', '\/');
127 $to = array("\\", "\r", "\t", "\n", '"', "\b", "\f", "/");
128 $val = str_replace($from, $to, $val);
129 return array($val, $m[2]);
132 return array(false, $v);
137 function getObject() {
138 return $this->v('struct', array());
141 function getTriples() {
142 return $this->v('triples', array());
145 function countTriples() {
146 return $this->t_count;
149 function addT($s = '', $p = '', $o = '', $s_type = '', $o_type = '', $o_dt = '', $o_lang = '') {
150 $o = $this->toUTF8($o);
151 //echo str_replace($this->base, '', "-----\n adding $s / $p / $o\n-----\n");
152 $t = array('s' => $s, 'p' => $p, 'o' => $o, 's_type' => $s_type, 'o_type' => $o_type, 'o_datatype' => $o_dt, 'o_lang' => $o_lang);
153 if ($this->skip_dupes) {
154 $h = md5(serialize($t));
155 if (!isset($this->added_triples[$h])) {
156 $this->triples[$this->t_count] = $t;
158 $this->added_triples[$h] = true;
162 $this->triples[$this->t_count] = $t;