3 * ARC2 streaming SPOG parser
5 * @author Benjamin Nowack
6 * @license <http://arc.semsol.org/license>
7 * @homepage <http://arc.semsol.org/>
12 ARC2::inc('RDFParser');
14 class ARC2_SPOGParser extends ARC2_RDFParser {
16 function __construct($a = '', &$caller) {
17 parent::__construct($a, $caller);
20 function ARC2_SPOGParser($a = '', &$caller) {
21 $this->__construct($a, $caller);
24 function __init() {/* reader */
26 $this->encoding = $this->v('encoding', false, $this->a);
27 $this->xml = 'http://www.w3.org/XML/1998/namespace';
28 $this->rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
29 $this->nsp = array($this->xml => 'xml', $this->rdf => 'rdf');
30 $this->target_encoding = '';
35 function parse($path, $data = '', $iso_fallback = false) {
38 if (!$this->v('reader')) {
40 $this->reader = & new ARC2_Reader($this->a, $this);
42 $this->reader->setAcceptHeader('Accept: sparql-results+xml; q=0.9, */*; q=0.1');
43 $this->reader->activate($path, $data);
44 $this->x_base = isset($this->a['base']) && $this->a['base'] ? $this->a['base'] : $this->reader->base;
46 $this->initXMLParser();
49 while ($d = $this->reader->readStream()) {
50 if ($iso_fallback && $first) {
51 $d = '<?xml version="1.0" encoding="ISO-8859-1"?>' . "\n" . preg_replace('/^\<\?xml [^\>]+\?\>\s*/s', '', $d);
54 if (!xml_parse($this->xml_parser, $d, false)) {
55 $error_str = xml_error_string(xml_get_error_code($this->xml_parser));
56 $line = xml_get_current_line_number($this->xml_parser);
57 $this->tmp_error = 'XML error: "' . $error_str . '" at line ' . $line . ' (parsing as ' . $this->getEncoding() . ')';
58 $this->tmp_error .= $d . urlencode($d);
59 if (0 && !$iso_fallback && preg_match("/Invalid character/i", $error_str)) {
60 xml_parser_free($this->xml_parser);
61 unset($this->xml_parser);
62 $this->reader->closeStream();
64 $this->encoding = 'ISO-8859-1';
65 unset($this->xml_parser);
67 return $this->parse($path, $data, true);
70 return $this->addError($this->tmp_error);
74 $this->target_encoding = xml_parser_get_option($this->xml_parser, XML_OPTION_TARGET_ENCODING);
75 xml_parser_free($this->xml_parser);
76 $this->reader->closeStream();
83 function initXMLParser() {
84 if (!isset($this->xml_parser)) {
85 $enc = preg_match('/^(utf\-8|iso\-8859\-1|us\-ascii)$/i', $this->getEncoding(), $m) ? $m[1] : 'UTF-8';
86 $parser = xml_parser_create($enc);
87 xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 0);
88 xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
89 xml_set_element_handler($parser, 'open', 'close');
90 xml_set_character_data_handler($parser, 'cdata');
91 xml_set_start_namespace_decl_handler($parser, 'nsDecl');
92 xml_set_object($parser, $this);
93 $this->xml_parser =& $parser;
99 function getEncoding($src = 'config') {
100 if ($src == 'parser') {
101 return $this->target_encoding;
103 elseif (($src == 'config') && $this->encoding) {
104 return $this->encoding;
106 return $this->reader->getEncoding();
112 function getTriples() {
113 return $this->v('triples', array());
116 function countTriples() {
117 return $this->t_count;
120 function addT($s = '', $p = '', $o = '', $s_type = '', $o_type = '', $o_dt = '', $o_lang = '', $g = '') {
121 if (!($s && $p && $o)) return 0;
122 //echo "-----\nadding $s / $p / $o\n-----\n";
123 $t = array('s' => $s, 'p' => $p, 'o' => $o, 's_type' => $s_type, 'o_type' => $o_type, 'o_datatype' => $o_dt, 'o_lang' => $o_lang, 'g' => $g);
124 if ($this->skip_dupes) {
125 $h = md5(serialize($t));
126 if (!isset($this->added_triples[$h])) {
127 $this->triples[$this->t_count] = $t;
129 $this->added_triples[$h] = true;
133 $this->triples[$this->t_count] = $t;
140 function open($p, $t, $a) {
142 if ($t == 'result') {
145 elseif ($t == 'binding') {
146 $this->binding = $a['name'];
147 $this->t[$this->binding] = '';
149 elseif ($t == 'literal') {
150 $this->t[$this->binding . '_dt'] = $this->v('datatype', '', $a);
151 $this->t[$this->binding . '_lang'] = $this->v('xml:lang', '', $a);
152 $this->t[$this->binding . '_type'] = 'literal';
154 elseif ($t == 'uri') {
155 $this->t[$this->binding . '_type'] = 'uri';
157 elseif ($t == 'bnode') {
158 $this->t[$this->binding . '_type'] = 'bnode';
159 $this->t[$this->binding] = '_:';
163 function close($p, $t) {
164 $this->prev_state = $this->state;
166 if ($t == 'result') {
168 $this->v('s', '', $this->t),
169 $this->v('p', '', $this->t),
170 $this->v('o', '', $this->t),
171 $this->v('s_type', '', $this->t),
172 $this->v('o_type', '', $this->t),
173 $this->v('o_dt', '', $this->t),
174 $this->v('o_lang', '', $this->t),
175 $this->v('g', '', $this->t)
180 function cData($p, $d) {
181 if (in_array($this->state, array('uri', 'bnode', 'literal'))) {
182 $this->t[$this->binding] .= $d;
186 function nsDecl($p, $prf, $uri) {
187 $this->nsp[$uri] = isset($this->nsp[$uri]) ? $this->nsp[$uri] : $prf;