3 homepage: http://arc.semsol.org/
4 license: http://arc.semsol.org/license
6 class: ARC2 Legaxy XML Parser
7 author: Benjamin Nowack
8 version: 2008-10-04 (Fix: nsDecl led to warnings when uri was an array.)
13 class ARC2_LegacyXMLParser extends ARC2_Class {
15 function __construct($a = '', &$caller) {
16 parent::__construct($a, $caller);
19 function ARC2_LegacyXMLParser($a = '', &$caller) {
20 $this->__construct($a, $caller);
23 function __init() {/* reader */
25 $this->encoding = $this->v('encoding', false, $this->a);
27 $this->x_base = $this->base;
28 $this->xml = 'http://www.w3.org/XML/1998/namespace';
29 $this->rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
30 $this->nsp = array($this->xml => 'xml', $this->rdf => 'rdf');
31 $this->allowCDataNodes = 1;
32 $this->target_encoding = '';
33 $this->keep_cdata_ws = $this->v('keep_cdata_whitespace', 0, $this->a);
38 function setReader(&$reader) {
39 $this->reader =& $reader;
42 function parse($path, $data = '', $iso_fallback = false) {
43 $this->nodes = array();
44 $this->node_count = 0;
47 if (!$this->v('reader')) {
49 $this->reader = & new ARC2_Reader($this->a, $this);
51 $this->reader->setAcceptHeader('Accept: application/xml; q=0.9, */*; q=0.1');
52 $this->reader->activate($path, $data);
53 $this->x_base = isset($this->a['base']) && $this->a['base'] ? $this->a['base'] : $this->reader->base;
54 $this->base = $this->x_base;
55 $this->doc_url = $this->reader->base;
57 $this->initXMLParser();
60 while ($d = $this->reader->readStream(1)) {
61 if ($iso_fallback && $first) {
62 $d = '<?xml version="1.0" encoding="ISO-8859-1"?>' . "\n" . preg_replace('/^\<\?xml [^\>]+\?\>\s*/s', '', $d);
64 if (!xml_parse($this->xml_parser, $d, false)) {
65 $error_str = xml_error_string(xml_get_error_code($this->xml_parser));
66 $line = xml_get_current_line_number($this->xml_parser);
67 if (!$iso_fallback && preg_match("/Invalid character/i", $error_str)) {
68 xml_parser_free($this->xml_parser);
69 unset($this->xml_parser);
70 $this->reader->closeStream();
73 $this->encoding = 'ISO-8859-1';
74 $this->initXMLParser();
75 return $this->parse($path, $data, true);
78 return $this->addError('XML error: "' . $error_str . '" at line ' . $line . ' (parsing as ' . $this->getEncoding() . ')');
83 $this->target_encoding = xml_parser_get_option($this->xml_parser, XML_OPTION_TARGET_ENCODING);
84 xml_parser_free($this->xml_parser);
85 $this->reader->closeStream();
92 function getEncoding($src = 'config') {
93 if ($src == 'parser') {
94 return $this->target_encoding;
96 elseif (($src == 'config') && $this->encoding) {
97 return $this->encoding;
99 return $this->reader->getEncoding();
110 function getStructure() {
111 return array('nodes' => $this->v('nodes', array()));
116 function getNodeIndex(){
117 if (!isset($this->node_index)) {
118 /* index by parent */
120 for ($i = 0, $i_max = count($this->nodes); $i < $i_max; $i++) {
121 $node = $this->nodes[$i];
123 $node['doc_base'] = $this->base;
124 if (isset($this->doc_url)) $node['doc_url'] = $this->doc_url;
125 $this->updateNode($node);
126 $p_id = $node['p_id'];
127 if (!isset($index[$p_id])) {
128 $index[$p_id] = array();
130 $index[$p_id][$node['pos']] = $node;
132 $this->node_index = $index;
134 return $this->node_index;
137 function getNodes() {
141 function getSubNodes($n) {
142 return $this->v($n['id'], array(), $this->getNodeIndex());
145 function getNodeContent($n, $outer = 0, $trim = 1) {
146 //echo '<pre>' . htmlspecialchars(print_r($n, 1)) . '</pre>';
147 if ($n['tag'] == 'cdata') {
148 $r = $n['a']['value'];
153 $r .= '<' . $n['tag'];
155 if (isset($n['a']['xmlns']) && $n['a']['xmlns']['']) {
156 $r .= ' xmlns="' . $n['a']['xmlns'][''] . '"';
158 foreach ($n['a'] as $a => $val) {
159 $r .= preg_match('/^[^\s]+$/', $a) && !is_array($val) ? ' ' . $a . '="' . addslashes($val) . '"' : '';
161 $r .= $n['empty'] ? '/>' : '>';
164 $r .= $this->v('cdata', '', $n);
165 $sub_nodes = $this->getSubNodes($n);
166 foreach ($sub_nodes as $sub_n) {
167 $r .= $this->getNodeContent($sub_n, 1, 0);
170 $r .= '</' . $n['tag'] . '>';
174 return ($trim && !$this->keep_cdata_ws) ? trim($r) : $r;
179 function pushNode($n) {
180 $n['id'] = $this->node_count;
181 $this->nodes[$this->node_count] = $n;
185 function getCurNode($t = '') {
188 $r = $this->node_count ? $this->nodes[$this->node_count - $i] : 0;
189 $found = (!$t || ($r['tag'] == $t)) ? 1 : 0;
191 } while (!$found && isset($this->nodes[$this->node_count - $i]));
195 function updateNode($node) {/* php4-save */
196 $this->nodes[$node['id']] = $node;
201 function initXMLParser() {
202 if (!isset($this->xml_parser)) {
203 $enc = preg_match('/^(utf\-8|iso\-8859\-1|us\-ascii)$/i', $this->getEncoding(), $m) ? $m[1] : 'UTF-8';
204 $parser = xml_parser_create_ns($enc, '');
205 xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 0);
206 xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
207 xml_set_element_handler($parser, 'open', 'close');
208 xml_set_character_data_handler($parser, 'cData');
209 xml_set_start_namespace_decl_handler($parser, 'nsDecl');
210 xml_set_object($parser, $this);
211 $this->xml_parser =& $parser;
217 function open($p, $t, $a) {
219 //echo "<br />\n".'opening '.$t . ' ' . print_r($a, 1); flush();
220 //echo "<br />\n".'opening '.$t; flush();
221 $t = strpos($t, ':') ? $t : strtolower($t);
224 if (($t == 'base') && isset($a['href'])) {
225 $this->base = $a['href'];
229 foreach (array('href', 'src', 'id') as $uri_a) {
230 if (isset($a[$uri_a])) {
231 $a[$uri_a . ' uri'] = ($uri_a == 'id') ? $this->calcURI('#'.$a[$uri_a]) : $this->calcURI($a[$uri_a]);
236 foreach ($a as $k => $v) {
237 if (strpos($k, 'xmlns') === 0) {
238 $this->nsDecl($p, trim(substr($k, 5), ':'), $v);
245 'tag_exact' => $t_exact,
247 'level' => $this->level,
249 'p_id' => $this->node_count-1,
255 $node['base'] = $base;
258 if ($this->node_count) {
260 $prev_node = $this->getCurNode();
261 if ($prev_node['level'] == $l) {
262 $node['p_id'] = $prev_node['p_id'];
263 $node['pos'] = $prev_node['pos']+1;
265 elseif($prev_node['level'] > $l) {
266 while($prev_node['level'] > $l) {
267 if (!isset($this->nodes[$prev_node['p_id']])) {
268 //$this->addError('nesting mismatch: tag is ' . $t . ', level is ' . $l . ', prev_level is ' . $prev_node['level'] . ', prev_node p_id is ' . $prev_node['p_id']);
271 $prev_node = $this->nodes[$prev_node['p_id']];
273 $node['p_id'] = $prev_node['p_id'];
274 $node['pos'] = $prev_node['pos']+1;
277 $this->pushNode($node);
283 function close($p, $t, $empty = 0) {
284 //echo "<br />\n".'closing '.$t; flush();
285 $node = $this->getCurNode($t);
286 $node['state'] = 'closed';
287 $node['empty'] = $empty;
288 $this->updateNode($node);
292 function cData($p, $d) {
293 //echo trim($d) ? "<br />\n".'cdata: ' . $d : ''; flush();
294 $node = $this->getCurNode();
295 if($node['state'] == 'open') {
296 $node['cdata'] .= $d;
297 $this->updateNode($node);
299 else {/* cdata is sibling of node */
300 if ($this->allowCDataNodes) {
301 $this->open($p, 'cdata', array('value' => $d));
302 $this->close($p, 'cdata');
307 function nsDecl($p, $prf, $uri) {
308 if (is_array($uri)) return 1;
309 $this->ns[$prf] = $uri;
310 $this->nsp[$uri] = isset($this->nsp[$uri]) ? $this->nsp[$uri] : $prf;