5 * @author Benjamin Nowack
6 * @license <http://arc.semsol.org/license>
7 * @homepage <http://arc.semsol.org/>
12 ARC2::inc('TurtleParser');
14 class ARC2_SPARQLParser extends ARC2_TurtleParser {
16 function __construct($a = '', &$caller) {
17 parent::__construct($a, $caller);
20 function ARC2_SPARQLParser($a = '', &$caller) {
21 $this->__construct($a, $caller);
26 $this->bnode_prefix = $this->v('bnode_prefix', 'arc'.substr(md5(uniqid(rand())), 0, 4).'b', $this->a);
28 $this->bnode_pattern_index = array('patterns' => array(), 'bnodes' => array());
33 function parse($q, $src = '') {
34 $this->setDefaultPrefixes();
35 $this->base = $src ? $this->calcBase($src) : ARC2::getRequestURI();
41 $this->unparsed_code = $q;
42 list($r, $v) = $this->xQuery($q);
44 $this->r['query'] = $r;
45 $this->unparsed_code = trim($v);
47 elseif (!$this->getErrors() && !$this->unparsed_code) {
48 $this->addError('Query not properly closed');
50 $this->r['prefixes'] = $this->prefixes;
51 $this->r['base'] = $this->base;
52 /* remove trailing comments */
53 while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $this->unparsed_code, $m)) $this->unparsed_code = $m[2];
54 if ($this->unparsed_code && !$this->getErrors()) {
55 $rest = preg_replace('/[\x0a|\x0d]/i', ' ', substr($this->unparsed_code, 0, 30));
56 $msg = trim($rest) ? 'Could not properly handle "' . $rest . '"' : 'Syntax error, probably an incomplete pattern';
57 $this->addError($msg);
61 function getQueryInfos() {
62 return $this->v('r', array());
68 list($r, $v) = $this->xPrologue($v);
69 foreach (array('Select', 'Construct', 'Describe', 'Ask') as $type) {
70 $m = 'x' . $type . 'Query';
71 if ((list($r, $v) = $this->$m($v)) && $r) {
80 function xPrologue($v) {
82 if ((list($sub_r, $v) = $this->xBaseDecl($v)) && $sub_r) {
86 while ((list($sub_r, $v) = $this->xPrefixDecl($v)) && $sub_r) {
87 $this->prefixes[$sub_r['prefix']] = $sub_r['uri'];
95 function xSelectQuery($v) {
96 if ($sub_r = $this->x('SELECT\s+', $v)) {
99 'result_vars' => array(),
100 'dataset' => array(),
104 /* distinct, reduced */
105 if ($sub_r = $this->x('(DISTINCT|REDUCED)\s+', $sub_v)) {
106 $r[strtolower($sub_r[1])] = 1;
110 if ($sub_r = $this->x('\*\s+', $sub_v)) {
115 while ((list($sub_r, $sub_v) = $this->xResultVar($sub_v)) && $sub_r) {
116 $r['result_vars'][] = $sub_r;
119 if (!$all_vars && !count($r['result_vars'])) {
120 $this->addError('No result bindings specified.');
123 while ((list($sub_r, $sub_v) = $this->xDatasetClause($sub_v)) && $sub_r) {
124 $r['dataset'][] = $sub_r;
127 if ((list($sub_r, $sub_v) = $this->xWhereClause($sub_v)) && $sub_r) {
128 $r['pattern'] = $sub_r;
133 /* solution modifier */
134 if ((list($sub_r, $sub_v) = $this->xSolutionModifier($sub_v)) && $sub_r) {
135 $r = array_merge($r, $sub_r);
139 foreach ($this->r['vars'] as $var) {
140 $r['result_vars'][] = array('var' => $var, 'aggregate' => 0, 'alias' => '');
142 if (!$r['result_vars']) {
143 $r['result_vars'][] = '*';
146 return array($r, $sub_v);
151 function xResultVar($v) {
152 return $this->xVar($v);
157 function xConstructQuery($v) {
158 if ($sub_r = $this->x('CONSTRUCT\s*', $v)) {
160 'type' => 'construct',
161 'dataset' => array(),
164 /* construct template */
165 if ((list($sub_r, $sub_v) = $this->xConstructTemplate($sub_v)) && is_array($sub_r)) {
166 $r['construct_triples'] = $sub_r;
169 $this->addError('Construct Template not found');
173 while ((list($sub_r, $sub_v) = $this->xDatasetClause($sub_v)) && $sub_r) {
174 $r['dataset'][] = $sub_r;
177 if ((list($sub_r, $sub_v) = $this->xWhereClause($sub_v)) && $sub_r) {
178 $r['pattern'] = $sub_r;
183 /* solution modifier */
184 if ((list($sub_r, $sub_v) = $this->xSolutionModifier($sub_v)) && $sub_r) {
185 $r = array_merge($r, $sub_r);
187 return array($r, $sub_v);
194 function xDescribeQuery($v) {
195 if ($sub_r = $this->x('DESCRIBE\s+', $v)) {
197 'type' => 'describe',
198 'result_vars' => array(),
199 'result_uris' => array(),
200 'dataset' => array(),
204 /* result vars/uris */
205 if ($sub_r = $this->x('\*\s+', $sub_v)) {
212 if ((list($sub_r, $sub_v) = $this->xResultVar($sub_v)) && $sub_r) {
213 $r['result_vars'][] = $sub_r;
216 if ((list($sub_r, $sub_v) = $this->xIRIref($sub_v)) && $sub_r) {
217 $r['result_uris'][] = $sub_r;
222 if (!$all_vars && !count($r['result_vars']) && !count($r['result_uris'])) {
223 $this->addError('No result bindings specified.');
226 while ((list($sub_r, $sub_v) = $this->xDatasetClause($sub_v)) && $sub_r) {
227 $r['dataset'][] = $sub_r;
230 if ((list($sub_r, $sub_v) = $this->xWhereClause($sub_v)) && $sub_r) {
231 $r['pattern'] = $sub_r;
233 /* solution modifier */
234 if ((list($sub_r, $sub_v) = $this->xSolutionModifier($sub_v)) && $sub_r) {
235 $r = array_merge($r, $sub_r);
239 foreach ($this->r['vars'] as $var) {
240 $r['result_vars'][] = array('var' => $var, 'aggregate' => 0, 'alias' => '');
243 return array($r, $sub_v);
250 function xAskQuery($v) {
251 if ($sub_r = $this->x('ASK\s+', $v)) {
254 'dataset' => array(),
258 while ((list($sub_r, $sub_v) = $this->xDatasetClause($sub_v)) && $sub_r) {
259 $r['dataset'][] = $sub_r;
262 if ((list($sub_r, $sub_v) = $this->xWhereClause($sub_v)) && $sub_r) {
263 $r['pattern'] = $sub_r;
264 return array($r, $sub_v);
267 $this->addError('Missing or invalid WHERE clause.');
275 function xDatasetClause($v) {
276 if ($r = $this->x('FROM(\s+NAMED)?\s+', $v)) {
277 $named = $r[1] ? 1 : 0;
278 if ((list($r, $sub_v) = $this->xIRIref($r[2])) && $r) {
279 return array(array('graph' => $r, 'named' => $named), $sub_v);
287 function xWhereClause($v) {
288 if ($r = $this->x('(WHERE)?', $v)) {
291 if ((list($r, $v) = $this->xGroupGraphPattern($v)) && $r) {
292 return array($r, $v);
299 function xSolutionModifier($v) {
301 if ((list($sub_r, $sub_v) = $this->xOrderClause($v)) && $sub_r) {
302 $r['order_infos'] = $sub_r;
304 while ((list($sub_r, $sub_v) = $this->xLimitOrOffsetClause($sub_v)) && $sub_r) {
305 $r = array_merge($r, $sub_r);
307 return ($v == $sub_v) ? array(0, $v) : array($r, $sub_v);
312 function xLimitOrOffsetClause($v) {
313 if ($sub_r = $this->x('(LIMIT|OFFSET)', $v)) {
314 $key = strtolower($sub_r[1]);
316 if ((list($sub_r, $sub_v) = $this->xINTEGER($sub_v)) && ($sub_r !== false)) {
317 return array(array($key =>$sub_r), $sub_v);
319 if ((list($sub_r, $sub_v) = $this->xPlaceholder($sub_v)) && ($sub_r !== false)) {
320 return array(array($key =>$sub_r), $sub_v);
328 function xOrderClause($v) {
329 if ($sub_r = $this->x('ORDER BY\s+', $v)) {
332 while ((list($sub_r, $sub_v) = $this->xOrderCondition($sub_v)) && $sub_r) {
336 return array($r, $sub_v);
339 $this->addError('No order conditions specified.');
347 function xOrderCondition($v) {
348 if ($sub_r = $this->x('(ASC|DESC)', $v)) {
349 $dir = strtolower($sub_r[1]);
351 if ((list($sub_r, $sub_v) = $this->xBrackettedExpression($sub_v)) && $sub_r) {
352 $sub_r['direction'] = $dir;
353 return array($sub_r, $sub_v);
356 elseif ((list($sub_r, $sub_v) = $this->xVar($v)) && $sub_r) {
357 $sub_r['direction'] = 'asc';
358 return array($sub_r, $sub_v);
360 elseif ((list($sub_r, $sub_v) = $this->xBrackettedExpression($v)) && $sub_r) {
361 return array($sub_r, $sub_v);
363 elseif ((list($sub_r, $sub_v) = $this->xBuiltInCall($v)) && $sub_r) {
364 $sub_r['direction'] = 'asc';
365 return array($sub_r, $sub_v);
367 elseif ((list($sub_r, $sub_v) = $this->xFunctionCall($v)) && $sub_r) {
368 $sub_r['direction'] = 'asc';
369 return array($sub_r, $sub_v);
376 function xGroupGraphPattern($v) {
377 $pattern_id = substr(md5(uniqid(rand())), 0, 4);
378 if ($sub_r = $this->x('\{', $v)) {
379 $r = array('type' => 'group', 'patterns' => array());
381 if ((list($sub_r, $sub_v) = $this->xTriplesBlock($sub_v)) && $sub_r) {
382 $this->indexBnodes($sub_r, $pattern_id);
383 $r['patterns'][] = array('type' => 'triples', 'patterns' => $sub_r);
387 if ((list($sub_r, $sub_v) = $this->xGraphPatternNotTriples($sub_v)) && $sub_r) {
388 $r['patterns'][] = $sub_r;
389 $pattern_id = substr(md5(uniqid(rand())), 0, 4);
392 elseif ((list($sub_r, $sub_v) = $this->xFilter($sub_v)) && $sub_r) {
393 $r['patterns'][] = array('type' => 'filter', 'constraint' => $sub_r);
396 if ($sub_r = $this->x('\.', $sub_v)) {
399 if ((list($sub_r, $sub_v) = $this->xTriplesBlock($sub_v)) && $sub_r) {
400 $this->indexBnodes($sub_r, $pattern_id);
401 $r['patterns'][] = array('type' => 'triples', 'patterns' => $sub_r);
404 if ((list($sub_r, $sub_v) = $this->xPlaceholder($sub_v)) && $sub_r) {
405 $r['patterns'][] = $sub_r;
409 if ($sub_r = $this->x('\}', $sub_v)) {
411 return array($r, $sub_v);
413 $rest = preg_replace('/[\x0a|\x0d]/i', ' ', substr($sub_v, 0, 30));
414 $this->addError('Incomplete or invalid Group Graph pattern. Could not handle "' . $rest . '"');
419 function indexBnodes($triples, $pattern_id) {
420 $index_id = count($this->bnode_pattern_index['patterns']);
421 $index_id = $pattern_id;
422 $this->bnode_pattern_index['patterns'][] = $triples;
423 foreach ($triples as $t) {
424 foreach (array('s', 'p', 'o') as $term) {
425 if ($t[$term . '_type'] == 'bnode') {
427 if (isset($this->bnode_pattern_index['bnodes'][$val]) && ($this->bnode_pattern_index['bnodes'][$val] != $index_id)) {
428 $this->addError('Re-used bnode label "' .$val. '" across graph patterns');
431 $this->bnode_pattern_index['bnodes'][$val] = $index_id;
440 function xGraphPatternNotTriples($v) {
441 if ((list($sub_r, $sub_v) = $this->xOptionalGraphPattern($v)) && $sub_r) {
442 return array($sub_r, $sub_v);
444 if ((list($sub_r, $sub_v) = $this->xGraphGraphPattern($v)) && $sub_r) {
445 return array($sub_r, $sub_v);
447 $r = array('type' => 'union', 'patterns' => array());
451 if ((list($sub_r, $sub_v) = $this->xGroupGraphPattern($sub_v)) && $sub_r) {
452 $r['patterns'][] = $sub_r;
453 if ($sub_r = $this->x('UNION', $sub_v)) {
459 $pc = count($r['patterns']);
461 return array($r['patterns'][0], $sub_v);
464 return array($r, $sub_v);
471 function xOptionalGraphPattern($v) {
472 if ($sub_r = $this->x('OPTIONAL', $v)) {
474 if ((list($sub_r, $sub_v) = $this->xGroupGraphPattern($sub_v)) && $sub_r) {
475 return array(array('type' => 'optional', 'patterns' => $sub_r['patterns']), $sub_v);
477 $this->addError('Missing or invalid Group Graph Pattern after OPTIONAL');
484 function xGraphGraphPattern($v) {
485 if ($sub_r = $this->x('GRAPH', $v)) {
487 $r = array('type' => 'graph', 'var' => '', 'uri' => '', 'patterns' => array());
488 if ((list($sub_r, $sub_v) = $this->xVar($sub_v)) && $sub_r) {
491 elseif ((list($sub_r, $sub_v) = $this->xIRIref($sub_v)) && $sub_r) {
494 if ($r['var'] || $r['uri']) {
495 if ((list($sub_r, $sub_v) = $this->xGroupGraphPattern($sub_v)) && $sub_r) {
496 $r['patterns'][] = $sub_r;
497 return array($r, $sub_v);
499 $this->addError('Missing or invalid Graph Pattern');
507 function xFilter($v) {
508 if ($r = $this->x('FILTER', $v)) {
510 if ((list($r, $sub_v) = $this->xBrackettedExpression($sub_v)) && $r) {
511 return array($r, $sub_v);
513 if ((list($r, $sub_v) = $this->xBuiltInCall($sub_v)) && $r) {
514 return array($r, $sub_v);
516 if ((list($r, $sub_v) = $this->xFunctionCall($sub_v)) && $r) {
517 return array($r, $sub_v);
519 $this->addError('Incomplete FILTER');
526 function xFunctionCall($v) {
527 if ((list($r, $sub_v) = $this->xIRIref($v)) && $r) {
528 if ((list($sub_r, $sub_v) = $this->xArgList($sub_v)) && $sub_r) {
529 return array(array('type' => 'function_call', 'uri' => $r, 'args' => $sub_r), $sub_v);
537 function xArgList($v) {
541 if ($sub_r = $this->x('\(', $sub_v)) {
545 if ((list($sub_r, $sub_v) = $this->xExpression($sub_v)) && $sub_r) {
547 if ($sub_r = $this->x('\,', $sub_v)) {
552 if ($sub_r = $this->x('\)', $sub_v)) {
559 return $closed ? array($r, $sub_v) : array(0, $v);
564 function xConstructTemplate($v) {
565 if ($sub_r = $this->x('\{', $v)) {
567 if ((list($sub_r, $sub_v) = $this->xTriplesBlock($sub_r[1])) && is_array($sub_r)) {
570 if ($sub_r = $this->x('\}', $sub_v)) {
571 return array($r, $sub_r[1]);
579 function xExpression($v) {
580 if ((list($sub_r, $sub_v) = $this->xConditionalAndExpression($v)) && $sub_r) {
581 $r = array('type' => 'expression', 'sub_type' => 'or', 'patterns' => array($sub_r));
584 if ($sub_r = $this->x('\|\|', $sub_v)) {
586 if ((list($sub_r, $sub_v) = $this->xConditionalAndExpression($sub_v)) && $sub_r) {
587 $r['patterns'][] = $sub_r;
592 return count($r['patterns']) == 1 ? array($r['patterns'][0], $sub_v) : array($r, $sub_v);
599 function xConditionalAndExpression($v) {
600 if ((list($sub_r, $sub_v) = $this->xRelationalExpression($v)) && $sub_r) {
601 $r = array('type' => 'expression', 'sub_type' => 'and', 'patterns' => array($sub_r));
604 if ($sub_r = $this->x('\&\&', $sub_v)) {
606 if ((list($sub_r, $sub_v) = $this->xRelationalExpression($sub_v)) && $sub_r) {
607 $r['patterns'][] = $sub_r;
612 return count($r['patterns']) == 1 ? array($r['patterns'][0], $sub_v) : array($r, $sub_v);
619 function xRelationalExpression($v) {
620 if ((list($sub_r, $sub_v) = $this->xAdditiveExpression($v)) && $sub_r) {
621 $r = array('type' => 'expression', 'sub_type' => 'relational', 'patterns' => array($sub_r));
624 /* don't mistake '<' + uriref with '<'-operator ("longest token" rule) */
625 if ((list($sub_r, $sub_v) = $this->xIRI_REF($sub_v)) && $sub_r) {
626 $this->addError('Expected operator, found IRIref: "'.$sub_r.'".');
628 if ($sub_r = $this->x('(\!\=|\=\=|\=|\<\=|\>\=|\<|\>)', $sub_v)) {
631 $r['operator'] = $op;
632 if ((list($sub_r, $sub_v) = $this->xAdditiveExpression($sub_v)) && $sub_r) {
633 //$sub_r['operator'] = $op;
634 $r['patterns'][] = $sub_r;
639 return count($r['patterns']) == 1 ? array($r['patterns'][0], $sub_v) : array($r, $sub_v);
646 function xAdditiveExpression($v) {
647 if ((list($sub_r, $sub_v) = $this->xMultiplicativeExpression($v)) && $sub_r) {
648 $r = array('type' => 'expression', 'sub_type' => 'additive', 'patterns' => array($sub_r));
651 if ($sub_r = $this->x('(\+|\-)', $sub_v)) {
654 if ((list($sub_r, $sub_v) = $this->xMultiplicativeExpression($sub_v)) && $sub_r) {
655 $sub_r['operator'] = $op;
656 $r['patterns'][] = $sub_r;
659 elseif ((list($sub_r, $sub_v) = $this->xNumericLiteral($sub_v)) && $sub_r) {
660 $r['patterns'][] = array('type' => 'numeric', 'operator' => $op, 'value' => $sub_r);
665 //return array($r, $sub_v);
666 return count($r['patterns']) == 1 ? array($r['patterns'][0], $sub_v) : array($r, $sub_v);
673 function xMultiplicativeExpression($v) {
674 if ((list($sub_r, $sub_v) = $this->xUnaryExpression($v)) && $sub_r) {
675 $r = array('type' => 'expression', 'sub_type' => 'multiplicative', 'patterns' => array($sub_r));
678 if ($sub_r = $this->x('(\*|\/)', $sub_v)) {
681 if ((list($sub_r, $sub_v) = $this->xUnaryExpression($sub_v)) && $sub_r) {
682 $sub_r['operator'] = $op;
683 $r['patterns'][] = $sub_r;
688 return count($r['patterns']) == 1 ? array($r['patterns'][0], $sub_v) : array($r, $sub_v);
695 function xUnaryExpression($v) {
698 if ($sub_r = $this->x('(\!|\+|\-)', $sub_v)) {
702 if ((list($sub_r, $sub_v) = $this->xPrimaryExpression($sub_v)) && $sub_r) {
703 if (!is_array($sub_r)) {
704 $sub_r = array('type' => 'unary', 'expression' => $sub_r);
706 elseif ($sub_op = $this->v1('operator', '', $sub_r)) {
707 $ops = array('!!' => '', '++' => '+', '--' => '+', '+-' => '-', '-+' => '-');
708 $op = isset($ops[$op . $sub_op]) ? $ops[$op . $sub_op] : $op . $sub_op;
710 $sub_r['operator'] = $op;
711 return array($sub_r, $sub_v);
718 function xPrimaryExpression($v) {
719 foreach (array('BrackettedExpression', 'BuiltInCall', 'IRIrefOrFunction', 'RDFLiteral', 'NumericLiteral', 'BooleanLiteral', 'Var', 'Placeholder') as $type) {
721 if ((list($sub_r, $sub_v) = $this->$m($v)) && $sub_r) {
722 return array($sub_r, $sub_v);
730 function xBrackettedExpression($v) {
731 if ($r = $this->x('\(', $v)) {
732 if ((list($r, $sub_v) = $this->xExpression($r[1])) && $r) {
733 if ($sub_r = $this->x('\)', $sub_v)) {
734 return array($r, $sub_r[1]);
743 function xBuiltInCall($v) {
744 if ($sub_r = $this->x('(str|lang|langmatches|datatype|bound|sameterm|isiri|isuri|isblank|isliteral|regex)\s*\(', $v)) {
745 $r = array('type' => 'built_in_call', 'call' => strtolower($sub_r[1]));
746 if ((list($sub_r, $sub_v) = $this->xArgList('(' . $sub_r[2])) && is_array($sub_r)) {
748 return array($r, $sub_v);
756 function xIRIrefOrFunction($v) {
757 if ((list($r, $v) = $this->xIRIref($v)) && $r) {
758 if ((list($sub_r, $sub_v) = $this->xArgList($v)) && is_array($sub_r)) {
759 return array(array('type' => 'function', 'uri' => $r, 'args' => $sub_r), $sub_v);
761 return array(array('type' => 'uri', 'uri' => $r), $sub_v);
765 /* 70.. @@sync with TurtleParser */
767 function xIRI_REF($v) {
768 if (($r = $this->x('\<(\$\{[^\>]*\})\>', $v)) && ($sub_r = $this->xPlaceholder($r[1]))) {
769 return array($r[1], $r[2]);
771 elseif ($r = $this->x('\<([^\<\>\s\"\|\^`]*)\>', $v)) {
772 return array($r[1] ? $r[1] : true, $r[2]);
774 /* allow reserved chars in obvious IRIs */
775 elseif ($r = $this->x('\<(https?\:[^\s][^\<\>]*)\>', $v)) {
776 return array($r[1] ? $r[1] : true, $r[2]);