src/install/install4 -text
src/install/install5_post -text
src/install/install6_upgrade -text
+src/lib/vendor/arc/ARC2.php -text
+src/lib/vendor/arc/ARC2_Class.php -text
+src/lib/vendor/arc/ARC2_Reader.php -text
+src/lib/vendor/arc/ARC2_Resource.php -text
+src/lib/vendor/arc/ARC2_getFormat.php -text
+src/lib/vendor/arc/ARC2_getPreferredFormat.php -text
+src/lib/vendor/arc/extractors/ARC2_DcExtractor.php -text
+src/lib/vendor/arc/extractors/ARC2_ErdfExtractor.php -text
+src/lib/vendor/arc/extractors/ARC2_MicroformatsExtractor.php -text
+src/lib/vendor/arc/extractors/ARC2_OpenidExtractor.php -text
+src/lib/vendor/arc/extractors/ARC2_PoshRdfExtractor.php -text
+src/lib/vendor/arc/extractors/ARC2_RDFExtractor.php -text
+src/lib/vendor/arc/extractors/ARC2_RdfaExtractor.php -text
+src/lib/vendor/arc/extractors/ARC2_TwitterProfilePicExtractor.php -text
+src/lib/vendor/arc/parsers/ARC2_AtomParser.php -text
+src/lib/vendor/arc/parsers/ARC2_CBJSONParser.php -text
+src/lib/vendor/arc/parsers/ARC2_JSONParser.php -text
+src/lib/vendor/arc/parsers/ARC2_LegacyXMLParser.php -text
+src/lib/vendor/arc/parsers/ARC2_RDFParser.php -text
+src/lib/vendor/arc/parsers/ARC2_RDFXMLParser.php -text
+src/lib/vendor/arc/parsers/ARC2_RSSParser.php -text
+src/lib/vendor/arc/parsers/ARC2_SGAJSONParser.php -text
+src/lib/vendor/arc/parsers/ARC2_SPARQLParser.php -text
+src/lib/vendor/arc/parsers/ARC2_SPARQLPlusParser.php -text
+src/lib/vendor/arc/parsers/ARC2_SPARQLXMLResultParser.php -text
+src/lib/vendor/arc/parsers/ARC2_SPOGParser.php -text
+src/lib/vendor/arc/parsers/ARC2_SemHTMLParser.php -text
+src/lib/vendor/arc/parsers/ARC2_TurtleParser.php -text
+src/lib/vendor/arc/serializers/ARC2_LegacyHTMLSerializer.php -text
+src/lib/vendor/arc/serializers/ARC2_LegacyJSONSerializer.php -text
+src/lib/vendor/arc/serializers/ARC2_LegacyXMLSerializer.php -text
+src/lib/vendor/arc/serializers/ARC2_MicroRDFSerializer.php -text
+src/lib/vendor/arc/serializers/ARC2_NTriplesSerializer.php -text
+src/lib/vendor/arc/serializers/ARC2_POSHRDFSerializer.php -text
+src/lib/vendor/arc/serializers/ARC2_RDFJSONSerializer.php -text
+src/lib/vendor/arc/serializers/ARC2_RDFSerializer.php -text
+src/lib/vendor/arc/serializers/ARC2_RDFXMLSerializer.php -text
+src/lib/vendor/arc/serializers/ARC2_RSS10Serializer.php -text
+src/lib/vendor/arc/serializers/ARC2_TurtleSerializer.php -text
+src/lib/vendor/arc/sparqlscript/ARC2_SPARQLScriptParser.php -text
+src/lib/vendor/arc/sparqlscript/ARC2_SPARQLScriptProcessor.php -text
+src/lib/vendor/arc/store/ARC2_RemoteStore.php -text
+src/lib/vendor/arc/store/ARC2_Store.php -text
+src/lib/vendor/arc/store/ARC2_StoreAskQueryHandler.php -text
+src/lib/vendor/arc/store/ARC2_StoreAtomLoader.php -text
+src/lib/vendor/arc/store/ARC2_StoreCBJSONLoader.php -text
+src/lib/vendor/arc/store/ARC2_StoreConstructQueryHandler.php -text
+src/lib/vendor/arc/store/ARC2_StoreDeleteQueryHandler.php -text
+src/lib/vendor/arc/store/ARC2_StoreDescribeQueryHandler.php -text
+src/lib/vendor/arc/store/ARC2_StoreDumpQueryHandler.php -text
+src/lib/vendor/arc/store/ARC2_StoreDumper.php -text
+src/lib/vendor/arc/store/ARC2_StoreEndpoint.php -text
+src/lib/vendor/arc/store/ARC2_StoreHelper.php -text
+src/lib/vendor/arc/store/ARC2_StoreInsertQueryHandler.php -text
+src/lib/vendor/arc/store/ARC2_StoreLoadQueryHandler.php -text
+src/lib/vendor/arc/store/ARC2_StoreQueryHandler.php -text
+src/lib/vendor/arc/store/ARC2_StoreRDFXMLLoader.php -text
+src/lib/vendor/arc/store/ARC2_StoreRSSLoader.php -text
+src/lib/vendor/arc/store/ARC2_StoreSGAJSONLoader.php -text
+src/lib/vendor/arc/store/ARC2_StoreSPOGLoader.php -text
+src/lib/vendor/arc/store/ARC2_StoreSelectQueryHandler.php -text
+src/lib/vendor/arc/store/ARC2_StoreSemHTMLLoader.php -text
+src/lib/vendor/arc/store/ARC2_StoreTableManager.php -text
+src/lib/vendor/arc/store/ARC2_StoreTurtleLoader.php -text
src/lib/vendor/coolfieldset/css/jquery.coolfieldset.css -text
src/lib/vendor/coolfieldset/images/collapsed.gif -text svneol=unset#unset
src/lib/vendor/coolfieldset/images/expanded.gif -text svneol=unset#unset
--- /dev/null
+<?php
+/**
+ * ARC2 core class (static, not instantiated)
+ *
+ * @author Benjamin Nowack
+ * @license <http://arc.semsol.org/license>
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-07-06
+*/
+
+class ARC2 {
+
+ function getVersion() {
+ return '2010-07-06';
+ }
+
+ /* */
+
+ function setStatic($val) {
+ static $arc_static = '';
+ if ($val) $arc_static = $val; /* set */
+ if (!$val) return $arc_static; /* get */
+ }
+
+ function getStatic() {
+ return ARC2::setStatic('');
+ }
+
+ /* */
+
+ function getIncPath($f = '') {
+ $r = realpath(dirname(__FILE__)) . '/';
+ $dirs = array(
+ 'plugin' => 'plugins',
+ 'trigger' => 'triggers',
+ 'store' => 'store',
+ 'serializer' => 'serializers',
+ 'extractor' => 'extractors',
+ 'sparqlscript' => 'sparqlscript',
+ 'parser' => 'parsers',
+ );
+ foreach ($dirs as $k => $dir) {
+ if (preg_match('/' . $k . '/i', $f)) {
+ return $r . $dir . '/';
+ }
+ }
+ return $r;
+ }
+
+ function getScriptURI() {
+ if (isset($_SERVER) && isset($_SERVER['SERVER_NAME'])) {
+ $proto = preg_replace('/^([a-z]+)\/.*$/', '\\1', strtolower($_SERVER['SERVER_PROTOCOL']));
+ $port = $_SERVER['SERVER_PORT'];
+ $server = $_SERVER['SERVER_NAME'];
+ $script = $_SERVER['SCRIPT_NAME'];
+ /* https */
+ if (($proto == 'http') && $port == 443) {
+ $proto = 'https';
+ $port = 80;
+ }
+ return $proto . '://' . $server . ($port != 80 ? ':' . $port : '') . $script;
+ /*
+ return preg_replace('/^([a-z]+)\/.*$/', '\\1', strtolower($_SERVER['SERVER_PROTOCOL'])) .
+ '://' . $_SERVER['SERVER_NAME'] .
+ ($_SERVER['SERVER_PORT'] != 80 ? ':' . $_SERVER['SERVER_PORT'] : '') .
+ $_SERVER['SCRIPT_NAME'];
+ */
+ }
+ elseif (isset($_SERVER['SCRIPT_FILENAME'])) {
+ return 'file://' . realpath($_SERVER['SCRIPT_FILENAME']);
+ }
+ return 'http://localhost/unknown_path';
+ }
+
+ function getRequestURI() {
+ if (isset($_SERVER) && isset($_SERVER['REQUEST_URI'])) {
+ return preg_replace('/^([a-z]+)\/.*$/', '\\1', strtolower($_SERVER['SERVER_PROTOCOL'])) .
+ '://' . $_SERVER['SERVER_NAME'] .
+ ($_SERVER['SERVER_PORT'] != 80 ? ':' . $_SERVER['SERVER_PORT'] : '') .
+ $_SERVER['REQUEST_URI'];
+ }
+ return ARC2::getScriptURI();
+ }
+
+ function inc($f, $path = '') {
+ $prefix = 'ARC2';
+ if (preg_match('/^([^\_]+)\_(.*)$/', $f, $m)) {
+ $prefix = $m[1];
+ $f = $m[2];
+ }
+ $inc_path = $path ? $path : ARC2::getIncPath($f);
+ $path = $inc_path . $prefix . '_' . urlencode($f) . '.php';
+ if (file_exists($path)) return include_once($path);
+ /* safe-mode hack */
+ if (@include_once($path)) return 1;
+ /* try other path */
+ if ($prefix != 'ARC2') {
+ $path = $inc_path . strtolower($prefix) . '/' . $prefix . '_' . urlencode($f) . '.php';
+ if (file_exists($path)) return include_once($path);
+ /* safe-mode hack */
+ if (@include_once($path)) return 1;
+ }
+ return 0;
+ }
+
+ /* */
+
+ function mtime(){
+ list($msec, $sec) = explode(" ", microtime());
+ return ((float)$msec + (float)$sec);
+ }
+
+ function x($re, $v, $options = 'si') {
+ return preg_match("/^\s*" . $re . "(.*)$/" . $options, $v, $m) ? $m : false;
+ }
+
+ /* */
+
+ function getFormat($val, $mtype = '', $ext = '') {
+ ARC2::inc('getFormat');
+ return ARC2_getFormat($val, $mtype, $ext);
+ }
+
+ function getPreferredFormat($default = 'plain') {
+ ARC2::inc('getPreferredFormat');
+ return ARC2_getPreferredFormat($default);
+ }
+
+ /* */
+
+ function toUTF8($v) {
+ if (urlencode($v) === $v) return $v;
+ //if (utf8_decode($v) == $v) return $v;
+ $v = (strpos(utf8_decode(str_replace('?', '', $v)), '?') === false) ? utf8_decode($v) : $v;
+ /* custom hacks, mainly caused by bugs in PHP's json_decode */
+ $mappings = array(
+ '%18' => '‘',
+ '%19' => '’',
+ '%1C' => '“',
+ '%1D' => '”',
+ '%1E' => '„',
+ '%10' => '‐',
+ '%12' => '−',
+ '%13' => '–',
+ '%14' => '—',
+ '%26' => '&',
+ );
+ $froms = array_keys($mappings);
+ $tos = array_values($mappings);
+ foreach ($froms as $i => $from) $froms[$i] = urldecode($from);
+ $v = str_replace($froms, $tos, $v);
+ /* utf8 tweaks */
+ return preg_replace_callback('/([\x00-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3}|[\xf8-\xfb][\x80-\xbf]{4}|[\xfc-\xfd][\x80-\xbf]{5}|[^\x00-\x7f])/', array('ARC2', 'getUTF8Char'), $v);
+ }
+
+ function getUTF8Char($v) {
+ $val = $v[1];
+ if (strlen(trim($val)) === 1) return utf8_encode($val);
+ if (preg_match('/^([\x00-\x7f])(.+)/', $val, $m)) return $m[1] . ARC2::toUTF8($m[2]);
+ return $val;
+ }
+
+ /* */
+
+ function splitURI($v) {
+ /* the following namespaces may lead to conflated URIs,
+ * we have to set the split position manually
+ */
+ if (strpos($v, 'www.w3.org')) {
+ $specials = array(
+ 'http://www.w3.org/XML/1998/namespace',
+ 'http://www.w3.org/2005/Atom',
+ 'http://www.w3.org/1999/xhtml',
+ );
+ foreach ($specials as $ns) {
+ if (strpos($v, $ns) === 0) {
+ $local_part = substr($v, strlen($ns));
+ if (!preg_match('/^[\/\#]/', $local_part)) {
+ return array($ns, $local_part);
+ }
+ }
+ }
+ }
+ /* auto-splitting on / or # */
+ //$re = '^(.*?)([A-Z_a-z][-A-Z_a-z0-9.]*)$';
+ if (preg_match('/^(.*[\/\#])([^\/\#]+)$/', $v, $m)) return array($m[1], $m[2]);
+ /* auto-splitting on last special char, e.g. urn:foo:bar */
+ if (preg_match('/^(.*[\:\/])([^\:\/]+)$/', $v, $m)) return array($m[1], $m[2]);
+ return array($v, '');
+ }
+
+ /* */
+
+ function getSimpleIndex($triples, $flatten_objects = 1, $vals = '') {
+ $r = array();
+ foreach ($triples as $t) {
+ $skip_t = 0;
+ foreach (array('s', 'p', 'o') as $term) {
+ $$term = $t[$term];
+ /* template var */
+ if (isset($t[$term . '_type']) && ($t[$term . '_type'] == 'var')) {
+ $val = isset($vals[$$term]) ? $vals[$$term] : '';
+ $skip_t = isset($vals[$$term]) ? $skip_t : 1;
+ $type = '';
+ $type = !$type && isset($vals[$$term . ' type']) ? $vals[$$term . ' type'] : $type;
+ $type = !$type && preg_match('/^\_\:/', $val) ? 'bnode' : $type;
+ if ($term == 'o') {
+ $type = !$type && (preg_match('/\s/s', $val) || !preg_match('/\:/', $val)) ? 'literal' : $type;
+ $type = !$type && !preg_match('/[\/]/', $val) ? 'literal' : $type;
+ }
+ $type = !$type ? 'uri' : $type;
+ $t[$term . '_type'] = $type;
+ $$term = $val;
+ }
+ }
+ if ($skip_t) {
+ continue;
+ }
+ if (!isset($r[$s])) $r[$s] = array();
+ if (!isset($r[$s][$p])) $r[$s][$p] = array();
+ if ($flatten_objects) {
+ if (!in_array($o, $r[$s][$p])) $r[$s][$p][] = $o;
+ }
+ else {
+ $o = array('value' => $o);
+ foreach (array('lang', 'type', 'datatype') as $suffix) {
+ if (isset($t['o_' . $suffix]) && $t['o_' . $suffix]) {
+ $o[$suffix] = $t['o_' . $suffix];
+ }
+ elseif (isset($t['o ' . $suffix]) && $t['o ' . $suffix]) {
+ $o[$suffix] = $t['o ' . $suffix];
+ }
+ }
+ if (!in_array($o, $r[$s][$p])) {
+ $r[$s][$p][] = $o;
+ }
+ }
+ }
+ return $r;
+ }
+
+ function getTriplesFromIndex($index) {
+ $r = array();
+ foreach ($index as $s => $ps) {
+ foreach ($ps as $p => $os) {
+ foreach ($os as $o) {
+ $r[] = array(
+ 's' => $s,
+ 'p' => $p,
+ 'o' => $o['value'],
+ 's_type' => preg_match('/^\_\:/', $s) ? 'bnode' : 'uri',
+ 'o_type' => $o['type'],
+ 'o_datatype' => isset($o['datatype']) ? $o['datatype'] : '',
+ 'o_lang' => isset($o['lang']) ? $o['lang'] : '',
+ );
+ }
+ }
+ }
+ return $r;
+ }
+
+ function getMergedIndex() {
+ $r = array();
+ foreach (func_get_args() as $index) {
+ foreach ($index as $s => $ps) {
+ if (!isset($r[$s])) $r[$s] = array();
+ foreach ($ps as $p => $os) {
+ if (!isset($r[$s][$p])) $r[$s][$p] = array();
+ foreach ($os as $o) {
+ if (!in_array($o, $r[$s][$p])) {
+ $r[$s][$p][] = $o;
+ }
+ }
+ }
+ }
+ }
+ return $r;
+ }
+
+ function getCleanedIndex() {/* removes triples from a given index */
+ $indexes = func_get_args();
+ $r = $indexes[0];
+ for ($i = 1, $i_max = count($indexes); $i < $i_max; $i++) {
+ $index = $indexes[$i];
+ foreach ($index as $s => $ps) {
+ if (!isset($r[$s])) continue;
+ foreach ($ps as $p => $os) {
+ if (!isset($r[$s][$p])) continue;
+ $r_os = $r[$s][$p];
+ $new_os = array();
+ foreach ($r_os as $r_o) {
+ $r_o_val = is_array($r_o) ? $r_o['value'] : $r_o;
+ $keep = 1;
+ foreach ($os as $o) {
+ $del_o_val = is_array($o) ? $o['value'] : $o;
+ if ($del_o_val == $r_o_val) {
+ $keep = 0;
+ break;
+ }
+ }
+ if ($keep) {
+ $new_os[] = $r_o;
+ }
+ }
+ if ($new_os) {
+ $r[$s][$p] = $new_os;
+ }
+ else {
+ unset($r[$s][$p]);
+ }
+ }
+ }
+ }
+ /* check r */
+ $has_data = 0;
+ foreach ($r as $s => $ps) {
+ if ($ps) {
+ $has_data = 1;
+ break;
+ }
+ }
+ return $has_data ? $r : array();
+ }
+
+ /* */
+
+ function getStructType($v) {
+ /* string */
+ if (is_string($v)) return 'string';
+ /* flat array, numeric keys */
+ if (in_array(0, array_keys($v))) {/* numeric keys */
+ /* simple array */
+ if (!is_array($v[0])) return 'array';
+ /* triples */
+ //if (isset($v[0]) && isset($v[0]['s']) && isset($v[0]['p'])) return 'triples';
+ if (in_array('p', array_keys($v[0]))) return 'triples';
+ }
+ /* associative array */
+ else {
+ /* index */
+ foreach ($v as $s => $ps) {
+ if (!is_array($ps)) break;
+ foreach ($ps as $p => $os) {
+ if (!is_array($os) || !is_array($os[0])) break;
+ if (in_array('value', array_keys($os[0]))) return 'index';
+ }
+ }
+ }
+ /* array */
+ return 'array';
+ }
+
+ /* */
+
+ function getComponent($name, $a = '', $caller = '') {
+ ARC2::inc($name);
+ $prefix = 'ARC2';
+ if (preg_match('/^([^\_]+)\_(.+)$/', $name, $m)) {
+ $prefix = $m[1];
+ $name = $m[2];
+ }
+ $cls = $prefix . '_' . $name;
+ if (!$caller) $caller = new stdClass();
+ return new $cls($a, $caller);
+ }
+
+ /* resource */
+
+ function getResource($a = '') {
+ return ARC2::getComponent('Resource', $a);
+ }
+
+ /* reader */
+
+ function getReader($a = '') {
+ return ARC2::getComponent('Reader', $a);
+ }
+
+ /* parsers */
+
+ function getParser($prefix, $a = '') {
+ return ARC2::getComponent($prefix . 'Parser', $a);
+ }
+
+ function getRDFParser($a = '') {
+ return ARC2::getParser('RDF', $a);
+ }
+
+ function getRDFXMLParser($a = '') {
+ return ARC2::getParser('RDFXML', $a);
+ }
+
+ function getTurtleParser($a = '') {
+ return ARC2::getParser('Turtle', $a);
+ }
+
+ function getRSSParser($a = '') {
+ return ARC2::getParser('RSS', $a);
+ }
+
+ function getSemHTMLParser($a = '') {
+ return ARC2::getParser('SemHTML', $a);
+ }
+
+ function getSPARQLParser($a = '') {
+ return ARC2::getComponent('SPARQLParser', $a);
+ }
+
+ function getSPARQLPlusParser($a = '') {
+ return ARC2::getParser('SPARQLPlus', $a);
+ }
+
+ function getSPARQLXMLResultParser($a = '') {
+ return ARC2::getParser('SPARQLXMLResult', $a);
+ }
+
+ function getJSONParser($a = '') {
+ return ARC2::getParser('JSON', $a);
+ }
+
+ function getSGAJSONParser($a = '') {
+ return ARC2::getParser('SGAJSON', $a);
+ }
+
+ function getCBJSONParser($a = '') {
+ return ARC2::getParser('CBJSON', $a);
+ }
+
+ function getSPARQLScriptParser($a = '') {
+ return ARC2::getParser('SPARQLScript', $a);
+ }
+
+ /* store */
+
+ function getStore($a = '', $caller = '') {
+ return ARC2::getComponent('Store', $a, $caller);
+ }
+
+ function getStoreEndpoint($a = '', $caller = '') {
+ return ARC2::getComponent('StoreEndpoint', $a, $caller);
+ }
+
+ function getRemoteStore($a = '', $caller = '') {
+ return ARC2::getComponent('RemoteStore', $a, $caller);
+ }
+
+ function getMemStore($a = '') {
+ return ARC2::getComponent('MemStore', $a);
+ }
+
+ /* serializers */
+
+ function getSer($prefix, $a = '') {
+ return ARC2::getComponent($prefix . 'Serializer', $a);
+ }
+
+ function getTurtleSerializer($a = '') {
+ return ARC2::getSer('Turtle', $a);
+ }
+
+ function getRDFXMLSerializer($a = '') {
+ return ARC2::getSer('RDFXML', $a);
+ }
+
+ function getNTriplesSerializer($a = '') {
+ return ARC2::getSer('NTriples', $a);
+ }
+
+ function getRDFJSONSerializer($a = '') {
+ return ARC2::getSer('RDFJSON', $a);
+ }
+
+ function getPOSHRDFSerializer($a = '') {/* deprecated */
+ return ARC2::getSer('POSHRDF', $a);
+ }
+
+ function getMicroRDFSerializer($a = '') {
+ return ARC2::getSer('MicroRDF', $a);
+ }
+
+ function getRSS10Serializer($a = '') {
+ return ARC2::getSer('RSS10', $a);
+ }
+
+ /* sparqlscript */
+
+ function getSPARQLScriptProcessor($a = '') {
+ return ARC2::getComponent('SPARQLScriptProcessor', $a);
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 base class
+ *
+ * @author Benjamin Nowack
+ * @license <http://arc.semsol.org/license>
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-06-25
+*/
+
+class ARC2_Class {
+
+ /* */
+
+ function __construct($a = '', &$caller) {
+ $a = is_array($a) ? $a : array();
+ $this->a = $a;
+ $this->caller = &$caller;
+ $this->__init();
+ }
+
+ function ARC2_Class($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __destruct() {
+ //echo "\ndestructing " . get_class($this);
+ }
+
+ function __init() {/* base, time_limit */
+ if (!$_POST && isset($GLOBALS['HTTP_RAW_POST_DATA'])) parse_str($GLOBALS['HTTP_RAW_POST_DATA'], $_POST); /* php5 bug */
+ $this->inc_path = ARC2::getIncPath();
+ $this->ns_count = 0;
+ $this->nsp = array('http://www.w3.org/1999/02/22-rdf-syntax-ns#' => 'rdf');
+ $this->used_ns = array('http://www.w3.org/1999/02/22-rdf-syntax-ns#');
+ $this->ns = $this->v('ns', array(), $this->a);
+
+ $this->base = $this->v('base', ARC2::getRequestURI(), $this->a);
+ $this->errors = array();
+ $this->warnings = array();
+ $this->adjust_utf8 = $this->v('adjust_utf8', 0, $this->a);
+ $this->max_errors = $this->v('max_errors', 25, $this->a);
+ }
+
+ /* */
+
+ function v($name, $default = false, $o = false) {/* value if set */
+ if ($o === false) $o =& $this;
+ if (is_array($o)) {
+ return isset($o[$name]) ? $o[$name] : $default;
+ }
+ return isset($o->$name) ? $o->$name : $default;
+ }
+
+ function v1($name, $default = false, $o = false) {/* value if 1 (= not empty) */
+ if ($o === false) $o =& $this;
+ if (is_array($o)) {
+ return (isset($o[$name]) && $o[$name]) ? $o[$name] : $default;
+ }
+ return (isset($o->$name) && $o->$name) ? $o->$name : $default;
+ }
+
+ function m($name, $a = false, $default = false, $o = false) {/* call method */
+ if ($o === false) $o =& $this;
+ return method_exists($o, $name) ? $o->$name($a) : $default;
+ }
+
+ /* */
+
+ function camelCase($v, $lc_first = 0, $keep_boundaries = 0) {
+ $r = ucfirst($v);
+ while (preg_match('/^(.*)[^a-z0-9](.*)$/si', $r, $m)) {
+ /* don't fuse 2 upper-case chars */
+ if ($keep_boundaries && $m[1]) {
+ $boundary = substr($m[1], -1);
+ if (strtoupper($boundary) == $boundary) $m[1] .= 'CAMELCASEBOUNDARY';
+ }
+ $r = $m[1] . ucfirst($m[2]);
+ }
+ $r = str_replace('CAMELCASEBOUNDARY', '_', $r);
+ if ((strlen($r) > 1) && $lc_first && !preg_match('/[A-Z]/', $r[1])) $r = strtolower($r[0]) . substr($r, 1);
+ return $r;
+ }
+
+ function deCamelCase($v, $uc_first = 0) {
+ $r = str_replace('_', ' ', $v);
+ $r = preg_replace('/([a-z0-9])([A-Z])/e', '"\\1 " . strtolower("\\2")', $r);
+ return $uc_first ? ucfirst($r) : $r;
+ }
+
+ function extractTermLabel($uri, $loops = 0) {
+ list($ns, $r) = $this->splitURI($uri);
+ $r = $this->deCamelCase($this->camelCase($r, 1, 1));
+ if (($loops < 1) && preg_match('/^(self|it|this|me)$/i', $r)) {
+ return $this->extractTermLabel(preg_replace('/\#.+$/', '', $uri), $loops + 1);
+ }
+ if ($uri && !$r && ($loops < 2)) {
+ return $this->extractTermLabel(preg_replace('/[\#\/]$/', '', $uri), $loops + 1);
+ }
+ return $r;
+ }
+
+ /* */
+
+ function addError($v) {
+ if (!in_array($v, $this->errors)) {
+ $this->errors[] = $v;
+ }
+ if ($this->caller && method_exists($this->caller, 'addError')) {
+ $glue = strpos($v, ' in ') ? ' via ' : ' in ';
+ $this->caller->addError($v . $glue . get_class($this));
+ }
+ if (count($this->errors) > $this->max_errors) {
+ die('Too many errors (limit: ' . $this->max_errors . '): ' . print_r($this->errors, 1));
+ }
+ return false;
+ }
+
+ function getErrors() {
+ return $this->errors;
+ }
+
+ function getWarnings() {
+ return $this->warnings;
+ }
+
+ function resetErrors() {
+ $this->errors = array();
+ if ($this->caller && method_exists($this->caller, 'resetErrors')) {
+ $this->caller->resetErrors();
+ }
+ }
+
+ /* */
+
+ function splitURI($v) {
+ return ARC2::splitURI($v);
+ }
+
+ /* */
+
+ function getPName($v, $connector = ':') {
+ /* is already a pname */
+ if ($ns = $this->getPNameNamespace($v, $connector)) {
+ if (!in_array($ns, $this->used_ns)) $this->used_ns[] = $ns;
+ return $v;
+ }
+ /* new pname */
+ if ($parts = $this->splitURI($v)) {
+ /* known prefix */
+ foreach ($this->ns as $prefix => $ns) {
+ if ($parts[0] == $ns) {
+ if (!in_array($ns, $this->used_ns)) $this->used_ns[] = $ns;
+ return $prefix . $connector . $parts[1];
+ }
+ }
+ /* new prefix */
+ $prefix = $this->getPrefix($parts[0]);
+ return $prefix . $connector . $parts[1];
+ }
+ return $v;
+ }
+
+ function getPNameNamespace($v, $connector = ':') {
+ $re = '/^([a-z0-9\_\-]+)\:([a-z0-9\_\-\.\%]+)$/i';
+ if ($connector != ':') {
+ $connectors = array('\:', '\-', '\_', '\.');
+ $chars = join('', array_diff($connectors, array($connector)));
+ $re = '/^([a-z0-9' . $chars . ']+)\\' . $connector . '([a-z0-9\_\-\.\%]+)$/i';
+ }
+ if (!preg_match($re, $v, $m)) return 0;
+ if (!isset($this->ns[$m[1]])) return 0;
+ return $this->ns[$m[1]];
+ }
+
+ function getPrefix($ns) {
+ if (!isset($this->nsp[$ns])) {
+ $this->ns['ns' . $this->ns_count] = $ns;
+ $this->nsp[$ns] = 'ns' . $this->ns_count;
+ $this->ns_count++;
+ }
+ if (!in_array($ns, $this->used_ns)) $this->used_ns[] = $ns;
+ return $this->nsp[$ns];
+ }
+
+ function expandPName($v, $connector = ':') {
+ $re = '/^([a-z0-9\_\-]+)\:([a-z0-9\_\-\.\%]+)$/i';
+ if ($connector != ':') {
+ $connectors = array(':', '-', '_', '.');
+ $chars = '\\' . join('\\', array_diff($connectors, array($connector)));
+ $re = '/^([a-z0-9' . $chars . ']+)\\' . $connector . '([a-z0-9\_\-\.\%]+)$/Ui';
+ }
+ if (preg_match($re, $v, $m) && isset($this->ns[$m[1]])) {
+ return $this->ns[$m[1]] . $m[2];
+ }
+ return $v;
+ }
+
+ function expandPNames($index) {
+ $r = array();
+ foreach ($index as $s => $ps) {
+ $s = $this->expandPName($s);
+ $r[$s] = array();
+ foreach ($ps as $p => $os) {
+ $p = $this->expandPName($p);
+ if (!is_array($os)) $os = array($os);
+ foreach ($os as $i => $o) {
+ if (!is_array($o)) {
+ $o_val = $this->expandPName($o);
+ $o_type = preg_match('/^[a-z]+\:[^\s\<\>]+$/si', $o_val) ? 'uri' : 'literal';
+ $o = array('value' => $o_val, 'type' => $o_type);
+ }
+ $os[$i] = $o;
+ }
+ $r[$s][$p] = $os;
+ }
+ }
+ return $r;
+ }
+
+ /* */
+
+ function calcURI($path, $base = "") {
+ /* quick check */
+ if (preg_match("/^[a-z0-9\_]+\:/i", $path)) {/* abs path or bnode */
+ return $path;
+ }
+ if (preg_match('/^\$\{.*\}/', $path)) {/* placeholder, assume abs URI */
+ return $path;
+ }
+ if (preg_match("/^\/\//", $path)) {/* net path, assume http */
+ return 'http:' . $path;
+ }
+ /* other URIs */
+ $base = $base ? $base : $this->base;
+ $base = preg_replace('/\#.*$/', '', $base);
+ if ($path === true) {/* empty (but valid) URIref via turtle parser: <> */
+ return $base;
+ }
+ $path = preg_replace("/^\.\//", '', $path);
+ $root = preg_match('/(^[a-z0-9]+\:[\/]{1,3}[^\/]+)[\/|$]/i', $base, $m) ? $m[1] : $base; /* w/o trailing slash */
+ $base .= ($base == $root) ? '/' : '';
+ if (preg_match('/^\//', $path)) {/* leading slash */
+ return $root . $path;
+ }
+ if (!$path) {
+ return $base;
+ }
+ if (preg_match('/^([\#\?])/', $path, $m)) {
+ return preg_replace('/\\' .$m[1]. '.*$/', '', $base) . $path;
+ }
+ if (preg_match('/^(\&)(.*)$/', $path, $m)) {/* not perfect yet */
+ return preg_match('/\?/', $base) ? $base . $m[1] . $m[2] : $base . '?' . $m[2];
+ }
+ if (preg_match("/^[a-z0-9]+\:/i", $path)) {/* abs path */
+ return $path;
+ }
+ /* rel path: remove stuff after last slash */
+ $base = substr($base, 0, strrpos($base, '/')+1);
+ /* resolve ../ */
+ while (preg_match('/^(\.\.\/)(.*)$/', $path, $m)) {
+ $path = $m[2];
+ $base = ($base == $root.'/') ? $base : preg_replace('/^(.*\/)[^\/]+\/$/', '\\1', $base);
+ }
+ return $base . $path;
+ }
+
+ /* */
+
+ function calcBase($path) {
+ $r = $path;
+ $r = preg_replace('/\#.*$/', '', $r);/* remove hash */
+ $r = preg_replace('/^\/\//', 'http://', $r);/* net path (//), assume http */
+ if (preg_match('/^[a-z0-9]+\:/', $r)) {/* scheme, abs path */
+ while (preg_match('/^(.+\/)(\.\.\/.*)$/U', $r, $m)) {
+ $r = $this->calcURI($m[1], $m[2]);
+ }
+ return $r;
+ }
+ return 'file://' . realpath($r);/* real path */
+ }
+
+ /* */
+
+ function getResource($uri, $store_or_props = '') {
+ $res = ARC2::getResource($this->a);
+ $res->setURI($uri);
+ if (is_array($store_or_props)) {
+ $res->setProps($store_or_props);
+ }
+ else {
+ $res->setStore($store_or_props);
+ }
+ return $res;
+ }
+
+ function toIndex($v) {
+ if (is_array($v)) {
+ if (isset($v[0]) && isset($v[0]['s'])) return ARC2::getSimpleIndex($v, 0);
+ return $v;
+ }
+ $parser = ARC2::getRDFParser($this->a);
+ if ($v && !preg_match('/\s/', $v)) {/* assume graph URI */
+ $parser->parse($v);
+ }
+ else {
+ $parser->parse('', $v);
+ }
+ return $parser->getSimpleIndex(0);
+ }
+
+ function toTriples($v) {
+ if (is_array($v)) {
+ if (isset($v[0]) && isset($v[0]['s'])) return $v;
+ return ARC2::getTriplesFromIndex($v);
+ }
+ $parser = ARC2::getRDFParser($this->a);
+ if ($v && !preg_match('/\s/', $v)) {/* assume graph URI */
+ $parser->parse($v);
+ }
+ else {
+ $parser->parse('', $v);
+ }
+ return $parser->getTriples();
+ }
+
+ /* */
+
+ function toNTriples($v, $ns = '', $raw = 0) {
+ ARC2::inc('NTriplesSerializer');
+ if (!$ns) $ns = isset($this->a['ns']) ? $this->a['ns'] : array();
+ $ser = new ARC2_NTriplesSerializer(array_merge($this->a, array('ns' => $ns)), $this);
+ return (isset($v[0]) && isset($v[0]['s'])) ? $ser->getSerializedTriples($v, $raw) : $ser->getSerializedIndex($v, $raw);
+ }
+
+ function toTurtle($v, $ns = '', $raw = 0) {
+ ARC2::inc('TurtleSerializer');
+ if (!$ns) $ns = isset($this->a['ns']) ? $this->a['ns'] : array();
+ $ser = new ARC2_TurtleSerializer(array_merge($this->a, array('ns' => $ns)), $this);
+ return (isset($v[0]) && isset($v[0]['s'])) ? $ser->getSerializedTriples($v, $raw) : $ser->getSerializedIndex($v, $raw);
+ }
+
+ function toRDFXML($v, $ns = '', $raw = 0) {
+ ARC2::inc('RDFXMLSerializer');
+ if (!$ns) $ns = isset($this->a['ns']) ? $this->a['ns'] : array();
+ $ser = new ARC2_RDFXMLSerializer(array_merge($this->a, array('ns' => $ns)), $this);
+ return (isset($v[0]) && isset($v[0]['s'])) ? $ser->getSerializedTriples($v, $raw) : $ser->getSerializedIndex($v, $raw);
+ }
+
+ function toRDFJSON($v, $ns = '') {
+ ARC2::inc('RDFJSONSerializer');
+ if (!$ns) $ns = isset($this->a['ns']) ? $this->a['ns'] : array();
+ $ser = new ARC2_RDFJSONSerializer(array_merge($this->a, array('ns' => $ns)), $this);
+ return (isset($v[0]) && isset($v[0]['s'])) ? $ser->getSerializedTriples($v) : $ser->getSerializedIndex($v);
+ }
+
+ function toRSS10($v, $ns = '') {
+ ARC2::inc('RSS10Serializer');
+ if (!$ns) $ns = isset($this->a['ns']) ? $this->a['ns'] : array();
+ $ser = new ARC2_RSS10Serializer(array_merge($this->a, array('ns' => $ns)), $this);
+ return (isset($v[0]) && isset($v[0]['s'])) ? $ser->getSerializedTriples($v) : $ser->getSerializedIndex($v);
+ }
+
+ function toLegacyXML($v, $ns = '') {
+ ARC2::inc('LegacyXMLSerializer');
+ if (!$ns) $ns = isset($this->a['ns']) ? $this->a['ns'] : array();
+ $ser = new ARC2_LegacyXMLSerializer(array_merge($this->a, array('ns' => $ns)), $this);
+ return $ser->getSerializedArray($v);
+ }
+
+ function toLegacyJSON($v, $ns = '') {
+ ARC2::inc('LegacyJSONSerializer');
+ if (!$ns) $ns = isset($this->a['ns']) ? $this->a['ns'] : array();
+ $ser = new ARC2_LegacyJSONSerializer(array_merge($this->a, array('ns' => $ns)), $this);
+ return $ser->getSerializedArray($v);
+ }
+
+ function toLegacyHTML($v, $ns = '') {
+ ARC2::inc('LegacyHTMLSerializer');
+ if (!$ns) $ns = isset($this->a['ns']) ? $this->a['ns'] : array();
+ $ser = new ARC2_LegacyHTMLSerializer(array_merge($this->a, array('ns' => $ns)), $this);
+ return $ser->getSerializedArray($v);
+ }
+
+ function toHTML($v, $ns = '', $label_store = '') {
+ ARC2::inc('MicroRDFSerializer');
+ if (!$ns) $ns = isset($this->a['ns']) ? $this->a['ns'] : array();
+ $conf = array_merge($this->a, array('ns' => $ns));
+ if ($label_store) $conf['label_store'] = $label_store;
+ $ser = new ARC2_MicroRDFSerializer($conf, $this);
+ return (isset($v[0]) && isset($v[0]['s'])) ? $ser->getSerializedTriples($v) : $ser->getSerializedIndex($v);
+ }
+
+ /* */
+
+ function getFilledTemplate($t, $vals, $g = '') {
+ $parser = ARC2::getTurtleParser();
+ $parser->parse($g, $this->getTurtleHead() . $t);
+ return $parser->getSimpleIndex(0, $vals);
+ }
+
+ function getTurtleHead() {
+ $r = '';
+ $ns = $this->v('ns', array(), $this->a);
+ foreach ($ns as $k => $v) {
+ $r .= "@prefix " . $k . ": <" .$v. "> .\n";
+ }
+ return $r;
+ }
+
+ function completeQuery($q, $ns = '') {
+ if (!$ns) $ns = isset($this->a['ns']) ? $this->a['ns'] : array();
+ $added_prefixes = array();
+ $prologue = '';
+ foreach ($ns as $k => $v) {
+ $k = rtrim($k, ':');
+ if (in_array($k, $added_prefixes)) continue;
+ if (preg_match('/(^|\s)' . $k . ':/s', $q) && !preg_match('/PREFIX\s+' . $k . '\:/is', $q)) {
+ $prologue .= "\n" . 'PREFIX ' . $k . ': <' . $v . '>';
+ }
+ $added_prefixes[] = $k;
+ }
+ return $prologue . "\n" . $q;
+ }
+
+ /* */
+
+ function toUTF8($str) {
+ return $this->adjust_utf8 ? ARC2::toUTF8($str) : $str;
+ }
+
+ function toDataURI($str) {
+ return 'data:text/plain;charset=utf-8,' . rawurlencode($str);
+ }
+
+ function fromDataURI($str) {
+ return str_replace('data:text/plain;charset=utf-8,', '', rawurldecode($str));
+ }
+
+ /* prevent SQL injections via SPARQL REGEX */
+
+ function checkRegex($str) {
+ return addslashes($str); // @@todo extend
+ }
+
+ /* Microdata methods */
+
+ function getMicrodataAttrs($id, $type = '') {
+ $type = $type ? $this->expandPName($type) : $this->expandPName('owl:Thing');
+ return 'itemscope="" itemtype="' . htmlspecialchars($type) . '" itemid="' . htmlspecialchars($id) . '"';
+ }
+
+ function mdAttrs($id, $type = '') {
+ return $this->getMicrodataAttrs($id, $type);
+ }
+
+ /* central DB query hook */
+
+ function queryDB($sql, $con, $log_errors = 0) {
+ $t1 = ARC2::mtime();
+ $r = mysql_query($sql, $con);
+ $t2 = ARC2::mtime() - $t1;
+ if ($t2 > 1) {
+ //echo "\n needed " . $t2 . ' secs for ' . $sql;
+ }
+ if ($log_errors && ($er = mysql_error($con))) $this->addError($er);
+ return $r;
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 Web Client
+ *
+ * @author Benjamin Nowack
+ * @license <http://arc.semsol.org/license>
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-07-06
+*/
+
+ARC2::inc('Class');
+
+class ARC2_Reader extends ARC2_Class {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_Reader($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* inc_path, proxy_host, proxy_port, proxy_skip, http_accept_header, http_user_agent_header, max_redirects */
+ parent::__init();
+ $this->http_method = $this->v('http_method', 'GET', $this->a);
+ $this->message_body = $this->v('message_body', '', $this->a);;
+ $this->http_accept_header = $this->v('http_accept_header', 'Accept: application/rdf+xml; q=0.9, */*; q=0.1', $this->a);
+ $this->http_user_agent_header = $this->v('http_user_agent_header', 'User-Agent: ARC Reader (http://arc.semsol.org/)', $this->a);
+ $this->http_custom_headers = $this->v('http_custom_headers', '', $this->a);
+ $this->max_redirects = $this->v('max_redirects', 3, $this->a);
+ $this->format = $this->v('format', false, $this->a);
+ $this->redirects = array();
+ $this->stream_id = '';
+ $this->timeout = $this->v('reader_timeout', 30, $this->a);
+ $this->response_headers = array();
+ $this->digest_auth = 0;
+ $this->auth_infos = $this->v('reader_auth_infos', array(), $this->a);
+ }
+
+ /* */
+
+ function setHTTPMethod($v) {
+ $this->http_method = $v;
+ }
+
+ function setMessageBody($v) {
+ $this->message_body = $v;
+ }
+
+ function setAcceptHeader($v) {
+ $this->http_accept_header = $v;
+ }
+
+ function setCustomHeaders($v) {
+ $this->http_custom_headers = $v;
+ }
+
+ function addCustomHeaders($v) {
+ if ($this->http_custom_headers) $this->http_custom_headers .= "\r\n";
+ $this->http_custom_headers .= $v;
+ }
+
+ /* */
+
+ function activate($path, $data = '', $ping_only = 0, $timeout = 0) {
+ $this->setCredentials($path);
+ $this->ping_only = $ping_only;
+ if ($timeout) $this->timeout = $timeout;
+ $id = md5($path . ' ' . $data);
+ if ($this->stream_id != $id) {
+ $this->stream_id = $id;
+ /* data uri? */
+ if (!$data && preg_match('/^data\:([^\,]+)\,(.*)$/', $path, $m)) {
+ $path = '';
+ $data = preg_match('/base64/', $m[1]) ? base64_decode($m[2]) : rawurldecode($m[2]);
+ }
+ $this->base = $this->calcBase($path);
+ $this->uri = $this->calcURI($path, $this->base);
+ $this->stream = ($data) ? $this->getDataStream($data) : $this->getSocketStream($this->base, $ping_only);
+ if ($this->stream && !$this->ping_only) {
+ $this->getFormat();
+ }
+ }
+ }
+
+ /*
+ * HTTP Basic/Digest + Proxy authorization can be defined in the
+ * arc_reader_credentials config setting:
+
+ 'arc_reader_credentials' => array(
+ 'http://basic.example.com/' => 'user:pass', // shortcut for type=basic
+ 'http://digest.example.com/' => 'user::pass', // shortcut for type=digest
+ 'http://proxy.example.com/' => array('type' => 'basic', 'proxy', 'user' => 'user', 'pass' => 'pass'),
+ ),
+
+ */
+
+ function setCredentials($url) {
+ if (!$creds = $this->v('arc_reader_credentials', array(), $this->a)) return 0;
+ foreach ($creds as $pattern => $creds) {
+ /* digest shortcut (user::pass) */
+ if (!is_array($creds) && preg_match('/^(.+)\:\:(.+)$/', $creds, $m)) {
+ $creds = array('type' => 'digest', 'user' => $m[1], 'pass' => $m[2]);
+ }
+ /* basic shortcut (user:pass) */
+ if (!is_array($creds) && preg_match('/^(.+)\:(.+)$/', $creds, $m)) {
+ $creds = array('type' => 'basic', 'user' => $m[1], 'pass' => $m[2]);
+ }
+ if (!is_array($creds)) return 0;
+ $regex = '/' . preg_replace('/([\:\/\.\?])/', '\\\\\1', $pattern) . '/';
+ if (!preg_match($regex, $url)) continue;
+ $mthd = 'set' . $this->camelCase($creds['type']) . 'AuthCredentials';
+ if (method_exists($this, $mthd)) $this->$mthd($creds, $url);
+ }
+ }
+
+ function setBasicAuthCredentials($creds) {
+ $auth = 'Basic ' . base64_encode($creds['user'] . ':' . $creds['pass']);
+ $h = in_array('proxy', $creds) ? 'Proxy-Authorization' : 'Authorization';
+ $this->addCustomHeaders($h . ': ' . $auth);
+ //echo $h . ': ' . $auth . print_r($creds, 1);
+ }
+
+ function setDigestAuthCredentials($creds, $url) {
+ $path = $this->v1('path', '/', parse_url($url));
+ $auth = '';
+ $hs = $this->getResponseHeaders();
+ /* initial 401 */
+ $h = $this->v('www-authenticate', '', $hs);
+ if ($h && preg_match('/Digest/i', $h)) {
+ $auth = 'Digest ';
+ /* Digest realm="$realm", nonce="$nonce", qop="auth", opaque="$opaque" */
+ $ks = array('realm', 'nonce', 'opaque');/* skipping qop, assuming "auth" */
+ foreach ($ks as $i => $k) {
+ $$k = preg_match('/' . $k . '=\"?([^\"]+)\"?/i', $h, $m) ? $m[1] : '';
+ $auth .= ($i ? ', ' : '') . $k . '="' . $$k . '"';
+ $this->auth_infos[$k] = $$k;
+ }
+ $this->auth_infos['auth'] = $auth;
+ $this->auth_infos['request_count'] = 1;
+ }
+ /* initial 401 or repeated request */
+ if ($this->v('auth', 0, $this->auth_infos)) {
+ $qop = 'auth';
+ $auth = $this->auth_infos['auth'];
+ $rc = $this->auth_infos['request_count'];
+ $realm = $this->auth_infos['realm'];
+ $nonce = $this->auth_infos['nonce'];
+ $ha1 = md5($creds['user'] . ':' . $realm . ':' . $creds['pass']);
+ $ha2 = md5($this->http_method . ':' . $path);
+ $nc = dechex($rc);
+ $cnonce = dechex($rc * 2);
+ $resp = md5($ha1 . ':' . $nonce . ':' . $nc . ':' . $cnonce . ':' . $qop . ':' . $ha2);
+ $auth .= ', username="' . $creds['user'] . '"' .
+ ', uri="' . $path . '"' .
+ ', qop=' . $qop . '' .
+ ', nc=' . $nc .
+ ', cnonce="' . $cnonce . '"' .
+ ', uri="' . $path . '"' .
+ ', response="' . $resp . '"' .
+ '';
+ $this->auth_infos['request_count'] = $rc + 1;
+ }
+ if (!$auth) return 0;
+ $h = in_array('proxy', $creds) ? 'Proxy-Authorization' : 'Authorization';
+ $this->addCustomHeaders($h . ': ' . $auth);
+ }
+
+ /* */
+
+ function useProxy($url) {
+ if (!$this->v1('proxy_host', 0, $this->a)) {
+ return false;
+ }
+ $skips = $this->v1('proxy_skip', array(), $this->a);
+ foreach ($skips as $skip) {
+ if (strpos($url, $skip) !== false) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /* */
+
+ function createStream($path, $data = '') {
+ $this->base = $this->calcBase($path);
+ $this->stream = ($data) ? $this->getDataStream($data) : $this->getSocketStream($this->base);
+ }
+
+ function getDataStream($data) {
+ return array('type' => 'data', 'pos' => 0, 'headers' => array(), 'size' => strlen($data), 'data' => $data, 'buffer' => '');
+ }
+
+ function getSocketStream($url) {
+ if ($url == 'file://') {
+ return $this->addError('Error: file does not exists or is not accessible');
+ }
+ $parts = parse_url($url);
+ $mappings = array('file' => 'File', 'http' => 'HTTP', 'https' => 'HTTP');
+ if ($scheme = $this->v(strtolower($parts['scheme']), '', $mappings)) {
+ return $this->m('get' . $scheme . 'Socket', $url, $this->getDataStream(''));
+ }
+ }
+
+ function getFileSocket($url) {
+ $parts = parse_url($url);
+ $s = file_exists($parts['path']) ? @fopen($parts['path'], 'rb') : false;
+ if (!$s) {
+ return $this->addError('Socket error: Could not open "' . $parts['path'] . '"');
+ }
+ return array('type' => 'socket', 'socket' =>& $s, 'headers' => array(), 'pos' => 0, 'size' => filesize($parts['path']), 'buffer' => '');
+ }
+
+ function getHTTPSocket($url, $redirs = 0, $prev_parts = '') {
+ $parts = parse_url($url);
+ /* relative redirect */
+ if (!isset($parts['scheme']) && $prev_parts) $parts['scheme'] = $prev_parts['scheme'];
+ if (!isset($parts['host']) && $prev_parts) $parts['host'] = $prev_parts['host'];
+ /* no scheme */
+ if (!$this->v('scheme', '', $parts)) return $this->addError('Socket error: Missing URI scheme.');
+ /* port tweaks */
+ $parts['port'] = ($parts['scheme'] == 'https') ? $this->v1('port', 443, $parts) : $this->v1('port', 80, $parts);
+ $nl = "\r\n";
+ $http_mthd = strtoupper($this->http_method);
+ if ($this->v1('user', 0, $parts) || $this->useProxy($url)) {
+ $h_code = $http_mthd . ' ' . $url;
+ }
+ else {
+ $h_code = $http_mthd . ' ' . $this->v1('path', '/', $parts) . (($v = $this->v1('query', 0, $parts)) ? '?' . $v : '') . (($v = $this->v1('fragment', 0, $parts)) ? '#' . $v : '');
+ }
+ $port_code = ($parts['port'] != 80) ? ':' . $parts['port'] : '';
+ $h_code .= ' HTTP/1.0' . $nl.
+ 'Host: ' . $parts['host'] . $port_code . $nl .
+ (($v = $this->http_accept_header) ? $v . $nl : '') .
+ (($v = $this->http_user_agent_header) && !preg_match('/User\-Agent\:/', $this->http_custom_headers) ? $v . $nl : '') .
+ (($http_mthd == 'POST') ? 'Content-Length: ' . strlen($this->message_body) . $nl : '') .
+ ($this->http_custom_headers ? trim($this->http_custom_headers) . $nl : '') .
+ $nl .
+ '';
+ /* post body */
+ if ($http_mthd == 'POST') {
+ $h_code .= $this->message_body . $nl;
+ }
+ /* connect */
+ if ($this->useProxy($url)) {
+ $s = @fsockopen($this->a['proxy_host'], $this->a['proxy_port'], $errno, $errstr, $this->timeout);
+ }
+ elseif (($parts['scheme'] == 'https') && function_exists('stream_socket_client')) {
+ // SSL options via config array, code by Hannes Muehleisen (muehleis@informatik.hu-berlin.de)
+ $context = stream_context_create();
+ foreach ($this->a as $k => $v) {
+ if (preg_match('/^arc_reader_ssl_(.+)$/', $k, $m)) {
+ stream_context_set_option($context, 'ssl', $m[1], $v);
+ }
+ }
+ $s = stream_socket_client('ssl://' . $parts['host'] . $port_code, $errno, $errstr, $this->timeout, STREAM_CLIENT_CONNECT, $context);
+ }
+ elseif ($parts['scheme'] == 'https') {
+ $s = @fsockopen('ssl://' . $parts['host'], $parts['port'], $errno, $errstr, $this->timeout);
+ }
+ elseif ($parts['scheme'] == 'http') {
+ $s = @fsockopen($parts['host'], $parts['port'], $errno, $errstr, $this->timeout);
+ }
+ if (!$s) {
+ return $this->addError('Socket error: Could not connect to "' . $url . '" (proxy: ' . ($this->useProxy($url) ? '1' : '0') . '): ' . $errstr);
+ }
+ /* request */
+ fwrite($s, $h_code);
+ /* timeout */
+ if ($this->timeout) {
+ //stream_set_blocking($s, false);
+ stream_set_timeout($s, $this->timeout);
+ }
+ /* response headers */
+ $h = array();
+ $this->response_headers = $h;
+ if (!$this->ping_only) {
+ do {
+ $line = trim(fgets($s, 4096));
+ $info = stream_get_meta_data($s);
+ if (preg_match("/^HTTP[^\s]+\s+([0-9]{1})([0-9]{2})(.*)$/i", $line, $m)) {/* response code */
+ $error = in_array($m[1], array('4', '5')) ? $m[1] . $m[2] . ' ' . $m[3] : '';
+ $error = ($m[1].$m[2] == '304') ? '304 '.$m[3] : $error;
+ $h['response-code'] = $m[1] . $m[2];
+ $h['error'] = $error;
+ $h['redirect'] = ($m[1] == '3') ? true : false;
+ }
+ elseif (preg_match('/^([^\:]+)\:\s*(.*)$/', $line, $m)) {/* header */
+ $h_name = strtolower($m[1]);
+ if (!isset($h[$h_name])) {/* 1st value */
+ $h[$h_name] = trim($m[2]);
+ }
+ elseif (!is_array($h[$h_name])) {/* 2nd value */
+ $h[$h_name] = array($h[$h_name], trim($m[2]));
+ }
+ else {/* more values */
+ $h[$h_name][] = trim($m[2]);
+ }
+ }
+ } while(!$info['timed_out'] && !feof($s) && $line);
+ $h['format'] = strtolower(preg_replace('/^([^\s]+).*$/', '\\1', $this->v('content-type', '', $h)));
+ $h['encoding'] = preg_match('/(utf\-8|iso\-8859\-1|us\-ascii)/', $this->v('content-type', '', $h), $m) ? strtoupper($m[1]) : '';
+ $h['encoding'] = preg_match('/charset=\s*([^\s]+)/si', $this->v('content-type', '', $h), $m) ? strtoupper($m[1]) : $h['encoding'];
+ $this->response_headers = $h;
+ /* result */
+ if ($info['timed_out']) {
+ return $this->addError('Connection timed out after ' . $this->timeout . ' seconds');
+ }
+ /* error */
+ if ($v = $this->v('error', 0, $h)) {
+ /* digest auth */
+ /* 401 received */
+ if (preg_match('/Digest/i', $this->v('www-authenticate', '', $h)) && !$this->digest_auth) {
+ $this->setCredentials($url);
+ $this->digest_auth = 1;
+ return $this->getHTTPSocket($url);
+ }
+ return $this->addError($error . ' "' . (!feof($s) ? trim(strip_tags(fread($s, 128))) . '..."' : ''));
+ }
+ /* redirect */
+ if ($this->v('redirect', 0, $h) && ($new_url = $this->v1('location', 0, $h))) {
+ fclose($s);
+ $this->redirects[$url] = $new_url;
+ $this->base = $new_url;
+ if ($redirs > $this->max_redirects) {
+ return $this->addError('Max numbers of redirects exceeded.');
+ }
+ return $this->getHTTPSocket($new_url, $redirs+1, $parts);
+ }
+ }
+ if ($this->timeout) {
+ stream_set_blocking($s, true);
+ }
+ return array('type' => 'socket', 'url' => $url, 'socket' =>& $s, 'headers' => $h, 'pos' => 0, 'size' => $this->v('content-length', 0, $h), 'buffer' => '');
+ }
+
+ function readStream($buffer_xml = true, $d_size = 1024) {
+ //if (!$s = $this->v('stream')) return '';
+ if (!$s = $this->v('stream')) return $this->addError('missing stream in "readStream" ' . $this->uri);
+ $s_type = $this->v('type', '', $s);
+ $r = $s['buffer'];
+ $s['buffer'] = '';
+ if ($s['size']) $d_size = min($d_size, $s['size'] - $s['pos']);
+ /* data */
+ if ($s_type == 'data') {
+ $d = ($d_size > 0) ? substr($s['data'], $s['pos'], $d_size) : '';
+ }
+ /* socket */
+ elseif ($s_type == 'socket') {
+ $d = ($d_size > 0) && !feof($s['socket']) ? fread($s['socket'], $d_size) : '';
+ }
+ $eof = $d ? false : true;
+ /* chunked despite HTTP 1.0 request */
+ if (isset($s['headers']) && isset($s['headers']['transfer-encoding']) && ($s['headers']['transfer-encoding'] == 'chunked')) {
+ $d = preg_replace('/(^|[\r\n]+)[0-9a-f]{1,4}[\r\n]+/', '', $d);
+ }
+ $s['pos'] += strlen($d);
+ if ($buffer_xml) {/* stop after last closing xml tag (if available) */
+ if (preg_match('/^(.*\>)([^\>]*)$/s', $d, $m)) {
+ $d = $m[1];
+ $s['buffer'] = $m[2];
+ }
+ elseif (!$eof) {
+ $s['buffer'] = $r . $d;
+ $this->stream = $s;
+ return $this->readStream(true, $d_size);
+ }
+ }
+ $this->stream = $s;
+ return $r . $d;
+ }
+
+ function closeStream() {
+ if (isset($this->stream)) {
+ if ($this->v('type', 0, $this->stream) == 'socket') {
+ @fclose($this->stream['socket']);
+ }
+ unset($this->stream);
+ }
+ }
+
+ /* */
+
+ function getFormat() {
+ if (!$this->format) {
+ if (!$this->v('stream')) {
+ return $this->addError('missing stream in "getFormat"');
+ }
+ $v = $this->readStream(false);
+ $mtype = $this->v('format', '', $this->stream['headers']);
+ $this->stream['buffer'] = $v . $this->stream['buffer'];
+ $ext = preg_match('/\.([^\.]+)$/', $this->uri, $m) ? $m[1] : '';
+ $this->format = ARC2::getFormat($v, $mtype, $ext);
+ }
+ return $this->format;
+ }
+
+ /* */
+
+ function getResponseHeaders() {
+ if (isset($this->stream) && isset($this->stream['headers'])) {
+ return $this->stream['headers'];
+ }
+ return $this->response_headers;
+ }
+
+ function getEncoding($default = 'UTF-8') {
+ return $this->v1('encoding', $default, $this->stream['headers']);
+ }
+
+ function getRedirects() {
+ return $this->redirects;
+ }
+
+ function getAuthInfos() {
+ return $this->auth_infos;
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 Resource object
+ *
+ * @author Benjamin Nowack <bnowack@semsol.com>
+ * @license http://arc.semsol.org/license
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-02-23
+*/
+
+ARC2::inc('Class');
+
+class ARC2_Resource extends ARC2_Class {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_Resource($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ $this->uri = '';
+ $this->index = array();
+ $this->fetched = array();
+ $this->store = '';
+ }
+
+ /* */
+
+ function setURI($uri) {
+ $this->uri = $uri;
+ }
+
+ function setIndex($index) {
+ $this->index = $index;
+ }
+
+ function setProps($props, $s = '') {
+ if (!$s) $s = $this->uri;
+ $this->index[$s] = $props;
+ }
+
+ function setProp($p, $os, $s = '') {
+ if (!$s) $s = $this->uri;
+ /* single plain value */
+ if (!is_array($os)) $os = array('value' => $os, 'type' => 'literal');
+ /* single array value */
+ if (isset($os['value'])) $os = array($os);
+ /* list of values */
+ foreach ($os as $i => $o) {
+ if (!is_array($o)) $os[$i] = array('value' => $o, 'type' => 'literal');
+ }
+ $this->index[$s][$this->expandPName($p)] = $os;
+ }
+
+ function setStore($store) {
+ $this->store = $store;
+ }
+
+ /* */
+
+ function fetchData($uri = '') {
+ if (!$uri) $uri = $this->uri;
+ if (!$uri) return 0;
+ if (in_array($uri, $this->fetched)) return 0;
+ $this->index[$uri] = array();
+ if ($this->store) {
+ $index = $this->store->query('DESCRIBE <' . $uri . '>', 'raw');
+ }
+ else {
+ $index = $this->toIndex($uri);
+ }
+ $this->index = ARC2::getMergedIndex($this->index, $index);
+ $this->fetched[] = $uri;
+ }
+
+ /* */
+
+ function getProps($p = '', $s = '') {
+ if (!$s) $s = $this->uri;
+ if (!$s) return array();
+ if (!isset($this->index[$s])) $this->fetchData($s);
+ if (!$p) return $this->index[$s];
+ return $this->v($this->expandPName($p), array(), $this->index[$s]);
+ }
+
+ function getProp($p, $s = '') {
+ $props = $this->getProps($p, $s);
+ return $props ? $props[0] : '';
+ }
+
+ function getPropValue($p, $s = '') {
+ $prop = $this->getProp($p, $s);
+ return $prop ? $prop['value'] : '';
+ }
+
+ function getPropValues($p, $s = '') {
+ $r = array();
+ $props = $this->getProps($p, $s);
+ foreach ($props as $prop) {
+ $r[] = $prop['value'];
+ }
+ return $r;
+ }
+
+ function hasPropValue($p, $o, $s = '') {
+ $props = $this->getProps($p, $s);
+ $o = $this->expandPName($o);
+ foreach ($props as $prop) {
+ if ($prop['value'] == $o) return 1;
+ }
+ return 0;
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 format detection function
+ *
+ * @author Benjamin Nowack <bnowack@semsol.com>
+ * @license http://arc.semsol.org/license
+ * @package ARC2
+ * @version 2010-01-18
+*/
+
+function ARC2_getFormat($v, $mtype = '', $ext = '') {
+ $r = false;
+ /* mtype check (atom, rdf/xml, turtle, n3, mp3, jpg) */
+ $r = (!$r && preg_match('/\/atom\+xml/', $mtype)) ? 'atom' : $r;
+ $r = (!$r && preg_match('/\/rdf\+xml/', $mtype)) ? 'rdfxml' : $r;
+ $r = (!$r && preg_match('/\/(x\-)?turtle/', $mtype)) ? 'turtle' : $r;
+ $r = (!$r && preg_match('/\/rdf\+n3/', $mtype)) ? 'n3' : $r;
+ $r = (!$r && preg_match('/\/sparql-results\+xml/', $mtype)) ? 'sparqlxml' : $r;
+ /* xml sniffing */
+ if (
+ !$r &&
+ /* starts with angle brackets */
+ preg_match('/^\s*\<[^\s]/s', $v) &&
+ /* has an xmlns:* declaration or a matching pair of tags */
+ (preg_match('/\sxmlns\:?/', $v) || preg_match('/\<([^\s]+).+\<\/\\1\>/s', $v)) &&
+ /* not a typical ntriples/turtle/n3 file */
+ !preg_match('/[\>\"\']\s*\.\s*$/s', $v)
+ ) {
+ while (preg_match('/^\s*\<\?xml[^\r\n]+\?\>\s*/s', $v)) {
+ $v = preg_replace('/^\s*\<\?xml[^\r\n]+\?\>\s*/s', '', $v);
+ }
+ while (preg_match('/^\s*\<\!--.+?--\>\s*/s', $v)) {
+ $v = preg_replace('/^\s*\<\!--.+?--\>\s*/s', '', $v);
+ }
+ /* doctype checks (html, rdf) */
+ $r = (!$r && preg_match('/^\s*\<\!DOCTYPE\s+html[\s|\>]/is', $v)) ? 'html' : $r;
+ $r = (!$r && preg_match('/^\s*\<\!DOCTYPE\s+[a-z0-9\_\-]\:RDF\s/is', $v)) ? 'rdfxml' : $r;
+ /* markup checks */
+ $v = preg_replace('/^\s*\<\!DOCTYPE\s.*\]\>/is', '', $v);
+ $r = (!$r && preg_match('/^\s*\<rss\s+[^\>]*version/s', $v)) ? 'rss' : $r;
+ $r = (!$r && preg_match('/^\s*\<feed\s+[^\>]+http\:\/\/www\.w3\.org\/2005\/Atom/s', $v)) ? 'atom' : $r;
+ $r = (!$r && preg_match('/^\s*\<opml\s/s', $v)) ? 'opml' : $r;
+ $r = (!$r && preg_match('/^\s*\<html[\s|\>]/is', $v)) ? 'html' : $r;
+ $r = (!$r && preg_match('/^\s*\<sparql\s+[^\>]+http\:\/\/www\.w3\.org\/2005\/sparql\-results\#/s', $v)) ? 'sparqlxml' : $r;
+ $r = (!$r && preg_match('/^\s*\<[^\>]+http\:\/\/www\.w3\.org\/2005\/sparql\-results#/s', $v)) ? 'srx' : $r;
+ $r = (!$r && preg_match('/^\s*\<[^\s]*RDF[\s\>]/s', $v)) ? 'rdfxml' : $r;
+ $r = (!$r && preg_match('/^\s*\<[^\>]+http\:\/\/www\.w3\.org\/1999\/02\/22\-rdf/s', $v)) ? 'rdfxml' : $r;
+
+ $r = !$r ? 'xml' : $r;
+ }
+ /* json|jsonp */
+ if (!$r && preg_match('/^[a-z0-9\.\(]*\s*[\{\[].*/s', trim($v))) {
+ /* google social graph api */
+ $r = (!$r && preg_match('/\"canonical_mapping\"/', $v)) ? 'sgajson' : $r;
+ /* crunchbase api */
+ $r = (!$r && preg_match('/\"permalink\"/', $v)) ? 'cbjson' : $r;
+
+ $r = !$r ? 'json' : $r;
+ }
+ /* turtle/n3 */
+ $r = (!$r && preg_match('/\@(prefix|base)/i', $v)) ? 'turtle' : $r;
+ $r = (!$r && preg_match('/^(ttl)$/', $ext)) ? 'turtle' : $r;
+ $r = (!$r && preg_match('/^(n3)$/', $ext)) ? 'n3' : $r;
+ /* ntriples */
+ $r = (!$r && preg_match('/^\s*(_:|<).+?\s+<[^>]+?>\s+\S.+?\s*\.\s*$/sm', $v)) ? 'ntriples' : $r;
+ $r = (!$r && preg_match('/^(nt)$/', $ext)) ? 'ntriples' : $r;
+ return $r;
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+function: result format detection
+author: Benjamin Nowack
+version: 2008-08-04
+*/
+
+function ARC2_getPreferredFormat($default = 'plain') {
+ $formats = array(
+ 'html' => 'HTML', 'text/html' => 'HTML', 'xhtml+xml' => 'HTML',
+ 'rdfxml' => 'RDFXML', 'rdf+xml' => 'RDFXML',
+ 'ntriples' => 'NTriples', 'rdf+n3' => 'Turtle', 'x-turtle' => 'Turtle', 'turtle' => 'Turtle',
+ 'rdfjson' => 'RDFJSON', 'json' => 'RDFJSON',
+ 'xml' => 'XML',
+ 'legacyjson' => 'LegacyJSON'
+ );
+ $prefs = array();
+ $o_vals = array();
+ /* accept header */
+ if ($vals = explode(',', $_SERVER['HTTP_ACCEPT'])) {
+ foreach ($vals as $val) {
+ if (preg_match('/(rdf\+n3|x\-turtle|rdf\+xml|text\/html|xhtml\+xml|xml|json)/', $val, $m)) {
+ $o_vals[$m[1]] = 1;
+ if (preg_match('/\;q\=([0-9\.]+)/', $val, $sub_m)) {
+ $o_vals[$m[1]] = 1 * $sub_m[1];
+ }
+ }
+ }
+ }
+ /* arg */
+ if (isset($_GET['format'])) $o_vals[$_GET['format']] = 1.1;
+ /* rank */
+ arsort($o_vals);
+ foreach ($o_vals as $val => $prio) {
+ $prefs[] = $val;
+ }
+ /* default */
+ $prefs[] = $default;
+ foreach ($prefs as $pref) {
+ if (isset($formats[$pref])) {
+ return $formats[$pref];
+ }
+ }
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 DC Extractor
+author: Benjamin Nowack
+version: 2008-04-09 (Fix: base URL (not doc URL) was used for annotations)
+*/
+
+ARC2::inc('RDFExtractor');
+
+class ARC2_DcExtractor extends ARC2_RDFExtractor {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_DcExtractor($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ $this->a['ns']['dc'] = 'http://purl.org/dc/elements/1.1/';
+ }
+
+ /* */
+
+ function extractRDF() {
+ $t_vals = array();
+ $t = '';
+ foreach ($this->nodes as $n) {
+ foreach (array('title', 'link', 'meta') as $tag) {
+ if ($n['tag'] == $tag) {
+ $m = 'extract' . ucfirst($tag);
+ list ($t_vals, $t) = $this->$m($n, $t_vals, $t);
+ }
+ }
+ }
+ if ($t) {
+ $doc = $this->getFilledTemplate($t, $t_vals, $n['doc_base']);
+ $this->addTs(ARC2::getTriplesFromIndex($doc));
+ }
+ }
+
+ /* */
+
+ function extractTitle($n, $t_vals, $t) {
+ if ($t_vals['title'] = $this->getPlainContent($n)) {
+ $t .= '<' . $n['doc_url'] . '> dc:title ?title . ';
+ }
+ return array($t_vals, $t);
+ }
+
+ /* */
+
+ function extractLink($n, $t_vals, $t) {
+ if ($this->hasRel($n, 'alternate') || $this->hasRel($n, 'meta')) {
+ if ($href = $this->v('href uri', '', $n['a'])) {
+ $t .= '<' . $n['doc_url'] . '> rdfs:seeAlso <' . $href . '> . ';
+ if ($v = $this->v('type', '', $n['a'])) {
+ $t .= '<' .$href. '> dc:format "' . $v . '" . ';
+ }
+ if ($v = $this->v('title', '', $n['a'])) {
+ $t .= '<' .$href. '> dc:title "' . $v . '" . ';
+ }
+ }
+ }
+ return array($t_vals, $t);
+ }
+
+ function extractMeta($n, $t_vals, $t) {
+ if ($this->hasAttribute('http-equiv', $n, 'Content-Type') || $this->hasAttribute('http-equiv', $n, 'content-type')) {
+ if ($v = $this->v('content', '', $n['a'])) {
+ $t .= '<' . $n['doc_url'] . '> dc:format "' . $v . '" . ';
+ }
+ }
+ return array($t_vals, $t);
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 eRDF Extractor (w/o link title generation)
+author: Benjamin Nowack
+version: 2009-02-09 (Tweak: getRootNode returns 1st node if html tag is not found)
+*/
+
+ARC2::inc('RDFExtractor');
+
+class ARC2_ErdfExtractor extends ARC2_RDFExtractor {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_ErdfExtractor($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ }
+
+ /* */
+
+ function extractRDF() {
+ if (!isset($this->caller->detected_formats['erdf'])) return 0;
+ $root_node = $this->getRootNode();
+ $base = $this->getDocBase();
+ $ns = $this->getNamespaces();
+ $context = array(
+ 'base' => $base,
+ 'prev_res' => $base,
+ 'cur_res' => $base,
+ 'ns' => $ns,
+ 'lang' => '',
+ );
+ $this->processNode($root_node, $context);
+ }
+
+ /* */
+
+ function getRootNode() {
+ foreach ($this->nodes as $id => $node) {
+ if ($node['tag'] == 'html') {
+ return $node;
+ }
+ }
+ return $this->nodes[0];
+ }
+
+ function getNamespaces() {
+ $r = array(
+ 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
+ 'rdfs' => 'http://www.w3.org/2000/01/rdf-schema#'
+ );
+ foreach ($this->nodes as $id => $node) {
+ if (preg_match('/^(link|a)$/', $node['tag']) && isset($node['a']['rel']) && preg_match('/schema\.([^\s]+)/is', $node['a']['rel'], $m) && isset($node['a']['href uri'])) {
+ $r[$m[1]] = $node['a']['href uri'];
+ }
+ }
+ return $r;
+ }
+
+ /* */
+
+ function processNode($n, $ct) {
+ /* context */
+ //$ct['lang'] = $this->v('xml:lang', $ct['lang'], $n['a']);
+ $ct['lang'] = '';
+ $ct['prop_uris'] = $this->getPropertyURIs($n, $ct);
+ $ct['prev_res'] = $ct['cur_res'];
+ $ct['cur_res'] = $this->getCurrentResourceURI($n, $ct);
+ $ct['cur_obj_id'] = $this->getCurrentObjectID($n, $ct);
+ $ct['cur_obj_literal'] = $this->getCurrentObjectLiteral($n, $ct);
+ /* triple production (http://research.talis.com/2005/erdf/wiki/Main/SummaryOfTripleProductionRules) */
+ foreach ($ct['prop_uris'] as $type => $uris) {
+ foreach ($uris as $uri) {
+ $rdf_type = preg_match('/^ /', $uri) ? 1 : 0;
+ /* meta + name */
+ if (($type == 'name') && ($n['tag'] == 'meta')) {
+ $t = array(
+ 's' => $ct['cur_res'],
+ 's_type' => 'uri',
+ 'p' => $uri,
+ 'o' => $ct['cur_obj_literal']['value'],
+ 'o_type' => 'literal',
+ 'o_lang' => $ct['cur_obj_literal']['datatype'] ? '' : $ct['cur_obj_literal']['lang'],
+ 'o_datatype' => $ct['cur_obj_literal']['datatype'],
+ );
+ $this->addT($t);
+ }
+ /* class */
+ if ($type == 'class') {
+ if ($rdf_type) {
+ $s = $this->v('href uri', $ct['cur_res'], $n['a']);
+ $s = $this->v('src uri', $s, $n['a']);
+ $t = array(
+ 's' => $s,
+ 's_type' => 'uri',
+ 'p' => $ct['ns']['rdf'] . 'type',
+ 'o' => trim($uri),
+ 'o_type' => 'uri',
+ 'o_lang' => '',
+ 'o_datatype' => '',
+ );
+ }
+ elseif (isset($n['a']['id'])) {/* used as object */
+ $t = array(
+ 's' => $ct['prev_res'],
+ 's_type' => 'uri',
+ 'p' => $uri,
+ 'o' => $ct['cur_res'],
+ 'o_type' => 'uri',
+ 'o_lang' => '',
+ 'o_datatype' => '',
+ );
+ }
+ else {
+ $t = array(
+ 's' => $ct['cur_res'],
+ 's_type' => 'uri',
+ 'p' => $uri,
+ 'o' => $ct['cur_obj_literal']['value'],
+ 'o_type' => 'literal',
+ 'o_lang' => $ct['cur_obj_literal']['datatype'] ? '' : $ct['cur_obj_literal']['lang'],
+ 'o_datatype' => $ct['cur_obj_literal']['datatype'],
+ );
+ if (($o = $this->v('src uri', '', $n['a'])) || ($o = $this->v('href uri', '', $n['a']))) {
+ if (!$ct['prop_uris']['rel'] && !$ct['prop_uris']['rev']) {
+ $t['o'] = $o;
+ $t['o_type'] = 'uri';
+ $t['o_lang'] = '';
+ $t['o_datatype'] = '';
+ }
+ }
+ }
+ $this->addT($t);
+ }
+ /* rel */
+ if ($type == 'rel') {
+ if (($o = $this->v('src uri', '', $n['a'])) || ($o = $this->v('href uri', '', $n['a']))) {
+ $t = array(
+ 's' => $ct['cur_res'],
+ 's_type' => 'uri',
+ 'p' => $uri,
+ 'o' => $o,
+ 'o_type' => 'uri',
+ 'o_lang' => '',
+ 'o_datatype' => '',
+ );
+ $this->addT($t);
+ }
+ }
+ /* rev */
+ if ($type == 'rev') {
+ if (($s = $this->v('src uri', '', $n['a'])) || ($s = $this->v('href uri', '', $n['a']))) {
+ $t = array(
+ 's' => $s,
+ 's_type' => 'uri',
+ 'p' => $uri,
+ 'o' => $ct['cur_res'],
+ 'o_type' => 'uri',
+ 'o_lang' => '',
+ 'o_datatype' => '',
+ );
+ $this->addT($t);
+ }
+ }
+ }
+ }
+ /* imgs */
+ if ($n['tag'] == 'img') {
+ if (($s = $this->v('src uri', '', $n['a'])) && $ct['cur_obj_literal']['value']) {
+ $t = array(
+ 's' => $s,
+ 's_type' => 'uri',
+ 'p' => $ct['ns']['rdfs'] . 'label',
+ 'o' => $ct['cur_obj_literal']['value'],
+ 'o_type' => 'literal',
+ 'o_lang' => $ct['cur_obj_literal']['datatype'] ? '' : $ct['cur_obj_literal']['lang'],
+ 'o_datatype' => $ct['cur_obj_literal']['datatype'],
+ );
+ $this->addT($t);
+ }
+ }
+ /* anchors */
+ if ($n['tag'] == 'a') {
+ if (($s = $this->v('href uri', '', $n['a'])) && $ct['cur_obj_literal']['value']) {
+ $t = array(
+ 's' => $s,
+ 's_type' => 'uri',
+ 'p' => $ct['ns']['rdfs'] . 'label',
+ 'o' => $ct['cur_obj_literal']['value'],
+ 'o_type' => 'literal',
+ 'o_lang' => $ct['cur_obj_literal']['datatype'] ? '' : $ct['cur_obj_literal']['lang'],
+ 'o_datatype' => $ct['cur_obj_literal']['datatype'],
+ );
+ $this->addT($t);
+ }
+ }
+ /* recurse */
+ if ($n['tag'] == 'a') {
+ $ct['cur_res'] = $ct['cur_obj_id'];
+ }
+ $sub_nodes = $this->getSubNodes($n);
+ foreach ($sub_nodes as $sub_node) {
+ $this->processNode($sub_node, $ct);
+ }
+ }
+
+ /* */
+
+ function getPropertyURIs($n, $ct) {
+ $r = array();
+ foreach (array('rel', 'rev', 'class', 'name', 'src') as $type) {
+ $r[$type] = array();
+ $vals = $this->v($type . ' m', array(), $n['a']);
+ foreach ($vals as $val) {
+ if (!trim($val)) continue;
+ list($uri, $sub_v) = $this->xQname(trim($val, '- '), $ct['base'], $ct['ns'], $type);
+ if (!$uri) continue;
+ $rdf_type = preg_match('/^-/', trim($val)) ? 1 : 0;
+ $r[$type][] = $rdf_type ? ' ' . $uri : $uri;
+ }
+ }
+ return $r;
+ }
+
+ function getCurrentResourceURI($n, $ct) {
+ if (isset($n['a']['id'])) {
+ list($r, $sub_v) = $this->xURI('#' . $n['a']['id'], $ct['base'], $ct['ns']);
+ return $r;
+ }
+ return $ct['cur_res'];
+ }
+
+ function getCurrentObjectID($n, $ct) {
+ foreach (array('href', 'src') as $a) {
+ if (isset($n['a'][$a])) {
+ list($r, $sub_v) = $this->xURI($n['a'][$a], $ct['base'], $ct['ns']);
+ return $r;
+ }
+ }
+ return $this->createBnodeID();
+ }
+
+ function getCurrentObjectLiteral($n, $ct) {
+ $r = array('value' => '', 'lang' => $ct['lang'], 'datatype' => '');
+ if (isset($n['a']['content'])) {
+ $r['value'] = $n['a']['content'];
+ }
+ elseif (isset($n['a']['title'])) {
+ $r['value'] = $n['a']['title'];
+ }
+ else {
+ $r['value'] = $this->getPlainContent($n);
+ }
+ return $r;
+ }
+
+ /* */
+
+ function xURI($v, $base, $ns, $attr_type = '') {
+ if ((list($sub_r, $sub_v) = $this->xQname($v, $base, $ns)) && $sub_r) {
+ return array($sub_r, $sub_v);
+ }
+ if (preg_match('/^(rel|rev|class|name)$/', $attr_type) && preg_match('/^[a-z0-9]+$/', $v)) {
+ return array(0, $v);
+ }
+ return array($this->calcURI($v, $base), '');
+ }
+
+ function xQname($v, $base, $ns) {
+ if ($sub_r = $this->x('([a-z0-9\-\_]+)[\-\.]([a-z0-9\-\_]+)', $v)) {
+ if (isset($ns[$sub_r[1]])) {
+ return array($ns[$sub_r[1]] . $sub_r[2], '');
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 microformats Extractor
+author: Benjamin Nowack
+version:
+*/
+
+ARC2::inc('ARC2_PoshRdfExtractor');
+
+class ARC2_MicroformatsExtractor extends ARC2_PoshRdfExtractor {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_MicroformatsExtractor($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ $this->terms = $this->getTerms();
+ $this->ns_prefix = 'mf';
+ $this->a['ns']['mf'] = 'http://poshrdf.org/ns/mf#';
+ $this->caller->detected_formats['posh-rdf'] = 1;
+ }
+
+ /* */
+
+ function preProcessNode($n) {
+ if (!$n) return $n;
+ /* remove existing poshRDF hooks */
+ if (!is_array($n['a'])) $n['a'] = array();
+ $n['a']['class'] = isset($n['a']['class']) ? preg_replace('/\s?rdf\-(s|p|o|o-xml)/', '', $n['a']['class']): '';
+ if (!isset($n['a']['rel'])) $n['a']['rel'] = '';
+ /* inject poshRDF hooks */
+ foreach ($this->terms as $term => $infos) {
+ if ((!in_array('rel', $infos) && $this->hasClass($n, $term)) || $this->hasRel($n, $term)) {
+ if ($this->v('scope', '', $infos)) $infos[] = 'p';
+ foreach (array('s', 'p', 'o', 'o-xml') as $type) {
+ if (in_array($type, $infos)) {
+ $n['a']['class'] .= ' rdf-' . $type;
+ $n['a']['class'] = preg_replace('/(^|\s)' . $term . '(\s|$)/s', '\\1mf-' . $term . '\\2', $n['a']['class']);
+ $n['a']['rel'] = preg_replace('/(^|\s)' . $term . '(\s|$)/s', '\\1mf-' . $term . '\\2', $n['a']['rel']);
+ }
+ }
+ }
+ }
+ $n['a']['class m'] = split(' ', $n['a']['class']);
+ $n['a']['rel m'] = split(' ', $n['a']['rel']);
+ return $n;
+ }
+
+ function getPredicates($n, $ns) {
+ $ns = array('mf' => $ns['mf']);
+ return parent::getPredicates($n, $ns);
+ }
+
+ function tweakObject($o, $p, $ct) {
+ $ns = $ct['ns']['mf'];
+ /* rel-tag, skill => extract from URL */
+ if (in_array($p, array($ns . 'tag', $ns . 'skill'))) {
+ $o = preg_replace('/^.*\/([^\/]+)/', '\\1', trim($o, '/'));
+ $o = urldecode(rawurldecode($o));
+ }
+ return $o;
+ }
+
+ /* */
+
+ function getTerms() {
+ /* no need to define 'p' if scope is not empty */
+ return array(
+ 'acquaintance' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
+ 'additional-name' => array('o', 'scope' => array('n')),
+ 'adr' => array('s', 'o', 'scope' => array('_doc', 'vcard')),
+ 'affiliation' => array('s', 'o', 'scope' => array('hresume')),
+ 'author' => array('s', 'o', 'scope' => array('hentry')),
+ 'bday' => array('o', 'scope' => array('vcard')),
+ 'bio' => array('o', 'scope' => array('vcard')),
+ 'best' => array('o', 'scope' => array('hreview')),
+ 'bookmark' => array('o', 'scope' => array('_doc', 'hentry', 'hreview')),
+ 'class' => array('o', 'scope' => array('vcard', 'vevent')),
+ 'category' => array('o', 's', 'scope' => array('vcard', 'vevent')),
+ 'child' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
+ 'co-resident' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
+ 'co-worker' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
+ 'colleague' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
+ 'contact' => array('o', 'scope' => array('_doc', 'hresume', 'hentry')),
+ 'country-name' => array('o', 'scope' => array('adr')),
+ 'crush' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
+ 'date' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
+ 'description' => array('o', 'scope' => array('vevent', 'hreview', 'xfolkentry')),
+ 'directory' => array('o', 'rel', 'scope' => array('_doc', 'hfeed', 'hentry', 'hreview')),
+ 'dtend' => array('o', 'scope' => array('vevent')),
+ 'dtreviewed' => array('o', 'scope' => array('hreview')),
+ 'dtstamp' => array('o', 'scope' => array('vevent')),
+ 'dtstart' => array('o', 'scope' => array('vevent')),
+ 'duration' => array('o', 'scope' => array('vevent')),
+ 'education' => array('s', 'o', 'scope' => array('hresume')),
+ 'email' => array('s', 'o', 'scope' => array('vcard')),
+ 'entry-title' => array('o', 'scope' => array('hentry')),
+ 'entry-content' => array('o-xml', 'scope' => array('hentry')),
+ 'entry-summary' => array('o', 'scope' => array('hentry')),
+ 'experience' => array('s', 'o', 'scope' => array('hresume')),
+ 'extended-address' => array('o', 'scope' => array('adr')),
+ 'family-name' => array('o', 'scope' => array('n')),
+ 'fn' => array('o', 'plain', 'scope' => array('vcard', 'item')),
+ 'friend' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
+ 'geo' => array('s', 'scope' => array('_doc', 'vcard', 'vevent')),
+ 'given-name' => array('o', 'scope' => array('n')),
+ 'hentry' => array('s', 'o', 'scope' => array('_doc', 'hfeed')),
+ 'hfeed' => array('s', 'scope' => array('_doc')),
+ 'honorific-prefix' => array('o', 'scope' => array('n')),
+ 'honorific-suffix' => array('o', 'scope' => array('n')),
+ 'hresume' => array('s', 'scope' => array('_doc')),
+ 'hreview' => array('s', 'scope' => array('_doc')),
+ 'item' => array('s', 'scope' => array('hreview')),
+ 'key' => array('o', 'scope' => array('vcard')),
+ 'kin' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
+ 'label' => array('o', 'scope' => array('vcard')),
+ 'last-modified' => array('o', 'scope' => array('vevent')),
+ 'latitude' => array('o', 'scope' => array('geo')),
+ 'license' => array('o', 'rel', 'scope' => array('_doc', 'hfeed', 'hentry', 'hreview')),
+ 'locality' => array('o', 'scope' => array('adr')),
+ 'location' => array('o', 'scope' => array('vevent')),
+ 'logo' => array('o', 'scope' => array('vcard')),
+ 'longitude' => array('o', 'scope' => array('geo')),
+ 'mailer' => array('o', 'scope' => array('vcard')),
+ 'me' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
+ 'met' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
+ 'muse' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
+ 'n' => array('s', 'o', 'scope' => array('vcard')),
+ 'neighbor' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
+ 'nickname' => array('o', 'plain', 'scope' => array('vcard')),
+ 'nofollow' => array('o', 'rel', 'scope' => array('_doc')),
+ 'note' => array('o', 'scope' => array('vcard')),
+ 'org' => array('o', 'xplain', 'scope' => array('vcard')),
+ 'parent' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
+ 'permalink' => array('o', 'scope' => array('hreview')),
+ 'photo' => array('o', 'scope' => array('vcard', 'item')),
+ 'post-office-box' => array('o', 'scope' => array('adr')),
+ 'postal-code' => array('o', 'scope' => array('adr')),
+ 'publication' => array('s', 'o', 'scope' => array('hresume')),
+ 'published' => array('o', 'scope' => array('hentry')),
+ 'rating' => array('o', 'scope' => array('hreview')),
+ 'region' => array('o', 'scope' => array('adr')),
+ 'rev' => array('o', 'scope' => array('vcard')),
+ 'reviewer' => array('s', 'o', 'scope' => array('hreview')),
+ 'role' => array('o', 'plain', 'scope' => array('vcard')),
+ 'sibling' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
+ 'skill' => array('o', 'scope' => array('hresume')),
+ 'sort-string' => array('o', 'scope' => array('vcard')),
+ 'sound' => array('o', 'scope' => array('vcard')),
+ 'spouse' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
+ 'status' => array('o', 'plain', 'scope' => array('vevent')),
+ 'street-address' => array('o', 'scope' => array('adr')),
+ 'summary' => array('o', 'scope' => array('vevent', 'hreview', 'hresume')),
+ 'sweetheart' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
+ 'tag' => array('o', 'rel', 'scope' => array('_doc', 'category', 'hfeed', 'hentry', 'skill', 'hreview', 'xfolkentry')),
+ 'taggedlink' => array('o', 'scope' => array('xfolkentry')),
+ 'title' => array('o', 'scope' => array('vcard')),
+ 'type' => array('o', 'scope' => array('adr', 'email', 'hreview', 'tel')),
+ 'tz' => array('o', 'scope' => array('vcard')),
+ 'uid' => array('o', 'scope' => array('vcard', 'vevent')),
+ 'updated' => array('o', 'scope' => array('hentry')),
+ 'url' => array('o', 'scope' => array('vcard', 'vevent', 'item')),
+ 'value' => array('o', 'scope' => array('email', 'adr', 'tel')),
+ 'vcard' => array('s', 'scope' => array('author', 'reviewer', 'affiliation', 'contact')),
+ 'version' => array('o', 'scope' => array('hreview')),
+ 'vevent' => array('s', 'scope' => array('_doc')),
+ 'worst' => array('o', 'scope' => array('hreview')),
+ 'xfolkentry' => array('s', 'scope' => array('_doc')),
+ );
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 foaf:openid Extractor
+author: Benjamin Nowack
+version: 2007-10-08
+*/
+
+ARC2::inc('RDFExtractor');
+
+class ARC2_OpenidExtractor extends ARC2_RDFExtractor {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_OpenidExtractor($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ $this->a['ns']['foaf'] = 'http://xmlns.com/foaf/0.1/';
+ }
+
+ /* */
+
+ function extractRDF() {
+ $t_vals = array();
+ $t = '';
+ foreach ($this->nodes as $n) {
+ if (isset($n['tag']) && $n['tag'] == 'link') {
+ $m = 'extract' . ucfirst($n['tag']);
+ list ($t_vals, $t) = $this->$m($n, $t_vals, $t);
+ }
+ }
+ if ($t) {
+ $doc = $this->getFilledTemplate($t, $t_vals, $n['doc_base']);
+ $this->addTs(ARC2::getTriplesFromIndex($doc));
+ }
+ }
+
+ /* */
+
+ function extractLink($n, $t_vals, $t) {
+ if ($this->hasRel($n, 'openid.server')) {
+ if ($href = $this->v('href uri', '', $n['a'])) {
+ $t_vals['doc_owner'] = $this->getDocOwnerID($n);
+ $t_vals['doc_id'] = $this->getDocID($n);
+ $t .= '?doc_owner foaf:homepage ?doc_id ; foaf:openid ?doc_id . ';
+ }
+ }
+ if ($this->hasRel($n, 'openid.delegate')) {
+ if ($href = $this->v('href uri', '', $n['a'])) {
+ $t_vals['doc_owner'] = $this->getDocOwnerID($n);
+ $t .= '?doc_owner foaf:homepage <' . $href . '> ; foaf:openid <' . $href . '> . ';
+ }
+ }
+ return array($t_vals, $t);
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 poshRDF Extractor
+author: Benjamin Nowack
+version:
+*/
+
+ARC2::inc('ARC2_RDFExtractor');
+
+class ARC2_PoshRdfExtractor extends ARC2_RDFExtractor {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_PoshRdfExtractor($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ $this->terms = $this->v('posh_terms', array(), $this->a);
+ $this->ns_prefix = 'posh';
+ $this->a['ns'] += array(
+ 'an' => 'http://www.w3.org/2000/10/annotation-ns#',
+ 'content' => 'http://purl.org/rss/1.0/modules/content/',
+ 'dc' => 'http://purl.org/dc/elements/1.1/',
+ 'dct' => 'http://purl.org/dc/terms/',
+ 'foaf' => 'http://xmlns.com/foaf/0.1/',
+ 'geo' => 'http://www.w3.org/2003/01/geo/wgs84_pos#',
+ 'ical' => 'http://www.w3.org/2002/12/cal/icaltzd#',
+ 'owl' => 'http://www.w3.org/2002/07/owl#',
+ 'posh' => 'http://poshrdf.org/ns/posh/',
+ 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
+ 'rdfs' => 'http://www.w3.org/2000/01/rdf-schema#',
+ 'rev' => 'http://www.purl.org/stuff/rev#',
+ 'rss' => 'http://purl.org/rss/1.0/',
+ 'sioc' => 'http://rdfs.org/sioc/ns#',
+ 'skos' => 'http://www.w3.org/2008/05/skos#',
+ 'uri' => 'http://www.w3.org/2006/uri#',
+ 'vcard' => 'http://www.w3.org/2006/vcard/ns#',
+ 'xfn' => 'http://gmpg.org/xfn/11#',
+ 'xml' => 'http://www.w3.org/XML/1998/namespace',
+ 'xsd' => 'http://www.w3.org/2001/XMLSchema#',
+ );
+ }
+
+ /* */
+
+ function extractRDF() {
+ if (!isset($this->caller->detected_formats['posh-rdf'])) return 0;
+ $n = $this->getRootNode();
+ $base = $this->getDocBase();
+ $context = array(
+ 'id' => $n['id'],
+ 'tag' => $n['tag'],
+ 'base' => $base,
+ 's' => array(array('_doc', $base)),
+ 'next_s' => array('_doc', $base),
+ 'ps' => array(),
+ 'ns' => $this->a['ns'],
+ 'lang' => '',
+ 'rpointer' => '',
+ );
+ $ct = $this->processNode($n, $context, 0, 1);
+ }
+
+ /* */
+
+ function getRootNode() {
+ foreach ($this->nodes as $id => $node) {
+ if ($node['tag'] == 'html') {
+ return $node;
+ }
+ }
+ return $this->nodes[0];
+ }
+
+ /* */
+
+ function processNode($n, $ct, $level, $pos) {
+ $n = $this->preProcessNode($n);
+ /* local context */
+ $lct = array_merge($ct, array(
+ 'ns' => array_merge($ct['ns'], $this->v('xmlns', array(), $n['a'])),
+ 'rpointer' => isset($n['a']['id']) ? $n['a']['id'] : ($n['tag'] == 'cdata' ? '' : $ct['rpointer'] . '/' . $pos),
+ 'tag' => $n['tag'],
+ 'id' => $n['id'],
+ 'lang' => $this->v('xml:lang', $ct['lang'], $n['a']),
+ ));
+ /* s stack */
+ $next_s_key = $lct['next_s'][0];
+ $next_s_val = $lct['next_s'][1];
+ if ($lct['s'][0][0] != $next_s_key) {
+ $lct['s'] = array_merge(array($lct['next_s']), $lct['s']);
+ }
+ else {
+ $lct['s'][0][1] = $next_s_val;
+ }
+ /* new s */
+ if ($this->hasClass($n, 'rdf-s')) {
+ $lct['next_s'] = array($n['a']['class'], $this->getSubject($n, $lct));
+ //echo "\ns: " . print_r($lct['next_s'], 1);
+ }
+ /* p */
+ if ($this->hasClass($n, 'rdf-p') || $this->hasRel($n, 'rdf-p')) {
+ if ($ps = $this->getPredicates($n, $lct['ns'])) {
+ $lct['ps'] = $ps;
+ $this->addPoshTypes($lct);
+ }
+ }
+ /* o */
+ $cls = $this->v('class', '', $n['a']);
+ if ($lct['ps'] && preg_match('/(^|\s)rdf\-(o|o\-(xml|dateTime|float|integer|boolean))($|\s)/s', $cls, $m)) {
+ $this->addTriples($n, $lct, $m[3]);
+ }
+ /* sub-nodes */
+ if ($sub_nodes = $this->getSubNodes($n)) {
+ $cur_ct = $lct;
+ $sub_pos = 1;
+ foreach ($sub_nodes as $i => $sub_node) {
+ if (in_array($sub_node['tag'], array('cdata', 'comment'))) continue;
+ $sub_ct = $this->processNode($sub_node, $cur_ct, $level + 1, $sub_pos);
+ $sub_pos++;
+ $cur_ct['next_s'] = $sub_ct['next_s'];
+ $cur_ct['ps'] = $sub_ct['ps'];
+ }
+ }
+ return $lct;
+ }
+
+ /* */
+
+ function getSubject($n, $ct) {
+ foreach (array('href uri', 'src uri', 'title', 'value') as $k) {
+ if (isset($n['a'][$k])) return $n['a'][$k];
+ }
+ /* rpointer */
+ return $ct['base'] . '#resource(' . $ct['rpointer'] . ')';
+ }
+
+ function getPredicates($n, $ns) {
+ $r = array();
+ /* try pnames */
+ $vals = array_merge($this->v('class m', array(), $n['a']), $this->v('rel m', array(), $n['a']));
+ foreach ($vals as $val) {
+ if (!preg_match('/^([a-z0-9]+)\-([a-z0-9\-\_]+)$/i', $val, $m)) continue;
+ if (!isset($ns[$m[1]])) continue;
+ if (preg_match('/^rdf-(s|p|o|o-(xml|dateTime|float|integer|boolean))$/', $val)) continue;
+ $r[] = $ns[$m[1]] . $m[2];
+ }
+ /* try other attributes */
+ if (!$r) {
+ foreach (array('href uri', 'title') as $k) {
+ if (isset($n['a'][$k])) {
+ $r[] = $n['a'][$k];
+ break;
+ }
+ }
+ }
+ return $r;
+ }
+
+ function addTriples($n, $ct, $o_type) {
+ foreach (array('href uri', 'src uri', 'title', 'value') as $k) {
+ if (isset($n['a'][$k])) {
+ $node_o = $n['a'][$k];
+ break;
+ }
+ }
+ if (!isset($node_o) && $this->hasClass($n, 'rdf-s')) {
+ $node_o = $ct['next_s'][1];
+ }
+ $lit_o = ($o_type == 'xml') ? $this->getContent($n) : $this->getPlainContent($n);
+ $posh_ns = $ct['ns'][$this->ns_prefix];
+ $rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
+ $xsd = 'http://www.w3.org/2001/XMLSchema#';
+ foreach ($ct['ps'] as $p) {
+ $p_key = str_replace($posh_ns, '', $p);
+ /* dt or obj */
+ $o = $this->isDatatypeProperty($p_key) ? $lit_o : (isset($node_o) ? $node_o : $lit_o);
+ if (!$o) continue;
+ if (!$s = $this->getContainerSubject($ct, $p_key)) continue;
+ $lang = (($o == $lit_o) && !$o_type) ? $ct['lang'] : '';
+ $o = $this->tweakObject($o, $p, $ct);
+ $this->addT(array(
+ 's' => $this->getContainerSubject($ct, $p_key),
+ 's_type' => preg_match('/^\_\:/', $s) ? 'bnode' : 'uri',
+ 'p' => $p,
+ 'o' => $o,
+ 'o_type' => $this->getObjectType($o, $p_key),
+ 'o_lang' => $lang,
+ 'o_datatype' => ($o_type == 'xml') ? $rdf . 'XMLLiteral' : ($o_type ? $xsd . $o_type : ''),
+ ));
+ }
+ }
+
+ function addPoshTypes($ct) {
+ $posh_ns = $ct['ns'][$this->ns_prefix];
+ foreach ($ct['ps'] as $p) {
+ $p_key = str_replace($posh_ns, '', $p);
+ if (!$this->isSubject($p_key)) continue;
+ $s = $ct['next_s'][1];
+ $this->addT(array(
+ 's' => $s,
+ 's_type' => preg_match('/^\_\:/', $s) ? 'bnode' : 'uri',
+ 'p' => $ct['ns']['rdf'] . 'type',
+ 'o' => $posh_ns . ucfirst($p_key),
+ 'o_type' => 'uri',
+ 'o_lang' => '',
+ 'o_datatype' => '',
+ ));
+ }
+ }
+
+ /* */
+
+ function preProcessNode($n) {
+ return $n;
+ }
+
+ function getContainerSubject($ct, $term) {
+ if (!isset($this->terms[$term])) return $ct['s'][0][1];
+ $scope = $this->v('scope', array(), $this->terms[$term]);
+ if (!$scope) return $ct['s'][0][1];
+ $scope_re = join('|', $scope);
+ foreach ($ct['s'] as $s) {
+ if (preg_match('/(^|\s)(' . $scope_re. ')($|\s)/s', str_replace($this->ns_prefix . '-', '', $s[0]))) return $s[1];
+ }
+ return 0;
+ }
+
+ function isSubject($term) {
+ if (!isset($this->terms[$term])) return 0;
+ return in_array('s', $this->terms[$term]);
+ }
+
+ function isDatatypeProperty($term) {
+ if (!isset($this->terms[$term])) return 0;
+ return in_array('plain', $this->terms[$term]);
+ }
+
+ function getObjectType($o, $term) {
+ if ($this->isDatatypeProperty($term)) return 'literal';
+ if (strpos($o, ' ')) return 'literal';
+ return preg_match('/^([a-z0-9\_]+)\:[^\s]+$/s', $o, $m) ? ($m[1] == '_' ? 'bnode' : 'uri') : 'literal';
+ }
+
+ function tweakObject($o, $p, $ct) {
+ return $o;
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 RDF Extractor
+author: Benjamin Nowack
+version: 2008-11-18 (Fix: Skip comments. Thanks to Masahide Kanzaki)
+*/
+
+ARC2::inc('Class');
+
+class ARC2_RDFExtractor extends ARC2_Class {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_RDFExtractor($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ $this->nodes = $this->caller->getNodes();
+ $this->index = $this->caller->getNodeIndex();
+ $this->bnode_prefix = $this->v('bnode_prefix', 'arc' . substr(md5(uniqid(rand())), 0, 4) . 'b', $this->a);
+ $this->bnode_id = 0;
+ $this->keep_cdata_ws = $this->v('keep_cdata_whitespace', 0, $this->a);
+ if (!isset($this->a['ns'])) $this->a['ns'] = array('rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#');
+ }
+
+ /* */
+
+ function x($re, $v, $options = 'si') {
+ return ARC2::x($re, $v, $options);
+ }
+
+ function camelCase($v) {
+ $r = ucfirst($v);
+ while (preg_match('/^(.*)[\-\_ ](.*)$/', $r, $m)) {
+ $r = $m[1] . ucfirst($m[2]);
+ }
+ return $r;
+ }
+
+ function createBnodeID(){
+ $this->bnode_id++;
+ return '_:' . $this->bnode_prefix . $this->bnode_id;
+ }
+
+ /* */
+
+ function extractRDF() {
+ }
+
+ /* */
+
+ function addTs($ts) {
+ foreach ($ts as $t) {
+ $this->caller->addT($t);
+ }
+ }
+
+ function addT($t) {
+ return $this->caller->addT($t);
+ }
+
+ /* */
+
+ function getSubNodes($n) {
+ return $this->v($n['id'], array(), $this->index);
+ }
+
+ function getParentNode($n) {
+ return isset($this->nodes[$n['p_id']]) ? $this->nodes[$n['p_id']] : 0;
+ }
+
+ /* */
+
+ function getSubNodesByClass($n, $cls, $skip_self = 0) {
+ if (!$skip_self && $this->hasClass($n, $cls)) {
+ return array($n);
+ }
+ $r = array();
+ $sns = $this->getSubNodes($n);
+ foreach ($sns as $sn) {
+ if ($sub_r = $this->getSubNodesByClass($sn, $cls, 0)) {
+ $r = array_merge($r, $sub_r);
+ }
+ }
+ return $r;
+ }
+
+ function getSubNodeByClass($n, $cls, $skip_self = 0) {
+ if (!$skip_self && $this->hasClass($n, $cls)) {
+ return $n;
+ }
+ $sns = $this->getSubNodes($n);
+ foreach ($sns as $sn) {
+ if ($sub_r = $this->getSubNodeByClass($sn, $cls, 0)) {
+ return $sub_r;
+ }
+ }
+ return 0;
+ }
+
+ function getParentNodeByClass($n, $cls, $skip_self = 0) {
+ if (!$skip_self && $this->hasClass($n, $cls)) {
+ return $n;
+ }
+ if ($pn = $this->getParentNode($n)) {
+ if ($sub_r = $this->getParentNodeByClass($pn, $cls, 0)) {
+ return $sub_r;
+ }
+ }
+ return 0;
+ }
+
+ /* */
+
+ function hasAttribute($a, $n, $v) {
+ $vs = is_array($v) ? $v : array($v);
+ $a_vs = $this->v($a . ' m', array(), $n['a']);
+ return array_intersect($vs, $a_vs) ? 1 : 0;
+ }
+
+ function hasClass($n, $v) {
+ return $this->hasAttribute('class', $n, $v);
+ }
+
+ function hasRel($n, $v) {
+ return $this->hasAttribute('rel', $n, $v);
+ }
+
+ /* */
+
+ function getDocBase() {
+ $root_node = $this->getRootNode();
+ $r = $root_node['doc_base'];
+ foreach ($this->getSubNodes($root_node) as $root_child) {
+ if ($root_child['tag'] == 'head') {
+ foreach ($this->getSubNodes($root_child) as $head_child) {
+ if ($head_child['tag'] == 'base') {
+ $r = $head_child['a']['href'];
+ break;
+ }
+ }
+ }
+ }
+ return $r;
+ }
+
+ /* */
+
+ function getPlainContent($n, $trim = 1, $use_img_alt = 1) {
+ if ($n['tag'] == 'comment') {
+ $r = '';
+ }
+ elseif ($n['tag'] == 'cdata') {
+ $r = $n['a']['value'];
+ }
+ elseif (trim($this->v('cdata', '', $n))) {
+ $r = $n['cdata'];
+ $sub_nodes = $this->getSubNodes($n);
+ foreach ($sub_nodes as $sub_n) {
+ $r .= $this->getPlainContent($sub_n, 0, $use_img_alt);
+ }
+ }
+ elseif (($n['tag'] == 'img') && $use_img_alt && isset($n['a']['alt'])) {
+ $r = $n['a']['alt'];
+ }
+ else {
+ $r = '';
+ $sub_nodes = $this->getSubNodes($n);
+ foreach ($sub_nodes as $sub_n) {
+ $r .= $this->getPlainContent($sub_n, 0, $use_img_alt);
+ }
+ }
+ $r = preg_replace('/\s/s', ' ', $r);
+ $r = preg_replace('/\s\s*/s', ' ', $r);
+ return $trim ? trim($r) : $r;
+ }
+
+ function getContent($n, $outer = 0, $trim = 1) {
+ //echo '<pre>' . htmlspecialchars(print_r($n, 1)) . '</pre>';
+ if ($n['tag'] == 'comment') {
+ $r = '<!-- ' . $n['a']['value'] . ' -->';
+ }
+ elseif ($n['tag'] == 'cdata') {
+ $r = $n['a']['value'];
+ }
+ else {
+ $r = '';
+ if ($outer) {
+ $r .= '<' . $n['tag'];
+ asort($n['a']);
+ if (isset($n['a']['xmlns']) && $n['a']['xmlns']['']) {
+ $r .= ' xmlns="' . $n['a']['xmlns'][''] . '"';
+ }
+ foreach ($n['a'] as $a => $val) {
+ if (!is_array($val) && isset($n['a'][$a . ' uri'])) $val = $n['a'][$a . ' uri'];
+ $r .= preg_match('/^[^\s]+$/', $a) && !is_array($val) ? ' ' . $a . '="' . addslashes($val) . '"' : '';
+ }
+ $r .= $n['empty'] ? '/>' : '>';
+ }
+ if (!$n['empty']) {
+ $r .= $this->v('cdata', '', $n);
+ $sub_nodes = $this->getSubNodes($n);
+ foreach ($sub_nodes as $sub_n) {
+ $r .= $this->getContent($sub_n, 1, 0);
+ }
+ if ($outer) {
+ $r .= '</' . $n['tag'] . '>';
+ }
+ }
+ }
+ return ($trim && !$this->keep_cdata_ws) ? trim($r) : $r;
+ }
+
+ /* */
+
+ function getDocID($n) {
+ $id = $n['id'];
+ $k = 'doc_' . $id;
+ if (!isset($this->caller->cache[$k])) {
+ $this->caller->cache[$k] = $n['doc_url'];
+ }
+ return $this->caller->cache[$k];
+ }
+
+ function getDocOwnerID($n) {
+ return '_:owner_of_' . $this->normalize($this->getDocID($n));
+ }
+
+ /* */
+
+ function normalize($v) {
+ $v = preg_replace('/[\W\s]+/is', '_', strip_tags(strtolower($v)));
+ $v = preg_replace('/http/', '', $v);
+ $v = preg_replace('/[\_]+/', '_', $v);
+ //$v = substr($v, 0, 30);
+ $v = trim($v, '_');
+ return $v;
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 RDFa Extractor
+author: Benjamin Nowack
+version: 2009-05-29 (Fix: CURIEs support DOTs now)
+*/
+
+ARC2::inc('RDFExtractor');
+
+class ARC2_RdfaExtractor extends ARC2_RDFExtractor {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_RdfaExtractor($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ }
+
+ /* */
+
+ function extractRDF() {
+ //echo '<pre>' . htmlspecialchars(print_r($this->nodes, 1)) . '</pre>';
+ if (!isset($this->caller->detected_formats['rdfa'])) return 0;
+ $root_node = $this->getRootNode();
+ //$base = $this->v('xml:base', $this->getDocBase(), $root_node['a']);
+ $base = $this->getDocBase();
+ $context = array(
+ 'base' => $base,
+ 'p_s' => $base,
+ 'p_o' => '',
+ 'ns' => array(),
+ 'inco_ts' => array(),
+ 'lang' => '',
+ );
+ $this->processNode($root_node, $context, 0);
+ }
+
+ /* */
+
+ function getRootNode() {
+ foreach ($this->nodes as $id => $node) {
+ if ($node['tag'] == 'html') {
+ return $node;
+ }
+ }
+ return $this->nodes[0];
+ }
+
+ /* */
+
+ function processNode($n, $ct, $level) {
+ if ($n['tag']=='cdata' || $n['tag']=='comment') return null; /* patch by tobyink */
+ $ts_added = 0;
+ /* step 1 */
+ $lct = array();
+ $lct['prev_s'] = $this->v('prev_s', $this->v('p_s', '', $ct), $ct);
+ $lct['recurse'] = 1;
+ $lct['skip'] = 0;
+ $lct['new_s'] = '';
+ $lct['cur_o_res'] = '';
+ $lct['inco_ts'] = array();
+ $lct['base'] = $ct['base'];
+ //$lct['base'] = $this->v('xml:base', $ct['base'], $n['a']);
+ /* step 2 */
+ $lct['ns'] = array_merge($ct['ns'], $this->v('xmlns', array(), $n['a']));
+ /* step 3 */
+ $lct['lang'] = $this->v('xml:lang', $ct['lang'], $n['a']);
+ /* step 4 */
+ $rel_uris = $this->getAttributeURIs($n, $ct, $lct, 'rel');
+ $rev_uris = $this->getAttributeURIs($n, $ct, $lct, 'rev');
+ if (!$rel_uris && !$rev_uris) {
+ foreach (array('about', 'src', 'resource', 'href') as $attr) {
+ if (isset($n['a'][$attr]) && (list($uri, $sub_v) = $this->xURI($n['a'][$attr], $lct['base'], $lct['ns'], '', $lct)) && $uri) {
+ $lct['new_s'] = $uri;
+ break;
+ }
+ }
+ if (!$lct['new_s']) {
+ if (preg_match('/(head|body)/i', $n['tag'])) {
+ $lct['new_s'] = $lct['base'];
+ }
+ elseif ($this->getAttributeURIs($n, $ct, $lct, 'typeof')) {
+ $lct['new_s'] = $this->createBnodeID();
+ }
+ elseif ($ct['p_o']) {
+ $lct['new_s'] = $ct['p_o'];
+ //$lct['skip'] = 1;
+ if(!isset($n['a']['property'])) $lct['skip'] = 1;/* patch by masaka */
+ }
+ }
+ }
+ /* step 5 */
+ else {
+ foreach (array('about', 'src') as $attr) {
+ if (isset($n['a'][$attr]) && (list($uri, $sub_v) = $this->xURI($n['a'][$attr], $lct['base'], $lct['ns'], '', $lct)) && $uri) {
+ $lct['new_s'] = $uri;
+ break;
+ }
+ }
+ if (!$lct['new_s']) {
+ if (preg_match('/(head|body)/i', $n['tag'])) {
+ $lct['new_s'] = $lct['base'];
+ }
+ elseif ($this->getAttributeURIs($n, $ct, $lct, 'typeof')) {
+ $lct['new_s'] = $this->createBnodeID();
+ }
+ elseif ($ct['p_o']) {
+ $lct['new_s'] = $ct['p_o'];
+ }
+ }
+ foreach (array('resource', 'href') as $attr) {
+ if (isset($n['a'][$attr]) && (list($uri, $sub_v) = $this->xURI($n['a'][$attr], $lct['base'], $lct['ns'], '', $lct)) && $uri) {
+ $lct['cur_o_res'] = $uri;
+ break;
+ }
+ }
+ }
+ /* step 6 */
+ if ($lct['new_s']) {
+ if ($uris = $this->getAttributeURIs($n, $ct, $lct, 'typeof')) {
+ foreach ($uris as $uri) {
+ $this->addT(array(
+ 's' => $lct['new_s'],
+ 's_type' => preg_match('/^\_\:/', $lct['new_s']) ? 'bnode' : 'uri',
+ 'p' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
+ 'o' => $uri,
+ 'o_type' => 'uri',
+ 'o_lang' => '',
+ 'o_datatype' => '',
+ ));
+ $ts_added = 1;
+ }
+ }
+ /* step 7 */
+ if ($lct['cur_o_res']) {
+ if ($rel_uris) {
+ foreach ($rel_uris as $uri) {
+ $this->addT(array(
+ 's' => $lct['new_s'],
+ 's_type' => preg_match('/^\_\:/', $lct['new_s']) ? 'bnode' : 'uri',
+ 'p' => $uri,
+ 'o' => $lct['cur_o_res'],
+ 'o_type' => preg_match('/^\_\:/', $lct['cur_o_res']) ? 'bnode' : 'uri',
+ 'o_lang' => '',
+ 'o_datatype' => '',
+ ));
+ $ts_added = 1;
+ }
+ }
+ if ($rev_uris) {
+ foreach ($rev_uris as $uri) {
+ $this->addT(array(
+ 's' => $lct['cur_o_res'],
+ 's_type' => preg_match('/^\_\:/', $lct['cur_o_res']) ? 'bnode' : 'uri',
+ 'p' => $uri,
+ 'o' => $lct['new_s'],
+ 'o_type' => preg_match('/^\_\:/', $lct['new_s']) ? 'bnode' : 'uri',
+ 'o_lang' => '',
+ 'o_datatype' => '',
+ ));
+ $ts_added = 1;
+ }
+ }
+ }
+ }
+ /* step 8 */
+ if (!$lct['cur_o_res']) {
+ if ($rel_uris || $rev_uris) {
+ $lct['cur_o_res'] = $this->createBnodeID();
+ foreach ($rel_uris as $uri) {
+ $lct['inco_ts'][] = array('p' => $uri, 'dir' => 'fwd');
+ }
+ foreach ($rev_uris as $uri) {
+ $lct['inco_ts'][] = array('p' => $uri, 'dir' => 'rev');
+ }
+ }
+ }
+ /* step 10 */
+ if (!$lct['skip'] && ($new_s = $lct['new_s'])) {
+ //if ($new_s = $lct['new_s']) {
+ if ($uris = $this->getAttributeURIs($n, $ct, $lct, 'property')) {
+ foreach ($uris as $uri) {
+ $lct['cur_o_lit'] = $this->getCurrentObjectLiteral($n, $lct, $ct);
+ $this->addT(array(
+ 's' => $lct['new_s'],
+ 's_type' => preg_match('/^\_\:/', $lct['new_s']) ? 'bnode' : 'uri',
+ 'p' => $uri,
+ 'o' => $lct['cur_o_lit']['value'],
+ 'o_type' => 'literal',
+ 'o_lang' => $lct['cur_o_lit']['lang'],
+ 'o_datatype' => $lct['cur_o_lit']['datatype'],
+ ));
+ $ts_added = 1;
+ if ($lct['cur_o_lit']['datatype'] == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral') {
+ $lct['recurse'] = 0;
+ }
+ }
+ }
+ }
+ /* step 11 (10) */
+ $complete_triples = 0;
+ if ($lct['recurse']) {
+ if ($lct['skip']) {
+ $new_ct = array_merge($ct, array('base' => $lct['base'], 'lang' => $lct['lang'], 'ns' => $lct['ns']));
+ }
+ else {
+ $new_ct = array(
+ 'base' => $lct['base'],
+ 'p_s' => $lct['new_s'] ? $lct['new_s'] : $ct['p_s'],
+ 'p_o' => $lct['cur_o_res'] ? $lct['cur_o_res'] : ($lct['new_s'] ? $lct['new_s'] : $ct['p_s']),
+ 'ns' => $lct['ns'],
+ 'inco_ts' => $lct['inco_ts'],
+ 'lang' => $lct['lang']
+ );
+ }
+ $sub_nodes = $this->getSubNodes($n);
+ foreach ($sub_nodes as $sub_node) {
+ if ($this->processNode($sub_node, $new_ct, $level+1)) {
+ $complete_triples = 1;
+ }
+ }
+ }
+ /* step 12 (11) */
+ $other = 0;
+ if ($ts_added || $complete_triples || ($lct['new_s'] && !preg_match('/^\_\:/', $lct['new_s'])) || ($other == 1)) {
+ //if (!$lct['skip'] && ($complete_triples || ($lct['new_s'] && !preg_match('/^\_\:/', $lct['new_s'])))) {
+ foreach ($ct['inco_ts'] as $inco_t) {
+ if ($inco_t['dir'] == 'fwd') {
+ $this->addT(array(
+ 's' => $ct['p_s'],
+ 's_type' => preg_match('/^\_\:/', $ct['p_s']) ? 'bnode' : 'uri',
+ 'p' => $inco_t['p'],
+ 'o' => $lct['new_s'],
+ 'o_type' => preg_match('/^\_\:/', $lct['new_s']) ? 'bnode' : 'uri',
+ 'o_lang' => '',
+ 'o_datatype' => '',
+ ));
+ }
+ elseif ($inco_t['dir'] == 'rev') {
+ $this->addT(array(
+ 's' => $lct['new_s'],
+ 's_type' => preg_match('/^\_\:/', $lct['new_s']) ? 'bnode' : 'uri',
+ 'p' => $inco_t['p'],
+ 'o' => $ct['p_s'],
+ 'o_type' => preg_match('/^\_\:/', $ct['p_s']) ? 'bnode' : 'uri',
+ 'o_lang' => '',
+ 'o_datatype' => '',
+ ));
+ }
+ }
+ }
+ /* step 13 (12) (result flag) */
+ if ($ts_added) return 1;
+ if ($lct['new_s'] && !preg_match('/^\_\:/', $lct['new_s'])) return 1;
+ if ($complete_triples) return 1;
+ return 0;
+ }
+
+ /* */
+
+ function getAttributeURIs($n, $ct, $lct, $attr) {
+ $vals = ($val = $this->v($attr, '', $n['a'])) ? explode(' ', $val) : array();
+ $r = array();
+ foreach ($vals as $val) {
+ if(!trim($val)) continue;
+ if ((list($uri, $sub_v) = $this->xURI(trim($val), $lct['base'], $lct['ns'], $attr, $lct)) && $uri) {
+ $r[] = $uri;
+ }
+ }
+ return $r;
+ }
+
+ /* */
+
+ function getCurrentObjectLiteral($n, $lct, $ct) {
+ $xml_val = $this->getContent($n);
+ $plain_val = $this->getPlainContent($n, 0, 0);
+ if (function_exists('html_entity_decode')) {
+ $plain_val = html_entity_decode($plain_val, ENT_QUOTES);
+ }
+ $dt = $this->v('datatype', '', $n['a']);
+ list($dt_uri, $sub_v) = $this->xURI($dt, $lct['base'], $lct['ns'], '', $lct);
+ $dt = $dt ? $dt_uri : $dt;
+ $r = array('value' => '', 'lang' => $lct['lang'], 'datatype' => $dt);
+ if (isset($n['a']['content'])) {
+ $r['value'] = $n['a']['content'];
+ if (function_exists('html_entity_decode')) {
+ $r['value'] = html_entity_decode($r['value'], ENT_QUOTES);
+ }
+ }
+ elseif ($xml_val == $plain_val) {
+ $r['value'] = $plain_val;
+ }
+ elseif (!preg_match('/[\<\>]/', $xml_val)) {
+ $r['value'] = $xml_val;
+ }
+ elseif (isset($n['a']['datatype']) && ($dt != 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral')) {
+ $r['value'] = $plain_val;
+ }
+ elseif (!isset($n['a']['datatype']) || ($dt == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral')) {
+ $r['value'] = $this->injectXMLDeclarations($xml_val, $lct['ns'], $lct['lang']);
+ $r['datatype'] = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral';
+ }
+ return $r;
+ }
+
+ function injectXMLDeclarations($val, $ns, $lang) {//@@todo proper node rebuilding */
+ $lang_code = $lang ? ' xml:lang="' . $lang . '"' : '';
+ /* ns */
+ $val = preg_replace('/<([a-z0-9]+)([\>\s])/is', '<\\1 xmlns="http://www.w3.org/1999/xhtml"' . $lang_code . '\\2', $val);
+ foreach ($ns as $prefix => $uri) {
+ if ($prefix && ($pos = strpos(' ' . $val, '<' . $prefix . ':'))) {
+ $val = substr($val, 0, $pos - 1) . preg_replace('/^(<' . $prefix . '\:[^\>\s]+)/', '\\1 xmlns:' . $prefix. '="' . $uri . '"' . $lang_code, substr($val, $pos - 1));
+ }
+ }
+ /* remove accidentally added xml:lang and xmlns= */
+ $val = preg_replace('/(\<[^\>]*)( xml\:lang[^\s\>]+)([^\>]*)(xml\:lang[^\s\>]+)/s', '\\1\\3\\4', $val);
+ $val = preg_replace('/(\<[^\>]*)( xmlns=[^\s\>]+)([^\>]*)(xmlns=[^\s\>]+)/s', '\\1\\3\\4', $val);
+ return $val;
+ }
+
+ /* */
+
+ function xURI($v, $base, $ns, $attr_type = '', $lct = '') {
+ if ((list($sub_r, $sub_v) = $this->xBlankCURIE($v, $base, $ns)) && $sub_r) {
+ return array($sub_r, $sub_v);
+ }
+ if ((list($sub_r, $sub_v) = $this->xSafeCURIE($v, $base, $ns, $lct)) && $sub_r) {
+ return array($sub_r, $sub_v);
+ }
+ if ((list($sub_r, $sub_v) = $this->xCURIE($v, $base, $ns)) && $sub_r) {
+ return array($sub_r, $sub_v);
+ }
+ if (preg_match('/^(rel|rev)$/', $attr_type) && preg_match('/^\s*(alternate|appendix|bookmark|cite|chapter|contents|copyright|glossary|help|icon|index|last|license|meta|next|p3pv1|prev|role|section|stylesheet|subsection|start|up)(\s|$)/is', $v, $m)) {
+ return array('http://www.w3.org/1999/xhtml/vocab#' . strtolower($m[1]), preg_replace('/^\s*' . $m[1]. '/is', '', $v));
+ }
+ if (preg_match('/^(rel|rev)$/', $attr_type) && preg_match('/^[a-z0-9\.]+$/i', $v)) {
+ return array(0, $v);
+ }
+ return array($this->calcURI($v, $base), '');
+ }
+
+ function xBlankCURIE($v, $base, $ns) {
+ if ($sub_r = $this->x('\[\_\:\]', $v)) {
+ $this->empty_bnode = isset($this->empty_bnode) ? $this->empty_bnode : $this->createBnodeID();
+ return array($this->empty_bnode, '');
+ }
+ if ($sub_r = $this->x('\[?(\_\:[a-z0-9\_\-]+)\]?', $v)) {
+ return array($sub_r[1], '');
+ }
+ return array(0, $v);
+ }
+
+ function xSafeCURIE($v, $base, $ns, $lct = '') {
+ /* empty */
+ if ($sub_r = $this->x('\[\]', $v)) {
+ $r = $lct ? $lct['prev_s'] : $base;/* should be current subject value */
+ return $sub_r[1] ? array($r, $sub_r[1]) : array($r, '');
+ }
+ if ($sub_r = $this->x('\[([^\:]*)\:([^\]]*)\]', $v)) {
+ if (!$sub_r[1]) return array('http://www.w3.org/1999/xhtml/vocab#' . $sub_r[2], '');
+ if (isset($ns[$sub_r[1]])) {
+ return array($ns[$sub_r[1]] . $sub_r[2], '');
+ }
+ }
+ return array(0, $v);
+ }
+
+ function xCURIE($v, $base, $ns) {
+ if ($sub_r = $this->x('([a-z0-9\-\_]*)\:([^\s]+)', $v)) {
+ if (!$sub_r[1]) return array('http://www.w3.org/1999/xhtml/vocab#' . $sub_r[2], '');
+ if (isset($ns[$sub_r[1]])) {
+ return array($ns[$sub_r[1]] . $sub_r[2], '');
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 Extractor
+author: Benjamin Nowack
+version: 2008-12-09
+*/
+
+ARC2::inc('RDFExtractor');
+
+class ARC2_TwitterProfilePicExtractor extends ARC2_RDFExtractor {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_TwitterProfilePicExtractor($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ $this->a['ns']['foaf'] = 'http://xmlns.com/foaf/0.1/';
+ $this->a['ns']['mf'] = 'http://poshrdf.org/ns/mf#';
+ }
+
+ /* */
+
+ function extractRDF() {
+ $t_vals = array();
+ $t = '';
+ foreach ($this->nodes as $n) {
+ if (isset($n['tag']) && ($n['tag'] == 'img') && ($this->v('id', '', $n['a']) == 'profile-image')) {
+ $t_vals['vcard_id'] = $this->getDocID($n) . '#resource(side/1/2/1)';
+ $t .= '?vcard_id mf:photo <' . $n['a']['src'] . '> . ';
+ break;
+ }
+ }
+ if ($t) {
+ $doc = $this->getFilledTemplate($t, $t_vals, $n['doc_base']);
+ $this->addTs(ARC2::getTriplesFromIndex($doc));
+ }
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 Atom Parser
+author: Benjamin Nowack
+version: 2009-04-21 (Addition: support for link types)
+*/
+
+ARC2::inc('LegacyXMLParser');
+
+class ARC2_AtomParser extends ARC2_LegacyXMLParser {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_AtomParser($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* reader */
+ parent::__init();
+ $this->triples = array();
+ $this->target_encoding = '';
+ $this->t_count = 0;
+ $this->added_triples = array();
+ $this->skip_dupes = false;
+ $this->bnode_prefix = $this->v('bnode_prefix', 'arc'.substr(md5(uniqid(rand())), 0, 4).'b', $this->a);
+ $this->bnode_id = 0;
+ $this->cache = array();
+ $this->allowCDataNodes = 0;
+ }
+
+ /* */
+
+ function done() {
+ $this->extractRDF();
+ }
+
+ /* */
+
+ function setReader(&$reader) {
+ $this->reader =& $reader;
+ }
+
+ function createBnodeID(){
+ $this->bnode_id++;
+ return '_:' . $this->bnode_prefix . $this->bnode_id;
+ }
+
+ function addT($t) {
+ //if (!isset($t['o_datatype']))
+ if ($this->skip_dupes) {
+ //$h = md5(print_r($t, 1));
+ $h = md5(serialize($t));
+ if (!isset($this->added_triples[$h])) {
+ $this->triples[$this->t_count] = $t;
+ $this->t_count++;
+ $this->added_triples[$h] = true;
+ }
+ }
+ else {
+ $this->triples[$this->t_count] = $t;
+ $this->t_count++;
+ }
+ }
+
+ function getTriples() {
+ return $this->v('triples', array());
+ }
+
+ function countTriples() {
+ return $this->t_count;
+ }
+
+ function getSimpleIndex($flatten_objects = 1, $vals = '') {
+ return ARC2::getSimpleIndex($this->getTriples(), $flatten_objects, $vals);
+ }
+
+ /* */
+
+ function extractRDF() {
+ $index = $this->getNodeIndex();
+ //print_r($index);
+ $this->rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
+ $this->atom = 'http://www.w3.org/2005/Atom';
+ $this->rss = 'http://purl.org/rss/1.0/';
+ $this->dc = 'http://purl.org/dc/elements/1.1/';
+ $this->sioc = 'http://rdfs.org/sioc/ns#';
+ $this->dct = 'http://purl.org/dc/terms/';
+ $this->content = 'http://purl.org/rss/1.0/modules/content/';
+ $this->enc = 'http://purl.oclc.org/net/rss_2.0/enc#';
+ $this->mappings = array(
+ 'feed' => $this->rss . 'channel',
+ 'entry' => $this->rss . 'item',
+ 'title' => $this->rss . 'title',
+ 'link' => $this->rss . 'link',
+ 'summary' => $this->rss . 'description',
+ 'content' => $this->content . 'encoded',
+ 'id' => $this->dc . 'identifier',
+ 'author' => $this->dc . 'creator',
+ 'category' => $this->dc . 'subject',
+ 'updated' => $this->dc . 'date',
+ 'source' => $this->dc . 'source',
+ );
+ $this->dt_props = array(
+ $this->dc . 'identifier',
+ $this->rss . 'link'
+ );
+ foreach ($index as $p_id => $nodes) {
+ foreach ($nodes as $pos => $node) {
+ $tag = $this->v('tag', '', $node);
+ if ($tag == 'feed') {
+ $struct = $this->extractChannel($index[$node['id']]);
+ $triples = ARC2::getTriplesFromIndex($struct);
+ foreach ($triples as $t) {
+ $this->addT($t);
+ }
+ }
+ elseif ($tag == 'entry') {
+ $struct = $this->extractItem($index[$node['id']]);
+ $triples = ARC2::getTriplesFromIndex($struct);
+ foreach ($triples as $t) {
+ $this->addT($t);
+ }
+ }
+ }
+ }
+ }
+
+ function extractChannel($els) {
+ list($props, $sub_index) = $this->extractProps($els, 'channel');
+ $uri = $props[$this->rss . 'link'][0]['value'];
+ return ARC2::getMergedIndex(array($uri => $props), $sub_index);
+ }
+
+ function extractItem($els) {
+ list($props, $sub_index) = $this->extractProps($els, 'item');
+ $uri = $props[$this->rss . 'link'][0]['value'];
+ return ARC2::getMergedIndex(array($uri => $props), $sub_index);
+ }
+
+ function extractProps($els, $container) {
+ $r = array($this->rdf . 'type' => array(array('value' => $this->rss . $container, 'type' => 'uri')));
+ $sub_index = array();
+ foreach ($els as $info) {
+ /* key */
+ $tag = $info['tag'];
+ if (!preg_match('/^[a-z0-9]+\:/i', $tag)) {
+ $k = isset($this->mappings[$tag]) ? $this->mappings[$tag] : '';
+ }
+ elseif (isset($this->mappings[$tag])) {
+ $k = $this->mappings[$tag];
+ }
+ else {/* qname */
+ $k = $this->expandPName($tag);
+ }
+ //echo $k . "\n";
+ if (($container == 'channel') && ($k == $this->rss . 'item')) continue;
+ /* val */
+ $v = trim($info['cdata']);
+ if (!$v) $v = $this->v('href uri', '', $info['a']);
+ /* prop */
+ if ($k) {
+ /* content handling */
+ if (in_array($k, array($this->rss . 'description', $this->content . 'encoded'))) {
+ $v = $this->getNodeContent($info);
+ }
+ /* source handling */
+ elseif ($k == $this->dc . 'source') {
+ $sub_nodes = $this->node_index[$info['id']];
+ foreach ($sub_nodes as $sub_pos => $sub_info) {
+ if ($sub_info['tag'] == 'id') {
+ $v = trim($sub_info['cdata']);
+ }
+ }
+ }
+ /* link handling */
+ elseif ($k == $this->rss . 'link') {
+ if ($link_type = $this->v('type', '', $info['a'])) {
+ $k2 = $this->dc . 'format';
+ if (!isset($sub_index[$v])) $sub_index[$v] = array();
+ if (!isset($sub_index[$v][$k2])) $sub_index[$v][$k2] = array();
+ $sub_index[$v][$k2][] = array('value' => $link_type, 'type' => 'literal');
+ }
+ }
+ /* author handling */
+ elseif ($k == $this->dc . 'creator') {
+ $sub_nodes = $this->node_index[$info['id']];
+ foreach ($sub_nodes as $sub_pos => $sub_info) {
+ if ($sub_info['tag'] == 'name') {
+ $v = trim($sub_info['cdata']);
+ }
+ if ($sub_info['tag'] == 'uri') {
+ $k2 = $this->sioc . 'has_creator';
+ $v2 = trim($sub_info['cdata']);
+ if (!isset($r[$k2])) $r[$k2] = array();
+ $r[$k2][] = array('value' => $v2, 'type' => 'uri');
+ }
+ }
+ }
+ /* date handling */
+ elseif (in_array($k, array($this->dc . 'date', $this->dct . 'modified'))) {
+ if (!preg_match('/^[0-9]{4}/', $v) && ($sub_v = strtotime($v)) && ($sub_v != -1)) {
+ $tz = date('Z', $sub_v); /* timezone offset */
+ $sub_v -= $tz; /* utc */
+ $v = date('Y-m-d\TH:i:s\Z', $sub_v);
+ }
+ }
+ /* tag handling */
+ elseif ($k == $this->dc . 'subject') {
+ $v = $this->v('term', '', $info['a']);
+ }
+ /* other attributes in closed tags */
+ elseif (!$v && ($info['state'] == 'closed') && $info['a']) {
+ foreach ($info['a'] as $sub_k => $sub_v) {
+ if (!preg_match('/(xmlns|\:|type)/', $sub_k)) {
+ $v = $sub_v;
+ break;
+ }
+ }
+ }
+ if (!isset($r[$k])) $r[$k] = array();
+ $r[$k][] = array('value' => $v, 'type' => in_array($k, $this->dt_props) || !preg_match('/^[a-z0-9]+\:[^\s]+$/is', $v) ? 'literal' : 'uri');
+ }
+ }
+ return array($r, $sub_index);
+ }
+
+ function initXMLParser() {
+ if (!isset($this->xml_parser)) {
+ $enc = preg_match('/^(utf\-8|iso\-8859\-1|us\-ascii)$/i', $this->getEncoding(), $m) ? $m[1] : 'UTF-8';
+ $parser = xml_parser_create($enc);
+ xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 0);
+ xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
+ xml_set_element_handler($parser, 'open', 'close');
+ xml_set_character_data_handler($parser, 'cData');
+ xml_set_start_namespace_decl_handler($parser, 'nsDecl');
+ xml_set_object($parser, $this);
+ $this->xml_parser =& $parser;
+ }
+ }
+
+ /* */
+
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 CrunchBase API JSON Parser
+ *
+ * @author Benjamin Nowack <bnowack@semsol.com>
+ * @license http://arc.semsol.org/license
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-03-25
+*/
+
+ARC2::inc('JSONParser');
+
+class ARC2_CBJSONParser extends ARC2_JSONParser {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_CBJSONParser($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* reader */
+ parent::__init();
+ $this->base = 'http://cb.semsol.org/';
+ $this->rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
+ $this->default_ns = $this->base . 'ns#';
+ $this->nsp = array($this->rdf => 'rdf');
+ }
+
+ /* */
+
+ function done() {
+ $this->extractRDF();
+ }
+
+ function extractRDF() {
+ $struct = $this->struct;
+ if ($type = $this->getStructType($struct)) {
+ $s = $this->getResourceID($struct, $type);
+ /* rdf:type */
+ $this->addT($s, $this->rdf . 'type', $this->default_ns . $this->camelCase($type), 'uri', 'uri');
+ /* explicit triples */
+ $this->extractResourceRDF($struct, $s);
+ }
+ }
+
+ function getStructType($struct, $rel = '') {
+ /* url-based */
+ if ($url = $this->v('crunchbase_url', '', $struct)) {
+ return preg_replace('/^.*crunchbase\.com\/([^\/]+)\/.*$/', '\\1', $url);
+ }
+ /* rel-based */
+ if ($rel == 'person') return 'person';
+ if ($rel == 'company') return 'company';
+ if ($rel == 'acquiring_company') return 'company';
+ if ($rel == 'firm') return 'company';
+ if ($rel == 'provider') return 'service-provider';
+ /* struct-based */
+ if (isset($struct['_type'])) return $struct['_type'];
+ if (isset($struct['round_code'])) return 'funding_round';
+ if (isset($struct['products'])) return 'company';
+ if (isset($struct['first_name'])) return 'person';
+ if (isset($struct['investments'])) return 'financial-organization';
+ if (isset($struct['launched_year'])) return 'product';
+ if (isset($struct['providerships']) && is_array($struct['providerships'])) return 'service-provider';
+ return '';
+ }
+
+ function getResourceID($struct, $type) {
+ if ($type && isset($struct['permalink'])) {
+ return $this->base . $type . '/' . $struct['permalink'] . '#self';
+ }
+ return $this->createBnodeID();
+ }
+
+ function getPropertyURI($name, $ns = '') {
+ if (!$ns) $ns = $this->default_ns;
+ if (preg_match('/^(product|funding_round|investment|acquisition|.+ship|office|milestone|.+embed|.+link|degree|fund)s/', $name, $m)) $name = $m[1];
+ if ($name == 'tag_list') $name = 'tag';
+ if ($name == 'competitions') $name = 'competitor';
+ return $ns . $name;
+ }
+
+ function createSubURI($s, $k, $pos) {
+ $s = str_replace('#self', '/', $s);
+ if (preg_match('/(office|ship|investment|milestone|fund|embed|link)s$/', $k)) $k = substr($k, 0, -1);
+ return $s . $k . '-' . ($pos + 1) . '#self';
+ }
+
+ /* */
+
+ function extractResourceRDF($struct, $s, $pos = 0) {
+ $s_type = preg_match('/^\_\:/', $s) ? 'bnode' : 'uri';
+ $date_prefixes = array();
+ foreach ($struct as $k => $v) {
+ if ($k == 'acquisition') $k = 'exit';
+ if (preg_match('/^(.*)\_(year|month|day)$/', $k, $m)) {
+ if (!in_array($m[1], $date_prefixes)) $date_prefixes[] = $m[1];
+ }
+ $sub_m = 'extract' . $this->camelCase($k) . 'RDF';
+ if (method_exists($this, $sub_m)) {
+ $this->$sub_m($s, $s_type, $v);
+ continue;
+ }
+ $p = $this->getPropertyURI($k);
+ if (!$v) continue;
+ /* simple, single v */
+ if (!is_array($v)) {
+ $o_type = preg_match('/^[a-z]+\:[^\s]+$/is', $v) ? 'uri' : 'literal';
+ $v = trim($v);
+ if (preg_match('/^https?\:\/\/[^\/]+$/', $v)) $v .= '/';
+ $this->addT($s, $p, $v, $s_type, $o_type);
+ /* rdfs:label */
+ if ($k == 'name') $this->addT($s, 'http://www.w3.org/2000/01/rdf-schema#label', $v, $s_type, $o_type);
+ /* dc:identifier */
+ //if ($k == 'permalink') $this->addT($s, 'http://purl.org/dc/elements/1.1/identifier', $v, $s_type, $o_type);
+ }
+ /* structured, single v */
+ elseif (!$this->isFlatArray($v)) {
+ if ($o_type = $this->getStructType($v, $k)) {/* known type */
+ $o = $this->getResourceID($v, $o_type);
+ $this->addT($s, $p, $o, $s_type, 'uri');
+ $this->addT($o, $this->rdf . 'type', $this->default_ns . $this->camelCase($o_type), 'uri', 'uri');
+ }
+ else {/* unknown type */
+ $o = $this->createSubURI($s, $k, $pos);
+ $this->addT($s, $p, $o, $s_type, 'uri');
+ $this->extractResourceRDF($v, $o);
+ }
+ }
+ /* value list */
+ else {
+ foreach ($v as $sub_pos => $sub_v) {
+ $this->extractResourceRDF(array($k => $sub_v), $s, $sub_pos);
+ }
+ }
+ }
+ /* infer XSD triples */
+ foreach ($date_prefixes as $prefix) {
+ $this->inferDate($prefix, $s, $struct);
+ }
+ }
+
+ function isFlatArray($v) {
+ foreach ($v as $k => $sub_v) {
+ return is_numeric($k) ? 1 : 0;
+ }
+ }
+
+ /* */
+
+ function extractTagListRDF($s, $s_type, $v) {
+ if (!$v) return 0;
+ $tags = split(', ', $v);
+ foreach ($tags as $tag) {
+ if (!trim($tag)) continue;
+ $this->addT($s, $this->getPropertyURI('tag'), $tag, $s_type, 'literal');
+ }
+ }
+
+ function extractImageRDF($s, $s_type, $v, $rel = 'image') {
+ if (!$v) return 1;
+ $sizes = $v['available_sizes'];
+ foreach ($sizes as $size) {
+ $w = $size[0][0];
+ $h = $size[0][1];
+ $img = 'http://www.crunchbase.com/' . $size[1];
+ $this->addT($s, $this->getPropertyURI($rel), $img, $s_type, 'uri');
+ $this->addT($img, $this->getPropertyURI('width'), $w, 'uri', 'literal');
+ $this->addT($img, $this->getPropertyURI('height'), $h, 'uri', 'literal');
+ }
+ }
+
+ function extractScreenshotsRDF($s, $s_type, $v) {
+ if (!$v) return 1;
+ foreach ($v as $sub_v) {
+ $this->extractImageRDF($s, $s_type, $sub_v, 'screenshot');
+ }
+ }
+
+ function extractProductsRDF($s, $s_type, $v) {
+ foreach ($v as $sub_v) {
+ $o = $this->getResourceID($sub_v, 'product');
+ $this->addT($s, $this->getPropertyURI('product'), $o, $s_type, 'uri');
+ }
+ }
+
+ function extractCompetitionsRDF($s, $s_type, $v) {
+ foreach ($v as $sub_v) {
+ $o = $this->getResourceID($sub_v['competitor'], 'company');
+ $this->addT($s, $this->getPropertyURI('competitor'), $o, $s_type, 'uri');
+ }
+ }
+
+ function extractFundingRoundsRDF($s, $s_type, $v) {
+ foreach ($v as $pos => $sub_v) {
+ $o = $this->createSubURI($s, 'funding_round', $pos);
+ $this->addT($s, $this->getPropertyURI('funding_round'), $o, $s_type, 'uri');
+ $this->extractResourceRDF($sub_v, $o, $pos);
+ }
+ }
+
+ function extractInvestmentsRDF($s, $s_type, $v) {
+ foreach ($v as $pos => $sub_v) {
+ /* incoming */
+ foreach (array('person' => 'person', 'company' => 'company', 'financial_org' => 'financial-organization') as $k => $type) {
+ if (isset($sub_v[$k])) $this->addT($s, $this->getPropertyURI('investment'), $this->getResourceID($sub_v[$k], $type), $s_type, 'uri');
+ }
+ /* outgoing */
+ if (isset($sub_v['funding_round'])) {
+ $o = $this->createSubURI($s, 'investment', $pos);
+ $this->addT($s, $this->getPropertyURI('investment'), $o, $s_type, 'uri');
+ $this->extractResourceRDF($sub_v['funding_round'], $o, $pos);
+ }
+ }
+ }
+
+ function extractExternalLinksRDF($s, $s_type, $v) {
+ foreach ($v as $sub_v) {
+ $href = $sub_v['external_url'];
+ if (preg_match('/^https?\:\/\/[^\/]+$/', $href)) $href .= '/';
+ $this->addT($s, $this->getPropertyURI('external_link'), $href, $s_type, 'uri');
+ $this->addT($href, $this->getPropertyURI('title'), $sub_v['title'], $s_type, 'literal');
+ }
+ }
+
+ function extractWebPresencesRDF($s, $s_type, $v) {
+ foreach ($v as $sub_v) {
+ $href = $sub_v['external_url'];
+ if (preg_match('/^https?\:\/\/[^\/]+$/', $href)) $href .= '/';
+ $this->addT($s, $this->getPropertyURI('web_presence'), $href, $s_type, 'uri');
+ $this->addT($href, $this->getPropertyURI('title'), $sub_v['title'], $s_type, 'literal');
+ }
+ }
+
+ function extractCreatedAtRDF($s, $s_type, $v) {
+ $v = $this->getAPIDateXSD($v);
+ $this->addT($s, $this->getPropertyURI('created_at'), $v, $s_type, 'literal');
+ }
+
+ function extractUpdatedAtRDF($s, $s_type, $v) {
+ $v = $this->getAPIDateXSD($v);
+ $this->addT($s, $this->getPropertyURI('updated_at'), $v, $s_type, 'literal');
+ }
+
+ function getAPIDateXSD($val) {
+ //Fri Jan 16 21:11:48 UTC 2009
+ if (preg_match('/^[a-z]+ ([a-z]+) ([0-9]+) ([0-9]{2}\:[0-9]{2}\:[0-9]{2}) UTC ([0-9]{4})/i', $val, $m)) {
+ $months = array('Jan' => '01', 'Feb' => '02', 'Mar' =>'03', 'Apr' => '04', 'May' => '05', 'Jun' => '06', 'Jul' => '07', 'Aug' => '08', 'Sep' => '09', 'Oct' => '10', 'Nov' => '11', 'Dec' => '12');
+ return $m[4] . '-' . $months[$m[1]] . '-' . $m[2] . 'T' . $m[3] . 'Z';
+ }
+ return '2000-01-01';
+ }
+
+ /* */
+
+ function inferDate($prefix, $s, $struct) {
+ $s_type = preg_match('/^\_\:/', $s) ? 'bnode' : 'uri';
+ $r = '';
+ foreach (array('year', 'month', 'day') as $suffix) {
+ $val = $this->v1($prefix . '_' . $suffix, '00', $struct);
+ $r .= ($r ? '-' : '') . str_pad($val, 2, '0', STR_PAD_LEFT);
+ }
+ if ($r != '00-00-00') {
+ $this->addT($s, $this->getPropertyURI($prefix . '_date'), $r, $s_type, 'literal');
+ }
+ }
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 JSON Parser
+ * Does not extract triples, needs sub-class for RDF extraction
+ *
+ * @author Benjamin Nowack <bnowack@semsol.com>
+ * @license http://arc.semsol.org/license
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-06-07
+*/
+
+ARC2::inc('RDFParser');
+
+class ARC2_JSONParser extends ARC2_RDFParser {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_JSONParser($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ }
+
+ /* */
+
+ function x($re, $v, $options = 'si') {
+ while (preg_match('/^\s*(\/\*.*\*\/)(.*)$/Usi', $v, $m)) {/* comment removal */
+ $v = $m[2];
+ }
+ $this->unparsed_code = (strlen($this->unparsed_code) > strlen($v)) ? $v : $this->unparsed_code;
+ return ARC2::x($re, $v, $options);
+ }
+
+ function parse($path, $data = '') {
+ $this->state = 0;
+ /* reader */
+ if (!$this->v('reader')) {
+ ARC2::inc('Reader');
+ $this->reader = & new ARC2_Reader($this->a, $this);
+ }
+ $this->reader->setAcceptHeader('Accept: application/json; q=0.9, */*; q=0.1');
+ $this->reader->activate($path, $data);
+ $this->x_base = isset($this->a['base']) && $this->a['base'] ? $this->a['base'] : $this->reader->base;
+ /* parse */
+ $doc = '';
+ while ($d = $this->reader->readStream()) {
+ $doc .= $d;
+ }
+ $this->reader->closeStream();
+ unset($this->reader);
+ $doc = preg_replace('/^[^\{]*(.*\})[^\}]*$/is', '\\1', $doc);
+ $this->unparsed_code = $doc;
+ list($this->struct, $rest) = $this->extractObject($doc);
+ return $this->done();
+ }
+
+ /* */
+
+ function extractObject($v) {
+ if (function_exists('json_decode')) return array(json_decode($v, 1), '');
+ $r = array();
+ /* sub-object */
+ if ($sub_r = $this->x('\{', $v)) {
+ $v = $sub_r[1];
+ while ((list($sub_r, $v) = $this->extractEntry($v)) && $sub_r) {
+ $r[$sub_r['key']] = $sub_r['value'];
+ }
+ if ($sub_r = $this->x('\}', $v)) $v = $sub_r[1];
+ }
+ /* sub-list */
+ elseif ($sub_r = $this->x('\[', $v)) {
+ $v = $sub_r[1];
+ while ((list($sub_r, $v) = $this->extractObject($v)) && $sub_r) {
+ $r[] = $sub_r;
+ $v = ltrim($v, ',');
+ }
+ if ($sub_r = $this->x('\]', $v)) $v = $sub_r[1];
+ }
+ /* sub-value */
+ elseif ((list($sub_r, $v) = $this->extractValue($v)) && ($sub_r !== false)) {
+ $r = $sub_r;
+ }
+ return array($r, $v);
+ }
+
+ function extractEntry($v) {
+ if ($r = $this->x('\,', $v)) $v = $r[1];
+ /* k */
+ if ($r = $this->x('\"([^\"]+)\"\s*\:', $v)) {
+ $k = $r[1];
+ $sub_v = $r[2];
+ if (list($sub_r, $sub_v) = $this->extractObject($sub_v)) {
+ return array(
+ array('key' => $k, 'value' => $sub_r),
+ $sub_v
+ );
+ }
+ }
+ return array(0, $v);
+ }
+
+ function extractValue($v) {
+ if ($r = $this->x('\,', $v)) $v = $r[1];
+ if ($sub_r = $this->x('null', $v)) {
+ return array(null, $sub_r[1]);
+ }
+ if ($sub_r = $this->x('(true|false)', $v)) {
+ return array($sub_r[1], $sub_r[2]);
+ }
+ if ($sub_r = $this->x('([\-\+]?[0-9\.]+)', $v)) {
+ return array($sub_r[1], $sub_r[2]);
+ }
+ if ($sub_r = $this->x('\"', $v)) {
+ $rest = $sub_r[1];
+ if (preg_match('/^([^\x5c]*|.*[^\x5c]|.*\x5c{2})\"(.*)$/sU', $rest, $m)) {
+ $val = $m[1];
+ /* unescape chars (single-byte) */
+ $val = preg_replace('/\\\u(.{4})/e', 'chr(hexdec("\\1"))', $val);
+ //$val = preg_replace('/\\\u00(.{2})/e', 'rawurldecode("%\\1")', $val);
+ /* other escaped chars */
+ $from = array('\\\\', '\r', '\t', '\n', '\"', '\b', '\f', '\/');
+ $to = array("\\", "\r", "\t", "\n", '"', "\b", "\f", "/");
+ $val = str_replace($from, $to, $val);
+ return array($val, $m[2]);
+ }
+ }
+ return array(false, $v);
+ }
+
+ /* */
+
+ function getObject() {
+ return $this->v('struct', array());
+ }
+
+ function getTriples() {
+ return $this->v('triples', array());
+ }
+
+ function countTriples() {
+ return $this->t_count;
+ }
+
+ function addT($s = '', $p = '', $o = '', $s_type = '', $o_type = '', $o_dt = '', $o_lang = '') {
+ $o = $this->toUTF8($o);
+ //echo str_replace($this->base, '', "-----\n adding $s / $p / $o\n-----\n");
+ $t = array('s' => $s, 'p' => $p, 'o' => $o, 's_type' => $s_type, 'o_type' => $o_type, 'o_datatype' => $o_dt, 'o_lang' => $o_lang);
+ if ($this->skip_dupes) {
+ $h = md5(serialize($t));
+ if (!isset($this->added_triples[$h])) {
+ $this->triples[$this->t_count] = $t;
+ $this->t_count++;
+ $this->added_triples[$h] = true;
+ }
+ }
+ else {
+ $this->triples[$this->t_count] = $t;
+ $this->t_count++;
+ }
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 Legaxy XML Parser
+author: Benjamin Nowack
+version: 2008-10-04 (Fix: nsDecl led to warnings when uri was an array.)
+*/
+
+ARC2::inc('Class');
+
+class ARC2_LegacyXMLParser extends ARC2_Class {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_LegacyXMLParser($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* reader */
+ parent::__init();
+ $this->encoding = $this->v('encoding', false, $this->a);
+ $this->state = 0;
+ $this->x_base = $this->base;
+ $this->xml = 'http://www.w3.org/XML/1998/namespace';
+ $this->rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
+ $this->nsp = array($this->xml => 'xml', $this->rdf => 'rdf');
+ $this->allowCDataNodes = 1;
+ $this->target_encoding = '';
+ $this->keep_cdata_ws = $this->v('keep_cdata_whitespace', 0, $this->a);
+ }
+
+ /* */
+
+ function setReader(&$reader) {
+ $this->reader =& $reader;
+ }
+
+ function parse($path, $data = '', $iso_fallback = false) {
+ $this->nodes = array();
+ $this->node_count = 0;
+ $this->level = 0;
+ /* reader */
+ if (!$this->v('reader')) {
+ ARC2::inc('Reader');
+ $this->reader = & new ARC2_Reader($this->a, $this);
+ }
+ $this->reader->setAcceptHeader('Accept: application/xml; q=0.9, */*; q=0.1');
+ $this->reader->activate($path, $data);
+ $this->x_base = isset($this->a['base']) && $this->a['base'] ? $this->a['base'] : $this->reader->base;
+ $this->base = $this->x_base;
+ $this->doc_url = $this->reader->base;
+ /* xml parser */
+ $this->initXMLParser();
+ /* parse */
+ $first = true;
+ while ($d = $this->reader->readStream(1)) {
+ if ($iso_fallback && $first) {
+ $d = '<?xml version="1.0" encoding="ISO-8859-1"?>' . "\n" . preg_replace('/^\<\?xml [^\>]+\?\>\s*/s', '', $d);
+ }
+ if (!xml_parse($this->xml_parser, $d, false)) {
+ $error_str = xml_error_string(xml_get_error_code($this->xml_parser));
+ $line = xml_get_current_line_number($this->xml_parser);
+ if (!$iso_fallback && preg_match("/Invalid character/i", $error_str)) {
+ xml_parser_free($this->xml_parser);
+ unset($this->xml_parser);
+ $this->reader->closeStream();
+ unset($this->reader);
+ $this->__init();
+ $this->encoding = 'ISO-8859-1';
+ $this->initXMLParser();
+ return $this->parse($path, $data, true);
+ }
+ else {
+ return $this->addError('XML error: "' . $error_str . '" at line ' . $line . ' (parsing as ' . $this->getEncoding() . ')');
+ }
+ }
+ $first = false;
+ }
+ $this->target_encoding = xml_parser_get_option($this->xml_parser, XML_OPTION_TARGET_ENCODING);
+ xml_parser_free($this->xml_parser);
+ $this->reader->closeStream();
+ unset($this->reader);
+ return $this->done();
+ }
+
+ /* */
+
+ function getEncoding($src = 'config') {
+ if ($src == 'parser') {
+ return $this->target_encoding;
+ }
+ elseif (($src == 'config') && $this->encoding) {
+ return $this->encoding;
+ }
+ return $this->reader->getEncoding();
+ }
+
+ /* */
+
+ function done() {
+
+ }
+
+ /* */
+
+ function getStructure() {
+ return array('nodes' => $this->v('nodes', array()));
+ }
+
+ /* */
+
+ function getNodeIndex(){
+ if (!isset($this->node_index)) {
+ /* index by parent */
+ $index = array();
+ for ($i = 0, $i_max = count($this->nodes); $i < $i_max; $i++) {
+ $node = $this->nodes[$i];
+ $node['id'] = $i;
+ $node['doc_base'] = $this->base;
+ if (isset($this->doc_url)) $node['doc_url'] = $this->doc_url;
+ $this->updateNode($node);
+ $p_id = $node['p_id'];
+ if (!isset($index[$p_id])) {
+ $index[$p_id] = array();
+ }
+ $index[$p_id][$node['pos']] = $node;
+ }
+ $this->node_index = $index;
+ }
+ return $this->node_index;
+ }
+
+ function getNodes() {
+ return $this->nodes;
+ }
+
+ function getSubNodes($n) {
+ return $this->v($n['id'], array(), $this->getNodeIndex());
+ }
+
+ function getNodeContent($n, $outer = 0, $trim = 1) {
+ //echo '<pre>' . htmlspecialchars(print_r($n, 1)) . '</pre>';
+ if ($n['tag'] == 'cdata') {
+ $r = $n['a']['value'];
+ }
+ else {
+ $r = '';
+ if ($outer) {
+ $r .= '<' . $n['tag'];
+ asort($n['a']);
+ if (isset($n['a']['xmlns']) && $n['a']['xmlns']['']) {
+ $r .= ' xmlns="' . $n['a']['xmlns'][''] . '"';
+ }
+ foreach ($n['a'] as $a => $val) {
+ $r .= preg_match('/^[^\s]+$/', $a) && !is_array($val) ? ' ' . $a . '="' . addslashes($val) . '"' : '';
+ }
+ $r .= $n['empty'] ? '/>' : '>';
+ }
+ if (!$n['empty']) {
+ $r .= $this->v('cdata', '', $n);
+ $sub_nodes = $this->getSubNodes($n);
+ foreach ($sub_nodes as $sub_n) {
+ $r .= $this->getNodeContent($sub_n, 1, 0);
+ }
+ if ($outer) {
+ $r .= '</' . $n['tag'] . '>';
+ }
+ }
+ }
+ return ($trim && !$this->keep_cdata_ws) ? trim($r) : $r;
+ }
+
+ /* */
+
+ function pushNode($n) {
+ $n['id'] = $this->node_count;
+ $this->nodes[$this->node_count] = $n;
+ $this->node_count++;
+ }
+
+ function getCurNode($t = '') {
+ $i = 1;
+ do {
+ $r = $this->node_count ? $this->nodes[$this->node_count - $i] : 0;
+ $found = (!$t || ($r['tag'] == $t)) ? 1 : 0;
+ $i++;
+ } while (!$found && isset($this->nodes[$this->node_count - $i]));
+ return $r;
+ }
+
+ function updateNode($node) {/* php4-save */
+ $this->nodes[$node['id']] = $node;
+ }
+
+ /* */
+
+ function initXMLParser() {
+ if (!isset($this->xml_parser)) {
+ $enc = preg_match('/^(utf\-8|iso\-8859\-1|us\-ascii)$/i', $this->getEncoding(), $m) ? $m[1] : 'UTF-8';
+ $parser = xml_parser_create_ns($enc, '');
+ xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 0);
+ xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
+ xml_set_element_handler($parser, 'open', 'close');
+ xml_set_character_data_handler($parser, 'cData');
+ xml_set_start_namespace_decl_handler($parser, 'nsDecl');
+ xml_set_object($parser, $this);
+ $this->xml_parser =& $parser;
+ }
+ }
+
+ /* */
+
+ function open($p, $t, $a) {
+ $t_exact = $t;
+ //echo "<br />\n".'opening '.$t . ' ' . print_r($a, 1); flush();
+ //echo "<br />\n".'opening '.$t; flush();
+ $t = strpos($t, ':') ? $t : strtolower($t);
+ /* base check */
+ $base = '';
+ if (($t == 'base') && isset($a['href'])) {
+ $this->base = $a['href'];
+ $base = $a['href'];
+ }
+ /* URIs */
+ foreach (array('href', 'src', 'id') as $uri_a) {
+ if (isset($a[$uri_a])) {
+ $a[$uri_a . ' uri'] = ($uri_a == 'id') ? $this->calcURI('#'.$a[$uri_a]) : $this->calcURI($a[$uri_a]);
+ }
+ }
+ /* ns */
+ if ($a) {
+ foreach ($a as $k => $v) {
+ if (strpos($k, 'xmlns') === 0) {
+ $this->nsDecl($p, trim(substr($k, 5), ':'), $v);
+ }
+ }
+ }
+ /* node */
+ $node = array(
+ 'tag' => $t,
+ 'tag_exact' => $t_exact,
+ 'a' => $a,
+ 'level' => $this->level,
+ 'pos' => 0,
+ 'p_id' => $this->node_count-1,
+ 'state' => 'open',
+ 'empty' => 0,
+ 'cdata' =>''
+ );
+ if ($base) {
+ $node['base'] = $base;
+ }
+ /* parent/sibling */
+ if ($this->node_count) {
+ $l = $this->level;
+ $prev_node = $this->getCurNode();
+ if ($prev_node['level'] == $l) {
+ $node['p_id'] = $prev_node['p_id'];
+ $node['pos'] = $prev_node['pos']+1;
+ }
+ elseif($prev_node['level'] > $l) {
+ while($prev_node['level'] > $l) {
+ if (!isset($this->nodes[$prev_node['p_id']])) {
+ //$this->addError('nesting mismatch: tag is ' . $t . ', level is ' . $l . ', prev_level is ' . $prev_node['level'] . ', prev_node p_id is ' . $prev_node['p_id']);
+ break;
+ }
+ $prev_node = $this->nodes[$prev_node['p_id']];
+ }
+ $node['p_id'] = $prev_node['p_id'];
+ $node['pos'] = $prev_node['pos']+1;
+ }
+ }
+ $this->pushNode($node);
+ $this->level++;
+ /* cdata */
+ $this->cur_cdata="";
+ }
+
+ function close($p, $t, $empty = 0) {
+ //echo "<br />\n".'closing '.$t; flush();
+ $node = $this->getCurNode($t);
+ $node['state'] = 'closed';
+ $node['empty'] = $empty;
+ $this->updateNode($node);
+ $this->level--;
+ }
+
+ function cData($p, $d) {
+ //echo trim($d) ? "<br />\n".'cdata: ' . $d : ''; flush();
+ $node = $this->getCurNode();
+ if($node['state'] == 'open') {
+ $node['cdata'] .= $d;
+ $this->updateNode($node);
+ }
+ else {/* cdata is sibling of node */
+ if ($this->allowCDataNodes) {
+ $this->open($p, 'cdata', array('value' => $d));
+ $this->close($p, 'cdata');
+ }
+ }
+ }
+
+ function nsDecl($p, $prf, $uri) {
+ if (is_array($uri)) return 1;
+ $this->ns[$prf] = $uri;
+ $this->nsp[$uri] = isset($this->nsp[$uri]) ? $this->nsp[$uri] : $prf;
+ }
+
+ /* */
+
+}
\ No newline at end of file
--- /dev/null
+<?php
+/**
+ * ARC2 RDF Parser (generic)
+ *
+ * @author Benjamin Nowack <bnowack@semsol.com>
+ * @license http://arc.semsol.org/license
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2009-12-03
+*/
+
+ARC2::inc('Class');
+
+class ARC2_RDFParser extends ARC2_Class {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_RDFParser($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* proxy_host, proxy_port, proxy_skip, http_accept_header, http_user_agent_header, max_redirects, reader, skip_dupes */
+ parent::__init();
+ $this->a['format'] = $this->v('format', false, $this->a);
+ $this->keep_time_limit = $this->v('keep_time_limit', 0, $this->a);
+ $this->triples = array();
+ $this->t_count = 0;
+ $this->added_triples = array();
+ $this->skip_dupes = $this->v('skip_dupes', false, $this->a);
+ $this->bnode_prefix = $this->v('bnode_prefix', 'arc'.substr(md5(uniqid(rand())), 0, 4).'b', $this->a);
+ $this->bnode_id = 0;
+ $this->format = '';
+ }
+
+ /* */
+
+ function setReader(&$reader) {
+ $this->reader =& $reader;
+ }
+
+ function parse($path, $data = '') {
+ /* reader */
+ if (!isset($this->reader)) {
+ ARC2::inc('Reader');
+ $this->reader = & new ARC2_Reader($this->a, $this);
+ }
+ $this->reader->activate($path, $data) ;
+ /* format detection */
+ $mappings = array(
+ 'rdfxml' => 'RDFXML',
+ 'turtle' => 'Turtle',
+ 'sparqlxml' => 'SPOG',
+ 'ntriples' => 'Turtle',
+ 'html' => 'SemHTML',
+ 'rss' => 'RSS',
+ 'atom' => 'Atom',
+ 'sgajson' => 'SGAJSON',
+ 'cbjson' => 'CBJSON'
+ );
+ $format = $this->reader->getFormat();
+ if (!$format || !isset($mappings[$format])) {
+ return $this->addError('No parser available for "' . $format . '".');
+ }
+ $this->format = $format;
+ /* format parser */
+ $suffix = $mappings[$format] . 'Parser';
+ ARC2::inc($suffix);
+ $cls = 'ARC2_' . $suffix;
+ $this->parser =& new $cls($this->a, $this);
+ $this->parser->setReader($this->reader);
+ return $this->parser->parse($path, $data);
+ }
+
+ function parseData($data) {
+ return $this->parse(ARC2::getScriptURI(), $data);
+ }
+
+ /* */
+
+ function done() {
+ }
+
+ /* */
+
+ function createBnodeID(){
+ $this->bnode_id++;
+ return '_:' . $this->bnode_prefix . $this->bnode_id;
+ }
+
+ function getTriples() {
+ return $this->v('parser') ? $this->m('getTriples', false, array(), $this->v('parser')) : array();
+ }
+
+ function countTriples() {
+ return $this->v('parser') ? $this->m('countTriples', false, 0, $this->v('parser')) : 0;
+ }
+
+ function getSimpleIndex($flatten_objects = 1, $vals = '') {
+ return ARC2::getSimpleIndex($this->getTriples(), $flatten_objects, $vals);
+ }
+
+ function reset() {
+ $this->__init();
+ if (isset($this->reader)) unset($this->reader);
+ if (isset($this->parser)) {
+ $this->parser->__init();
+ unset($this->parser);
+ }
+ }
+
+ /* */
+
+ function extractRDF($formats = '') {
+ if (method_exists($this->parser, 'extractRDF')) {
+ return $this->parser->extractRDF($formats);
+ }
+ }
+
+ /* */
+
+ function getEncoding($src = 'config') {
+ if (method_exists($this->parser, 'getEncoding')) {
+ return $this->parser->getEncoding($src);
+ }
+ }
+
+ /**
+ * returns the array of namespace prefixes encountered during parsing
+ * @return array (keys = namespace URI / values = prefix used)
+ */
+
+ function getParsedNamespacePrefixes() {
+ if (isset($this->parser)) {
+ return $this->v('nsp', array(), $this->parser);
+ }
+ return $this->v('nsp', array());
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 RDF/XML Parser
+ *
+ * @author Benjamin Nowack <bnowack@semsol.com>
+ * @license http://arc.semsol.org/license
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2009-12-03
+*/
+
+ARC2::inc('RDFParser');
+
+class ARC2_RDFXMLParser extends ARC2_RDFParser {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_RDFXMLParser($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* reader */
+ parent::__init();
+ $this->encoding = $this->v('encoding', false, $this->a);
+ $this->state = 0;
+ $this->x_lang = '';
+ $this->x_base = $this->base;
+ $this->xml = 'http://www.w3.org/XML/1998/namespace';
+ $this->rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
+ $this->nsp = array($this->xml => 'xml', $this->rdf => 'rdf');
+ $this->s_stack = array();
+ $this->s_count = 0;
+ $this->target_encoding = '';
+ }
+
+ /* */
+
+ function parse($path, $data = '', $iso_fallback = false) {
+ /* reader */
+ if (!$this->v('reader')) {
+ ARC2::inc('Reader');
+ $this->reader = & new ARC2_Reader($this->a, $this);
+ }
+ $this->reader->setAcceptHeader('Accept: application/rdf+xml; q=0.9, */*; q=0.1');
+ $this->reader->activate($path, $data);
+ $this->x_base = isset($this->a['base']) && $this->a['base'] ? $this->a['base'] : $this->reader->base;
+ /* xml parser */
+ $this->initXMLParser();
+ /* parse */
+ $first = true;
+ while ($d = $this->reader->readStream()) {
+ if (!$this->keep_time_limit) @set_time_limit($this->v('time_limit', 60, $this->a));
+ if ($iso_fallback && $first) {
+ $d = '<?xml version="1.0" encoding="ISO-8859-1"?>' . "\n" . preg_replace('/^\<\?xml [^\>]+\?\>\s*/s', '', $d);
+ $first = false;
+ }
+ if (!xml_parse($this->xml_parser, $d, false)) {
+ $error_str = xml_error_string(xml_get_error_code($this->xml_parser));
+ $line = xml_get_current_line_number($this->xml_parser);
+ $this->tmp_error = 'XML error: "' . $error_str . '" at line ' . $line . ' (parsing as ' . $this->getEncoding() . ')';
+ if (!$iso_fallback && preg_match("/Invalid character/i", $error_str)) {
+ xml_parser_free($this->xml_parser);
+ unset($this->xml_parser);
+ $this->reader->closeStream();
+ $this->__init();
+ $this->encoding = 'ISO-8859-1';
+ unset($this->xml_parser);
+ unset($this->reader);
+ return $this->parse($path, $data, true);
+ }
+ else {
+ return $this->addError($this->tmp_error);
+ }
+ }
+ }
+ $this->target_encoding = xml_parser_get_option($this->xml_parser, XML_OPTION_TARGET_ENCODING);
+ xml_parser_free($this->xml_parser);
+ $this->reader->closeStream();
+ unset($this->reader);
+ return $this->done();
+ }
+
+ /* */
+
+ function initXMLParser() {
+ if (!isset($this->xml_parser)) {
+ $enc = preg_match('/^(utf\-8|iso\-8859\-1|us\-ascii)$/i', $this->getEncoding(), $m) ? $m[1] : 'UTF-8';
+ $parser = xml_parser_create_ns($enc, '');
+ xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 0);
+ xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
+ xml_set_element_handler($parser, 'open', 'close');
+ xml_set_character_data_handler($parser, 'cdata');
+ xml_set_start_namespace_decl_handler($parser, 'nsDecl');
+ xml_set_object($parser, $this);
+ $this->xml_parser =& $parser;
+ }
+ }
+
+ /* */
+
+ function getEncoding($src = 'config') {
+ if ($src == 'parser') {
+ return $this->target_encoding;
+ }
+ elseif (($src == 'config') && $this->encoding) {
+ return $this->encoding;
+ }
+ return $this->reader->getEncoding();
+ }
+
+ /* */
+
+ function getTriples() {
+ return $this->v('triples', array());
+ }
+
+ function countTriples() {
+ return $this->t_count;
+ }
+
+ /* */
+
+ function pushS(&$s) {
+ $s['pos'] = $this->s_count;
+ $this->s_stack[$this->s_count] = $s;
+ $this->s_count++;
+ }
+
+ function popS(){/* php 4.0.x-safe */
+ $r = array();
+ $this->s_count--;
+ for ($i = 0, $i_max = $this->s_count; $i < $i_max; $i++) {
+ $r[$i] = $this->s_stack[$i];
+ }
+ $this->s_stack = $r;
+ }
+
+ function updateS($s) {
+ $this->s_stack[$s['pos']] = $s;
+ }
+
+ function getParentS() {
+ return ($this->s_count && isset($this->s_stack[$this->s_count - 1])) ? $this->s_stack[$this->s_count - 1] : false;
+ }
+
+ function getParentXBase() {
+ if ($p = $this->getParentS()) {
+ return isset($p['p_x_base']) && $p['p_x_base'] ? $p['p_x_base'] : (isset($p['x_base']) ? $p['x_base'] : '');
+ }
+ return $this->x_base;
+ }
+
+ function getParentXLang() {
+ if ($p = $this->getParentS()) {
+ return isset($p['p_x_lang']) && $p['p_x_lang'] ? $p['p_x_lang'] : (isset($p['x_lang']) ? $p['x_lang'] : '');
+ }
+ return $this->x_lang;
+ }
+
+ /* */
+
+ function addT($s, $p, $o, $s_type, $o_type, $o_dt = '', $o_lang = '') {
+ //echo "-----\nadding $s / $p / $o\n-----\n";
+ $t = array('s' => $s, 'p' => $p, 'o' => $o, 's_type' => $s_type, 'o_type' => $o_type, 'o_datatype' => $o_dt, 'o_lang' => $o_lang);
+ if ($this->skip_dupes) {
+ $h = md5(serialize($t));
+ if (!isset($this->added_triples[$h])) {
+ $this->triples[$this->t_count] = $t;
+ $this->t_count++;
+ $this->added_triples[$h] = true;
+ }
+ }
+ else {
+ $this->triples[$this->t_count] = $t;
+ $this->t_count++;
+ }
+ }
+
+ function reify($t, $s, $p, $o, $s_type, $o_type, $o_dt = '', $o_lang = '') {
+ $this->addT($t, $this->rdf.'type', $this->rdf.'Statement', 'uri', 'uri');
+ $this->addT($t, $this->rdf.'subject', $s, 'uri', $s_type);
+ $this->addT($t, $this->rdf.'predicate', $p, 'uri', 'uri');
+ $this->addT($t, $this->rdf.'object', $o, 'uri', $o_type, $o_dt, $o_lang);
+ }
+
+ /* */
+
+ function open($p, $t, $a) {
+ //echo "state is $this->state\n";
+ //echo "opening $t\n";
+ switch($this->state) {
+ case 0: return $this->h0Open($t, $a);
+ case 1: return $this->h1Open($t, $a);
+ case 2: return $this->h2Open($t, $a);
+ case 4: return $this->h4Open($t, $a);
+ case 5: return $this->h5Open($t, $a);
+ case 6: return $this->h6Open($t, $a);
+ default: $this->addError('open() called at state ' . $this->state . ' in '.$t);
+ }
+ }
+
+ function close($p, $t) {
+ //echo "state is $this->state\n";
+ //echo "closing $t\n";
+ switch($this->state){
+ case 1: return $this->h1Close($t);
+ case 2: return $this->h2Close($t);
+ case 3: return $this->h3Close($t);
+ case 4: return $this->h4Close($t);
+ case 5: return $this->h5Close($t);
+ case 6: return $this->h6Close($t);
+ default: $this->addError('close() called at state ' . $this->state . ' in '.$t);
+ }
+ }
+
+ function cdata($p, $d) {
+ //echo "state is $this->state\n";
+ //echo "cdata\n";
+ switch($this->state){
+ case 4: return $this->h4Cdata($d);
+ case 6: return $this->h6Cdata($d);
+ default: return false;
+ }
+ }
+
+ function nsDecl($p, $prf, $uri) {
+ $this->nsp[$uri] = isset($this->nsp[$uri]) ? $this->nsp[$uri] : $prf;
+ }
+
+ /* */
+
+ function h0Open($t, $a) {
+ $this->x_lang = $this->v($this->xml.'lang', $this->x_lang, $a);
+ $this->x_base = $this->calcURI($this->v($this->xml.'base', $this->x_base, $a));
+ $this->state = 1;
+ if ($t !== $this->rdf.'RDF') {
+ $this->h1Open($t, $a);
+ }
+ }
+
+ /* */
+
+ function h1Open($t, $a) {
+ $s = array(
+ 'x_base' => isset($a[$this->xml.'base']) ? $this->calcURI($a[$this->xml.'base']) : $this->getParentXBase(),
+ 'x_lang' => isset($a[$this->xml.'lang']) ? $a[$this->xml.'lang'] : $this->getParentXLang(),
+ 'li_count' => 0,
+ );
+ /* ID */
+ if (isset($a[$this->rdf.'ID'])) {
+ $s['type'] = 'uri';
+ $s['value'] = $this->calcURI('#'.$a[$this->rdf.'ID'], $s['x_base']);
+ }
+ /* about */
+ elseif (isset($a[$this->rdf.'about'])) {
+ $s['type'] = 'uri';
+ $s['value'] = $this->calcURI($a[$this->rdf.'about'], $s['x_base']);
+ }
+ /* bnode */
+ else {
+ $s['type'] = 'bnode';
+ if (isset($a[$this->rdf.'nodeID'])) {
+ $s['value'] = '_:'.$a[$this->rdf.'nodeID'];
+ }
+ else {
+ $s['value'] = $this->createBnodeID();
+ }
+ }
+ /* sub-node */
+ if ($this->state === 4) {
+ $sup_s = $this->getParentS();
+ /* new collection */
+ if (isset($sup_s['o_is_coll']) && $sup_s['o_is_coll']) {
+ $coll = array('value' => $this->createBnodeID(), 'type' => 'bnode', 'is_coll' => true, 'x_base' => $s['x_base'], 'x_lang' => $s['x_lang']);
+ $this->addT($sup_s['value'], $sup_s['p'], $coll['value'], $sup_s['type'], $coll['type']);
+ $this->addT($coll['value'], $this->rdf . 'first', $s['value'], $coll['type'], $s['type']);
+ $this->pushS($coll);
+ }
+ /* new entry in existing coll */
+ elseif (isset($sup_s['is_coll']) && $sup_s['is_coll']) {
+ $coll = array('value' => $this->createBnodeID(), 'type' => 'bnode', 'is_coll' => true, 'x_base' => $s['x_base'], 'x_lang' => $s['x_lang']);
+ $this->addT($sup_s['value'], $this->rdf . 'rest', $coll['value'], $sup_s['type'], $coll['type']);
+ $this->addT($coll['value'], $this->rdf . 'first', $s['value'], $coll['type'], $s['type']);
+ $this->pushS($coll);
+ }
+ /* normal sub-node */
+ elseif(isset($sup_s['p']) && $sup_s['p']) {
+ $this->addT($sup_s['value'], $sup_s['p'], $s['value'], $sup_s['type'], $s['type']);
+ }
+ }
+ /* typed node */
+ if ($t !== $this->rdf.'Description') {
+ $this->addT($s['value'], $this->rdf.'type', $t, $s['type'], 'uri');
+ }
+ /* (additional) typing attr */
+ if (isset($a[$this->rdf.'type'])) {
+ $this->addT($s['value'], $this->rdf.'type', $a[$this->rdf.'type'], $s['type'], 'uri');
+ }
+ /* Seq|Bag|Alt */
+ if (in_array($t, array($this->rdf.'Seq', $this->rdf.'Bag', $this->rdf.'Alt'))) {
+ $s['is_con'] = true;
+ }
+ /* any other attrs (skip rdf and xml, except rdf:_, rdf:value, rdf:Seq) */
+ foreach($a as $k => $v) {
+ if (((strpos($k, $this->xml) === false) && (strpos($k, $this->rdf) === false)) || preg_match('/(\_[0-9]+|value|Seq|Bag|Alt|Statement|Property|List)$/', $k)) {
+ if (strpos($k, ':')) {
+ $this->addT($s['value'], $k, $v, $s['type'], 'literal', '', $s['x_lang']);
+ }
+ }
+ }
+ $this->pushS($s);
+ $this->state = 2;
+ }
+
+ /* */
+
+ function h2Open($t, $a) {
+ $s = $this->getParentS();
+ foreach (array('p_x_base', 'p_x_lang', 'p_id', 'o_is_coll') as $k) {
+ unset($s[$k]);
+ }
+ /* base */
+ if (isset($a[$this->xml.'base'])) {
+ $s['p_x_base'] = $this->calcURI($a[$this->xml.'base'], $s['x_base']);
+ }
+ $b = isset($s['p_x_base']) && $s['p_x_base'] ? $s['p_x_base'] : $s['x_base'];
+ /* lang */
+ if (isset($a[$this->xml.'lang'])) {
+ $s['p_x_lang'] = $a[$this->xml.'lang'];
+ }
+ $l = isset($s['p_x_lang']) && $s['p_x_lang'] ? $s['p_x_lang'] : $s['x_lang'];
+ /* adjust li */
+ if ($t === $this->rdf.'li') {
+ $s['li_count']++;
+ $t = $this->rdf.'_'.$s['li_count'];
+ }
+ /* set p */
+ $s['p'] = $t;
+ /* reification */
+ if (isset($a[$this->rdf.'ID'])) {
+ $s['p_id'] = $a[$this->rdf.'ID'];
+ }
+ $o = array('value' => '', 'type' => '', 'x_base' => $b, 'x_lang' => $l);
+ /* resource/rdf:resource */
+ if (isset($a['resource'])) {
+ $a[$this->rdf . 'resource'] = $a['resource'];
+ unset($a['resource']);
+ }
+ if (isset($a[$this->rdf.'resource'])) {
+ $o['value'] = $this->calcURI($a[$this->rdf.'resource'], $b);
+ $o['type'] = 'uri';
+ $this->addT($s['value'], $s['p'], $o['value'], $s['type'], $o['type']);
+ /* type */
+ if (isset($a[$this->rdf.'type'])) {
+ $this->addT($o['value'], $this->rdf.'type', $a[$this->rdf.'type'], 'uri', 'uri');
+ }
+ /* reification */
+ if (isset($s['p_id'])) {
+ $this->reify($this->calcURI('#'.$s['p_id'], $b), $s['value'], $s['p'], $o['value'], $s['type'], $o['type']);
+ unset($s['p_id']);
+ }
+ $this->state = 3;
+ }
+ /* named bnode */
+ elseif (isset($a[$this->rdf.'nodeID'])) {
+ $o['value'] = '_:' . $a[$this->rdf.'nodeID'];
+ $o['type'] = 'bnode';
+ $this->addT($s['value'], $s['p'], $o['value'], $s['type'], $o['type']);
+ $this->state = 3;
+ /* reification */
+ if (isset($s['p_id'])) {
+ $this->reify($this->calcURI('#'.$s['p_id'], $b), $s['value'], $s['p'], $o['value'], $s['type'], $o['type']);
+ }
+ }
+ /* parseType */
+ elseif (isset($a[$this->rdf.'parseType'])) {
+ if ($a[$this->rdf.'parseType'] === 'Literal') {
+ $s['o_xml_level'] = 0;
+ $s['o_xml_data'] = '';
+ $s['p_xml_literal_level'] = 0;
+ $s['ns'] = array();
+ $this->state = 6;
+ }
+ elseif ($a[$this->rdf.'parseType'] === 'Resource') {
+ $o['value'] = $this->createBnodeID();
+ $o['type'] = 'bnode';
+ $o['has_closing_tag'] = 0;
+ $this->addT($s['value'], $s['p'], $o['value'], $s['type'], $o['type']);
+ $this->pushS($o);
+ /* reification */
+ if (isset($s['p_id'])) {
+ $this->reify($this->calcURI('#'.$s['p_id'], $b), $s['value'], $s['p'], $o['value'], $s['type'], $o['type']);
+ unset($s['p_id']);
+ }
+ $this->state = 2;
+ }
+ elseif ($a[$this->rdf.'parseType'] === 'Collection') {
+ $s['o_is_coll'] = true;
+ $this->state = 4;
+ }
+ }
+ /* sub-node or literal */
+ else {
+ $s['o_cdata'] = '';
+ if (isset($a[$this->rdf.'datatype'])) {
+ $s['o_datatype'] = $a[$this->rdf.'datatype'];
+ }
+ $this->state = 4;
+ }
+ /* any other attrs (skip rdf and xml) */
+ foreach($a as $k => $v) {
+ if (((strpos($k, $this->xml) === false) && (strpos($k, $this->rdf) === false)) || preg_match('/(\_[0-9]+|value)$/', $k)) {
+ if (strpos($k, ':')) {
+ if (!$o['value']) {
+ $o['value'] = $this->createBnodeID();
+ $o['type'] = 'bnode';
+ $this->addT($s['value'], $s['p'], $o['value'], $s['type'], $o['type']);
+ }
+ /* reification */
+ if (isset($s['p_id'])) {
+ $this->reify($this->calcURI('#'.$s['p_id'], $b), $s['value'], $s['p'], $o['value'], $s['type'], $o['type']);
+ unset($s['p_id']);
+ }
+ $this->addT($o['value'], $k, $v, $o['type'], 'literal');
+ $this->state = 3;
+ }
+ }
+ }
+ $this->updateS($s);
+ }
+
+ /* */
+
+ function h4Open($t, $a) {
+ return $this->h1Open($t, $a);
+ }
+
+ /* */
+
+ function h5Open($t, $a) {
+ $this->state = 4;
+ return $this->h4Open($t, $a);
+ }
+
+ /* */
+
+ function h6Open($t, $a) {
+ $s = $this->getParentS();
+ $data = isset($s['o_xml_data']) ? $s['o_xml_data'] : '';
+ $ns = isset($s['ns']) ? $s['ns'] : array();
+ $parts = $this->splitURI($t);
+ if (count($parts) === 1) {
+ $data .= '<'.$t;
+ }
+ else {
+ $ns_uri = $parts[0];
+ $name = $parts[1];
+ if (!isset($this->nsp[$ns_uri])) {
+ foreach ($this->nsp as $tmp1 => $tmp2) {
+ if (strpos($t, $tmp1) === 0) {
+ $ns_uri = $tmp1;
+ $name = substr($t, strlen($tmp1));
+ break;
+ }
+ }
+ }
+ $nsp = $this->nsp[$ns_uri];
+ $data .= $nsp ? '<' . $nsp . ':' . $name : '<' . $name;
+ /* ns */
+ if (!isset($ns[$nsp.'='.$ns_uri]) || !$ns[$nsp.'='.$ns_uri]) {
+ $data .= $nsp ? ' xmlns:'.$nsp.'="'.$ns_uri.'"' : ' xmlns="'.$ns_uri.'"';
+ $ns[$nsp.'='.$ns_uri] = true;
+ $s['ns'] = $ns;
+ }
+ }
+ foreach ($a as $k => $v) {
+ $parts = $this->splitURI($k);
+ if (count($parts) === 1) {
+ $data .= ' '.$k.'="'.$v.'"';
+ }
+ else {
+ $ns_uri = $parts[0];
+ $name = $parts[1];
+ $nsp = $this->nsp[$ns_uri];
+ $data .= $nsp ? ' '.$nsp.':'.$name.'="'.$v.'"' : ' '.$name.'="'.$v.'"' ;
+ }
+ }
+ $data .= '>';
+ $s['o_xml_data'] = $data;
+ $s['o_xml_level'] = isset($s['o_xml_level']) ? $s['o_xml_level'] + 1 : 1;
+ if ($t == $s['p']) {/* xml container prop */
+ $s['p_xml_literal_level'] = isset($s['p_xml_literal_level']) ? $s['p_xml_literal_level'] + 1 : 1;
+ }
+ $this->updateS($s);
+ }
+
+ /* */
+
+ function h1Close($t) {/* end of doc */
+ $this->state = 0;
+ }
+
+ /* */
+
+ function h2Close($t) {/* expecting a prop, getting a close */
+ if ($s = $this->getParentS()) {
+ $has_closing_tag = (isset($s['has_closing_tag']) && !$s['has_closing_tag']) ? 0 : 1;
+ $this->popS();
+ $this->state = 5;
+ if ($s = $this->getParentS()) {/* new s */
+ if (!isset($s['p']) || !$s['p']) {/* p close after collection|parseType=Resource|node close after p close */
+ $this->state = $this->s_count ? 4 : 1;
+ if (!$has_closing_tag) {
+ $this->state = 2;
+ }
+ }
+ elseif (!$has_closing_tag) {
+ $this->state = 2;
+ }
+ }
+ }
+ }
+
+ /* */
+
+ function h3Close($t) {/* p close */
+ $this->state = 2;
+ }
+
+ /* */
+
+ function h4Close($t) {/* empty p | pClose after cdata | pClose after collection */
+ if ($s = $this->getParentS()) {
+ $b = isset($s['p_x_base']) && $s['p_x_base'] ? $s['p_x_base'] : (isset($s['x_base']) ? $s['x_base'] : '');
+ if (isset($s['is_coll']) && $s['is_coll']) {
+ $this->addT($s['value'], $this->rdf . 'rest', $this->rdf . 'nil', $s['type'], 'uri');
+ /* back to collection start */
+ while ((!isset($s['p']) || ($s['p'] != $t))) {
+ $sub_s = $s;
+ $this->popS();
+ $s = $this->getParentS();
+ }
+ /* reification */
+ if (isset($s['p_id']) && $s['p_id']) {
+ $this->reify($this->calcURI('#'.$s['p_id'], $b), $s['value'], $s['p'], $sub_s['value'], $s['type'], $sub_s['type']);
+ }
+ unset($s['p']);
+ $this->updateS($s);
+ }
+ else {
+ $dt = isset($s['o_datatype']) ? $s['o_datatype'] : '';
+ $l = isset($s['p_x_lang']) && $s['p_x_lang'] ? $s['p_x_lang'] : (isset($s['x_lang']) ? $s['x_lang'] : '');
+ $o = array('type' => 'literal', 'value' => $s['o_cdata']);
+ $this->addT($s['value'], $s['p'], $o['value'], $s['type'], $o['type'], $dt, $l);
+ /* reification */
+ if (isset($s['p_id']) && $s['p_id']) {
+ $this->reify($this->calcURI('#'.$s['p_id'], $b), $s['value'], $s['p'], $o['value'], $s['type'], $o['type'], $dt, $l);
+ }
+ unset($s['o_cdata']);
+ unset($s['o_datatype']);
+ unset($s['p']);
+ $this->updateS($s);
+ }
+ $this->state = 2;
+ }
+ }
+
+ /* */
+
+ function h5Close($t) {/* p close */
+ if ($s = $this->getParentS()) {
+ unset($s['p']);
+ $this->updateS($s);
+ $this->state = 2;
+ }
+ }
+
+ /* */
+
+ function h6Close($t) {
+ if ($s = $this->getParentS()) {
+ $l = isset($s['p_x_lang']) && $s['p_x_lang'] ? $s['p_x_lang'] : (isset($s['x_lang']) ? $s['x_lang'] : '');
+ $data = $s['o_xml_data'];
+ $level = $s['o_xml_level'];
+ if ($level === 0) {/* pClose */
+ $this->addT($s['value'], $s['p'], trim($data, ' '), $s['type'], 'literal', $this->rdf.'XMLLiteral', $l);
+ unset($s['o_xml_data']);
+ $this->state = 2;
+ }
+ else {
+ $parts = $this->splitURI($t);
+ if (count($parts) == 1) {
+ $data .= '</'.$t.'>';
+ }
+ else {
+ $ns_uri = $parts[0];
+ $name = $parts[1];
+ if (!isset($this->nsp[$ns_uri])) {
+ foreach ($this->nsp as $tmp1 => $tmp2) {
+ if (strpos($t, $tmp1) === 0) {
+ $ns_uri = $tmp1;
+ $name = substr($t, strlen($tmp1));
+ break;
+ }
+ }
+ }
+ $nsp = $this->nsp[$ns_uri];
+ $data .= $nsp ? '</'.$nsp.':'.$name.'>' : '</'.$name.'>';
+ }
+ $s['o_xml_data'] = $data;
+ $s['o_xml_level'] = $level - 1;
+ if ($t == $s['p']) {/* xml container prop */
+ $s['p_xml_literal_level']--;
+ }
+ }
+ $this->updateS($s);
+ }
+ }
+
+ /* */
+
+ function h4Cdata($d) {
+ if ($s = $this->getParentS()) {
+ $s['o_cdata'] = isset($s['o_cdata']) ? $s['o_cdata'] . $d : $d;
+ $this->updateS($s);
+ }
+ }
+
+ /* */
+
+ function h6Cdata($d) {
+ if ($s = $this->getParentS()) {
+ if (isset($s['o_xml_data']) || preg_match("/[\n\r]/", $d) || trim($d)) {
+ $d = htmlspecialchars($d, ENT_NOQUOTES);
+ $s['o_xml_data'] = isset($s['o_xml_data']) ? $s['o_xml_data'] . $d : $d;
+ }
+ $this->updateS($s);
+ }
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 RSS Parser
+author: Benjamin Nowack
+version: 2008-02-10
+*/
+
+ARC2::inc('LegacyXMLParser');
+
+class ARC2_RSSParser extends ARC2_LegacyXMLParser {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_RSSParser($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* reader */
+ parent::__init();
+ $this->triples = array();
+ $this->target_encoding = '';
+ $this->t_count = 0;
+ $this->added_triples = array();
+ $this->skip_dupes = false;
+ $this->bnode_prefix = $this->v('bnode_prefix', 'arc'.substr(md5(uniqid(rand())), 0, 4).'b', $this->a);
+ $this->bnode_id = 0;
+ $this->cache = array();
+ $this->allowCDataNodes = 0;
+ }
+
+ /* */
+
+ function done() {
+ $this->extractRDF();
+ }
+
+ /* */
+
+ function setReader(&$reader) {
+ $this->reader =& $reader;
+ }
+
+ function createBnodeID(){
+ $this->bnode_id++;
+ return '_:' . $this->bnode_prefix . $this->bnode_id;
+ }
+
+ function addT($t) {
+ //if (!isset($t['o_datatype']))
+ if ($this->skip_dupes) {
+ $h = md5(serialize($t));
+ if (!isset($this->added_triples[$h])) {
+ $this->triples[$this->t_count] = $t;
+ $this->t_count++;
+ $this->added_triples[$h] = true;
+ }
+ }
+ else {
+ $this->triples[$this->t_count] = $t;
+ $this->t_count++;
+ }
+ }
+
+ function getTriples() {
+ return $this->v('triples', array());
+ }
+
+ function countTriples() {
+ return $this->t_count;
+ }
+
+ function getSimpleIndex($flatten_objects = 1, $vals = '') {
+ return ARC2::getSimpleIndex($this->getTriples(), $flatten_objects, $vals);
+ }
+
+ /* */
+
+ function extractRDF() {
+ $index = $this->getNodeIndex();
+ $this->rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
+ $this->rss = 'http://purl.org/rss/1.0/';
+ $this->dc = 'http://purl.org/dc/elements/1.1/';
+ $this->dct = 'http://purl.org/dc/terms/';
+ $this->content = 'http://purl.org/rss/1.0/modules/content/';
+ $this->enc = 'http://purl.oclc.org/net/rss_2.0/enc#';
+ $this->mappings = array(
+ 'channel' => $this->rss . 'channel',
+ 'item' => $this->rss . 'item',
+ 'title' => $this->rss . 'title',
+ 'link' => $this->rss . 'link',
+ 'description' => $this->rss . 'description',
+ 'guid' => $this->dc . 'identifier',
+ 'author' => $this->dc . 'creator',
+ 'category' => $this->dc . 'subject',
+ 'pubDate' => $this->dc . 'date',
+ 'pubdate' => $this->dc . 'date',
+ 'source' => $this->dc . 'source',
+ 'enclosure' => $this->enc . 'enclosure',
+ );
+ $this->dt_props = array(
+ $this->dc . 'identifier',
+ $this->rss . 'link'
+ );
+ foreach ($index as $p_id => $nodes) {
+ foreach ($nodes as $pos => $node) {
+ $tag = $this->v('tag', '', $node);
+ if ($tag == 'channel') {
+ $struct = $this->extractChannel($index[$node['id']]);
+ $triples = ARC2::getTriplesFromIndex($struct);
+ foreach ($triples as $t) {
+ $this->addT($t);
+ }
+ }
+ elseif ($tag == 'item') {
+ $struct = $this->extractItem($index[$node['id']]);
+ $triples = ARC2::getTriplesFromIndex($struct);
+ foreach ($triples as $t) {
+ $this->addT($t);
+ }
+ }
+ }
+ }
+ }
+
+ function extractChannel($els) {
+ $res = array($this->rdf . 'type' => array(array('value' => $this->rss . 'channel', 'type' => 'uri')));
+ $res = array_merge($res, $this->extractProps($els, 'channel'));
+ return array($res[$this->rss . 'link'][0]['value'] => $res);
+ }
+
+ function extractItem($els) {
+ $res = array($this->rdf . 'type' => array(array('value' => $this->rss . 'item', 'type' => 'uri')));
+ $res = array_merge($res, $this->extractProps($els, 'item'));
+ if (isset($res[$this->rss . 'link'])) return array($res[$this->rss . 'link'][0]['value'] => $res);
+ if (isset($res[$this->dc . 'identifier'])) return array($res[$this->dc . 'identifier'][0]['value'] => $res);
+ }
+
+ function extractProps($els, $container) {
+ $res = array();
+ foreach ($els as $info) {
+ /* key */
+ $tag = $info['tag'];
+ if (!preg_match('/^[a-z0-9]+\:/i', $tag)) {
+ $k = isset($this->mappings[$tag]) ? $this->mappings[$tag] : '';
+ }
+ else {
+ $k = $tag;
+ }
+ if (($container == 'channel') && ($k == $this->rss . 'item')) continue;
+ /* val */
+ $v = $info['cdata'];
+ if (!$v) $v = $this->v('url', '', $info['a']);
+ if (!$v) $v = $this->v('href', '', $info['a']);
+ /* prop */
+ if ($k) {
+ /* enclosure handling */
+ if ($k == $this->enc . 'enclosure') {
+ $sub_res = array();
+ foreach (array('length', 'type') as $attr) {
+ if ($attr_v = $this->v($attr, 0, $info['a'])) {
+ $sub_res[$this->enc . $attr] = array(array('value' => $attr_v, 'type' => 'literal'));
+ }
+ }
+ $struct[$v] = $sub_res;
+ }
+ /* date handling */
+ if (in_array($k, array($this->dc . 'date', $this->dct . 'modified'))) {
+ if (!preg_match('/^[0-9]{4}/', $v) && ($sub_v = strtotime($v)) && ($sub_v != -1)) {
+ $tz = date('Z', $sub_v); /* timezone offset */
+ $sub_v -= $tz; /* utc */
+ $v = date('Y-m-d\TH:i:s\Z', $sub_v);
+ }
+ }
+ if (!isset($res[$k])) $res[$k] = array();
+ $res[$k][] = array('value' => $v, 'type' => in_array($k, $this->dt_props) || !preg_match('/^[a-z0-9]+\:[^\s]+$/is', $v) ? 'literal' : 'uri');
+ }
+ }
+ return $res;
+ }
+
+ /* */
+
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 SG API JSON Parser
+author: Benjamin Nowack
+version: 2008-07-17 (Tweak: Moved re-usable code to new ARC2_JSONParser)
+*/
+
+ARC2::inc('JSONParser');
+
+class ARC2_SGAJSONParser extends ARC2_JSONParser {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_SGAJSONParser($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* reader */
+ parent::__init();
+ $this->rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
+ $this->nsp = array($this->rdf => 'rdf');
+ }
+
+ /* */
+
+ function done() {
+ $this->extractRDF();
+ }
+
+ function extractRDF() {
+ $s = $this->getContext();
+ $os = $this->getURLs($this->struct);
+ foreach ($os as $o) {
+ if ($o != $s) $this->addT($s, 'http://www.w3.org/2000/01/rdf-schema#seeAlso', $o, 'uri', 'uri');
+ }
+ }
+
+ function getContext() {
+ if (!isset($this->struct['canonical_mapping'])) return '';
+ foreach ($this->struct['canonical_mapping'] as $k => $v) return $v;
+ }
+
+ function getURLs($struct) {
+ $r =array();
+ if (is_array($struct)) {
+ foreach ($struct as $k => $v) {
+ if (preg_match('/^http:\/\//', $k) && !in_array($k, $r)) $r[] = $k;
+ $sub_r = $this->getURLs($v);
+ foreach ($sub_r as $sub_v) {
+ if (!in_array($sub_v, $r)) $r[] = $sub_v;
+ }
+ }
+ }
+ elseif (preg_match('/^http:\/\//', $struct) && !in_array($struct, $r)) {
+ $r[] = $struct;
+ }
+ return $r;
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 SPARQL Parser
+ *
+ * @author Benjamin Nowack
+ * @license <http://arc.semsol.org/license>
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-04-11
+*/
+
+ARC2::inc('TurtleParser');
+
+class ARC2_SPARQLParser extends ARC2_TurtleParser {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_SPARQLParser($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ $this->bnode_prefix = $this->v('bnode_prefix', 'arc'.substr(md5(uniqid(rand())), 0, 4).'b', $this->a);
+ $this->bnode_id = 0;
+ $this->bnode_pattern_index = array('patterns' => array(), 'bnodes' => array());
+ }
+
+ /* */
+
+ function parse($q, $src = '') {
+ $this->setDefaultPrefixes();
+ $this->base = $src ? $this->calcBase($src) : ARC2::getRequestURI();
+ $this->r = array(
+ 'base' => '',
+ 'vars' => array(),
+ 'prefixes' => array()
+ );
+ $this->unparsed_code = $q;
+ list($r, $v) = $this->xQuery($q);
+ if ($r) {
+ $this->r['query'] = $r;
+ $this->unparsed_code = trim($v);
+ }
+ elseif (!$this->getErrors() && !$this->unparsed_code) {
+ $this->addError('Query not properly closed');
+ }
+ $this->r['prefixes'] = $this->prefixes;
+ $this->r['base'] = $this->base;
+ /* remove trailing comments */
+ while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $this->unparsed_code, $m)) $this->unparsed_code = $m[2];
+ if ($this->unparsed_code && !$this->getErrors()) {
+ $rest = preg_replace('/[\x0a|\x0d]/i', ' ', substr($this->unparsed_code, 0, 30));
+ $msg = trim($rest) ? 'Could not properly handle "' . $rest . '"' : 'Syntax error, probably an incomplete pattern';
+ $this->addError($msg);
+ }
+ }
+
+ function getQueryInfos() {
+ return $this->v('r', array());
+ }
+
+ /* 1 */
+
+ function xQuery($v) {
+ list($r, $v) = $this->xPrologue($v);
+ foreach (array('Select', 'Construct', 'Describe', 'Ask') as $type) {
+ $m = 'x' . $type . 'Query';
+ if ((list($r, $v) = $this->$m($v)) && $r) {
+ return array($r, $v);
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* 2 */
+
+ function xPrologue($v) {
+ $r = 0;
+ if ((list($sub_r, $v) = $this->xBaseDecl($v)) && $sub_r) {
+ $this->base = $sub_r;
+ $r = 1;
+ }
+ while ((list($sub_r, $v) = $this->xPrefixDecl($v)) && $sub_r) {
+ $this->prefixes[$sub_r['prefix']] = $sub_r['uri'];
+ $r = 1;
+ }
+ return array($r, $v);
+ }
+
+ /* 5.. */
+
+ function xSelectQuery($v) {
+ if ($sub_r = $this->x('SELECT\s+', $v)) {
+ $r = array(
+ 'type' => 'select',
+ 'result_vars' => array(),
+ 'dataset' => array(),
+ );
+ $all_vars = 0;
+ $sub_v = $sub_r[1];
+ /* distinct, reduced */
+ if ($sub_r = $this->x('(DISTINCT|REDUCED)\s+', $sub_v)) {
+ $r[strtolower($sub_r[1])] = 1;
+ $sub_v = $sub_r[2];
+ }
+ /* result vars */
+ if ($sub_r = $this->x('\*\s+', $sub_v)) {
+ $all_vars = 1;
+ $sub_v = $sub_r[1];
+ }
+ else {
+ while ((list($sub_r, $sub_v) = $this->xResultVar($sub_v)) && $sub_r) {
+ $r['result_vars'][] = $sub_r;
+ }
+ }
+ if (!$all_vars && !count($r['result_vars'])) {
+ $this->addError('No result bindings specified.');
+ }
+ /* dataset */
+ while ((list($sub_r, $sub_v) = $this->xDatasetClause($sub_v)) && $sub_r) {
+ $r['dataset'][] = $sub_r;
+ }
+ /* where */
+ if ((list($sub_r, $sub_v) = $this->xWhereClause($sub_v)) && $sub_r) {
+ $r['pattern'] = $sub_r;
+ }
+ else {
+ return array(0, $v);
+ }
+ /* solution modifier */
+ if ((list($sub_r, $sub_v) = $this->xSolutionModifier($sub_v)) && $sub_r) {
+ $r = array_merge($r, $sub_r);
+ }
+ /* all vars */
+ if ($all_vars) {
+ foreach ($this->r['vars'] as $var) {
+ $r['result_vars'][] = array('var' => $var, 'aggregate' => 0, 'alias' => '');
+ }
+ if (!$r['result_vars']) {
+ $r['result_vars'][] = '*';
+ }
+ }
+ return array($r, $sub_v);
+ }
+ return array(0, $v);
+ }
+
+ function xResultVar($v) {
+ return $this->xVar($v);
+ }
+
+ /* 6.. */
+
+ function xConstructQuery($v) {
+ if ($sub_r = $this->x('CONSTRUCT\s*', $v)) {
+ $r = array(
+ 'type' => 'construct',
+ 'dataset' => array(),
+ );
+ $sub_v = $sub_r[1];
+ /* construct template */
+ if ((list($sub_r, $sub_v) = $this->xConstructTemplate($sub_v)) && is_array($sub_r)) {
+ $r['construct_triples'] = $sub_r;
+ }
+ else {
+ $this->addError('Construct Template not found');
+ return array(0, $v);
+ }
+ /* dataset */
+ while ((list($sub_r, $sub_v) = $this->xDatasetClause($sub_v)) && $sub_r) {
+ $r['dataset'][] = $sub_r;
+ }
+ /* where */
+ if ((list($sub_r, $sub_v) = $this->xWhereClause($sub_v)) && $sub_r) {
+ $r['pattern'] = $sub_r;
+ }
+ else {
+ return array(0, $v);
+ }
+ /* solution modifier */
+ if ((list($sub_r, $sub_v) = $this->xSolutionModifier($sub_v)) && $sub_r) {
+ $r = array_merge($r, $sub_r);
+ }
+ return array($r, $sub_v);
+ }
+ return array(0, $v);
+ }
+
+ /* 7.. */
+
+ function xDescribeQuery($v) {
+ if ($sub_r = $this->x('DESCRIBE\s+', $v)) {
+ $r = array(
+ 'type' => 'describe',
+ 'result_vars' => array(),
+ 'result_uris' => array(),
+ 'dataset' => array(),
+ );
+ $sub_v = $sub_r[1];
+ $all_vars = 0;
+ /* result vars/uris */
+ if ($sub_r = $this->x('\*\s+', $sub_v)) {
+ $all_vars = 1;
+ $sub_v = $sub_r[1];
+ }
+ else {
+ do {
+ $proceed = 0;
+ if ((list($sub_r, $sub_v) = $this->xResultVar($sub_v)) && $sub_r) {
+ $r['result_vars'][] = $sub_r;
+ $proceed = 1;
+ }
+ if ((list($sub_r, $sub_v) = $this->xIRIref($sub_v)) && $sub_r) {
+ $r['result_uris'][] = $sub_r;
+ $proceed =1;
+ }
+ } while ($proceed);
+ }
+ if (!$all_vars && !count($r['result_vars']) && !count($r['result_uris'])) {
+ $this->addError('No result bindings specified.');
+ }
+ /* dataset */
+ while ((list($sub_r, $sub_v) = $this->xDatasetClause($sub_v)) && $sub_r) {
+ $r['dataset'][] = $sub_r;
+ }
+ /* where */
+ if ((list($sub_r, $sub_v) = $this->xWhereClause($sub_v)) && $sub_r) {
+ $r['pattern'] = $sub_r;
+ }
+ /* solution modifier */
+ if ((list($sub_r, $sub_v) = $this->xSolutionModifier($sub_v)) && $sub_r) {
+ $r = array_merge($r, $sub_r);
+ }
+ /* all vars */
+ if ($all_vars) {
+ foreach ($this->r['vars'] as $var) {
+ $r['result_vars'][] = array('var' => $var, 'aggregate' => 0, 'alias' => '');
+ }
+ }
+ return array($r, $sub_v);
+ }
+ return array(0, $v);
+ }
+
+ /* 8.. */
+
+ function xAskQuery($v) {
+ if ($sub_r = $this->x('ASK\s+', $v)) {
+ $r = array(
+ 'type' => 'ask',
+ 'dataset' => array(),
+ );
+ $sub_v = $sub_r[1];
+ /* dataset */
+ while ((list($sub_r, $sub_v) = $this->xDatasetClause($sub_v)) && $sub_r) {
+ $r['dataset'][] = $sub_r;
+ }
+ /* where */
+ if ((list($sub_r, $sub_v) = $this->xWhereClause($sub_v)) && $sub_r) {
+ $r['pattern'] = $sub_r;
+ return array($r, $sub_v);
+ }
+ else {
+ $this->addError('Missing or invalid WHERE clause.');
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* 9, 10, 11, 12 */
+
+ function xDatasetClause($v) {
+ if ($r = $this->x('FROM(\s+NAMED)?\s+', $v)) {
+ $named = $r[1] ? 1 : 0;
+ if ((list($r, $sub_v) = $this->xIRIref($r[2])) && $r) {
+ return array(array('graph' => $r, 'named' => $named), $sub_v);
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* 13 */
+
+ function xWhereClause($v) {
+ if ($r = $this->x('(WHERE)?', $v)) {
+ $v = $r[2];
+ }
+ if ((list($r, $v) = $this->xGroupGraphPattern($v)) && $r) {
+ return array($r, $v);
+ }
+ return array(0, $v);
+ }
+
+ /* 14, 15 */
+
+ function xSolutionModifier($v) {
+ $r = array();
+ if ((list($sub_r, $sub_v) = $this->xOrderClause($v)) && $sub_r) {
+ $r['order_infos'] = $sub_r;
+ }
+ while ((list($sub_r, $sub_v) = $this->xLimitOrOffsetClause($sub_v)) && $sub_r) {
+ $r = array_merge($r, $sub_r);
+ }
+ return ($v == $sub_v) ? array(0, $v) : array($r, $sub_v);
+ }
+
+ /* 18, 19 */
+
+ function xLimitOrOffsetClause($v) {
+ if ($sub_r = $this->x('(LIMIT|OFFSET)', $v)) {
+ $key = strtolower($sub_r[1]);
+ $sub_v = $sub_r[2];
+ if ((list($sub_r, $sub_v) = $this->xINTEGER($sub_v)) && ($sub_r !== false)) {
+ return array(array($key =>$sub_r), $sub_v);
+ }
+ if ((list($sub_r, $sub_v) = $this->xPlaceholder($sub_v)) && ($sub_r !== false)) {
+ return array(array($key =>$sub_r), $sub_v);
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* 16 */
+
+ function xOrderClause($v) {
+ if ($sub_r = $this->x('ORDER BY\s+', $v)) {
+ $sub_v = $sub_r[1];
+ $r = array();
+ while ((list($sub_r, $sub_v) = $this->xOrderCondition($sub_v)) && $sub_r) {
+ $r[] = $sub_r;
+ }
+ if (count($r)) {
+ return array($r, $sub_v);
+ }
+ else {
+ $this->addError('No order conditions specified.');
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* 17, 27 */
+
+ function xOrderCondition($v) {
+ if ($sub_r = $this->x('(ASC|DESC)', $v)) {
+ $dir = strtolower($sub_r[1]);
+ $sub_v = $sub_r[2];
+ if ((list($sub_r, $sub_v) = $this->xBrackettedExpression($sub_v)) && $sub_r) {
+ $sub_r['direction'] = $dir;
+ return array($sub_r, $sub_v);
+ }
+ }
+ elseif ((list($sub_r, $sub_v) = $this->xVar($v)) && $sub_r) {
+ $sub_r['direction'] = 'asc';
+ return array($sub_r, $sub_v);
+ }
+ elseif ((list($sub_r, $sub_v) = $this->xBrackettedExpression($v)) && $sub_r) {
+ return array($sub_r, $sub_v);
+ }
+ elseif ((list($sub_r, $sub_v) = $this->xBuiltInCall($v)) && $sub_r) {
+ $sub_r['direction'] = 'asc';
+ return array($sub_r, $sub_v);
+ }
+ elseif ((list($sub_r, $sub_v) = $this->xFunctionCall($v)) && $sub_r) {
+ $sub_r['direction'] = 'asc';
+ return array($sub_r, $sub_v);
+ }
+ return array(0, $v);
+ }
+
+ /* 20 */
+
+ function xGroupGraphPattern($v) {
+ $pattern_id = substr(md5(uniqid(rand())), 0, 4);
+ if ($sub_r = $this->x('\{', $v)) {
+ $r = array('type' => 'group', 'patterns' => array());
+ $sub_v = $sub_r[1];
+ if ((list($sub_r, $sub_v) = $this->xTriplesBlock($sub_v)) && $sub_r) {
+ $this->indexBnodes($sub_r, $pattern_id);
+ $r['patterns'][] = array('type' => 'triples', 'patterns' => $sub_r);
+ }
+ do {
+ $proceed = 0;
+ if ((list($sub_r, $sub_v) = $this->xGraphPatternNotTriples($sub_v)) && $sub_r) {
+ $r['patterns'][] = $sub_r;
+ $pattern_id = substr(md5(uniqid(rand())), 0, 4);
+ $proceed = 1;
+ }
+ elseif ((list($sub_r, $sub_v) = $this->xFilter($sub_v)) && $sub_r) {
+ $r['patterns'][] = array('type' => 'filter', 'constraint' => $sub_r);
+ $proceed = 1;
+ }
+ if ($sub_r = $this->x('\.', $sub_v)) {
+ $sub_v = $sub_r[1];
+ }
+ if ((list($sub_r, $sub_v) = $this->xTriplesBlock($sub_v)) && $sub_r) {
+ $this->indexBnodes($sub_r, $pattern_id);
+ $r['patterns'][] = array('type' => 'triples', 'patterns' => $sub_r);
+ $proceed = 1;
+ }
+ if ((list($sub_r, $sub_v) = $this->xPlaceholder($sub_v)) && $sub_r) {
+ $r['patterns'][] = $sub_r;
+ $proceed = 1;
+ }
+ } while ($proceed);
+ if ($sub_r = $this->x('\}', $sub_v)) {
+ $sub_v = $sub_r[1];
+ return array($r, $sub_v);
+ }
+ $rest = preg_replace('/[\x0a|\x0d]/i', ' ', substr($sub_v, 0, 30));
+ $this->addError('Incomplete or invalid Group Graph pattern. Could not handle "' . $rest . '"');
+ }
+ return array(0, $v);
+ }
+
+ function indexBnodes($triples, $pattern_id) {
+ $index_id = count($this->bnode_pattern_index['patterns']);
+ $index_id = $pattern_id;
+ $this->bnode_pattern_index['patterns'][] = $triples;
+ foreach ($triples as $t) {
+ foreach (array('s', 'p', 'o') as $term) {
+ if ($t[$term . '_type'] == 'bnode') {
+ $val = $t[$term];
+ if (isset($this->bnode_pattern_index['bnodes'][$val]) && ($this->bnode_pattern_index['bnodes'][$val] != $index_id)) {
+ $this->addError('Re-used bnode label "' .$val. '" across graph patterns');
+ }
+ else {
+ $this->bnode_pattern_index['bnodes'][$val] = $index_id;
+ }
+ }
+ }
+ }
+ }
+
+ /* 22.., 25.. */
+
+ function xGraphPatternNotTriples($v) {
+ if ((list($sub_r, $sub_v) = $this->xOptionalGraphPattern($v)) && $sub_r) {
+ return array($sub_r, $sub_v);
+ }
+ if ((list($sub_r, $sub_v) = $this->xGraphGraphPattern($v)) && $sub_r) {
+ return array($sub_r, $sub_v);
+ }
+ $r = array('type' => 'union', 'patterns' => array());
+ $sub_v = $v;
+ do {
+ $proceed = 0;
+ if ((list($sub_r, $sub_v) = $this->xGroupGraphPattern($sub_v)) && $sub_r) {
+ $r['patterns'][] = $sub_r;
+ if ($sub_r = $this->x('UNION', $sub_v)) {
+ $sub_v = $sub_r[1];
+ $proceed = 1;
+ }
+ }
+ } while ($proceed);
+ $pc = count($r['patterns']);
+ if ($pc == 1) {
+ return array($r['patterns'][0], $sub_v);
+ }
+ elseif ($pc > 1) {
+ return array($r, $sub_v);
+ }
+ return array(0, $v);
+ }
+
+ /* 23 */
+
+ function xOptionalGraphPattern($v) {
+ if ($sub_r = $this->x('OPTIONAL', $v)) {
+ $sub_v = $sub_r[1];
+ if ((list($sub_r, $sub_v) = $this->xGroupGraphPattern($sub_v)) && $sub_r) {
+ return array(array('type' => 'optional', 'patterns' => $sub_r['patterns']), $sub_v);
+ }
+ $this->addError('Missing or invalid Group Graph Pattern after OPTIONAL');
+ }
+ return array(0, $v);
+ }
+
+ /* 24.. */
+
+ function xGraphGraphPattern($v) {
+ if ($sub_r = $this->x('GRAPH', $v)) {
+ $sub_v = $sub_r[1];
+ $r = array('type' => 'graph', 'var' => '', 'uri' => '', 'patterns' => array());
+ if ((list($sub_r, $sub_v) = $this->xVar($sub_v)) && $sub_r) {
+ $r['var'] = $sub_r;
+ }
+ elseif ((list($sub_r, $sub_v) = $this->xIRIref($sub_v)) && $sub_r) {
+ $r['uri'] = $sub_r;
+ }
+ if ($r['var'] || $r['uri']) {
+ if ((list($sub_r, $sub_v) = $this->xGroupGraphPattern($sub_v)) && $sub_r) {
+ $r['patterns'][] = $sub_r;
+ return array($r, $sub_v);
+ }
+ $this->addError('Missing or invalid Graph Pattern');
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* 26.., 27.. */
+
+ function xFilter($v) {
+ if ($r = $this->x('FILTER', $v)) {
+ $sub_v = $r[1];
+ if ((list($r, $sub_v) = $this->xBrackettedExpression($sub_v)) && $r) {
+ return array($r, $sub_v);
+ }
+ if ((list($r, $sub_v) = $this->xBuiltInCall($sub_v)) && $r) {
+ return array($r, $sub_v);
+ }
+ if ((list($r, $sub_v) = $this->xFunctionCall($sub_v)) && $r) {
+ return array($r, $sub_v);
+ }
+ $this->addError('Incomplete FILTER');
+ }
+ return array(0, $v);
+ }
+
+ /* 28.. */
+
+ function xFunctionCall($v) {
+ if ((list($r, $sub_v) = $this->xIRIref($v)) && $r) {
+ if ((list($sub_r, $sub_v) = $this->xArgList($sub_v)) && $sub_r) {
+ return array(array('type' => 'function_call', 'uri' => $r, 'args' => $sub_r), $sub_v);
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* 29 */
+
+ function xArgList($v) {
+ $r = array();
+ $sub_v = $v;
+ $closed = 0;
+ if ($sub_r = $this->x('\(', $sub_v)) {
+ $sub_v = $sub_r[1];
+ do {
+ $proceed = 0;
+ if ((list($sub_r, $sub_v) = $this->xExpression($sub_v)) && $sub_r) {
+ $r[] = $sub_r;
+ if ($sub_r = $this->x('\,', $sub_v)) {
+ $sub_v = $sub_r[1];
+ $proceed = 1;
+ }
+ }
+ if ($sub_r = $this->x('\)', $sub_v)) {
+ $sub_v = $sub_r[1];
+ $closed = 1;
+ $proceed = 0;
+ }
+ } while ($proceed);
+ }
+ return $closed ? array($r, $sub_v) : array(0, $v);
+ }
+
+ /* 30, 31 */
+
+ function xConstructTemplate($v) {
+ if ($sub_r = $this->x('\{', $v)) {
+ $r = array();
+ if ((list($sub_r, $sub_v) = $this->xTriplesBlock($sub_r[1])) && is_array($sub_r)) {
+ $r = $sub_r;
+ }
+ if ($sub_r = $this->x('\}', $sub_v)) {
+ return array($r, $sub_r[1]);
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* 46, 47 */
+
+ function xExpression($v) {
+ if ((list($sub_r, $sub_v) = $this->xConditionalAndExpression($v)) && $sub_r) {
+ $r = array('type' => 'expression', 'sub_type' => 'or', 'patterns' => array($sub_r));
+ do {
+ $proceed = 0;
+ if ($sub_r = $this->x('\|\|', $sub_v)) {
+ $sub_v = $sub_r[1];
+ if ((list($sub_r, $sub_v) = $this->xConditionalAndExpression($sub_v)) && $sub_r) {
+ $r['patterns'][] = $sub_r;
+ $proceed = 1;
+ }
+ }
+ } while ($proceed);
+ return count($r['patterns']) == 1 ? array($r['patterns'][0], $sub_v) : array($r, $sub_v);
+ }
+ return array(0, $v);
+ }
+
+ /* 48.., 49.. */
+
+ function xConditionalAndExpression($v) {
+ if ((list($sub_r, $sub_v) = $this->xRelationalExpression($v)) && $sub_r) {
+ $r = array('type' => 'expression', 'sub_type' => 'and', 'patterns' => array($sub_r));
+ do {
+ $proceed = 0;
+ if ($sub_r = $this->x('\&\&', $sub_v)) {
+ $sub_v = $sub_r[1];
+ if ((list($sub_r, $sub_v) = $this->xRelationalExpression($sub_v)) && $sub_r) {
+ $r['patterns'][] = $sub_r;
+ $proceed = 1;
+ }
+ }
+ } while ($proceed);
+ return count($r['patterns']) == 1 ? array($r['patterns'][0], $sub_v) : array($r, $sub_v);
+ }
+ return array(0, $v);
+ }
+
+ /* 50, 51 */
+
+ function xRelationalExpression($v) {
+ if ((list($sub_r, $sub_v) = $this->xAdditiveExpression($v)) && $sub_r) {
+ $r = array('type' => 'expression', 'sub_type' => 'relational', 'patterns' => array($sub_r));
+ do {
+ $proceed = 0;
+ /* don't mistake '<' + uriref with '<'-operator ("longest token" rule) */
+ if ((list($sub_r, $sub_v) = $this->xIRI_REF($sub_v)) && $sub_r) {
+ $this->addError('Expected operator, found IRIref: "'.$sub_r.'".');
+ }
+ if ($sub_r = $this->x('(\!\=|\=\=|\=|\<\=|\>\=|\<|\>)', $sub_v)) {
+ $op = $sub_r[1];
+ $sub_v = $sub_r[2];
+ $r['operator'] = $op;
+ if ((list($sub_r, $sub_v) = $this->xAdditiveExpression($sub_v)) && $sub_r) {
+ //$sub_r['operator'] = $op;
+ $r['patterns'][] = $sub_r;
+ $proceed = 1;
+ }
+ }
+ } while ($proceed);
+ return count($r['patterns']) == 1 ? array($r['patterns'][0], $sub_v) : array($r, $sub_v);
+ }
+ return array(0, $v);
+ }
+
+ /* 52 */
+
+ function xAdditiveExpression($v) {
+ if ((list($sub_r, $sub_v) = $this->xMultiplicativeExpression($v)) && $sub_r) {
+ $r = array('type' => 'expression', 'sub_type' => 'additive', 'patterns' => array($sub_r));
+ do {
+ $proceed = 0;
+ if ($sub_r = $this->x('(\+|\-)', $sub_v)) {
+ $op = $sub_r[1];
+ $sub_v = $sub_r[2];
+ if ((list($sub_r, $sub_v) = $this->xMultiplicativeExpression($sub_v)) && $sub_r) {
+ $sub_r['operator'] = $op;
+ $r['patterns'][] = $sub_r;
+ $proceed = 1;
+ }
+ elseif ((list($sub_r, $sub_v) = $this->xNumericLiteral($sub_v)) && $sub_r) {
+ $r['patterns'][] = array('type' => 'numeric', 'operator' => $op, 'value' => $sub_r);
+ $proceed = 1;
+ }
+ }
+ } while ($proceed);
+ //return array($r, $sub_v);
+ return count($r['patterns']) == 1 ? array($r['patterns'][0], $sub_v) : array($r, $sub_v);
+ }
+ return array(0, $v);
+ }
+
+ /* 53 */
+
+ function xMultiplicativeExpression($v) {
+ if ((list($sub_r, $sub_v) = $this->xUnaryExpression($v)) && $sub_r) {
+ $r = array('type' => 'expression', 'sub_type' => 'multiplicative', 'patterns' => array($sub_r));
+ do {
+ $proceed = 0;
+ if ($sub_r = $this->x('(\*|\/)', $sub_v)) {
+ $op = $sub_r[1];
+ $sub_v = $sub_r[2];
+ if ((list($sub_r, $sub_v) = $this->xUnaryExpression($sub_v)) && $sub_r) {
+ $sub_r['operator'] = $op;
+ $r['patterns'][] = $sub_r;
+ $proceed = 1;
+ }
+ }
+ } while ($proceed);
+ return count($r['patterns']) == 1 ? array($r['patterns'][0], $sub_v) : array($r, $sub_v);
+ }
+ return array(0, $v);
+ }
+
+ /* 54 */
+
+ function xUnaryExpression($v) {
+ $sub_v = $v;
+ $op = '';
+ if ($sub_r = $this->x('(\!|\+|\-)', $sub_v)) {
+ $op = $sub_r[1];
+ $sub_v = $sub_r[2];
+ }
+ if ((list($sub_r, $sub_v) = $this->xPrimaryExpression($sub_v)) && $sub_r) {
+ if (!is_array($sub_r)) {
+ $sub_r = array('type' => 'unary', 'expression' => $sub_r);
+ }
+ elseif ($sub_op = $this->v1('operator', '', $sub_r)) {
+ $ops = array('!!' => '', '++' => '+', '--' => '+', '+-' => '-', '-+' => '-');
+ $op = isset($ops[$op . $sub_op]) ? $ops[$op . $sub_op] : $op . $sub_op;
+ }
+ $sub_r['operator'] = $op;
+ return array($sub_r, $sub_v);
+ }
+ return array(0, $v);
+ }
+
+ /* 55 */
+
+ function xPrimaryExpression($v) {
+ foreach (array('BrackettedExpression', 'BuiltInCall', 'IRIrefOrFunction', 'RDFLiteral', 'NumericLiteral', 'BooleanLiteral', 'Var', 'Placeholder') as $type) {
+ $m = 'x' . $type;
+ if ((list($sub_r, $sub_v) = $this->$m($v)) && $sub_r) {
+ return array($sub_r, $sub_v);
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* 56 */
+
+ function xBrackettedExpression($v) {
+ if ($r = $this->x('\(', $v)) {
+ if ((list($r, $sub_v) = $this->xExpression($r[1])) && $r) {
+ if ($sub_r = $this->x('\)', $sub_v)) {
+ return array($r, $sub_r[1]);
+ }
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* 57.., 58.. */
+
+ function xBuiltInCall($v) {
+ if ($sub_r = $this->x('(str|lang|langmatches|datatype|bound|sameterm|isiri|isuri|isblank|isliteral|regex)\s*\(', $v)) {
+ $r = array('type' => 'built_in_call', 'call' => strtolower($sub_r[1]));
+ if ((list($sub_r, $sub_v) = $this->xArgList('(' . $sub_r[2])) && is_array($sub_r)) {
+ $r['args'] = $sub_r;
+ return array($r, $sub_v);
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* 59.. */
+
+ function xIRIrefOrFunction($v) {
+ if ((list($r, $v) = $this->xIRIref($v)) && $r) {
+ if ((list($sub_r, $sub_v) = $this->xArgList($v)) && is_array($sub_r)) {
+ return array(array('type' => 'function', 'uri' => $r, 'args' => $sub_r), $sub_v);
+ }
+ return array(array('type' => 'uri', 'uri' => $r), $sub_v);
+ }
+ }
+
+ /* 70.. @@sync with TurtleParser */
+
+ function xIRI_REF($v) {
+ if (($r = $this->x('\<(\$\{[^\>]*\})\>', $v)) && ($sub_r = $this->xPlaceholder($r[1]))) {
+ return array($r[1], $r[2]);
+ }
+ elseif ($r = $this->x('\<([^\<\>\s\"\|\^`]*)\>', $v)) {
+ return array($r[1] ? $r[1] : true, $r[2]);
+ }
+ /* allow reserved chars in obvious IRIs */
+ elseif ($r = $this->x('\<(https?\:[^\s][^\<\>]*)\>', $v)) {
+ return array($r[1] ? $r[1] : true, $r[2]);
+ }
+ return array(0, $v);
+ }
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 SPARQL+ Parser (SPARQL + Aggregates + LOAD + INSERT + DELETE)
+author: Benjamin Nowack
+version: 2008-05-30 (Tweak: CONSTRUCT keyword is now optional)
+*/
+
+ARC2::inc('SPARQLParser');
+
+class ARC2_SPARQLPlusParser extends ARC2_SPARQLParser {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_SPARQLPlusParser($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ }
+
+ /* +1 */
+
+ function xQuery($v) {
+ list($r, $v) = $this->xPrologue($v);
+ foreach (array('Select', 'Construct', 'Describe', 'Ask', 'Insert', 'Delete', 'Load') as $type) {
+ $m = 'x' . $type . 'Query';
+ if ((list($r, $v) = $this->$m($v)) && $r) {
+ return array($r, $v);
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* +3 */
+
+ function xResultVar($v) {
+ $aggregate = '';
+ /* aggregate */
+ if ($sub_r = $this->x('\(?(AVG|COUNT|MAX|MIN|SUM)\s*\(\s*([^\)]+)\)\s+AS\s+([^\s\)]+)\)?', $v)) {
+ $aggregate = $sub_r[1];
+ $result_var = $sub_r[3];
+ $v = $sub_r[2] . $sub_r[4];
+ }
+ if ($sub_r && (list($sub_r, $sub_v) = $this->xVar($result_var)) && $sub_r) {
+ $result_var = $sub_r['value'];
+ }
+ /* * or var */
+ if ((list($sub_r, $sub_v) = $this->x('\*', $v)) && $sub_r) {
+ return array(array('var' => $sub_r['value'], 'aggregate' => $aggregate, 'alias' => $aggregate ? $result_var : ''), $sub_v);
+ }
+ if ((list($sub_r, $sub_v) = $this->xVar($v)) && $sub_r) {
+ return array(array('var' => $sub_r['value'], 'aggregate' => $aggregate, 'alias' => $aggregate ? $result_var : ''), $sub_v);
+ }
+ return array(0, $v);
+ }
+
+ /* +4 */
+
+ function xLoadQuery($v) {
+ if ($sub_r = $this->x('LOAD\s+', $v)) {
+ $sub_v = $sub_r[1];
+ if ((list($sub_r, $sub_v) = $this->xIRIref($sub_v)) && $sub_r) {
+ $r = array('type' => 'load', 'url' => $sub_r, 'target_graph' => '');
+ if ($sub_r = $this->x('INTO\s+', $sub_v)) {
+ $sub_v = $sub_r[1];
+ if ((list($sub_r, $sub_v) = $this->xIRIref($sub_v)) && $sub_r) {
+ $r['target_graph'] = $sub_r;
+ }
+ }
+ return array($r, $sub_v);
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* +5 */
+
+ function xInsertQuery($v) {
+ if ($sub_r = $this->x('INSERT\s+', $v)) {
+ $r = array(
+ 'type' => 'insert',
+ 'dataset' => array(),
+ );
+ $sub_v = $sub_r[1];
+ /* target */
+ if ($sub_r = $this->x('INTO\s+', $sub_v)) {
+ $sub_v = $sub_r[1];
+ if ((list($sub_r, $sub_v) = $this->xIRIref($sub_v)) && $sub_r) {
+ $r['target_graph'] = $sub_r;
+ /* CONSTRUCT keyword, optional */
+ if ($sub_r = $this->x('CONSTRUCT\s+', $sub_v)) {
+ $sub_v = $sub_r[1];
+ }
+ /* construct template */
+ if ((list($sub_r, $sub_v) = $this->xConstructTemplate($sub_v)) && is_array($sub_r)) {
+ $r['construct_triples'] = $sub_r;
+ }
+ else {
+ $this->addError('Construct Template not found');
+ return array(0, $v);
+ }
+ /* dataset */
+ while ((list($sub_r, $sub_v) = $this->xDatasetClause($sub_v)) && $sub_r) {
+ $r['dataset'][] = $sub_r;
+ }
+ /* where */
+ if ((list($sub_r, $sub_v) = $this->xWhereClause($sub_v)) && $sub_r) {
+ $r['pattern'] = $sub_r;
+ }
+ /* solution modifier */
+ if ((list($sub_r, $sub_v) = $this->xSolutionModifier($sub_v)) && $sub_r) {
+ $r = array_merge($r, $sub_r);
+ }
+ return array($r, $sub_v);
+ }
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* +6 */
+
+ function xDeleteQuery($v) {
+ if ($sub_r = $this->x('DELETE\s+', $v)) {
+ $r = array(
+ 'type' => 'delete',
+ 'target_graphs' => array()
+ );
+ $sub_v = $sub_r[1];
+ /* target */
+ do {
+ $proceed = false;
+ if ($sub_r = $this->x('FROM\s+', $sub_v)) {
+ $sub_v = $sub_r[1];
+ if ((list($sub_r, $sub_v) = $this->xIRIref($sub_v)) && $sub_r) {
+ $r['target_graphs'][] = $sub_r;
+ $proceed = 1;
+ }
+ }
+ } while ($proceed);
+ /* CONSTRUCT keyword, optional */
+ if ($sub_r = $this->x('CONSTRUCT\s+', $sub_v)) {
+ $sub_v = $sub_r[1];
+ }
+ /* construct template */
+ if ((list($sub_r, $sub_v) = $this->xConstructTemplate($sub_v)) && is_array($sub_r)) {
+ $r['construct_triples'] = $sub_r;
+ /* dataset */
+ while ((list($sub_r, $sub_v) = $this->xDatasetClause($sub_v)) && $sub_r) {
+ $r['dataset'][] = $sub_r;
+ }
+ /* where */
+ if ((list($sub_r, $sub_v) = $this->xWhereClause($sub_v)) && $sub_r) {
+ $r['pattern'] = $sub_r;
+ }
+ /* solution modifier */
+ if ((list($sub_r, $sub_v) = $this->xSolutionModifier($sub_v)) && $sub_r) {
+ $r = array_merge($r, $sub_r);
+ }
+ }
+ return array($r, $sub_v);
+ }
+ return array(0, $v);
+ }
+
+ /* +7 */
+
+ function xSolutionModifier($v) {
+ $r = array();
+ if ((list($sub_r, $sub_v) = $this->xGroupClause($v)) && $sub_r) {
+ $r['group_infos'] = $sub_r;
+ }
+ if ((list($sub_r, $sub_v) = $this->xOrderClause($sub_v)) && $sub_r) {
+ $r['order_infos'] = $sub_r;
+ }
+ while ((list($sub_r, $sub_v) = $this->xLimitOrOffsetClause($sub_v)) && $sub_r) {
+ $r = array_merge($r, $sub_r);
+ }
+ return ($v == $sub_v) ? array(0, $v) : array($r, $sub_v);
+ }
+
+ /* +8 */
+
+ function xGroupClause($v) {
+ if ($sub_r = $this->x('GROUP BY\s+', $v)) {
+ $sub_v = $sub_r[1];
+ $r = array();
+ do {
+ $proceed = 0;
+ if ((list($sub_r, $sub_v) = $this->xVar($sub_v)) && $sub_r) {
+ $r[] = $sub_r;
+ $proceed = 1;
+ if ($sub_r = $this->x('\,', $sub_v)) {
+ $sub_v = $sub_r[1];
+ }
+ }
+ } while ($proceed);
+ if (count($r)) {
+ return array($r, $sub_v);
+ }
+ else {
+ $this->addError('No columns specified in GROUP BY clause.');
+ }
+ }
+ return array(0, $v);
+ }
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 SPARQL Result XML Parser
+author: Benjamin Nowack
+version: 2008-08-28 (Addition: Support for "inserted" and "deleted")
+*/
+
+ARC2::inc('LegacyXMLParser');
+
+class ARC2_SPARQLXMLResultParser extends ARC2_LegacyXMLParser {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_SPARQLXMLResultParser($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* reader */
+ parent::__init();
+ $this->srx = 'http://www.w3.org/2005/sparql-results#';
+ $this->nsp[$this->srx] = 'srx';
+ $this->allowCDataNodes = 0;
+ }
+
+ /* */
+
+ function done() {
+ }
+
+ /* */
+
+ function getVariables() {
+ $r = array();
+ foreach ($this->nodes as $node) {
+ if ($node['tag'] == $this->srx . 'variable') {
+ $r[] = $node['a']['name'];
+ }
+ }
+ return $r;
+ }
+
+ function getRows() {
+ $r = array();
+ $index = $this->getNodeIndex();
+ foreach ($this->nodes as $node) {
+ if ($node['tag'] == $this->srx . 'result') {
+ $row = array();
+ $row_id = $node['id'];
+ $bindings = isset($index[$row_id])? $index[$row_id] : array();
+ foreach ($bindings as $binding) {
+ $row = array_merge($row, $this->getBinding($binding));
+ }
+ if ($row) {
+ $r[] = $row;
+ }
+ }
+ }
+ return $r;
+ }
+
+ function getBinding($node) {
+ $r = array();
+ $index = $this->getNodeIndex();
+ $var = $node['a']['name'];
+ $term = $index[$node['id']][0];
+ $r[$var . ' type'] = preg_replace('/^uri$/', 'uri', substr($term['tag'], strlen($this->srx)));
+ $r[$var] = ($r[$var . ' type'] == 'bnode') ? '_:' . $term['cdata'] : $term['cdata'];
+ if (isset($term['a']['datatype'])) {
+ $r[$var . ' datatype'] = $term['a']['datatype'];
+ }
+ elseif (isset($term['a'][$this->xml . 'lang'])) {
+ $r[$var . ' lang'] = $term['a'][$this->xml . 'lang'];
+ }
+ return $r;
+ }
+
+ function getBooleanInsertedDeleted() {
+ foreach ($this->nodes as $node) {
+ if ($node['tag'] == $this->srx . 'boolean') {
+ return ($node['cdata'] == 'true') ? array('boolean' => true) : array('boolean' => false);
+ }
+ elseif ($node['tag'] == $this->srx . 'inserted') {
+ return array('inserted' => $node['cdata']);
+ }
+ elseif ($node['tag'] == $this->srx . 'deleted') {
+ return array('deleted' => $node['cdata']);
+ }
+ elseif ($node['tag'] == $this->srx . 'results') {
+ return '';
+ }
+ }
+ return '';
+ }
+
+ /* */
+
+ function getStructure() {
+ $r = array('variables' => $this->getVariables(), 'rows' => $this->getRows());
+ /* boolean|inserted|deleted */
+ if ($sub_r = $this->getBooleanInsertedDeleted()) {
+ foreach ($sub_r as $k => $v) {
+ $r[$k] = $v;
+ }
+ }
+ return $r;
+ }
+
+ /* */
+
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 streaming SPOG parser
+ *
+ * @author Benjamin Nowack
+ * @license <http://arc.semsol.org/license>
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-06-08
+*/
+
+ARC2::inc('RDFParser');
+
+class ARC2_SPOGParser extends ARC2_RDFParser {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_SPOGParser($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* reader */
+ parent::__init();
+ $this->encoding = $this->v('encoding', false, $this->a);
+ $this->xml = 'http://www.w3.org/XML/1998/namespace';
+ $this->rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
+ $this->nsp = array($this->xml => 'xml', $this->rdf => 'rdf');
+ $this->target_encoding = '';
+ }
+
+ /* */
+
+ function parse($path, $data = '', $iso_fallback = false) {
+ $this->state = 0;
+ /* reader */
+ if (!$this->v('reader')) {
+ ARC2::inc('Reader');
+ $this->reader = & new ARC2_Reader($this->a, $this);
+ }
+ $this->reader->setAcceptHeader('Accept: sparql-results+xml; q=0.9, */*; q=0.1');
+ $this->reader->activate($path, $data);
+ $this->x_base = isset($this->a['base']) && $this->a['base'] ? $this->a['base'] : $this->reader->base;
+ /* xml parser */
+ $this->initXMLParser();
+ /* parse */
+ $first = true;
+ while ($d = $this->reader->readStream()) {
+ if ($iso_fallback && $first) {
+ $d = '<?xml version="1.0" encoding="ISO-8859-1"?>' . "\n" . preg_replace('/^\<\?xml [^\>]+\?\>\s*/s', '', $d);
+ $first = false;
+ }
+ if (!xml_parse($this->xml_parser, $d, false)) {
+ $error_str = xml_error_string(xml_get_error_code($this->xml_parser));
+ $line = xml_get_current_line_number($this->xml_parser);
+ $this->tmp_error = 'XML error: "' . $error_str . '" at line ' . $line . ' (parsing as ' . $this->getEncoding() . ')';
+ $this->tmp_error .= $d . urlencode($d);
+ if (0 && !$iso_fallback && preg_match("/Invalid character/i", $error_str)) {
+ xml_parser_free($this->xml_parser);
+ unset($this->xml_parser);
+ $this->reader->closeStream();
+ $this->__init();
+ $this->encoding = 'ISO-8859-1';
+ unset($this->xml_parser);
+ unset($this->reader);
+ return $this->parse($path, $data, true);
+ }
+ else {
+ return $this->addError($this->tmp_error);
+ }
+ }
+ }
+ $this->target_encoding = xml_parser_get_option($this->xml_parser, XML_OPTION_TARGET_ENCODING);
+ xml_parser_free($this->xml_parser);
+ $this->reader->closeStream();
+ unset($this->reader);
+ return $this->done();
+ }
+
+ /* */
+
+ function initXMLParser() {
+ if (!isset($this->xml_parser)) {
+ $enc = preg_match('/^(utf\-8|iso\-8859\-1|us\-ascii)$/i', $this->getEncoding(), $m) ? $m[1] : 'UTF-8';
+ $parser = xml_parser_create($enc);
+ xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 0);
+ xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
+ xml_set_element_handler($parser, 'open', 'close');
+ xml_set_character_data_handler($parser, 'cdata');
+ xml_set_start_namespace_decl_handler($parser, 'nsDecl');
+ xml_set_object($parser, $this);
+ $this->xml_parser =& $parser;
+ }
+ }
+
+ /* */
+
+ function getEncoding($src = 'config') {
+ if ($src == 'parser') {
+ return $this->target_encoding;
+ }
+ elseif (($src == 'config') && $this->encoding) {
+ return $this->encoding;
+ }
+ return $this->reader->getEncoding();
+ return 'UTF-8';
+ }
+
+ /* */
+
+ function getTriples() {
+ return $this->v('triples', array());
+ }
+
+ function countTriples() {
+ return $this->t_count;
+ }
+
+ function addT($s = '', $p = '', $o = '', $s_type = '', $o_type = '', $o_dt = '', $o_lang = '', $g = '') {
+ if (!($s && $p && $o)) return 0;
+ //echo "-----\nadding $s / $p / $o\n-----\n";
+ $t = array('s' => $s, 'p' => $p, 'o' => $o, 's_type' => $s_type, 'o_type' => $o_type, 'o_datatype' => $o_dt, 'o_lang' => $o_lang, 'g' => $g);
+ if ($this->skip_dupes) {
+ $h = md5(serialize($t));
+ if (!isset($this->added_triples[$h])) {
+ $this->triples[$this->t_count] = $t;
+ $this->t_count++;
+ $this->added_triples[$h] = true;
+ }
+ }
+ else {
+ $this->triples[$this->t_count] = $t;
+ $this->t_count++;
+ }
+ }
+
+ /* */
+
+ function open($p, $t, $a) {
+ $this->state = $t;
+ if ($t == 'result') {
+ $this->t = array();
+ }
+ elseif ($t == 'binding') {
+ $this->binding = $a['name'];
+ $this->t[$this->binding] = '';
+ }
+ elseif ($t == 'literal') {
+ $this->t[$this->binding . '_dt'] = $this->v('datatype', '', $a);
+ $this->t[$this->binding . '_lang'] = $this->v('xml:lang', '', $a);
+ $this->t[$this->binding . '_type'] = 'literal';
+ }
+ elseif ($t == 'uri') {
+ $this->t[$this->binding . '_type'] = 'uri';
+ }
+ elseif ($t == 'bnode') {
+ $this->t[$this->binding . '_type'] = 'bnode';
+ $this->t[$this->binding] = '_:';
+ }
+ }
+
+ function close($p, $t) {
+ $this->prev_state = $this->state;
+ $this->state = '';
+ if ($t == 'result') {
+ $this->addT(
+ $this->v('s', '', $this->t),
+ $this->v('p', '', $this->t),
+ $this->v('o', '', $this->t),
+ $this->v('s_type', '', $this->t),
+ $this->v('o_type', '', $this->t),
+ $this->v('o_dt', '', $this->t),
+ $this->v('o_lang', '', $this->t),
+ $this->v('g', '', $this->t)
+ );
+ }
+ }
+
+ function cData($p, $d) {
+ if (in_array($this->state, array('uri', 'bnode', 'literal'))) {
+ $this->t[$this->binding] .= $d;
+ }
+ }
+
+ function nsDecl($p, $prf, $uri) {
+ $this->nsp[$uri] = isset($this->nsp[$uri]) ? $this->nsp[$uri] : $prf;
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 RDF/XML Parser
+author: Benjamin Nowack
+version: 2009-02-09 (Addition: Support for RDFa detection via typeof/property/about)
+*/
+
+ARC2::inc('LegacyXMLParser');
+
+class ARC2_SemHTMLParser extends ARC2_LegacyXMLParser {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_SemHTMLParser($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* reader */
+ parent::__init();
+ $this->default_sem_html_formats = 'dc openid erdf rdfa posh-rdf microformats';
+ $this->triples = array();
+ $this->target_encoding = '';
+ $this->t_count = 0;
+ $this->added_triples = array();
+ $this->skip_dupes = false;
+ $this->bnode_prefix = $this->v('bnode_prefix', 'arc'.substr(md5(uniqid(rand())), 0, 4).'b', $this->a);
+ $this->bnode_id = 0;
+ $this->auto_extract = $this->v('auto_extract', 1, $this->a);
+ $this->extracted_formats = array();
+ $this->cache = array();
+ $this->detected_formats = array();
+ $this->keep_cdata_ws = $this->v('keep_cdata_whitespace', 0, $this->a);
+ }
+
+ /* */
+
+ function x($re, $v, $options = 'si', $keep_ws = 0) {
+ list($ws, $v) = preg_match('/^(\s*)(.*)$/s', $v, $m) ? array($m[1], $m[2]) : array('', $v);
+ if (preg_match("/^" . $re . "(.*)$/" . $options, $v, $m)) {
+ if ($keep_ws) $m[1] = $ws . $m[1];
+ return $m;
+ }
+ return false;
+ }
+
+ function camelCase($v) {
+ $r = ucfirst($v);
+ while (preg_match('/^(.*)[\-\_ ](.*)$/', $r, $m)) {
+ $r = $m[1] . ucfirst($m[2]);
+ }
+ return $r;
+ }
+
+ /* */
+
+ function setReader(&$reader) {
+ $this->reader =& $reader;
+ }
+
+ function createBnodeID(){
+ $this->bnode_id++;
+ return '_:' . $this->bnode_prefix . $this->bnode_id;
+ }
+
+ function addT($t) {
+ if (function_exists('html_entity_decode')) {
+ $t['o'] = html_entity_decode($t['o']);
+ }
+ if ($this->skip_dupes) {
+ $h = md5(serialize($t));
+ if (!isset($this->added_triples[$h])) {
+ $this->triples[$this->t_count] = $t;
+ $this->t_count++;
+ $this->added_triples[$h] = true;
+ }
+ }
+ else {
+ $this->triples[$this->t_count] = $t;
+ $this->t_count++;
+ }
+ }
+
+ function getTriples() {
+ return $this->v('triples', array());
+ }
+
+ function countTriples() {
+ return $this->t_count;
+ }
+
+ function getSimpleIndex($flatten_objects = 1, $vals = '') {
+ return ARC2::getSimpleIndex($this->getTriples(), $flatten_objects, $vals);
+ }
+
+ /* */
+
+ function parse($path, $data = '') {
+ $this->nodes = array();
+ $this->node_count = 0;
+ $this->level = 0;
+ /* reader */
+ if (!$this->v('reader')) {
+ ARC2::inc('Reader');
+ $this->reader = & new ARC2_Reader($this->a, $this);
+ }
+ $this->reader->setAcceptHeader('Accept: text/html, application/xhtml, */*; q=0.9');
+ $this->reader->activate($path, $data);
+ $this->target_encoding = $this->reader->getEncoding(false);
+ $this->x_base = isset($this->a['base']) && $this->a['base'] ? $this->a['base'] : $this->reader->base;
+ $this->base = $this->x_base;
+ $this->doc_url = $this->reader->base;
+ /* parse */
+ $rest = '';
+ $this->cur_tag = '';
+ while ($d = $this->reader->readStream(1)) {
+ $rest = $this->processData($rest . $d);
+ }
+ $this->reader->closeStream();
+ unset($this->reader);
+ return $this->done();
+ }
+
+ /* */
+
+ function getEncoding() {
+ return $this->target_encoding;
+ }
+
+ /* */
+
+ function done() {
+ if ($this->auto_extract) {
+ $this->extractRDF();
+ }
+ }
+
+ /* */
+
+ function processData($v) {
+ $sub_v = $v;
+ do {
+ $proceed = 1;
+ if ((list($sub_r, $sub_v) = $this->xComment($sub_v)) && $sub_r) {
+ $this->open(0, 'comment', array('value' => $sub_r));
+ $this->close(0, 'comment');
+ continue;
+ }
+ if ((list($sub_r, $sub_v) = $this->xDoctype($sub_v)) && $sub_r) {
+ $this->open(0, 'doctype', array('value' => $sub_r));
+ $this->close(0, 'doctype');
+ /* RDFa detection */
+ if (preg_match('/rdfa /i', $sub_r)) $this->detected_formats['rdfa'] = 1;
+ continue;
+ }
+ if ($this->level && ((list($sub_r, $sub_v) = $this->xWS($sub_v)) && $sub_r)) {
+ $this->cData(0, $sub_r);
+ }
+ elseif ((list($sub_r, $sub_v) = $this->xOpen($sub_v)) && $sub_r) {
+ $this->open(0, $sub_r['tag'], $sub_r['a']);
+ $this->cur_tag = $sub_r['tag'];
+ if ($sub_r['empty']) {
+ $this->close(0, $sub_r['tag'], 1);
+ $this->cur_tag = '';
+ }
+ /* eRDF detection */
+ if (!isset($this->detected_formats['erdf']) && isset($sub_r['a']['profile m']) && in_array('http://purl.org/NET/erdf/profile', $sub_r['a']['profile m'])) $this->detected_formats['erdf'] = 1;
+ /* poshRDF detection */
+ if (!isset($this->detected_formats['posh-rdf']) && isset($sub_r['a']['class m']) && in_array('rdf-p', $sub_r['a']['class m'])) $this->detected_formats['posh-rdf'] = 1;
+ /* RDFa detection */
+ if (!isset($this->detected_formats['rdfa']) && ($this->cur_tag == 'html') && isset($sub_r['a']['version m']) && in_array('XHTML+RDFa', $sub_r['a']['version m'])) $this->detected_formats['rdfa'] = 1;
+ if (!isset($this->detected_formats['rdfa']) && isset($sub_r['a']['xmlns']) && $sub_r['a']['xmlns'] && $this->isRDFNSDecl($sub_r['a']['xmlns'])) $this->detected_formats['rdfa'] = 1;
+ if (!isset($this->detected_formats['rdfa']) && array_intersect(array('about', 'typeof', 'property'), array_keys($sub_r['a']))) $this->detected_formats['rdfa'] = 1;
+ }
+ elseif ((list($sub_r, $sub_v) = $this->xClose($sub_v)) && $sub_r) {
+ if (preg_match('/^(area|base|br|col|frame|hr|input|img|link|xmeta|param)$/', $sub_r['tag'])) {
+ /* already implicitly closed */
+ }
+ else {
+ $this->close(0, $sub_r['tag']);
+ $this->cur_tag = '';
+ }
+ }
+ elseif ((list($sub_r, $sub_v) = $this->xCData($sub_v)) && $sub_r) {
+ $this->cData(0, $sub_r);
+ }
+ else {
+ $proceed = 0;
+ }
+ } while ($proceed);
+ return $sub_v;
+ }
+
+ /* */
+
+ function isRDFNSDecl($ns) {
+ foreach ($ns as $k => $v) {
+ if ($k) return 1;
+ }
+ return 0;
+ }
+
+ /* */
+
+ function xComment($v) {
+ if ($r = $this->x('\<\!\-\-', $v)) {
+ if ($sub_r = $this->x('(.*)\-\-\>', $r[1], 'Us')) {
+ return array($sub_r[1], $sub_r[2]);
+ }
+ }
+ return array(0, $v);
+ }
+
+ function xDoctype($v) {
+ if ($r = $this->x('\<\!DOCTYPE', $v)) {
+ if ($sub_r = $this->x('([^\>]+)\>', $r[1])) {
+ return array($sub_r[1], $sub_r[2]);
+ }
+ }
+ return array(0, $v);
+ }
+
+ function xWS($v) {
+ if ($r = ARC2::x('(\s+)', $v)) {
+ return array($r[1], $r[2]);
+ }
+ return array(0, $v);
+ }
+
+ /* */
+
+ function xOpen($v) {
+ if ($r = $this->x('\<([^\s\/\>]+)([^\>]*)\>', $v)) {
+ list($sub_r, $sub_v) = $this->xAttributes($r[2]);
+ return array(array('tag' => strtolower($r[1]), 'a' => $sub_r, 'empty' => $this->isEmpty($r[1], $r[2])), $r[3]);
+ }
+ return array(0, $v);
+ }
+
+ /* */
+
+ function xAttributes($v) {
+ $r = array();
+ while ((list($sub_r, $v) = $this->xAttribute($v)) && $sub_r) {
+ if ($sub_sub_r = $this->x('xmlns\:?(.*)', $sub_r['k'])) {
+ $this->nsDecl(0, $sub_sub_r[1], $sub_r['value']);
+ $r['xmlns'][$sub_sub_r[1]] = $sub_r['value'];
+ }
+ else {
+ $r[$sub_r['k']] = $sub_r['value'];
+ $r[$sub_r['k'] . ' m'] = $sub_r['values'];
+ }
+ }
+ return array($r, $v);
+ }
+
+ /* */
+
+ function xAttribute($v) {
+ if ($r = $this->x('([^\s\=]+)\s*(\=)?\s*([\'\"]?)', $v)) {
+ if (!$r[2]) {/* no '=' */
+ if ($r[1] == '/') {
+ return array(0, $r[4]);
+ }
+ return array(array('k' => $r[1], 'value' => 1, 'values' => array(1)), $r[4]);
+ }
+ if (!$r[3]) {/* no quots */
+ if ($sub_r = $this->x('([^\s]+)', $r[4])) {
+ return array(array('k' => $r[1], 'value' => $sub_r[1], 'values' => array($sub_r[1])), $sub_r[2]);
+ }
+ return array(array('k' => $r[1], 'value' => '', 'values' => array()), $r[4]);
+ }
+ $val = '';
+ $multi = 0;
+ $sub_v = $r[4];
+ while ($sub_v && (!$sub_r = $this->x('(\x5c\\' .$r[3]. '|\\' .$r[3]. ')', $sub_v))) {
+ $val .= substr($sub_v, 0, 1);
+ $sub_v = substr($sub_v, 1);
+ }
+ $sub_v = $sub_v ? $sub_r[2] : $sub_v;
+ $vals = split(' ', $val);
+ return array(array('k' => $r[1], 'value' => $val, 'values' => $vals), $sub_v);
+ }
+ return array(0, $v);
+ }
+
+ /* */
+
+ function isEmpty($t, $v) {
+ if (preg_match('/^(area|base|br|col|frame|hr|input|img|link|xmeta|param)$/', $t)) {
+ return 1;
+ }
+ if (preg_match('/\/$/', $v)) {
+ return 1;
+ }
+ return 0;
+ }
+
+ /* */
+
+ function xClose($v) {
+ if ($r = $this->x('\<\/([^\s\>]+)\>', $v)) {
+ return array(array('tag' => strtolower($r[1])), $r[2]);
+ }
+ return array(0, $v);
+ }
+
+ /* */
+
+ function xCData($v) {
+ if (preg_match('/(script|style)/i', $this->cur_tag)) {
+ if ($r = $this->x('(.+)(\<\/' . $this->cur_tag . '\>)', $v, 'Uis')) {
+ return array($r[1], $r[2] . $r[3]);
+ }
+ }
+ elseif ($r = $this->x('([^\<]+)', $v, 'si', $this->keep_cdata_ws)) {
+ return array($r[1], $r[2]);
+ }
+ return array(0, $v);
+ }
+
+ /* */
+
+ function extractRDF($formats = '') {
+ $this->node_index = $this->getNodeIndex();
+ $formats = !$formats ? $this->v1('sem_html_formats', $this->default_sem_html_formats, $this->a) : $formats;
+ $formats = split(' ', $formats);
+ foreach ($formats as $format) {
+ if (!in_array($format, $this->extracted_formats)) {
+ $comp = $this->camelCase($format) . 'Extractor';
+ if (ARC2::inc($comp)) {
+ $cls = 'ARC2_' . $comp;
+ $e = new $cls($this->a, $this);
+ $e->extractRDF();
+ }
+ $this->extracted_formats[] = $format;
+ }
+ }
+ }
+
+ function getNode($id) {
+ return isset($this->nodes[$id]) ? $this->nodes[$id] : 0;
+ }
+
+ /* */
+
+}
\ No newline at end of file
--- /dev/null
+<?php
+/**
+ * ARC2 SPARQL-enhanced Turtle Parser
+ *
+ * @author Benjamin Nowack
+ * @license <http://arc.semsol.org/license>
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-04-11
+*/
+
+ARC2::inc('RDFParser');
+
+class ARC2_TurtleParser extends ARC2_RDFParser {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_TurtleParser($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* reader */
+ parent::__init();
+ $this->state = 0;
+ $this->xml = 'http://www.w3.org/XML/1998/namespace';
+ $this->rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
+ $this->xsd = 'http://www.w3.org/2001/XMLSchema#';
+ $this->nsp = array($this->xml => 'xml', $this->rdf => 'rdf', $this->xsd => 'xsd');
+ $this->unparsed_code = '';
+ $this->max_parsing_loops = $this->v('turtle_max_parsing_loops', 500, $this->a);
+ }
+
+ /* */
+
+ function x($re, $v, $options = 'si') {
+ $v = preg_replace('/^[\xA0\xC2]+/', ' ', $v);
+ while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $v, $m)) {/* comment removal */
+ $v = $m[2];
+ }
+ return ARC2::x($re, $v, $options);
+ //$this->unparsed_code = ($sub_r && count($sub_r)) ? $sub_r[count($sub_r) - 1] : '';
+ }
+
+ function createBnodeID(){
+ $this->bnode_id++;
+ return '_:' . $this->bnode_prefix . $this->bnode_id;
+ }
+
+ /* */
+
+ function addT($t) {
+ if ($this->skip_dupes) {
+ $h = md5(serialize($t));
+ if (!isset($this->added_triples[$h])) {
+ $this->triples[$this->t_count] = $t;
+ $this->t_count++;
+ $this->added_triples[$h] = true;
+ }
+ }
+ else {
+ $this->triples[$this->t_count] = $t;
+ $this->t_count++;
+ }
+ }
+
+ /* */
+
+ function getTriples() {
+ return $this->v('triples', array());
+ }
+
+ function countTriples() {
+ return $this->t_count;
+ }
+
+ /* */
+
+ function getUnparsedCode() {
+ return $this->v('unparsed_code', '');
+ }
+
+ /* */
+
+ function setDefaultPrefixes() {
+ $this->prefixes = array(
+ 'rdf:' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
+ 'rdfs:' => 'http://www.w3.org/2000/01/rdf-schema#',
+ 'owl:' => 'http://www.w3.org/2002/07/owl#',
+ 'xsd:' => 'http://www.w3.org/2001/XMLSchema#',
+ );
+ if ($ns = $this->v('ns', array(), $this->a)) {
+ foreach ($ns as $p => $u) $this->prefixes[$p . ':'] = $u;
+ }
+ }
+
+
+ function parse($path, $data = '', $iso_fallback = false) {
+ $this->setDefaultPrefixes();
+ /* reader */
+ if (!$this->v('reader')) {
+ ARC2::inc('Reader');
+ $this->reader = & new ARC2_Reader($this->a, $this);
+ }
+ $this->reader->setAcceptHeader('Accept: application/x-turtle; q=0.9, */*; q=0.1');
+ $this->reader->activate($path, $data);
+ $this->base = $this->v1('base', $this->reader->base, $this->a);
+ $this->r = array('vars' => array());
+ /* parse */
+ $buffer = '';
+ $more_triples = array();
+ $sub_v = '';
+ $sub_v2 = '';
+ $loops = 0;
+ $prologue_done = 0;
+ while ($d = $this->reader->readStream(0)) {
+ $buffer .= $d;
+ $sub_v = $buffer;
+ do {
+ $proceed = 0;
+ if (!$prologue_done) {
+ $proceed = 1;
+ if ((list($sub_r, $sub_v) = $this->xPrologue($sub_v)) && $sub_r) {
+ $loops = 0;
+ $sub_v .= $this->reader->readStream(0, 128);
+ /* we might have missed the final DOT in the previous prologue loop */
+ if ($sub_r = $this->x('\.', $sub_v)) $sub_v = $sub_r[1];
+ if ($this->x("\@?(base|prefix)", $sub_v)) {/* more prologue to come, use outer loop */
+ $proceed = 0;
+ }
+ }
+ else {
+ $prologue_done = 1;
+ }
+ }
+ if ($prologue_done && (list($sub_r, $sub_v, $more_triples, $sub_v2) = $this->xTriplesBlock($sub_v)) && is_array($sub_r)) {
+ $proceed = 1;
+ $loops = 0;
+ foreach ($sub_r as $t) {
+ $this->addT($t);
+ }
+ }
+ } while ($proceed);
+ $loops++;
+ $buffer = $sub_v;
+ if ($loops > $this->max_parsing_loops) {/* most probably a parser or code bug, might also be a huge object value, though */
+ $this->addError('too many loops: ' . $loops . '. Could not parse "' . substr($buffer, 0, 200) . '..."');
+ break;
+ }
+ }
+ foreach ($more_triples as $t) {
+ $this->addT($t);
+ }
+ $sub_v = count($more_triples) ? $sub_v2 : $sub_v;
+ $buffer = $sub_v;
+ $this->unparsed_code = $buffer;
+ $this->reader->closeStream();
+ unset($this->reader);
+ /* remove trailing comments */
+ while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $this->unparsed_code, $m)) $this->unparsed_code = $m[2];
+ if ($this->unparsed_code && !$this->getErrors()) {
+ $rest = preg_replace('/[\x0a|\x0d]/i', ' ', substr($this->unparsed_code, 0, 30));
+ if (trim($rest)) $this->addError('Could not parse "' . $rest . '"');
+ }
+ return $this->done();
+ }
+
+ function xPrologue($v) {
+ $r = 0;
+ if (!$this->t_count) {
+ if ((list($sub_r, $v) = $this->xBaseDecl($v)) && $sub_r) {
+ $this->base = $sub_r;
+ $r = 1;
+ }
+ while ((list($sub_r, $v) = $this->xPrefixDecl($v)) && $sub_r) {
+ $this->prefixes[$sub_r['prefix']] = $sub_r['uri'];
+ $r = 1;
+ }
+ }
+ return array($r, $v);
+ }
+
+ /* 3 */
+
+ function xBaseDecl($v) {
+ if ($r = $this->x("\@?base\s+", $v)) {
+ if ((list($r, $sub_v) = $this->xIRI_REF($r[1])) && $r) {
+ if ($sub_r = $this->x('\.', $sub_v)) {
+ $sub_v = $sub_r[1];
+ }
+ return array($r, $sub_v);
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* 4 */
+
+ function xPrefixDecl($v) {
+ if ($r = $this->x("\@?prefix\s+", $v)) {
+ if ((list($r, $sub_v) = $this->xPNAME_NS($r[1])) && $r) {
+ $prefix = $r;
+ if((list($r, $sub_v) = $this->xIRI_REF($sub_v)) && $r) {
+ $uri = $this->calcURI($r, $this->base);
+ if ($sub_r = $this->x('\.', $sub_v)) {
+ $sub_v = $sub_r[1];
+ }
+ return array(array('prefix' => $prefix, 'uri_ref' => $r, 'uri' => $uri), $sub_v);
+ }
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* 21.., 32.. */
+
+ function xTriplesBlock($v) {
+ $pre_r = array();
+ $r = array();
+ $state = 1;
+ $sub_v = $v;
+ $buffer = $sub_v;
+ do {
+ $proceed = 0;
+ if ($state == 1) {/* expecting subject */
+ $t = array('type' => 'triple', 's' => '', 'p' => '', 'o' => '', 's_type' => '', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => '');
+ if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
+ $t['s'] = $sub_r['value'];
+ $t['s_type'] = $sub_r['type'];
+ $state = 2;
+ $proceed = 1;
+ if ($sub_r = $this->x('(\}|\.)', $sub_v)) {
+ if ($t['s_type'] == 'placeholder') {
+ $state = 4;
+ }
+ else {
+ $this->addError('"' . $sub_r[1]. '" after subject found.');
+ }
+ }
+ }
+ elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
+ $t['s'] = $sub_r['id'];
+ $t['s_type'] = $sub_r['type'];
+ $pre_r = array_merge($pre_r, $sub_r['triples']);
+ $state = 2;
+ $proceed = 1;
+ if ($sub_r = $this->x('\.', $sub_v)) {
+ $this->addError('DOT after subject found.');
+ }
+ }
+ elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
+ $t['s'] = $sub_r['id'];
+ $t['s_type'] = $sub_r['type'];
+ $pre_r = array_merge($pre_r, $sub_r['triples']);
+ $state = 2;
+ $proceed = 1;
+ }
+ elseif ($sub_r = $this->x('\.', $sub_v)) {
+ $this->addError('Subject expected, DOT found.' . $sub_v);
+ }
+ }
+ if ($state == 2) {/* expecting predicate */
+ if ($sub_r = $this->x('a\s+', $sub_v)) {
+ $sub_v = $sub_r[1];
+ $t['p'] = $this->rdf . 'type';
+ $t['p_type'] = 'uri';
+ $state = 3;
+ $proceed = 1;
+ }
+ elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
+ if ($sub_r['type'] == 'bnode') {
+ $this->addError('Blank node used as triple predicate');
+ }
+ $t['p'] = $sub_r['value'];
+ $t['p_type'] = $sub_r['type'];
+ $state = 3;
+ $proceed = 1;
+ }
+ elseif ($sub_r = $this->x('\.', $sub_v)) {
+ $state = 4;
+ }
+ elseif ($sub_r = $this->x('\}', $sub_v)) {
+ $buffer = $sub_v;
+ $r = array_merge($r, $pre_r);
+ $pre_r = array();
+ $proceed = 0;
+ }
+ }
+ if ($state == 3) {/* expecting object */
+ if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
+ $t['o'] = $sub_r['value'];
+ $t['o_type'] = $sub_r['type'];
+ $t['o_lang'] = $this->v('lang', '', $sub_r);
+ $t['o_datatype'] = $this->v('datatype', '', $sub_r);
+ $pre_r[] = $t;
+ $state = 4;
+ $proceed = 1;
+ }
+ elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
+ $t['o'] = $sub_r['id'];
+ $t['o_type'] = $sub_r['type'];
+ $pre_r = array_merge($pre_r, array($t), $sub_r['triples']);
+ $state = 4;
+ $proceed = 1;
+ }
+ elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
+ $t['o'] = $sub_r['id'];
+ $t['o_type'] = $sub_r['type'];
+ $pre_r = array_merge($pre_r, array($t), $sub_r['triples']);
+ $state = 4;
+ $proceed = 1;
+ }
+ }
+ if ($state == 4) {/* expecting . or ; or , or } */
+ if ($sub_r = $this->x('\.', $sub_v)) {
+ $sub_v = $sub_r[1];
+ $buffer = $sub_v;
+ $r = array_merge($r, $pre_r);
+ $pre_r = array();
+ $state = 1;
+ $proceed = 1;
+ }
+ elseif ($sub_r = $this->x('\;', $sub_v)) {
+ $sub_v = $sub_r[1];
+ $state = 2;
+ $proceed = 1;
+ }
+ elseif ($sub_r = $this->x('\,', $sub_v)) {
+ $sub_v = $sub_r[1];
+ $state = 3;
+ $proceed = 1;
+ if ($sub_r = $this->x('\}', $sub_v)) {
+ $this->addError('Object expected, } found.');
+ }
+ }
+ if ($sub_r = $this->x('(\}|\{|OPTIONAL|FILTER|GRAPH)', $sub_v)) {
+ $buffer = $sub_v;
+ $r = array_merge($r, $pre_r);
+ $pre_r = array();
+ $proceed = 0;
+ }
+ }
+ } while ($proceed);
+ return count($r) ? array($r, $buffer, $pre_r, $sub_v) : array(0, $buffer, $pre_r, $sub_v);
+ }
+
+ /* 39.. */
+
+ function xBlankNodePropertyList($v) {
+ if ($sub_r = $this->x('\[', $v)) {
+ $sub_v = $sub_r[1];
+ $s = $this->createBnodeID();
+ $r = array('id' => $s, 'type' => 'bnode', 'triples' => array());
+ $t = array('type' => 'triple', 's' => $s, 'p' => '', 'o' => '', 's_type' => 'bnode', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => '');
+ $state = 2;
+ $closed = 0;
+ do {
+ $proceed = 0;
+ if ($state == 2) {/* expecting predicate */
+ if ($sub_r = $this->x('a\s+', $sub_v)) {
+ $sub_v = $sub_r[1];
+ $t['p'] = $this->rdf . 'type';
+ $t['p_type'] = 'uri';
+ $state = 3;
+ $proceed = 1;
+ }
+ elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
+ $t['p'] = $sub_r['value'];
+ $t['p_type'] = $sub_r['type'];
+ $state = 3;
+ $proceed = 1;
+ }
+ }
+ if ($state == 3) {/* expecting object */
+ if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
+ $t['o'] = $sub_r['value'];
+ $t['o_type'] = $sub_r['type'];
+ $t['o_lang'] = $this->v('lang', '', $sub_r);
+ $t['o_datatype'] = $this->v('datatype', '', $sub_r);
+ $r['triples'][] = $t;
+ $state = 4;
+ $proceed = 1;
+ }
+ elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
+ $t['o'] = $sub_r['id'];
+ $t['o_type'] = $sub_r['type'];
+ $r['triples'] = array_merge($r['triples'], array($t), $sub_r['triples']);
+ $state = 4;
+ $proceed = 1;
+ }
+ elseif((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
+ $t['o'] = $sub_r['id'];
+ $t['o_type'] = $sub_r['type'];
+ $r['triples'] = array_merge($r['triples'], array($t), $sub_r['triples']);
+ $state = 4;
+ $proceed = 1;
+ }
+ }
+ if ($state == 4) {/* expecting . or ; or , or ] */
+ if ($sub_r = $this->x('\.', $sub_v)) {
+ $sub_v = $sub_r[1];
+ $state = 1;
+ $proceed = 1;
+ }
+ if ($sub_r = $this->x('\;', $sub_v)) {
+ $sub_v = $sub_r[1];
+ $state = 2;
+ $proceed = 1;
+ }
+ if ($sub_r = $this->x('\,', $sub_v)) {
+ $sub_v = $sub_r[1];
+ $state = 3;
+ $proceed = 1;
+ }
+ if ($sub_r = $this->x('\]', $sub_v)) {
+ $sub_v = $sub_r[1];
+ $proceed = 0;
+ $closed = 1;
+ }
+ }
+ } while ($proceed);
+ if ($closed) {
+ return array($r, $sub_v);
+ }
+ return array(0, $v);
+ }
+ return array(0, $v);
+ }
+
+ /* 40.. */
+
+ function xCollection($v) {
+ if ($sub_r = $this->x('\(', $v)) {
+ $sub_v = $sub_r[1];
+ $s = $this->createBnodeID();
+ $r = array('id' => $s, 'type' => 'bnode', 'triples' => array());
+ $closed = 0;
+ do {
+ $proceed = 0;
+ if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
+ $r['triples'][] = array('type' => 'triple', 's' => $s, 'p' => $this->rdf . 'first', 'o' => $sub_r['value'], 's_type' => 'bnode', 'p_type' => 'uri', 'o_type' => $sub_r['type'], 'o_lang' => $this->v('lang', '', $sub_r), 'o_datatype' => $this->v('datatype', '', $sub_r));
+ $proceed = 1;
+ }
+ elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
+ $r['triples'][] = array('type' => 'triple', 's' => $s, 'p' => $this->rdf . 'first', 'o' => $sub_r['id'], 's_type' => 'bnode', 'p_type' => 'uri', 'o_type' => $sub_r['type'], 'o_lang' => '', 'o_datatype' => '');
+ $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
+ $proceed = 1;
+ }
+ elseif((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
+ $r['triples'][] = array('type' => 'triple', 's' => $s, 'p' => $this->rdf . 'first', 'o' => $sub_r['id'], 's_type' => 'bnode', 'p_type' => 'uri', 'o_type' => $sub_r['type'], 'o_lang' => '', 'o_datatype' => '');
+ $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
+ $proceed = 1;
+ }
+ if ($proceed) {
+ if ($sub_r = $this->x('\)', $sub_v)) {
+ $sub_v = $sub_r[1];
+ $r['triples'][] = array('type' => 'triple', 's' => $s, 'p' => $this->rdf . 'rest', 'o' => $this->rdf . 'nil', 's_type' => 'bnode', 'p_type' => 'uri', 'o_type' => 'uri', 'o_lang' => '', 'o_datatype' => '');
+ $closed = 1;
+ $proceed = 0;
+ }
+ else {
+ $next_s = $this->createBnodeID();
+ $r['triples'][] = array('type' => 'triple', 's' => $s, 'p' => $this->rdf . 'rest', 'o' => $next_s, 's_type' => 'bnode', 'p_type' => 'uri', 'o_type' => 'bnode', 'o_lang' => '', 'o_datatype' => '');
+ $s = $next_s;
+ }
+ }
+ } while ($proceed);
+ if ($closed) {
+ return array($r, $sub_v);
+ }
+ }
+ return array (0, $v);
+ }
+
+ /* 42 */
+
+ function xVarOrTerm($v) {
+ if ((list($sub_r, $sub_v) = $this->xVar($v)) && $sub_r) {
+ return array($sub_r, $sub_v);
+ }
+ elseif ((list($sub_r, $sub_v) = $this->xGraphTerm($v)) && $sub_r) {
+ return array($sub_r, $sub_v);
+ }
+ return array(0, $v);
+ }
+
+ /* 44, 74.., 75.. */
+
+ function xVar($v) {
+ if ($r = $this->x('(\?|\$)([^\s]+)', $v)) {
+ if ((list($sub_r, $sub_v) = $this->xVARNAME($r[2])) && $sub_r) {
+ if (!in_array($sub_r, $this->r['vars'])) {
+ $this->r['vars'][] = $sub_r;
+ }
+ return array(array('value' => $sub_r, 'type' => 'var'), $sub_v . $r[3]);
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* 45 */
+
+ function xGraphTerm($v) {
+ foreach (array(
+ 'IRIref' => 'uri',
+ 'RDFLiteral' => 'literal',
+ 'NumericLiteral' => 'literal',
+ 'BooleanLiteral' => 'literal',
+ 'BlankNode' => 'bnode',
+ 'NIL' => 'uri',
+ 'Placeholder' => 'placeholder'
+ ) as $term => $type) {
+ $m = 'x' . $term;
+ if ((list($sub_r, $sub_v) = $this->$m($v)) && $sub_r) {
+ if (!is_array($sub_r)) {
+ $sub_r = array('value' => $sub_r);
+ }
+ $sub_r['type'] = $this->v1('type', $type, $sub_r);
+ return array($sub_r, $sub_v);
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* 60 */
+
+ function xRDFLiteral($v) {
+ if ((list($sub_r, $sub_v) = $this->xString($v)) && $sub_r) {
+ $sub_r['value'] = $this->unescapeNtripleUTF($sub_r['value']);
+ $r = $sub_r;
+ if ((list($sub_r, $sub_v) = $this->xLANGTAG($sub_v)) && $sub_r) {
+ $r['lang'] = $sub_r;
+ }
+ elseif (!$this->x('\s', $sub_v) && ($sub_r = $this->x('\^\^', $sub_v)) && (list($sub_r, $sub_v) = $this->xIRIref($sub_r[1])) && $sub_r[1]) {
+ $r['datatype'] = $sub_r;
+ }
+ return array($r, $sub_v);
+ }
+ return array(0, $v);
+ }
+
+ /* 61.., 62.., 63.., 64.. */
+
+ function xNumericLiteral($v) {
+ $sub_r = $this->x('(\-|\+)?', $v);
+ $prefix = $sub_r[1];
+ $sub_v = $sub_r[2];
+ foreach (array('DOUBLE' => 'double', 'DECIMAL' => 'decimal', 'INTEGER' => 'integer') as $type => $xsd) {
+ $m = 'x' . $type;
+ if ((list($sub_r, $sub_v) = $this->$m($sub_v)) && ($sub_r !== false)) {
+ $r = array('value' => $prefix . $sub_r, 'type' => 'literal', 'datatype' => $this->xsd . $xsd);
+ return array($r, $sub_v);
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* 65.. */
+
+ function xBooleanLiteral($v) {
+ if ($r = $this->x('(true|false)', $v)) {
+ return array($r[1], $r[2]);
+ }
+ return array(0, $v);
+ }
+
+ /* 66.., 87.., 88.., 89.., 90.., 91.. */
+
+ function xString($v) {/* largely simplified, may need some tweaks in following revisions */
+ $sub_v = $v;
+ if (!preg_match('/^\s*([\']{3}|\'|[\"]{3}|\")(.*)$/s', $sub_v, $m)) return array(0, $v);
+ $delim = $m[1];
+ $rest = $m[2];
+ $sub_types = array("'''" => 'literal_long1', '"""' => 'literal_long2', "'" => 'literal1', '"' => 'literal2');
+ $sub_type = $sub_types[$delim];
+ $pos = 0;
+ $r = false;
+ do {
+ $proceed = 0;
+ $delim_pos = strpos($rest, $delim, $pos);
+ if ($delim_pos === false) break;
+ $new_rest = substr($rest, $delim_pos + strlen($delim));
+ $r = substr($rest, 0, $delim_pos);
+ if (!preg_match('/([\x5c]+)$/s', $r, $m) || !(strlen($m[1]) % 2)) {
+ $rest = $new_rest;
+ }
+ else {
+ $r = false;
+ $pos = $delim_pos + 1;
+ $proceed = 1;
+ }
+ } while ($proceed);
+ if ($r !== false) {
+ return array(array('value' => $this->toUTF8($r) , 'type' => 'literal', 'sub_type' => $sub_type), $rest);
+ }
+ return array(0, $v);
+ }
+
+ /* 67 */
+
+ function xIRIref($v) {
+ if ((list($r, $v) = $this->xIRI_REF($v)) && $r) {
+ return array($this->calcURI($r, $this->base), $v);
+ }
+ elseif ((list($r, $v) = $this->xPrefixedName($v)) && $r) {
+ return array($r, $v);
+ }
+ return array(0, $v);
+ }
+
+ /* 68 */
+
+ function xPrefixedName($v) {
+ if ((list($r, $v) = $this->xPNAME_LN($v)) && $r) {
+ return array($r, $v);
+ }
+ elseif ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
+ return isset($this->prefixes[$r]) ? array($this->prefixes[$r], $sub_v) : array(0, $v);
+ }
+ return array(0, $v);
+ }
+
+ /* 69.., 73.., 93, 94.. */
+
+ function xBlankNode($v) {
+ if (($r = $this->x('\_\:', $v)) && (list($r, $sub_v) = $this->xPN_LOCAL($r[1])) && $r) {
+ return array(array('type' => 'bnode', 'value' => '_:' . $r), $sub_v);
+ }
+ if ($r = $this->x('\[[\x20\x9\xd\xa]*\]', $v)) {
+ return array(array('type' => 'bnode', 'value' => $this->createBnodeID()), $r[1]);
+ }
+ return array(0, $v);
+ }
+
+ /* 70.. @@sync with SPARQLParser */
+
+ function xIRI_REF($v) {
+ //if ($r = $this->x('\<([^\<\>\"\{\}\|\^\'[:space:]]*)\>', $v)) {
+ if (($r = $this->x('\<(\$\{[^\>]*\})\>', $v)) && ($sub_r = $this->xPlaceholder($r[1]))) {
+ return array($r[1], $r[2]);
+ }
+ elseif ($r = $this->x('\<\>', $v)) {
+ return array(true, $r[1]);
+ }
+ elseif ($r = $this->x('\<([^\s][^\<\>]*)\>', $v)) {
+ return array($r[1] ? $r[1] : true, $r[2]);
+ }
+ return array(0, $v);
+ }
+
+ /* 71 */
+
+ function xPNAME_NS($v) {
+ list($r, $sub_v) = $this->xPN_PREFIX($v);
+ $prefix = $r ? $r : '';
+ return ($r = $this->x("\:", $sub_v)) ? array($prefix . ':', $r[1]) : array(0, $v);
+ }
+
+ /* 72 */
+
+ function xPNAME_LN($v) {
+ if ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
+ if (!$this->x('\s', $sub_v) && (list($sub_r, $sub_v) = $this->xPN_LOCAL($sub_v)) && $sub_r) {
+ if (!isset($this->prefixes[$r])) {
+ return array(0, $v);
+ }
+ return array($this->prefixes[$r] . $sub_r, $sub_v);
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* 76 */
+
+ function xLANGTAG($v) {
+ if (!$this->x('\s', $v) && ($r = $this->x('\@([a-z]+(\-[a-z0-9]+)*)', $v))) {
+ return array($r[1], $r[3]);
+ }
+ return array(0, $v);
+ }
+
+ /* 77.. */
+
+ function xINTEGER($v) {
+ if ($r = $this->x('([0-9]+)', $v)) {
+ return array($r[1], $r[2]);
+ }
+ return array(false, $v);
+ }
+
+ /* 78.. */
+
+ function xDECIMAL($v) {
+ if ($r = $this->x('([0-9]+\.[0-9]*)', $v)) {
+ return array($r[1], $r[2]);
+ }
+ if ($r = $this->x('(\.[0-9]+)', $v)) {
+ return array($r[1], $r[2]);
+ }
+ return array(false, $v);
+ }
+
+ /* 79.., 86.. */
+
+ function xDOUBLE($v) {
+ if ($r = $this->x('([0-9]+\.[0-9]*E[\+\-]?[0-9]+)', $v)) {
+ return array($r[1], $r[2]);
+ }
+ if ($r = $this->x('(\.[0-9]+E[\+\-]?[0-9]+)', $v)) {
+ return array($r[1], $r[2]);
+ }
+ if ($r = $this->x('([0-9]+E[\+\-]?[0-9]+)', $v)) {
+ return array($r[1], $r[2]);
+ }
+ return array(false, $v);
+ }
+
+ /* 92 */
+
+ function xNIL($v) {
+ if ($r = $this->x('\([\x20\x9\xd\xa]*\)', $v)) {
+ return array(array('type' => 'uri', 'value' => $this->rdf . 'nil'), $r[1]);
+ }
+ return array(0, $v);
+ }
+
+ /* 95.. */
+
+ function xPN_CHARS_BASE($v) {
+ if ($r = $this->x("([a-z]+|\\\u[0-9a-f]{1,4})", $v)) {
+ return array($r[1], $r[2]);
+ }
+ return array(0, $v);
+ }
+
+ /* 96 */
+
+ function xPN_CHARS_U($v) {
+ if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
+ return array($r, $sub_v);
+ }
+ elseif ($r = $this->x("(_)", $v)) {
+ return array($r[1], $r[2]);
+ }
+ return array(0, $v);
+ }
+
+ /* 97.. */
+
+ function xVARNAME($v) {
+ $r = '';
+ do {
+ $proceed = 0;
+ if ($sub_r = $this->x('([0-9]+)', $v)) {
+ $r .= $sub_r[1];
+ $v = $sub_r[2];
+ $proceed = 1;
+ }
+ elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($v)) && $sub_r) {
+ $r .= $sub_r;
+ $v = $sub_v;
+ $proceed = 1;
+ }
+ elseif ($r && ($sub_r = $this->x('([\xb7\x300-\x36f]+)', $v))) {
+ $r .= $sub_r[1];
+ $v = $sub_r[2];
+ $proceed = 1;
+ }
+ } while ($proceed);
+ return array($r, $v);
+ }
+
+ /* 98.. */
+
+ function xPN_CHARS($v) {
+ if ((list($r, $sub_v) = $this->xPN_CHARS_U($v)) && $r) {
+ return array($r, $sub_v);
+ }
+ elseif ($r = $this->x('([\-0-9\xb7\x300-\x36f])', $v)) {
+ return array($r[1], $r[2]);
+ }
+ return array(false, $v);
+ }
+
+ /* 99 */
+
+ function xPN_PREFIX($v) {
+ if ($sub_r = $this->x("([^\s\:\(\)\{\}\;\,]+)", $v, 's')) {/* accelerator */
+ return array($sub_r[1], $sub_r[2]);/* @@testing */
+ }
+ if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
+ do {
+ $proceed = 0;
+ list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
+ if ($sub_r !== false) {
+ $r .= $sub_r;
+ $proceed = 1;
+ }
+ elseif ($sub_r = $this->x("\.", $sub_v)) {
+ $r .= '.';
+ $sub_v = $sub_r[1];
+ $proceed = 1;
+ }
+ } while ($proceed);
+ list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
+ $r .= $sub_r ? $sub_r : '';
+ }
+ return array($r, $sub_v);
+ }
+
+ /* 100 */
+
+ function xPN_LOCAL($v) {
+ if (($sub_r = $this->x("([^\s\(\)\{\}\[\]\;\,\.]+)", $v, 's')) && !preg_match('/^\./', $sub_r[2])) {/* accelerator */
+ return array($sub_r[1], $sub_r[2]);/* @@testing */
+ }
+ $r = '';
+ $sub_v = $v;
+ do {
+ $proceed = 0;
+ if ($this->x('\s', $sub_v)) {
+ return array($r, $sub_v);
+ }
+ if ($sub_r = $this->x('([0-9])', $sub_v)) {
+ $r .= $sub_r[1];
+ $sub_v = $sub_r[2];
+ $proceed = 1;
+ }
+ elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($sub_v)) && $sub_r) {
+ $r .= $sub_r;
+ $proceed = 1;
+ }
+ elseif ($r) {
+ if (($sub_r = $this->x('(\.)', $sub_v)) && !preg_match('/^[\s\}]/s', $sub_r[2])) {
+ $r .= $sub_r[1];
+ $sub_v = $sub_r[2];
+ }
+ if ((list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v)) && $sub_r) {
+ $r .= $sub_r;
+ $proceed = 1;
+ }
+ }
+ } while ($proceed);
+ return array($r, $sub_v);
+ }
+
+ /* */
+
+ function unescapeNtripleUTF($v) {
+ if (strpos($v, '\\') === false) return $v;
+ $mappings = array('t' => "\t", 'n' => "\n", 'r' => "\r", '\"' => '"', '\'' => "'");
+ foreach ($mappings as $in => $out) {
+ $v = preg_replace('/\x5c([' . $in . '])/', $out, $v);
+ }
+ if (strpos(strtolower($v), '\u') === false) return $v;
+ while (preg_match('/\\\(U)([0-9A-F]{8})/', $v, $m) || preg_match('/\\\(u)([0-9A-F]{4})/', $v, $m)) {
+ $no = hexdec($m[2]);
+ if ($no < 128) $char = chr($no);
+ else if ($no < 2048) $char = chr(($no >> 6) + 192) . chr(($no & 63) + 128);
+ else if ($no < 65536) $char = chr(($no >> 12) + 224) . chr((($no >> 6) & 63) + 128) . chr(($no & 63) + 128);
+ else if ($no < 2097152) $char = chr(($no >> 18) + 240) . chr((($no >> 12) & 63) + 128) . chr((($no >> 6) & 63) + 128) . chr(($no & 63) + 128);
+ else $char= '';
+ $v = str_replace('\\' . $m[1] . $m[2], $char, $v);
+ }
+ return $v;
+ }
+
+ /* */
+
+ function xPlaceholder($v) {
+ //if ($r = $this->x('(\?|\$)\{([^\}]+)\}', $v)) {
+ if ($r = $this->x('(\?|\$)', $v)) {
+ if (preg_match('/(\{(?:[^{}]+|(?R))*\})/', $r[2], $m) && strpos(trim($r[2]), $m[1]) === 0) {
+ $ph = substr($m[1], 1, -1);
+ $rest = substr(trim($r[2]), strlen($m[1]));
+ if (!isset($this->r['placeholders'])) $this->r['placeholders'] = array();
+ if (!in_array($ph, $this->r['placeholders'])) $this->r['placeholders'][] = $ph;
+ return array(array('value' => $ph, 'type' => 'placeholder'), $rest);
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* */
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 Legacy XML Serializer
+author: Benjamin Nowack
+version: 2008-08-04
+*/
+
+ARC2::inc('Class');
+
+class ARC2_LegacyHTMLSerializer extends ARC2_Class {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_LegacyHTMLSerializer($a = '', &$caller) {/* ns */
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ $this->content_header = 'text/html';
+ }
+
+ /* */
+
+ function getSerializedArray($struct, $root = 1, $ind = ' ') {
+ $n = "\n";
+ $r = '';
+ $is_flat = $this->isAssociativeArray($struct) ? 0 : 1;
+ foreach ($struct as $k => $v) {
+ if (!$is_flat) $r .= $n . $ind . $ind . '<dt>' . $k . '</dt>';
+ $r .= $n . $ind . $ind . '<dd>' . (is_array($v) ? $this->getSerializedArray($v, 0, $ind . $ind . $ind) . $n . $ind . $ind : htmlspecialchars($v)) . '</dd>';
+ }
+ return $n . $ind . '<dl>' . $r . $n . $ind . '</dl>';
+ }
+
+ /* */
+
+ function isAssociativeArray($v) {
+ foreach (array_keys($v) as $k => $val) {
+ if ($k !== $val) return 1;
+ }
+ return 0;
+ }
+
+ /* */
+
+}
+
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 Legacy JSON Serializer
+author: Benjamin Nowack
+version: 2008-08-04
+*/
+
+ARC2::inc('Class');
+
+class ARC2_LegacyJSONSerializer extends ARC2_Class {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_LegacyJSONSerializer($a = '', &$caller) {/* ns */
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ $this->content_header = 'application/json';
+ }
+
+ /* */
+
+ function getSerializedArray($struct, $ind = '') {
+ $n = "\n";
+ if (function_exists('json_encode')) return str_replace('","', '",' . $n . '"', json_encode($struct));
+ $r = '';
+ $from = array("\\", "\r", "\t", "\n", '"', "\b", "\f", "/");
+ $to = array('\\\\', '\r', '\t', '\n', '\"', '\b', '\f', '\/');
+ $is_flat = $this->isAssociativeArray($struct) ? 0 : 1;
+ foreach ($struct as $k => $v) {
+ $r .= $r ? ',' . $n . $ind . $ind : $ind . $ind;
+ $r .= $is_flat ? '' : '"' . $k . '": ';
+ $r .= is_array($v) ? $this->getSerializedArray($v, $ind . ' ') : '"' . str_replace($from, $to, $v) . '"';
+ }
+ return $is_flat ? $ind . '[' . $n . $r . $n . $ind . ']' : $ind . '{' . $n . $r . $n . $ind . '}';
+ }
+
+ /* */
+
+ function isAssociativeArray($v) {
+ foreach (array_keys($v) as $k => $val) {
+ if ($k !== $val) return 1;
+ }
+ return 0;
+ }
+
+ /* */
+
+}
+
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 Legacy XML Serializer
+author: Benjamin Nowack
+version: 2008-08-04
+*/
+
+ARC2::inc('Class');
+
+class ARC2_LegacyXMLSerializer extends ARC2_Class {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_LegacyXMLSerializer($a = '', &$caller) {/* ns */
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ $this->content_header = 'text/xml';
+ }
+
+ /* */
+
+ function getSerializedArray($struct, $root = 1, $ind = ' ') {
+ $n = "\n";
+ $r = '';
+ $is_flat = $this->isAssociativeArray($struct) ? 0 : 1;
+ foreach ($struct as $k => $v) {
+ $tag = $is_flat ? 'item' : preg_replace('/[\s]/s', '_', $k);
+ $tag = preg_replace('/^.*([a-z0-9\-\_]+)$/Uis', '\\1', $tag);
+ $r .= $n . $ind . '<' . $tag . '>' . (is_array($v) ? $this->getSerializedArray($v, 0, $ind . ' ') . $n . $ind : htmlspecialchars($v)) . '</' . $tag . '>';
+ }
+ if ($root) $r = $this->getHead() . $r . $this->getFooter();
+ return $r;
+ }
+
+ /* */
+
+ function getHead() {
+ $n = "\n";
+ $r = '<?xml version="1.0"?>';
+ $r .= $n . '<items>';
+ return $r;
+ }
+
+ function getFooter() {
+ $n = "\n";
+ $r = $n . '</items>';
+ return $r;
+ }
+
+ /* */
+
+ function isAssociativeArray($v) {
+ foreach (array_keys($v) as $k => $val) {
+ if ($k !== $val) return 1;
+ }
+ return 0;
+ }
+
+ /* */
+
+}
+
--- /dev/null
+<?php
+/**
+ * ARC2 MicroRDF Serializer
+ *
+ * @author Benjamin Nowack
+ * @license <http://arc.semsol.org/license>
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-06-24
+*/
+
+ARC2::inc('RDFSerializer');
+
+class ARC2_MicroRDFSerializer extends ARC2_RDFSerializer {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_MicroRDFSerializer($a = '', &$caller) {/* ns */
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ $this->content_header = 'text/html';
+ $this->label_store = $this->v('label_store', '', $this->a);
+ }
+
+ /* */
+
+ function getLabel($res, $ps = '') {
+ if (!$ps) $ps = array();
+ foreach ($ps as $p => $os) {
+ if (preg_match('/[\/\#](name|label|summary|title|fn)$/i', $p)) {
+ return $os[0]['value'];
+ }
+ }
+ if (preg_match('/^\_\:/', $res)) return "An unnamed resource";
+ return $this->extractTermLabel($res);
+ return preg_replace("/^(.*[\/\#])([^\/\#]+)$/", '\\2', str_replace('_', ' ', $res));
+ }
+
+ function getSerializedIndex($index, $res = '') {
+ $r = '';
+ $n = "\n";
+ if ($res) $index = array($res => $index[$res]);
+ //return Trice::dump($index);
+ $types = $this->v($this->expandPName('rdf:type'), array(), $index);
+ $main_type = $types ? $types[0]['value'] : '';
+ foreach ($index as $s => $ps) {
+ /* node */
+ $r .= '
+ <div class="rdf-item" ' . $this->mdAttrs($s, $main_type) . '>
+ <h3 class="rdf-itemlabel"><a href="' . $s . '">' . ucfirst($this->getLabel($s, $ps)) . '</a></h3>
+ ';
+ /* arcs */
+ foreach ($ps as $p => $os) {
+ $p_cls = strtolower($this->getPName($p));
+ $p_cls = str_replace(':', '-', $p_cls);
+ $r .= '
+ <div class="rdf-prop ' . $p_cls . '">
+ <a class="rdf-proplabel" href="' . $p . '">' . ucfirst($this->getLabel($p)) . ':</a>
+ <ul class="rdf-values">
+ ';
+ $oc = count($os);
+ foreach ($os as $i => $o) {
+ $val = $this->getObjectValue($o, $p);
+ $cls = '';
+ if ($i == 0) $cls .= ($cls ? ' ' : '') . 'first';
+ if ($i == $oc - 1) $cls .= ($cls ? ' ' : '') . 'last';
+ $r .= $n . '<li' . ($cls ? ' class="' . $cls . '"' : '') . '>' . $val . '</li>';
+ }
+ $r .= '
+ </ul>
+ <div class="clb"></div>
+ </div>
+ ';
+ }
+ /* /node */
+ $r .= '
+ <div class="clb"></div>
+ </div>
+ ';
+ }
+ return $r;
+ }
+
+ function getObjectValue($o, $p) {
+ if ($o['type'] == 'uri') {
+ if (preg_match('/(jpe?g|gif|png)$/i', $o['value'])) {
+ return $this->getImageObjectValue($o, $p);
+ }
+ return $this->getURIObjectValue($o, $p);
+ }
+ if ($o['type'] == "bnode") {
+ return $this->getBNodeObjectValue($o, $p);
+ }
+ return $this->getLiteralObjectValue($o, $p);
+ }
+
+ function getImageObjectValue($o, $p) {
+ return '<img class="rdf-value" itemprop="' . $p. '" src="' . htmlspecialchars($o['value']) . '" alt="img" />';
+ }
+
+ function getURIObjectValue($o, $p) {
+ $id = htmlspecialchars($o['value']);
+ $label = $this->getObjectLabel($o['value']);
+ /* differing href */
+ $href = htmlspecialchars($this->v('href', $o['value'], $o));
+ if ($id != $href) {
+ return '<a class="rdf-value" itemprop="' . $p. '" href="' . $id . '" onclick="location.href=\'' . $href . '\';return false">' . $label . '</a>';
+ }
+ return '<a class="rdf-value" itemprop="' . $p. '" href="' . $id . '">' . $label . '</a>';
+ //$label = $o['value'];
+ //$label = preg_replace('/^https?\:\/\/(www\.)?/', '', $label);
+ }
+
+ function getBNodeObjectValue($o, $p) {
+ return '<div class="rdf-value" itemprop="' . $p. '" itemscope="">' . $o['value'] . '</div>';
+ return '<div class="rdf-value" itemprop="' . $p. '" itemscope="">An unnamed resource</div>';
+ }
+
+ function getLiteralObjectValue($o, $p) {
+ return '<div class="rdf-value" itemprop="' . $p. '">' . $o['value'] . '</div>';
+ }
+
+ /* */
+
+ function getObjectLabel($id) {
+ $r = $this->extractTermLabel($id);
+ if (!$this->label_store) return $r;
+ $q = '
+ SELECT ?val WHERE {
+ <' . $id . '> ?p ?val .
+ FILTER(REGEX(str(?p), "(label|title|name|summary)$"))
+ } LIMIT 1
+ ';
+ $row = $this->label_store->query($q, 'row');
+ return $row ? $row['val'] : $r;
+ }
+
+ /* */
+
+}
+
--- /dev/null
+<?php
+/**
+ * ARC2 N-Triples Serializer
+ *
+ * @author Benjamin Nowack
+ * @license <http://arc.semsol.org/license>
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-03-29
+*/
+
+ARC2::inc('RDFSerializer');
+
+class ARC2_NTriplesSerializer extends ARC2_RDFSerializer {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_NTriplesSerializer($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ $this->esc_chars = array();
+ $this->raw = 0;
+ }
+
+ /* */
+
+ function getTerm($v) {
+ if (!is_array($v)) {
+ if (preg_match('/^\_\:/', $v)) {
+ return $v;
+ }
+ if (preg_match('/^[a-z0-9]+\:[^\s\"]*$/is', $v)) {
+ return '<' . $this->escape($v) . '>';
+ }
+ return $this->getTerm(array('type' => 'literal', 'value' => $v));
+ }
+ if ($v['type'] != 'literal') {
+ return $this->getTerm($v['value']);
+ }
+ /* literal */
+ $quot = '"';
+ if ($this->raw && preg_match('/\"/', $v['value'])) {
+ $quot = "'";
+ if (preg_match('/\'/', $v['value'])) {
+ $quot = '"""';
+ if (preg_match('/\"\"\"/', $v['value']) || preg_match('/\"$/', $v['value']) || preg_match('/^\"/', $v['value'])) {
+ $quot = "'''";
+ $v['value'] = preg_replace("/'$/", "' ", $v['value']);
+ $v['value'] = preg_replace("/^'/", " '", $v['value']);
+ $v['value'] = str_replace("'''", '\\\'\\\'\\\'', $v['value']);
+ }
+ }
+ }
+ if ($this->raw && (strlen($quot) == 1) && preg_match('/[\x0d\x0a]/', $v['value'])) {
+ $quot = $quot . $quot . $quot;
+ }
+ $suffix = isset($v['lang']) && $v['lang'] ? '@' . $v['lang'] : '';
+ $suffix = isset($v['datatype']) && $v['datatype'] ? '^^' . $this->getTerm($v['datatype']) : $suffix;
+ //return $quot . "object" . utf8_encode($v['value']) . $quot . $suffix;
+ return $quot . $this->escape($v['value']) . $quot . $suffix;
+ }
+
+ function getSerializedIndex($index, $raw = 0) {
+ $this->raw = $raw;
+ $r = '';
+ $nl = "\n";
+ foreach ($index as $s => $ps) {
+ $s = $this->getTerm($s);
+ foreach ($ps as $p => $os) {
+ $p = $this->getTerm($p);
+ if (!is_array($os)) {/* single literal o */
+ $os = array(array('value' => $os, 'type' => 'literal'));
+ }
+ foreach ($os as $o) {
+ $o = $this->getTerm($o);
+ $r .= $r ? $nl : '';
+ $r .= $s . ' ' . $p . ' ' . $o . ' .';
+ }
+ }
+ }
+ return $r . $nl;
+ }
+
+ /* */
+
+ function escape($v) {
+ $r = '';
+ $v = (strpos(utf8_decode(str_replace('?', '', $v)), '?') === false) ? utf8_decode($v) : $v;
+ if ($this->raw) return $v;
+ for ($i = 0, $i_max = strlen($v); $i < $i_max; $i++) {
+ $c = $v[$i];
+ if (!isset($this->esc_chars[$c])) {
+ $this->esc_chars[$c] = $this->getEscapedChar($c, $this->getCharNo($c));
+ }
+ $r .= $this->esc_chars[$c];
+ }
+ return $r;
+ }
+
+ /* */
+
+ function getCharNo($c) {
+ $c_utf = utf8_encode($c);
+ $bl = strlen($c_utf);/* binary length */
+ $r = 0;
+ switch ($bl) {
+ case 1:/* 0####### (0-127) */
+ $r = ord($c_utf);
+ break;
+ case 2:/* 110##### 10###### = 192+x 128+x */
+ $r = ((ord($c_utf[0]) - 192) * 64) + (ord($c_utf[1]) - 128);
+ break;
+ case 3:/* 1110#### 10###### 10###### = 224+x 128+x 128+x */
+ $r = ((ord($c_utf[0]) - 224) * 4096) + ((ord($c_utf[1]) - 128) * 64) + (ord($c_utf[2]) - 128);
+ break;
+ case 4:/* 1111#### 10###### 10###### 10###### = 240+x 128+x 128+x 128+x */
+ $r = ((ord($c_utf[0]) - 240) * 262144) + ((ord($c_utf[1]) - 128) * 4096) + ((ord($c_utf[2]) - 128) * 64) + (ord($c_utf[3]) - 128);
+ break;
+ }
+ return $r;
+ }
+
+ function getEscapedChar($c, $no) {/*see http://www.w3.org/TR/rdf-testcases/#ntrip_strings */
+ if ($no < 9) return "\\u" . sprintf('%04X', $no); /* #x0-#x8 (0-8) */
+ if ($no == 9) return '\t'; /* #x9 (9) */
+ if ($no == 10) return '\n'; /* #xA (10) */
+ if ($no < 13) return "\\u" . sprintf('%04X', $no); /* #xB-#xC (11-12) */
+ if ($no == 13) return '\r'; /* #xD (13) */
+ if ($no < 32) return "\\u" . sprintf('%04X', $no); /* #xE-#x1F (14-31) */
+ if ($no < 34) return $c; /* #x20-#x21 (32-33) */
+ if ($no == 34) return '\"'; /* #x22 (34) */
+ if ($no < 92) return $c; /* #x23-#x5B (35-91) */
+ if ($no == 92) return '\\'; /* #x5C (92) */
+ if ($no < 127) return $c; /* #x5D-#x7E (93-126) */
+ if ($no < 65536) return "\\u" . sprintf('%04X', $no); /* #x7F-#xFFFF (128-65535) */
+ if ($no < 1114112) return "\\U" . sprintf('%08X', $no); /* #x10000-#x10FFFF (65536-1114111) */
+ return ''; /* not defined => ignore */
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 POSH RDF Serializer
+author: Benjamin Nowack
+version: 2008-11-18 (Tweak: Updated to poshRDF spec draft)
+*/
+
+ARC2::inc('RDFSerializer');
+
+class ARC2_POSHRDFSerializer extends ARC2_RDFSerializer {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_POSHRDFSerializer($a = '', &$caller) {/* ns */
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ $this->content_header = 'text/html';
+ }
+
+ /* */
+
+ function getLabel($res, $ps = '') {
+ if (!$ps) $ps = array();
+ foreach ($ps as $p => $os) {
+ if (preg_match('/[\/\#](name|label|summary|title|fn)$/i', $p)) {
+ return $os[0]['value'];
+ }
+ }
+ if (preg_match('/^\_\:/', $res)) return "An unnamed resource";
+ return preg_replace("/^(.*[\/\#])([^\/\#]+)$/", '\\2', str_replace('_', ' ', $res));
+ }
+
+ function getSerializedIndex($index, $res = '') {
+ $r = '';
+ $n = "\n";
+ if ($res) $index = array($res => $index[$res]);
+ //return Trice::dump($index);
+ foreach ($index as $s => $ps) {
+ /* node */
+ $r .= '
+ <div class="rdf-view">
+ <h3><a class="rdf-s" href="' . $s . '">' . $this->getLabel($s, $ps) . '</a></h3>
+ ';
+ /* arcs */
+ foreach ($ps as $p => $os) {
+ $r .= '
+ <div class="rdf-o-list">
+ <a class="rdf-p" href="' . $p . '">' . ucfirst($this->getLabel($p)) . '</a>
+ ';
+ foreach ($os as $o) {
+ $r .= $n . $this->getObjectValue($o);
+ }
+ $r .= '
+ </div>
+ ';
+ }
+ /* node */
+ $r .= '
+ <div class="clb"></div>
+ </div>
+ ';
+ }
+ return $r;
+ }
+
+ function getObjectValue($o) {
+ if ($o['type'] == 'uri') {
+ if (preg_match('/(jpe?g|gif|png)$/i', $o['value'])) {
+ return $this->getImageObjectValue($o);
+ }
+ return $this->getURIObjectValue($o);
+ }
+ if ($o['type'] == "bnode") {
+ return $this->getBNodeObjectValue($o);
+ }
+ return $this->getLiteralObjectValue($o);
+ }
+
+ function getImageObjectValue($o) {
+ return '<img class="rdf-o" src="' . htmlspecialchars($o['value']) . '" alt="img" />';
+ }
+
+ function getURIObjectValue($o) {
+ $href = htmlspecialchars($o['value']);
+ $label = $o['value'];
+ $label = preg_replace('/^https?\:\/\/(www\.)?/', '', $label);
+ return '<a class="rdf-o" href="' . $href . '">' . $label . '</a>';
+ }
+
+ function getBNodeObjectValue($o) {
+ return '<div class="rdf-o" title="' . $o['value']. '">An unnamed resource</div>';
+ }
+
+ function getLiteralObjectValue($o) {
+ return '<div class="rdf-o">' . $o['value'] . '</div>';
+ }
+
+ /* */
+
+}
+
--- /dev/null
+<?php
+/**
+ * ARC2 RDF/JSON Serializer
+ *
+ * @author Benjamin Nowack <bnowack@semsol.com>
+ * @license http://arc.semsol.org/license
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-03-10
+*/
+
+ARC2::inc('RDFSerializer');
+
+class ARC2_RDFJSONSerializer extends ARC2_RDFSerializer {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_RDFJSONSerializer($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ $this->content_header = 'application/json';
+ }
+
+ /* */
+
+ function getTerm($v, $term = 's') {
+ if (!is_array($v)) {
+ if (preg_match('/^\_\:/', $v)) {
+ return ($term == 'o') ? $this->getTerm(array('value' => $v, 'type' => 'bnode'), 'o') : '"' . $v . '"';
+ }
+ return ($term == 'o') ? $this->getTerm(array('value' => $v, 'type' => 'uri'), 'o') : '"' . $v . '"';
+ }
+ if (!isset($v['type']) || ($v['type'] != 'literal')) {
+ if ($term != 'o') {
+ return $this->getTerm($v['value'], $term);
+ }
+ if (preg_match('/^\_\:/', $v['value'])) {
+ return '{ "value" : "' . $this->jsonEscape($v['value']) . '", "type" : "bnode" }';
+ }
+ return '{ "value" : "' . $this->jsonEscape($v['value']) . '", "type" : "uri" }';
+ }
+ /* literal */
+ $r = '{ "value" : "' . $this->jsonEscape($v['value']) . '", "type" : "literal"';
+ $suffix = isset($v['datatype']) ? ', "datatype" : "' . $v['datatype'] . '"' : '';
+ $suffix = isset($v['lang']) ? ', "lang" : "' . $v['lang'] . '"' : $suffix;
+ $r .= $suffix . ' }';
+ return $r;
+ }
+
+ function jsonEscape($v) {
+ if (function_exists('json_encode')) return trim(json_encode($v), '"');
+ $from = array("\\", "\r", "\t", "\n", '"', "\b", "\f", "/");
+ $to = array('\\\\', '\r', '\t', '\n', '\"', '\b', '\f', '\/');
+ return str_replace($from, $to, $v);
+ }
+
+ function getSerializedIndex($index) {
+ $r = '';
+ $nl = "\n";
+ foreach ($index as $s => $ps) {
+ $r .= $r ? ',' . $nl . $nl : '';
+ $r .= ' ' . $this->getTerm($s). ' : {';
+ $first_p = 1;
+ foreach ($ps as $p => $os) {
+ $r .= $first_p ? $nl : ',' . $nl;
+ $r .= ' ' . $this->getTerm($p). ' : [';
+ $first_o = 1;
+ if (!is_array($os)) {/* single literal o */
+ $os = array(array('value' => $os, 'type' => 'literal'));
+ }
+ foreach ($os as $o) {
+ $r .= $first_o ? $nl : ',' . $nl;
+ $r .= ' ' . $this->getTerm($o, 'o');
+ $first_o = 0;
+ }
+ $first_p = 0;
+ $r .= $nl . ' ]';
+ }
+ $r .= $nl . ' }';
+ }
+ $r .= $r ? ' ' : '';
+ return '{' . $nl . $r . $nl . '}';
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 RDF Serializer
+ *
+ * @author Benjamin Nowack
+ * @license <http://arc.semsol.org/license>
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2009-11-09
+*/
+
+ARC2::inc('Class');
+
+class ARC2_RDFSerializer extends ARC2_Class {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_RDFSerializer($a = '', &$caller) {/* ns */
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ foreach ($this->ns as $k => $v) {
+ $this->nsp[$v] = $k;
+ }
+ }
+
+ /* */
+
+ function xgetPName($v) {/* moved to merged getPName in ARC2_CLass */
+ if (preg_match('/^([a-z0-9\_\-]+)\:([a-z\_][a-z0-9\_\-]*)$/i', $v, $m) && isset($this->ns[$m[1]])) {
+ $this->used_ns = !in_array($this->ns[$m[1]], $this->used_ns) ? array_merge($this->used_ns, array($this->ns[$m[1]])) : $this->used_ns;
+ return $v;
+ }
+ if (preg_match('/^(.*[\/\#])([a-z\_][a-z0-9\-\_]*)$/i', $v, $m)) {
+ return $this->getPrefix($m[1]) . ':' . $m[2];
+ }
+ return 0;
+ }
+
+ /* */
+
+ function getSerializedTriples($triples, $raw = 0) {
+ $index = ARC2::getSimpleIndex($triples, 0);
+ return $this->getSerializedIndex($index, $raw);
+ }
+
+ function getSerializedIndex() {
+ return '';
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 RDF/XML Serializer
+ *
+ * @author Benjamin Nowack
+ * @license <http://arc.semsol.org/license>
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-01-30
+ *
+*/
+
+ARC2::inc('RDFSerializer');
+
+class ARC2_RDFXMLSerializer extends ARC2_RDFSerializer {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_RDFXMLSerializer($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ $this->content_header = 'application/rdf+xml';
+ $this->pp_containers = $this->v('serializer_prettyprint_containers', 0, $this->a);
+ $this->default_ns = $this->v('serializer_default_ns', '', $this->a);
+ $this->type_nodes = $this->v('serializer_type_nodes', 0, $this->a);
+ }
+
+ /* */
+
+ function getTerm($v, $type) {
+ if (!is_array($v)) {/* uri or bnode */
+ if (preg_match('/^\_\:(.*)$/', $v, $m)) {
+ return ' rdf:nodeID="' . $m[1] . '"';
+ }
+ if ($type == 's') {
+ return ' rdf:about="' . htmlspecialchars($v) . '"';
+ }
+ if ($type == 'p') {
+ if ($pn = $this->getPName($v)) {
+ return $pn;
+ }
+ return 0;
+ }
+ if ($type == 'o') {
+ $v = $this->expandPName($v);
+ if (!preg_match('/^[a-z0-9]{2,}\:[^\s]+$/is', $v)) return $this->getTerm(array('value' => $v, 'type' => 'literal'), $type);
+ return ' rdf:resource="' . htmlspecialchars($v) . '"';
+ }
+ if ($type == 'datatype') {
+ $v = $this->expandPName($v);
+ return ' rdf:datatype="' . htmlspecialchars($v) . '"';
+ }
+ if ($type == 'lang') {
+ return ' xml:lang="' . htmlspecialchars($v) . '"';
+ }
+ }
+ if ($v['type'] != 'literal') {
+ return $this->getTerm($v['value'], 'o');
+ }
+ /* literal */
+ $dt = isset($v['datatype']) ? $v['datatype'] : '';
+ $lang = isset($v['lang']) ? $v['lang'] : '';
+ if ($dt == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral') {
+ return ' rdf:parseType="Literal">' . $v['value'];
+ }
+ elseif ($dt) {
+ return $this->getTerm($dt, 'datatype') . '>' . htmlspecialchars($v['value']);
+ }
+ elseif ($lang) {
+ return $this->getTerm($lang, 'lang') . '>' . htmlspecialchars($v['value']);
+ }
+ return '>' . htmlspecialchars($v['value']);
+ }
+
+ function getPName($v, $connector = ':') {
+ if ($this->default_ns && (strpos($v, $this->default_ns) === 0)) {
+ $pname = substr($v, strlen($this->default_ns));
+ if (!preg_match('/\//', $pname)) return $pname;
+ }
+ return parent::getPName($v, $connector);
+ }
+
+ function getHead() {
+ $r = '';
+ $nl = "\n";
+ $r .= '<?xml version="1.0" encoding="UTF-8"?>';
+ $r .= $nl . '<rdf:RDF';
+ $first_ns = 1;
+ foreach ($this->used_ns as $v) {
+ $r .= $first_ns ? ' ' : $nl . ' ';
+ $r .= 'xmlns:' . $this->nsp[$v] . '="' .$v. '"';
+ $first_ns = 0;
+ }
+ if ($this->default_ns) {
+ $r .= $first_ns ? ' ' : $nl . ' ';
+ $r .= 'xmlns="' . $this->default_ns . '"';
+ }
+ $r .= '>';
+ return $r;
+ }
+
+ function getFooter() {
+ $r = '';
+ $nl = "\n";
+ $r .= $nl . $nl . '</rdf:RDF>';
+ return $r;
+ }
+
+ function getSerializedIndex($index, $raw = 0) {
+ $r = '';
+ $nl = "\n";
+ foreach ($index as $raw_s => $ps) {
+ $r .= $r ? $nl . $nl : '';
+ $s = $this->getTerm($raw_s, 's');
+ $tag = 'rdf:Description';
+ list($tag, $ps) = $this->getNodeTag($ps);
+ $sub_ps = 0;
+ /* pretty containers */
+ if ($this->pp_containers && ($ctag = $this->getContainerTag($ps))) {
+ $tag = 'rdf:' . $ctag;
+ list($ps, $sub_ps) = $this->splitContainerEntries($ps);
+ }
+ $r .= ' <' . $tag . '' .$s . '>';
+ $first_p = 1;
+ foreach ($ps as $p => $os) {
+ if (!$os) continue;
+ if ($p = $this->getTerm($p, 'p')) {
+ $r .= $nl . str_pad('', 4);
+ $first_o = 1;
+ if (!is_array($os)) {/* single literal o */
+ $os = array(array('value' => $os, 'type' => 'literal'));
+ }
+ foreach ($os as $o) {
+ $o = $this->getTerm($o, 'o');
+ $r .= $first_o ? '' : $nl . ' ';
+ $r .= '<' . $p;
+ $r .= $o;
+ $r .= preg_match('/\>/', $o) ? '</' . $p . '>' : '/>';
+ $first_o = 0;
+ }
+ $first_p = 0;
+ }
+ }
+ $r .= $r ? $nl . ' </' . $tag . '>' : '';
+ if ($sub_ps) $r .= $nl . $nl . $this->getSerializedIndex(array($raw_s => $sub_ps), 1);
+ }
+ if ($raw) {
+ return $r;
+ }
+ return $this->getHead() . $nl . $nl . $r . $this->getFooter();
+ }
+
+ function getNodeTag($ps) {
+ if (!$this->type_nodes) return array('rdf:Description', $ps);
+ $rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
+ $types = $this->v($rdf . 'type', array(), $ps);
+ if (!$types) return array('rdf:Description', $ps);
+ $type = array_shift($types);
+ $ps[$rdf . 'type'] = $types;
+ if (!is_array($type)) $type = array('value' => $type);
+ return array($this->getPName($type['value']), $ps);
+ }
+
+ /* */
+
+ function getContainerTag($ps) {
+ $rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
+ if (!isset($ps[$rdf . 'type'])) return '';
+ $types = $ps[$rdf . 'type'];
+ foreach ($types as $type) {
+ if (!in_array($type['value'], array($rdf . 'Bag', $rdf . 'Seq', $rdf . 'Alt'))) return '';
+ return str_replace($rdf, '', $type['value']);
+ }
+ }
+
+ function splitContainerEntries($ps) {
+ $rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
+ $items = array();
+ $rest = array();
+ foreach ($ps as $p => $os) {
+ $p_short = str_replace($rdf, '', $p);
+ if ($p_short === 'type') continue;
+ if (preg_match('/^\_([0-9]+)$/', $p_short, $m)) {
+ $items = array_merge($items, $os);
+ }
+ else {
+ $rest[$p] = $os;
+ }
+ }
+ if ($items) return array(array($rdf . 'li' => $items), $rest);
+ return array($rest, 0);
+ }
+
+ /* */
+}
--- /dev/null
+<?php
+/**
+ * ARC2 RSS 1.0 Serializer
+ *
+ * @author Toby Inkster
+ * @author Benjamin Nowack
+ * @license <http://arc.semsol.org/license>
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2009-11-09
+*/
+
+ARC2::inc('RDFXMLSerializer');
+
+class ARC2_RSS10Serializer extends ARC2_RDFXMLSerializer {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_RSS10Serializer($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ $this->content_header = 'application/rss+xml';
+ $this->default_ns = 'http://purl.org/rss/1.0/';
+ $this->type_nodes = true;
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 Turtle Serializer
+ *
+ * @author Benjamin Nowack
+ * @license http://arc.semsol.org/license
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2009-11-23
+ *
+*/
+
+ARC2::inc('RDFSerializer');
+
+class ARC2_TurtleSerializer extends ARC2_RDFSerializer {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_TurtleSerializer($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ $this->content_header = 'application/x-turtle';
+ }
+
+ /* */
+
+ function getTerm($v, $term = '', $qualifier = '') {
+ if (!is_array($v)) {
+ if (preg_match('/^\_\:/', $v)) {
+ return $v;
+ }
+ if (($term === 'p') && ($pn = $this->getPName($v))) {
+ return $pn;
+ }
+ if (
+ ($term === 'o') &&
+ in_array($qualifier, array('rdf:type', 'rdfs:domain', 'rdfs:range', 'rdfs:subClassOf')) &&
+ ($pn = $this->getPName($v))
+ ) {
+ return $pn;
+ }
+ if (preg_match('/^[a-z0-9]+\:[^\s]*$/is', $v)) {
+ return '<' .$v. '>';
+ }
+ return $this->getTerm(array('type' => 'literal', 'value' => $v), $term, $qualifier);
+ }
+ if (!isset($v['type']) || ($v['type'] != 'literal')) {
+ return $this->getTerm($v['value'], $term, $qualifier);
+ }
+ /* literal */
+ $quot = '"';
+ if (preg_match('/\"/', $v['value'])) {
+ $quot = "'";
+ if (preg_match('/\'/', $v['value'])) {
+ $quot = '"""';
+ if (preg_match('/\"\"\"/', $v['value']) || preg_match('/\"$/', $v['value']) || preg_match('/^\"/', $v['value'])) {
+ $quot = "'''";
+ $v['value'] = preg_replace("/'$/", "' ", $v['value']);
+ $v['value'] = preg_replace("/^'/", " '", $v['value']);
+ $v['value'] = str_replace("'''", '\\\'\\\'\\\'', $v['value']);
+ }
+ }
+ }
+ if ((strlen($quot) == 1) && preg_match('/[\x0d\x0a]/', $v['value'])) {
+ $quot = $quot . $quot . $quot;
+ }
+ $suffix = isset($v['lang']) && $v['lang'] ? '@' . $v['lang'] : '';
+ $suffix = isset($v['datatype']) && $v['datatype'] ? '^^' . $this->getTerm($v['datatype'], 'dt') : $suffix;
+ return $quot . $v['value'] . $quot . $suffix;
+ }
+
+ function getHead() {
+ $r = '';
+ $nl = "\n";
+ foreach ($this->used_ns as $v) {
+ $r .= $r ? $nl : '';
+ $r .= '@prefix ' . $this->nsp[$v] . ': <' .$v. '> .';
+ }
+ return $r;
+ }
+
+ function getSerializedIndex($index, $raw = 0) {
+ $r = '';
+ $nl = "\n";
+ foreach ($index as $s => $ps) {
+ $r .= $r ? ' .' . $nl . $nl : '';
+ $s = $this->getTerm($s, 's');
+ $r .= $s;
+ $first_p = 1;
+ foreach ($ps as $p => $os) {
+ if (!$os) continue;
+ $p = $this->getTerm($p, 'p');
+ $r .= $first_p ? ' ' : ' ;' . $nl . str_pad('', strlen($s) + 1);
+ $r .= $p;
+ $first_o = 1;
+ if (!is_array($os)) {/* single literal o */
+ $os = array(array('value' => $os, 'type' => 'literal'));
+ }
+ foreach ($os as $o) {
+ $r .= $first_o ? ' ' : ' ,' . $nl . str_pad('', strlen($s) + strlen($p) + 2);
+ $o = $this->getTerm($o, 'o', $p);
+ $r .= $o;
+ $first_o = 0;
+ }
+ $first_p = 0;
+ }
+ }
+ $r .= $r ? ' .' : '';
+ if ($raw) {
+ return $r;
+ }
+ return $r ? $this->getHead() . $nl . $nl . $r : '';
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 SPARQLScript Parser (SPARQL+ + functions)
+author: Benjamin Nowack
+version: 2008-09-22 (Addition: support for FunctionCall)
+*/
+
+ARC2::inc('ARC2_SPARQLPlusParser');
+
+class ARC2_SPARQLScriptParser extends ARC2_SPARQLPlusParser {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_SPARQLScriptParser($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ }
+
+ /* */
+
+ function parse($v, $src = '') {
+ $this->setDefaultPrefixes();
+ $this->base = $src ? $this->calcBase($src) : ARC2::getScriptURI();
+ $this->blocks = array();
+ $this->r = array('base' => '', 'vars' => array(), 'prefixes' => $this->prefixes);
+ do {
+ $proceed = 0;
+ if ((list($r, $v) = $this->xScriptBlock($v)) && $r) {
+ $this->blocks[] = $r;
+ $proceed = 1;
+ }
+ $this->unparsed_code = trim($v);
+ } while ($proceed);
+ if (trim($this->unparsed_code) && !$this->getErrors()) {
+ $rest = preg_replace('/[\x0a|\x0d]/i', ' ', substr($this->unparsed_code, 0, 30));
+ $msg = trim($rest) ? 'Could not properly handle "' . $rest . '"' : 'Syntax Error';
+ $this->addError($msg);
+ }
+ }
+
+ function getScriptBlocks() {
+ return $this->v('blocks', array());
+ }
+
+ /* */
+
+ function xScriptBlock($v) {
+ /* comment removal */
+ while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $v, $m)) $v = $m[2];
+ /* BaseDecl */
+ if ((list($sub_r, $v) = $this->xBaseDecl($v)) && $sub_r) {
+ $this->base = $sub_r;
+ }
+ /* PrefixDecl */
+ while ((list($r, $v) = $this->xPrefixDecl($v)) && $r) {
+ $this->prefixes[$r['prefix']] = $r['uri'];
+ }
+ /* EndpointDecl */
+ if ((list($r, $v) = $this->xEndpointDecl($v)) && $r) {
+ return array($r, $v);
+ }
+ /* Return */
+ if ((list($r, $v) = $this->xReturn($v)) && $r) {
+ return array($r, $v);
+ }
+ /* Assignment */
+ if ((list($r, $v) = $this->xAssignment($v)) && $r) {
+ return array($r, $v);
+ }
+ /* IFBlock */
+ if ((list($r, $v) = $this->xIFBlock($v)) && $r) {
+ return array($r, $v);
+ }
+ /* FORBlock */
+ if ((list($r, $v) = $this->xFORBlock($v)) && $r) {
+ return array($r, $v);
+ }
+ /* String */
+ if ((list($r, $v) = $this->xString($v)) && $r) {
+ return array($r, $v);
+ }
+ /* FunctionCall */
+ if ((list($r, $v) = $this->xFunctionCall($v)) && $r) {
+ return array($r, ltrim($v, ';'));
+ }
+ /* Query */
+ $prev_r = $this->r;
+ $this->r = array('base' => '', 'vars' => array(), 'prefixes' => $this->prefixes);
+ if ((list($r, $rest) = $this->xQuery($v)) && $r) {
+ $q = $rest ? trim(substr($v, 0, -strlen($rest))) : trim($v);
+ $v = $rest;
+ $r = array_merge($this->r, array(
+ 'type' => 'query',
+ 'query_type' => $r['type'],
+ 'query' => $q,
+ //'prefixes' => $this->prefixes,
+ 'base' => $this->base,
+ //'infos' => $r
+ ));
+ return array($r, $v);
+ }
+ else {
+ $this->r = $prev_r;
+ }
+ return array(0, $v);
+ }
+
+ function xBlockSet($v) {
+ if (!$r = $this->x("\{", $v)) return array(0, $v);
+ $blocks = array();
+ $sub_v = $r[1];
+ while ((list($sub_r, $sub_v) = $this->xScriptBlock($sub_v)) && $sub_r) {
+ $blocks[] = $sub_r;
+ }
+ if (!$sub_r = $this->x("\}", $sub_v)) return array(0, $v);
+ $sub_v = $sub_r[1];
+ return array(array('type' => 'block_set', 'blocks' => $blocks), $sub_v);
+ }
+
+ /* s2 */
+
+ function xEndpointDecl($v) {
+ if ($r = $this->x("ENDPOINT\s+", $v)) {
+ if ((list($r, $sub_v) = $this->xIRI_REF($r[1])) && $r) {
+ $r = $this->calcURI($r, $this->base);
+ if ($sub_r = $this->x('\.', $sub_v)) {
+ $sub_v = $sub_r[1];
+ }
+ return array(
+ array('type' => 'endpoint_decl', 'endpoint' => $r),
+ $sub_v
+ );
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* s3 */
+
+ function xAssignment($v) {
+ /* Var */
+ list($r, $sub_v) = $this->xVar($v);
+ if (!$r) return array(0, $v);
+ $var = $r;
+ /* := | = */
+ if (!$sub_r = $this->x("\:?\=", $sub_v)) return array(0, $v);
+ $sub_v = $sub_r[1];
+ /* try String */
+ list($r, $sub_v) = $this->xString($sub_v);
+ if ($r) return array(array('type' => 'assignment', 'var' => $var, 'sub_type' => 'string', 'string' => $r), ltrim($sub_v, '; '));
+ /* try VarMerge */
+ list($r, $sub_v) = $this->xVarMerge($sub_v);
+ if ($r) return array(array('type' => 'assignment', 'var' => $var, 'sub_type' => 'var_merge', 'var2' => $r[0], 'var3' => $r[1]), ltrim($sub_v, '; '));
+ /* try Var */
+ list($r, $sub_v) = $this->xVar($sub_v);
+ if ($r) return array(array('type' => 'assignment', 'var' => $var, 'sub_type' => 'var', 'var2' => $r), ltrim($sub_v, '; '));
+ /* try function */
+ list($r, $sub_v) = $this->xFunctionCall($sub_v);
+ if ($r) return array(array('type' => 'assignment', 'var' => $var, 'sub_type' => 'function_call', 'function_call' => $r), ltrim($sub_v, '; '));
+ /* try Placeholder */
+ list($r, $sub_v) = $this->xPlaceholder($sub_v);
+ if ($r) return array(array('type' => 'assignment', 'var' => $var, 'sub_type' => 'placeholder', 'placeholder' => $r), ltrim($sub_v, '; '));
+ /* try query */
+ $prev_r = $this->r;
+ $this->r = array('base' => '', 'vars' => array(), 'prefixes' => $this->prefixes);
+ list($r, $rest) = $this->xQuery($sub_v);
+ if (!$r) {
+ $this->r = $prev_r;
+ return array(0, $v);
+ }
+ else {
+ $q = $rest ? trim(substr($sub_v, 0, -strlen($rest))) : trim($sub_v);
+ return array(
+ array(
+ 'type' => 'assignment',
+ 'var' => $var,
+ 'sub_type' => 'query',
+ 'query' => array_merge($this->r, array(
+ 'type' => 'query',
+ 'query_type' => $r['type'],
+ 'query' => $q,
+ 'base' => $this->base,
+ )),
+ ),
+ ltrim($rest, '; ')
+ );
+ }
+ }
+
+ function xReturn($v) {
+ if ($r = $this->x("return\s+", $v)) {
+ /* fake assignment which accepts same right-hand values */
+ $sub_v = '$__return_value__ := ' . $r[1];
+ if ((list($r, $sub_v) = $this->xAssignment($sub_v)) && $r) {
+ $r['type'] = 'return';
+ return array($r, $sub_v);
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* s4 'IF' BrackettedExpression '{' Script '}' ( 'ELSE' '{' Script '}')? */
+
+ function xIFBlock($v) {
+ if ($r = $this->x("IF\s*", $v)) {
+ if ((list($sub_r, $sub_v) = $this->xBrackettedExpression($r[1])) && $sub_r) {
+ $cond = $sub_r;
+ if ((list($sub_r, $sub_v) = $this->xBlockSet($sub_v)) && $sub_r) {
+ $blocks = $sub_r['blocks'];
+ /* else */
+ $else_blocks = array();
+ $rest = $sub_v;
+ if ($sub_r = $this->x("ELSE\s*", $sub_v)) {
+ if ((list($sub_r, $sub_v) = $this->xBlockSet($sub_r[1])) && $sub_r) {
+ $else_blocks = $sub_r['blocks'];
+ }
+ else {
+ $sub_v = $rest;
+ }
+ }
+ return array(
+ array(
+ 'type' => 'ifblock',
+ 'condition' => $cond,
+ 'blocks' => $blocks,
+ 'else_blocks' => $else_blocks,
+ ),
+ $sub_v
+ );
+ }
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* s5 'FOR' '(' Var 'IN' Var ')' '{' Script '}' */
+
+ function xFORBlock($v) {
+ if ($r = $this->x("FOR\s*\(\s*[\$\?]([^\s]+)\s+IN\s+[\$\?]([^\s]+)\s*\)", $v)) {/* @@todo split into sub-patterns? */
+ $iterator = $r[1];
+ $set_var = $r[2];
+ $sub_v = $r[3];
+ if ((list($sub_r, $sub_v) = $this->xBlockSet($sub_v)) && $sub_r) {
+ return array(
+ array(
+ 'type' => 'forblock',
+ 'set' => $set_var,
+ 'iterator' => $iterator,
+ 'blocks' => $sub_r['blocks']
+ ),
+ $sub_v
+ );
+ }
+ }
+ return array(0, $v);
+ }
+
+ /* s6 Var '+' Var */
+
+ function xVarMerge($v) {
+ if ((list($sub_r, $sub_v) = $this->xVar($v)) && $sub_r) {
+ $var1 = $sub_r;
+ if ($sub_r = $this->x("\+", $sub_v)) {
+ $sub_v = $sub_r[1];
+ if ((list($sub_r, $sub_v) = $this->xVar($sub_v)) && $sub_r) {
+ return array(
+ array($var1, $sub_r),
+ $sub_v
+ );
+ }
+ }
+ }
+ return array(0, $v);
+ }
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 SPARQLScript Processor
+ *
+ * @author Benjamin Nowack <bnowack@semsol.com>
+ * @license http://arc.semsol.org/license
+ * @package ARC2
+ * @version 2010-07-06
+*/
+
+ARC2::inc('Class');
+
+class ARC2_SPARQLScriptProcessor extends ARC2_Class {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_SPARQLScriptProcessor ($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ $this->max_operations = $this->v('sparqlscript_max_operations', 0, $this->a);
+ $this->max_queries = $this->v('sparqlscript_max_queries', 0, $this->a);
+ $this->return = 0;
+ $this->script_hash = '';
+ $this->env = array(
+ 'endpoint' => '',
+ 'vars' => array(),
+ 'output' => '',
+ 'operation_count' => 0,
+ 'query_count' => 0,
+ 'query_log' => array()
+ );
+ }
+
+ function reset() {
+ $this->__init();
+ }
+
+ /* */
+
+ function processScript($s) {
+ $this->script_hash = abs(crc32($s));
+ $parser = $this->getParser();
+ $parser->parse($s);
+ $blocks = $parser->getScriptBlocks();
+ if ($parser->getErrors()) return 0;
+ foreach ($blocks as $block) {
+ $this->processBlock($block);
+ if ($this->return) return 0;
+ if ($this->getErrors()) return 0;
+ }
+ }
+
+ function getResult() {
+ if ($this->return) {
+ return $this->getVarValue('__return_value__');
+ }
+ else {
+ return $this->env['output'];
+ }
+ }
+
+ /* */
+
+ function getParser() {
+ ARC2::inc('SPARQLScriptParser');
+ return new ARC2_SPARQLScriptParser($this->a, $this);
+ }
+
+ /* */
+
+ function setVar($name, $val, $type = 'literal', $meta = '') {
+ /* types: literal, var, rows, bool, doc, http_response, undefined, ? */
+ $this->env['vars'][$name] = array(
+ 'value_type' => $type,
+ 'value' => $val,
+ 'meta' => $meta ? $meta : array()
+ );
+ }
+
+ function getVar($name) {
+ return isset($this->env['vars'][$name]) ? $this->env['vars'][$name] : '';
+ }
+
+ function getVarValue($name) {
+ return ($v = $this->getVar($name)) ? (isset($v['value']) ? $v['value'] : $v ) : '';
+ }
+
+ /* */
+
+ function replacePlaceholders($val, $context = '', $return_string = 1, $loop = 0) {
+ do {
+ $old_val = $val;
+ if (preg_match_all('/(\{(?:[^{}]+|(?R))*\})/', $val, $m)) {
+ foreach ($m[1] as $match) {
+ if (strpos($val, '$' . $match) === false) {/* just some container brackets, recurse */
+ $val = str_replace($match, '{' . $this->replacePlaceholders(substr($match, 1, -1), $context, $return_string, $loop + 1) . '}', $val);
+ }
+ else {
+ $ph = substr($match, 1, -1);
+ $sub_val = $this->getPlaceholderValue($ph);
+ if (is_array($sub_val)) {
+ $sub_val = $this->getArraySerialization($sub_val, $context);
+ }
+ $val = str_replace('${' . $ph . '}', $sub_val, $val);
+ }
+ }
+ }
+ } while (($old_val != $val) && ($loop < 10));
+ return $val;
+ }
+
+ function getPlaceholderValue($ph) {
+ /* simple vars */
+ if (isset($this->env['vars'][$ph])) {
+ return $this->v('value', $this->env['vars'][$ph], $this->env['vars'][$ph]);
+ }
+ /* GET/POST */
+ if (preg_match('/^(GET|POST)\.([^\.]+)(.*)$/', $ph, $m)) {
+ $vals = strtoupper($m[1]) == 'GET' ? $_GET : $POST;
+ $r = isset($vals[$m[2]]) ? $vals[$m[2]] : '';
+ return $m[3] ? $this->getPropertyValue(array('value' => $r, 'value_type' => '?'), ltrim($m[3], '.')) : $r;
+ }
+ /* NOW */
+ if (preg_match('/^NOW(.*)$/', $ph, $m)) {
+ $rest = $m[1];
+ /* may have sub-phs */
+ $rest = $this->replacePlaceholders($rest);
+ $r_struct = array(
+ 'y' => date('Y'),
+ 'mo' => date('m'),
+ 'd' => date('d'),
+ 'h' => date('H'),
+ 'mi' => date('i'),
+ 's' => date('s')
+ );
+ if (preg_match('/(\+|\-)\s*([0-9]+)(y|mo|d|h|mi|s)[a-z]*(.*)/is', trim($rest), $m2)) {
+ eval('$r_struct[$m2[3]] ' . $m2[1] . '= (int)' . $m2[2] . ';');
+ $rest = $m2[4];
+ }
+ $uts = mktime($r_struct['h'], $r_struct['mi'], $r_struct['s'], $r_struct['mo'], $r_struct['d'], $r_struct['y']);
+ $uts -= date('Z', $uts); /* timezone offset */
+ $r = date('Y-m-d\TH:i:s\Z', $uts);
+ if (preg_match('/^\.(.+)$/', $rest, $m)) {
+ return $this->getPropertyValue(array('value' => $r), $m[1]);
+ }
+ return $r;
+ }
+ /* property */
+ if (preg_match('/^([^\.]+)\.(.+)$/', $ph, $m)) {
+ list($var, $path) = array($m[1], $m[2]);
+ if (isset($this->env['vars'][$var])) {
+ return $this->getPropertyValue($this->env['vars'][$var], $path);
+ }
+ }
+ return '';
+ }
+
+ function getPropertyValue($obj, $path) {
+ $val = isset($obj['value']) ? $obj['value'] : $obj;
+ $path = $this->replacePlaceholders($path, 'property_value', 0);
+ /* reserved */
+ if ($path == 'size') {
+ if ($obj['value_type'] == 'rows') return count($val);
+ if ($obj['value_type'] == 'literal') return strlen($val);
+ }
+ if (preg_match('/^replace\([\'\"](\/.*\/[a-z]*)[\'\"],\s*[\'\"](.*)[\'\"]\)$/is', $path, $m)) {
+ return @preg_replace($m[1], str_replace('$', '\\', $m[2]), $val);
+ }
+ if (preg_match('/^match\([\'\"](\/.*\/[a-z]*)[\'\"]\)$/is', $path, $m)) {
+ return @preg_match($m[1], $val, $m) ? $m : '';
+ }
+ if (preg_match('/^urlencode\([\'\"]?(get|post|.*)[\'\"]?\)$/is', $path, $m)) {
+ return (strtolower($m[1]) == 'post') ? rawurlencode($val) : urlencode($val);
+ }
+ if (preg_match('/^toDataURI\([^\)]*\)$/is', $path, $m)) {
+ return 'data:text/plain;charset=utf-8,' . rawurlencode($val);
+ }
+ if (preg_match('/^fromDataURI\([^\)]*\)$/is', $path, $m)) {
+ return rawurldecode(str_replace('data:text/plain;charset=utf-8,', '', $val));
+ }
+ if (preg_match('/^toPrettyDate\([^\)]*\)$/is', $path, $m)) {
+ $uts = strtotime(preg_replace('/(T|\+00\:00)/', ' ', $val));
+ return date('D j M H:i', $uts);
+ }
+ if (preg_match('/^render\(([^\)]*)\)$/is', $path, $m)) {
+ $src_format = trim($m[1], '"\'');
+ return $this->render($val, $src_format);
+ }
+ /* struct */
+ if (is_array($val)) {
+ if (isset($val[$path])) return $val[$path];
+ $exp_path = $this->expandPName($path);
+ if (isset($val[$exp_path])) return $val[$exp_path];
+ if (preg_match('/^([^\.]+)\.(.+)$/', $path, $m)) {
+ list($var, $path) = array($m[1], $m[2]);
+ if (isset($val[$var])) {
+ return $this->getPropertyValue(array('value' => $val[$var]), $path);
+ }
+ /* qname */
+ $exp_var = $this->expandPName($var);
+ if (isset($val[$exp_var])) {
+ return $this->getPropertyValue(array('value' => $val[$exp_var]), $path);
+ }
+ return '';
+ }
+ }
+ /* meta */
+ if (preg_match('/^\_/', $path) && isset($obj['meta']) && isset($obj['meta'][substr($path, 1)])) {
+ return $obj['meta'][substr($path, 1)];
+ }
+ return '';
+ }
+
+ function render($val, $src_format = '') {
+ if ($src_format) {
+ $mthd = 'render' . $this->camelCase($src_format);
+ if (method_exists($this, $mthd)) {
+ return $this->$mthd($val);
+ }
+ else {
+ return 'No rendering method found for "' . $src_format. '"';
+ }
+ }
+ /* try RDF */
+ return $this->getArraySerialization($val);
+ }
+
+ function renderObjects($os) {
+ $r = '';
+ foreach ($os as $o) {
+ $r .= $r ? ', ' : '';
+ $r .= $o['value'];
+ }
+ return $r;
+ }
+
+ /* */
+
+ function getArraySerialization($v, $context) {
+ $v_type = ARC2::getStructType($v);/* string|array|triples|index */
+ $pf = ARC2::getPreferredFormat();
+ /* string */
+ if ($v_type == 'string') return $v;
+ /* simple array (e.g. from SELECT) */
+ if ($v_type == 'array') {
+ return join(', ', $v);
+ $m = method_exists($this, 'toLegacy' . $pf) ? 'toLegacy' . $pf : 'toLegacyXML';
+ }
+ /* rdf */
+ if (($v_type == 'triples') || ($v_type == 'index')) {
+ $m = method_exists($this, 'to' . $pf) ? 'to' . $pf : ($context == 'query' ? 'toNTriples' : 'toRDFXML');
+ }
+ /* else */
+ return $this->$m($v);
+ }
+
+ /* */
+
+ function processBlock($block) {
+ if ($this->max_operations && ($this->env['operation_count'] >= $this->max_operations)) return $this->addError('Number of ' . $this->max_operations . ' allowed operations exceeded.');
+ if ($this->return) return 0;
+ $this->env['operation_count']++;
+ $type = $block['type'];
+ $m = 'process' . $this->camelCase($type) . 'Block';
+ if (method_exists($this, $m)) {
+ return $this->$m($block);
+ }
+ return $this->addError('Unsupported block type "' . $type . '"');
+ }
+
+ /* */
+
+ function processEndpointDeclBlock($block) {
+ $this->env['endpoint'] = $block['endpoint'];
+ return $this->env;
+ }
+
+ /* */
+
+ function processQueryBlock($block) {
+ if ($this->max_queries && ($this->env['query_count'] >= $this->max_queries)) return $this->addError('Number of ' . $this->max_queries . ' allowed queries exceeded.');
+ $this->env['query_count']++;
+ $ep_uri = $this->replacePlaceholders($this->env['endpoint'], 'endpoint');
+ /* q */
+ $prologue = 'BASE <' . $block['base']. '>';
+ $q = $this->replacePlaceholders($block['query'], 'query');
+ /* prefixes */
+ $ns = isset($this->a['ns']) ? array_merge($this->a['ns'], $block['prefixes']) : $block['prefixes'];
+ $q = $prologue . "\n" . $this->completeQuery($q, $ns);
+ $this->env['query_log'][] = '(' . $ep_uri . ') ' . $q;
+ if ($store = $this->getStore($ep_uri)) {
+ $sub_r = $this->v('is_remote', '', $store) ? $store->query($q, '', $ep_uri) : $store->query($q);
+ /* ignore socket errors */
+ if (($errs = $this->getErrors()) && preg_match('/socket/', $errs[0])) {
+ $this->warnings[] = $errs[0];
+ $this->errors = array();
+ $sub_r = array();
+ }
+ return $sub_r;
+ }
+ else {
+ return $this->addError("no store (" . $ep_uri . ")");
+ }
+ }
+
+ function getStore($ep_uri) {
+ /* local store */
+ if ((!$ep_uri || $ep_uri == ARC2::getScriptURI()) && ($this->v('sparqlscript_default_endpoint', '', $this->a) == 'local')) {
+ if (!isset($this->local_store)) $this->local_store = ARC2::getStore($this->a);/* @@todo error checking */
+ return $this->local_store;
+ }
+ elseif ($ep_uri) {
+ ARC2::inc('RemoteStore');
+ $conf = array_merge($this->a, array('remote_store_endpoint' => $ep_uri, 'reader_timeout' => 10));
+ return new ARC2_RemoteStore($conf, $this);
+ }
+ return 0;
+ }
+
+ /* */
+
+ function processAssignmentBlock($block) {
+ $sub_type = $block['sub_type'];
+ $m = 'process' . $this->camelCase($sub_type) . 'AssignmentBlock';
+ if (!method_exists($this, $m)) return $this->addError('Unknown method "' . $m . '"');
+ return $this->$m($block);
+ }
+
+ function processQueryAssignmentBlock($block) {
+ $qr = $this->processQueryBlock($block['query']);
+ if ($this->getErrors() || !isset($qr['query_type'])) return 0;
+ $qt = $qr['query_type'];
+ $vts = array('ask' => 'bool', 'select' => 'rows', 'desribe' => 'doc', 'construct' => 'doc');
+ $r = array(
+ 'value_type' => isset($vts[$qt]) ? $vts[$qt] : $qt . ' result',
+ 'value' => ($qt == 'select') ? $this->v('rows', array(), $qr['result']) : $qr['result'],
+ );
+ $this->env['vars'][$block['var']['value']] = $r;
+ }
+
+ function processStringAssignmentBlock($block) {
+ $r = array('value_type' => 'literal', 'value' => $this->replacePlaceholders($block['string']['value']));
+ $this->env['vars'][$block['var']['value']] = $r;
+ }
+
+ function processVarAssignmentBlock($block) {
+ if (isset($this->env['vars'][$block['var2']['value']])) {
+ $this->env['vars'][$block['var']['value']] = $this->env['vars'][$block['var2']['value']];
+ }
+ else {
+ $this->env['vars'][$block['var']['value']] = array('value_type' => 'undefined', 'value' => '');
+ }
+ }
+
+ function processPlaceholderAssignmentBlock($block) {
+ $ph_val = $this->getPlaceholderValue($block['placeholder']['value']);
+ $this->env['vars'][$block['var']['value']] = array('value_type' => 'undefined', 'value' => $ph_val);
+ }
+
+ function processVarMergeAssignmentBlock($block) {
+ $val1 = isset($this->env['vars'][$block['var2']['value']]) ? $this->env['vars'][$block['var2']['value']] : array('value_type' => 'undefined', 'value' => '');
+ $val2 = isset($this->env['vars'][$block['var3']['value']]) ? $this->env['vars'][$block['var3']['value']] : array('value_type' => 'undefined', 'value' => '');
+ if (is_array($val1) && is_array($val2)) {
+ $this->env['vars'][$block['var']['value']] = array('value_type' => $val2['value_type'], 'value' => array_merge($val1['value'], $val2['value']));
+ }
+ elseif (is_numeric($val1) && is_numeric($val2)) {
+ $this->env['vars'][$block['var']['value']] = $val1 + $val2;
+ }
+ }
+
+ function processFunctionCallAssignmentBlock($block) {
+ $sub_r = $this->processFunctionCallBlock($block['function_call']);
+ if ($this->getErrors()) return 0;
+ $this->env['vars'][$block['var']['value']] = $sub_r;
+ }
+
+ /* */
+
+ function processReturnBlock($block) {
+ $sub_type = $block['sub_type'];
+ $m = 'process' . $this->camelCase($sub_type) . 'AssignmentBlock';
+ if (!method_exists($this, $m)) return $this->addError('Unknown method "' . $m . '"');
+ $sub_r = $this->$m($block);
+ $this->return = 1;
+ return $sub_r;
+ }
+
+ /* */
+
+ function processIfblockBlock($block) {
+ if ($this->testCondition($block['condition'])) {
+ $blocks = $block['blocks'];
+ }
+ else {
+ $blocks = $block['else_blocks'];
+ }
+ foreach ($blocks as $block) {
+ $sub_r = $this->processBlock($block);
+ if ($this->getErrors()) return 0;
+ }
+ }
+
+ function testCondition($cond) {
+ $m = 'test' . $this->camelCase($cond['type']) . 'Condition';
+ if (!method_exists($this, $m)) return $this->addError('Unknown method "' . $m . '"');
+ return $this->$m($cond);
+ }
+
+ function testVarCondition($cond) {
+ $r = 0;
+ $vn = $cond['value'];
+ if (isset($this->env['vars'][$vn])) $r = $this->env['vars'][$vn]['value'];
+ $op = $this->v('operator', '', $cond);
+ if ($op == '!') $r = !$r;
+ return $r ? true : false;
+ }
+
+ function testPlaceholderCondition($cond) {
+ $val = $this->getPlaceholderValue($cond['value']);
+ $r = $val ? true : false;
+ $op = $this->v('operator', '', $cond);
+ if ($op == '!') $r = !$r;
+ return $r;
+ }
+
+ function testExpressionCondition($cond) {
+ $m = 'test' . $this->camelCase($cond['sub_type']) . 'ExpressionCondition';
+ if (!method_exists($this, $m)) return $this->addError('Unknown method "' . $m . '"');
+ return $this->$m($cond);
+ }
+
+ function testRelationalExpressionCondition($cond) {
+ $op = $cond['operator'];
+ if ($op == '=') $op = '==';
+ $val1 = $this->getPatternValue($cond['patterns'][0]);
+ $val2 = $this->getPatternValue($cond['patterns'][1]);
+ eval('$result = ($val1 ' . $op . ' $val2) ? 1 : 0;');
+ return $result;
+ }
+
+ function testAndExpressionCondition($cond) {
+ foreach ($cond['patterns'] as $pattern) {
+ if (!$this->testCondition($pattern)) return false;
+ }
+ return true;
+ }
+
+ function getPatternValue($pattern) {
+ $m = 'get' . $this->camelCase($pattern['type']) . 'PatternValue';
+ if (!method_exists($this, $m)) return '';
+ return $this->$m($pattern);
+ }
+
+ function getLiteralPatternValue($pattern) {
+ return $pattern['value'];
+ }
+
+ function getPlaceholderPatternValue($pattern) {
+ return $this->getPlaceholderValue($pattern['value']);
+ }
+
+ /* */
+
+ function processForblockBlock($block) {
+ $set = $this->v($block['set'], array('value' => array()), $this->env['vars']);
+ $entries = isset($set['value']) ? $set['value'] : $set;
+ $iterator = $block['iterator'];
+ $blocks = $block['blocks'];
+ if (!is_array($entries)) return 0;
+ $rc = count($entries);
+ foreach ($entries as $i => $entry) {
+ $val_type = $this->v('value_type', 'set', $set) . ' entry';
+ $this->env['vars'][$iterator] = array(
+ 'value' => $entry,
+ 'value_type' => $val_type,
+ 'meta' => array(
+ 'pos' => $i,
+ 'odd_even' => ($i % 2) ? 'even' : 'odd'
+ )
+ );
+ foreach ($blocks as $block) {
+ $this->processBlock($block);
+ if ($this->getErrors()) return 0;
+ }
+ }
+ }
+
+ /* */
+
+ function processLiteralBlock($block) {
+ $this->env['output'] .= $this->replacePlaceholders($block['value'], 'output');
+ }
+
+ /* */
+
+ function processFunctionCallBlock($block) {
+ $uri = $this->replacePlaceholders($block['uri'], 'function_call');
+ /* built-ins */
+ if (strpos($uri, $this->a['ns']['sps']) === 0) {
+ return $this->processBuiltinFunctionCallBlock($block);
+ }
+ /* remote functions */
+ }
+
+ function processBuiltinFunctionCallBlock($block) {
+ $fnc_uri = $this->replacePlaceholders($block['uri'], 'function_call');
+ $fnc_name = substr($fnc_uri, strlen($this->a['ns']['sps']));
+ if (preg_match('/^(get|post)$/i', $fnc_name, $m)) {
+ return $this->processHTTPCall($block, strtoupper($m[1]));
+ }
+ if ($fnc_name == 'eval') {
+ return $this->processEvalCall($block);
+ }
+ }
+
+ function processEvalCall($block) {
+ if (!$block['args']) return 0;
+ $arg = $block['args'][0];
+ $script = '';
+ if ($arg['type'] == 'placeholder') $script = $this->getPlaceholderValue($arg['value']);
+ if ($arg['type'] == 'literal') $script = $arg['value'];
+ if ($arg['type'] == 'var') $script = $this->getVarValue($arg['value']);
+ //echo "\n" . $script . $arg['type'];
+ $this->processScript($script);
+ }
+
+ function processHTTPCall($block, $mthd = 'GET') {
+ ARC2::inc('Reader');
+ $reader =& new ARC2_Reader($this->a, $this);
+ $url = $this->replacePlaceholders($block['args'][0]['value'], 'function_call');
+ if ($mthd != 'GET') {
+ $reader->setHTTPMethod($mthd);
+ $reader->setCustomHeaders("Content-Type: application/x-www-form-urlencoded");
+ }
+ $to = $this->v('remote_call_timeout', 0, $this->a);
+ $reader->activate($url, '', 0, $to);
+ $format = $reader->getFormat();
+ $resp = '';
+ while ($d = $reader->readStream()) {
+ $resp .= $d;
+ }
+ $reader->closeStream();
+ unset($this->reader);
+ return array('value_type' => 'http_response', 'value' => $resp);
+ }
+
+ /* */
+
+ function extractVars($pattern, $input = '') {
+ $vars = array();
+ /* replace PHs, track ()s */
+ $regex = $pattern;
+ $vars = array();
+ if (preg_match_all('/([\?\$]\{([^\}]+)\}|\([^\)]+\))/', $regex, $m)) {
+ $matches = $m[1];
+ $pre_vars = $m[2];
+ foreach ($matches as $i => $match) {
+ $vars[] = $pre_vars[$i];
+ if ($pre_vars[$i]) {/* placeholder */
+ $regex = str_replace($match, '(.+)', $regex);
+ }
+ else {/* parentheses, but may contain placeholders */
+ $sub_regex = $match;
+ while (preg_match('/([\?\$]\{([^\}]+)\})/', $sub_regex, $m)) {
+ $sub_regex = str_replace($m[1], '(.+)', $sub_regex);
+ $vars[] = $m[2];
+ }
+ $regex = str_replace($match, $sub_regex, $regex);
+ }
+ }
+ /* eval regex */
+ if (@preg_match('/' . $regex . '/is', $input, $m)) {
+ $vals = $m;
+ }
+ else {
+ return 0;
+ }
+ for ($i = 0; $i < count($vars); $i++) {
+ if ($vars[$i]) {
+ $this->setVar($vars[$i], isset($vals[$i + 1]) ? $vals[$i + 1] : '');
+ }
+ }
+ return 1;
+ }
+ /* no placeholders */
+ return ($pattern == $input) ? 1 : 0;
+ }
+
+ /* */
+
+}
\ No newline at end of file
--- /dev/null
+<?php
+/**
+ * ARC2 Remote RDF Store
+ *
+ * @author Benjamin Nowack <bnowack@semsol.com>
+ * @license http://arc.semsol.org/license
+ * @package ARC2
+ * @version 2010-05-07
+*/
+
+ARC2::inc('Class');
+
+class ARC2_RemoteStore extends ARC2_Class {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_RemoteStore($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ $this->is_remote = 1;
+ }
+
+ function __init() {
+ parent::__init();
+ }
+
+ /* */
+
+ function isSetUp() {
+ return 1;
+ }
+
+ function setUp() {}
+
+ /* */
+
+ function reset() {}
+
+ function drop() {}
+
+ function insert($doc, $g, $keep_bnode_ids = 0) {
+ return $this->query('INSERT INTO <' . $g . '> { ' . $this->toNTriples($doc, '', 1) . ' }');
+ }
+
+ function delete($doc, $g) {
+ if (!$doc) {
+ return $this->query('DELETE FROM <' . $g . '>');
+ }
+ else {
+ return $this->query('DELETE FROM <' . $g . '> { ' . $this->toNTriples($doc, '', 1) . ' }');
+ }
+ }
+
+ function replace($doc, $g, $doc_2) {
+ return array($this->delete($doc, $g), $this->insert($doc_2, $g));
+ }
+
+ /* */
+
+ function query($q, $result_format = '', $src = '', $keep_bnode_ids = 0, $log_query = 0) {
+ if ($log_query) $this->logQuery($q);
+ ARC2::inc('SPARQLPlusParser');
+ $p = & new ARC2_SPARQLPlusParser($this->a, $this);
+ $p->parse($q, $src);
+ $infos = $p->getQueryInfos();
+ $t1 = ARC2::mtime();
+ if (!$errs = $p->getErrors()) {
+ $qt = $infos['query']['type'];
+ $r = array('query_type' => $qt, 'result' => $this->runQuery($q, $qt, $infos));
+ }
+ else {
+ $r = array('result' => '');
+ }
+ $t2 = ARC2::mtime();
+ $r['query_time'] = $t2 - $t1;
+ /* query result */
+ if ($result_format == 'raw') {
+ return $r['result'];
+ }
+ if ($result_format == 'rows') {
+ return $this->v('rows', array(), $r['result']);
+ }
+ if ($result_format == 'row') {
+ if (!isset($r['result']['rows'])) return array();
+ return $r['result']['rows'] ? $r['result']['rows'][0] : array();
+ }
+ return $r;
+ }
+
+ function runQuery($q, $qt = '', $infos = '') {
+ /* ep */
+ $ep = $this->v('remote_store_endpoint', 0, $this->a);
+ if (!$ep) return false;
+ /* prefixes */
+ $q = $this->completeQuery($q);
+ /* custom handling */
+ $mthd = 'run' . $this->camelCase($qt) . 'Query';
+ if (method_exists($this, $mthd)) {
+ return $this->$mthd($q, $infos);
+ }
+ /* http verb */
+ $mthd = in_array($qt, array('load', 'insert', 'delete')) ? 'POST' : 'GET';
+ /* reader */
+ ARC2::inc('Reader');
+ $reader =& new ARC2_Reader($this->a, $this);
+ $reader->setAcceptHeader('Accept: application/sparql-results+xml; q=0.9, application/rdf+xml; q=0.9, */*; q=0.1');
+ if ($mthd == 'GET') {
+ $url = $ep;
+ $url .= strpos($ep, '?') ? '&' : '?';
+ $url .= 'query=' . urlencode($q);
+ if ($k = $this->v('store_read_key', '', $this->a)) $url .= '&key=' . urlencode($k);
+ }
+ else {
+ $url = $ep;
+ $reader->setHTTPMethod($mthd);
+ $reader->setCustomHeaders("Content-Type: application/x-www-form-urlencoded");
+ $suffix = ($k = $this->v('store_write_key', '', $this->a)) ? '&key=' . rawurlencode($k) : '';
+ $reader->setMessageBody('query=' . rawurlencode($q) . $suffix);
+ }
+ $to = $this->v('remote_store_timeout', 0, $this->a);
+ $reader->activate($url, '', 0, $to);
+ $format = $reader->getFormat();
+ $resp = '';
+ while ($d = $reader->readStream()) {
+ $resp .= $this->toUTF8($d);
+ }
+ $reader->closeStream();
+ $ers = $reader->getErrors();
+ $this->a['reader_auth_infos'] = $reader->getAuthInfos();
+ unset($this->reader);
+ if ($ers) return array('errors' => $ers);
+ $mappings = array('rdfxml' => 'RDFXML', 'sparqlxml' => 'SPARQLXMLResult', 'turtle' => 'Turtle');
+ if (!$format || !isset($mappings[$format])) {
+ return $resp;
+ //return $this->addError('No parser available for "' . $format . '" SPARQL result');
+ }
+ /* format parser */
+ $suffix = $mappings[$format] . 'Parser';
+ ARC2::inc($suffix);
+ $cls = 'ARC2_' . $suffix;
+ $parser =& new $cls($this->a, $this);
+ $parser->parse($ep, $resp);
+ /* ask|load|insert|delete */
+ if (in_array($qt, array('ask', 'load', 'insert', 'delete'))) {
+ $bid = $parser->getBooleanInsertedDeleted();
+ if ($qt == 'ask') {
+ $r = $bid['boolean'];
+ }
+ else {
+ $r = $bid;
+ }
+ }
+ /* select */
+ elseif (($qt == 'select') && !method_exists($parser, 'getRows')) {
+ $r = $resp;
+ }
+ elseif ($qt == 'select') {
+ $r = array('rows' => $parser->getRows(), 'variables' => $parser->getVariables());
+ }
+ /* any other */
+ else {
+ $r = $parser->getSimpleIndex(0);
+ }
+ unset($parser);
+ return $r;
+ }
+
+ /* */
+
+ function optimizeTables() {}
+
+ /* */
+
+ function getResourceLabel($res, $unnamed_label = 'An unnamed resource') {
+ if (!isset($this->resource_labels)) $this->resource_labels = array();
+ if (isset($this->resource_labels[$res])) return $this->resource_labels[$res];
+ if (!preg_match('/^[a-z0-9\_]+\:[^\s]+$/si', $res)) return $res;/* literal */
+ $r = '';
+ if (preg_match('/^\_\:/', $res)) {
+ return $unnamed_label;
+ }
+ $row = $this->query('SELECT ?o WHERE { <' . $res . '> ?p ?o . FILTER(REGEX(str(?p), "(label|name)$", "i"))}', 'row');
+ if ($row) {
+ $r = $row['o'];
+ }
+ else {
+ $r = preg_replace("/^(.*[\/\#])([^\/\#]+)$/", '\\2', str_replace('#self', '', $res));
+ $r = str_replace('_', ' ', $r);
+ $r = preg_replace('/([a-z])([A-Z])/e', '"\\1 " . strtolower("\\2")', $r);
+ }
+ $this->resource_labels[$res] = $r;
+ return $r;
+ }
+
+ function getDomains($p) {
+ $r = array();
+ foreach($this->query('SELECT DISTINCT ?type WHERE {?s <' . $p . '> ?o ; a ?type . }', 'rows') as $row) {
+ $r[] = $row['type'];
+ }
+ return $r;
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 RDF Store
+ *
+ * @author Benjamin Nowack <bnowack@semsol.com>
+ * @license http://arc.semsol.org/license
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-06-22
+*/
+
+ARC2::inc('Class');
+
+class ARC2_Store extends ARC2_Class {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_Store($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* db_con */
+ parent::__init();
+ $this->table_lock = 0;
+ $this->triggers = $this->v('store_triggers', array(), $this->a);
+ $this->queue_queries = $this->v('store_queue_queries', 0, $this->a);
+ $this->is_win = (strtolower(substr(PHP_OS, 0, 3)) == 'win') ? true : false;
+ $this->max_split_tables = $this->v('store_max_split_tables', 10, $this->a);
+ $this->split_predicates = $this->v('store_split_predicates', array(), $this->a);
+ }
+
+ /* */
+
+ function getName() {
+ return $this->v('store_name', 'arc', $this->a);
+ }
+
+ function getTablePrefix() {
+ if (!isset($this->tbl_prefix)) {
+ $r = $this->v('db_table_prefix', '', $this->a);
+ $r .= $r ? '_' : '';
+ $r .= $this->getName() . '_';
+ $this->tbl_prefix = $r;
+ }
+ return $this->tbl_prefix;;
+ }
+
+ /* */
+
+ function createDBCon() {
+ foreach (array('db_host' => 'localhost', 'db_user' => '', 'db_pwd' => '', 'db_name' => '') as $k => $v) {
+ $this->a[$k] = $this->v($k, $v, $this->a);
+ }
+ if (!$db_con = mysql_connect($this->a['db_host'], $this->a['db_user'], $this->a['db_pwd'])) {
+ return $this->addError(mysql_error());
+ }
+ $this->a['db_con'] =& $db_con;
+ if (!mysql_select_db($this->a['db_name'], $db_con)) {
+ return $this->addError(mysql_error($db_con));
+ }
+ if (preg_match('/^utf8/', $this->getCollation())) {
+ $this->queryDB("SET NAMES 'utf8'", $db_con);
+ }
+ return true;
+ }
+
+ function getDBCon($force = 0) {
+ if ($force || !isset($this->a['db_con'])) {
+ if (!$this->createDBCon()) {
+ return false;
+ }
+ }
+ if (!$force && !@mysql_thread_id($this->a['db_con'])) return $this->getDBCon(1);
+ return $this->a['db_con'];
+ }
+
+ function closeDBCon() {
+ if ($this->v('db_con', false, $this->a)) {
+ @mysql_close($this->a['db_con']);
+ }
+ unset($this->a['db_con']);
+ }
+
+ function getDBVersion() {
+ if (!$this->v('db_version')) {
+ $this->db_version = preg_match("/^([0-9]+)\.([0-9]+)\.([0-9]+)/", mysql_get_server_info($this->getDBCon()), $m) ? sprintf("%02d-%02d-%02d", $m[1], $m[2], $m[3]) : '00-00-00';
+ }
+ return $this->db_version;
+ }
+
+ /* */
+
+ function getCollation() {
+ $rs = $this->queryDB('SHOW TABLE STATUS LIKE "' . $this->getTablePrefix(). 'setting"', $this->getDBCon());
+ return ($rs && ($row = mysql_fetch_array($rs)) && isset($row['Collation'])) ? $row['Collation'] : '';
+ }
+
+ function getColumnType() {
+ if (!$this->v('column_type')) {
+ $tbl = $this->getTablePrefix() . 'g2t';
+ $rs = $this->queryDB('SHOW COLUMNS FROM ' . $tbl . ' LIKE "t"', $this->getDBCon());
+ $row = $rs ? mysql_fetch_array($rs) : array('Type' => 'mediumint');
+ $this->column_type = preg_match('/mediumint/', $row['Type']) ? 'mediumint' : 'int';
+ }
+ return $this->column_type;
+ }
+
+ /* */
+
+ function hasHashColumn($tbl) {
+ $var_name = 'has_hash_column_' . $tbl;
+ if (!isset($this->$var_name)) {
+ $tbl = $this->getTablePrefix() . $tbl;
+ $rs = $this->queryDB('SHOW COLUMNS FROM ' . $tbl . ' LIKE "val_hash"', $this->getDBCon());
+ $this->$var_name = ($rs && mysql_fetch_array($rs));
+ }
+ return $this->$var_name;
+ }
+
+ /* */
+
+ function hasFulltextIndex() {
+ if (!isset($this->has_fulltext_index)) {
+ $this->has_fulltext_index = 0;
+ $tbl = $this->getTablePrefix() . 'o2val';
+ $rs = $this->queryDB('SHOW INDEX FROM ' . $tbl, $this->getDBCon());
+ while ($row = mysql_fetch_array($rs)) {
+ if ($row['Column_name'] != 'val') continue;
+ if ($row['Index_type'] != 'FULLTEXT') continue;
+ $this->has_fulltext_index = 1;
+ break;
+ }
+ }
+ return $this->has_fulltext_index;
+ }
+
+ function enableFulltextSearch() {
+ if ($this->hasFulltextIndex()) return 1;
+ $tbl = $this->getTablePrefix() . 'o2val';
+ $this->queryDB('CREATE FULLTEXT INDEX vft ON ' . $tbl . '(val(128))', $this->getDBCon(), 1);
+ }
+
+ function disableFulltextSearch() {
+ if (!$this->hasFulltextIndex()) return 1;
+ $tbl = $this->getTablePrefix() . 'o2val';
+ $this->queryDB('DROP INDEX vft ON ' . $tbl, $this->getDBCon());
+ }
+
+ /* */
+
+ function countDBProcesses() {
+ return ($rs = $this->queryDB('SHOW PROCESSLIST', $this->getDBCon())) ? mysql_num_rows($rs) : 0;
+ }
+
+ /* */
+
+ function getTables() {
+ return array('triple', 'g2t', 'id2val', 's2val', 'o2val', 'setting');
+ }
+
+ /* */
+
+ function isSetUp() {
+ if (($con = $this->getDBCon())) {
+ $tbl = $this->getTablePrefix() . 'setting';
+ return $this->queryDB("SELECT 1 FROM " . $tbl . " LIMIT 0", $con) ? 1 : 0;
+ }
+ }
+
+ function setUp($force = 0) {
+ if (($force || !$this->isSetUp()) && ($con = $this->getDBCon())) {
+ if ($this->getDBVersion() < '04-00-04') {
+ /* UPDATE + JOINs */
+ return $this->addError('MySQL version not supported. ARC requires version 4.0.4 or higher.');
+ }
+ ARC2::inc('StoreTableManager');
+ $mgr = new ARC2_StoreTableManager($this->a, $this);
+ $mgr->createTables();
+ }
+ }
+
+ function extendColumns() {
+ ARC2::inc('StoreTableManager');
+ $mgr = new ARC2_StoreTableManager($this->a, $this);
+ $mgr->extendColumns();
+ $this->column_type = 'int';
+ }
+
+ function splitTables() {
+ ARC2::inc('StoreTableManager');
+ $mgr = new ARC2_StoreTableManager($this->a, $this);
+ $mgr->splitTables();
+ }
+
+ /* */
+
+ function hasSetting($k) {
+ $tbl = $this->getTablePrefix() . 'setting';
+ $sql = "SELECT val FROM " . $tbl . " WHERE k = '" .md5($k). "'";
+ $rs = $this->queryDB($sql, $this->getDBCon());
+ return ($rs && ($row = mysql_fetch_array($rs))) ? 1 : 0;
+ }
+
+ function getSetting($k, $default = 0) {
+ $tbl = $this->getTablePrefix() . 'setting';
+ $sql = "SELECT val FROM " . $tbl . " WHERE k = '" .md5($k). "'";
+ $rs = $this->queryDB($sql, $this->getDBCon());
+ if ($rs && ($row = mysql_fetch_array($rs))) {
+ return unserialize($row['val']);
+ }
+ return $default;
+ }
+
+ function setSetting($k, $v) {
+ $con = $this->getDBCon();
+ $tbl = $this->getTablePrefix() . 'setting';
+ if ($this->hasSetting($k)) {
+ $sql = "UPDATE " .$tbl . " SET val = '" . mysql_real_escape_string(serialize($v), $con) . "' WHERE k = '" . md5($k) . "'";
+ }
+ else {
+ $sql = "INSERT INTO " . $tbl . " (k, val) VALUES ('" . md5($k) . "', '" . mysql_real_escape_string(serialize($v), $con) . "')";
+ }
+ return $this->queryDB($sql, $con);
+ }
+
+ function removeSetting($k) {
+ $tbl = $this->getTablePrefix() . 'setting';
+ return $this->queryDB("DELETE FROM " . $tbl . " WHERE k = '" . md5($k) . "'", $this->getDBCon());
+ }
+
+ function getQueueTicket() {
+ if (!$this->queue_queries) return 1;
+ $t = 'ticket_' . substr(md5(uniqid(rand())), 0, 10);
+ $con = $this->getDBCon();
+ /* lock */
+ $rs = $this->queryDB('LOCK TABLES ' . $this->getTablePrefix() . 'setting WRITE', $con);
+ /* queue */
+ $queue = $this->getSetting('query_queue', array());
+ $queue[] = $t;
+ $this->setSetting('query_queue', $queue);
+ $this->queryDB('UNLOCK TABLES', $con);
+ /* loop */
+ $lc = 0;
+ $queue = $this->getSetting('query_queue', array());
+ while ($queue && ($queue[0] != $t) && ($lc < 30)) {
+ if ($this->is_win) {
+ sleep(1);
+ $lc++;
+ }
+ else {
+ usleep(100000);
+ $lc += 0.1;
+ }
+ $queue = $this->getSetting('query_queue', array());
+ }
+ return ($lc < 30) ? $t : 0;
+ }
+
+ function removeQueueTicket($t) {
+ if (!$this->queue_queries) return 1;
+ $con = $this->getDBCon();
+ /* lock */
+ $this->queryDB('LOCK TABLES ' . $this->getTablePrefix() . 'setting WRITE', $con);
+ /* queue */
+ $vals = $this->getSetting('query_queue', array());
+ $pos = array_search($t, $vals);
+ $queue = ($pos < (count($vals) - 1)) ? array_slice($vals, $pos + 1) : array();
+ $this->setSetting('query_queue', $queue);
+ $this->queryDB('UNLOCK TABLES', $con);
+ }
+
+ /* */
+
+ function reset($keep_settings = 0) {
+ $con = $this->getDBCon();
+ $tbls = $this->getTables();
+ $prefix = $this->getTablePrefix();
+ /* remove split tables */
+ $ps = $this->getSetting('split_predicates', array());
+ foreach ($ps as $p) {
+ $tbl = 'triple_' . abs(crc32($p));
+ $this->queryDB('DROP TABLE ' . $prefix . $tbl, $con);
+ }
+ $this->removeSetting('split_predicates');
+ /* truncate tables */
+ foreach ($tbls as $tbl) {
+ if ($keep_settings && ($tbl == 'setting')) {
+ continue;
+ }
+ $this->queryDB('TRUNCATE ' . $prefix . $tbl, $con);
+ }
+ }
+
+ function drop() {
+ $con = $this->getDBCon();
+ $tbls = $this->getTables();
+ $prefix = $this->getTablePrefix();
+ foreach ($tbls as $tbl) {
+ $this->queryDB('DROP TABLE ' . $prefix . $tbl, $con);
+ }
+ }
+
+ function insert($doc, $g, $keep_bnode_ids = 0) {
+ $doc = is_array($doc) ? $this->toTurtle($doc) : $doc;
+ $infos = array('query' => array('url' => $g, 'target_graph' => $g));
+ ARC2::inc('StoreLoadQueryHandler');
+ $h =& new ARC2_StoreLoadQueryHandler($this->a, $this);
+ $r = $h->runQuery($infos, $doc, $keep_bnode_ids);
+ $this->processTriggers('insert', $infos);
+ return $r;
+ }
+
+ function delete($doc, $g) {
+ if (!$doc) {
+ $infos = array('query' => array('target_graphs' => array($g)));
+ ARC2::inc('StoreDeleteQueryHandler');
+ $h =& new ARC2_StoreDeleteQueryHandler($this->a, $this);
+ $r = $h->runQuery($infos);
+ $this->processTriggers('delete', $infos);
+ return $r;
+ }
+ }
+
+ function replace($doc, $g, $doc_2) {
+ return array($this->delete($doc, $g), $this->insert($doc_2, $g));
+ }
+
+ function dump() {
+ ARC2::inc('StoreDumper');
+ $d =& new ARC2_StoreDumper($this->a, $this);
+ $d->dumpSPOG();
+ }
+
+ function createBackup($path, $q = '') {
+ ARC2::inc('StoreDumper');
+ $d =& new ARC2_StoreDumper($this->a, $this);
+ $d->saveSPOG($path, $q);
+ }
+
+ function renameTo($name) {
+ $con = $this->getDBCon();
+ $tbls = $this->getTables();
+ $old_prefix = $this->getTablePrefix();
+ $new_prefix = $this->v('db_table_prefix', '', $this->a);
+ $new_prefix .= $new_prefix ? '_' : '';
+ $new_prefix .= $name . '_';
+ foreach ($tbls as $tbl) {
+ $rs = $this->queryDB('RENAME TABLE ' . $old_prefix . $tbl .' TO ' . $new_prefix . $tbl, $con);
+ if ($er = mysql_error($con)) {
+ return $this->addError($er);
+ }
+ }
+ $this->a['store_name'] = $name;
+ unset($this->tbl_prefix);
+ }
+
+ function replicateTo($name) {
+ $conf = array_merge($this->a, array('store_name' => $name));
+ $new_store = ARC2::getStore($conf);
+ $new_store->setUp();
+ $new_store->reset();
+ $con = $this->getDBCon();
+ $tbls = $this->getTables();
+ $old_prefix = $this->getTablePrefix();
+ $new_prefix = $new_store->getTablePrefix();
+ foreach ($tbls as $tbl) {
+ $rs = $this->queryDB('INSERT IGNORE INTO ' . $new_prefix . $tbl .' SELECT * FROM ' . $old_prefix . $tbl, $con);
+ if ($er = mysql_error($con)) {
+ return $this->addError($er);
+ }
+ }
+ return $new_store->query('SELECT COUNT(*) AS t_count WHERE { ?s ?p ?o}', 'row');
+ }
+
+ /* */
+
+ function query($q, $result_format = '', $src = '', $keep_bnode_ids = 0, $log_query = 0) {
+ if ($log_query) $this->logQuery($q);
+ $con = $this->getDBCon();
+ if (preg_match('/^dump/i', $q)) {
+ $infos = array('query' => array('type' => 'dump'));
+ }
+ else {
+ ARC2::inc('SPARQLPlusParser');
+ $p = & new ARC2_SPARQLPlusParser($this->a, $this);
+ $p->parse($q, $src);
+ $infos = $p->getQueryInfos();
+ }
+ if ($result_format == 'infos') return $infos;
+ $infos['result_format'] = $result_format;
+ if (!isset($p) || !$p->getErrors()) {
+ $qt = $infos['query']['type'];
+ if (!in_array($qt, array('select', 'ask', 'describe', 'construct', 'load', 'insert', 'delete', 'dump'))) {
+ return $this->addError('Unsupported query type "'.$qt.'"');
+ }
+ $t1 = ARC2::mtime();
+ $r = array('query_type' => $qt, 'result' => $this->runQuery($infos, $qt, $keep_bnode_ids, $q));
+ $t2 = ARC2::mtime();
+ $r['query_time'] = $t2 - $t1;
+ /* query result */
+ if ($result_format == 'raw') {
+ return $r['result'];
+ }
+ if ($result_format == 'rows') {
+ return $r['result']['rows'] ? $r['result']['rows'] : array();
+ }
+ if ($result_format == 'row') {
+ return $r['result']['rows'] ? $r['result']['rows'][0] : array();
+ }
+ return $r;
+ }
+ return 0;
+ }
+
+ function runQuery($infos, $type, $keep_bnode_ids = 0, $q = '') {
+ ARC2::inc('Store' . ucfirst($type) . 'QueryHandler');
+ $cls = 'ARC2_Store' . ucfirst($type) . 'QueryHandler';
+ $h =& new $cls($this->a, $this);
+ $ticket = 1;
+ $r = array();
+ if ($q && ($type == 'select')) $ticket = $this->getQueueTicket($q);
+ if ($ticket) {
+ if ($type == 'load') {/* the LoadQH supports raw data as 2nd parameter */
+ $r = $h->runQuery($infos, '', $keep_bnode_ids);
+ }
+ else {
+ $r = $h->runQuery($infos, $keep_bnode_ids);
+ }
+ }
+ if ($q && ($type == 'select')) $this->removeQueueTicket($ticket);
+ $trigger_r = $this->processTriggers($type, $infos);
+ return $r;
+ }
+
+ function processTriggers($type, $infos) {
+ $r = array();
+ $trigger_defs = $this->triggers;
+ $this->triggers = array();
+ if ($triggers = $this->v($type, array(), $trigger_defs)) {
+ $r['trigger_results'] = array();
+ $triggers = is_array($triggers) ? $triggers : array($triggers);
+ $trigger_inc_path = $this->v('store_triggers_path', '', $this->a);
+ foreach ($triggers as $trigger) {
+ $trigger .= !preg_match('/Trigger$/', $trigger) ? 'Trigger' : '';
+ if (ARC2::inc(ucfirst($trigger), $trigger_inc_path)) {
+ $cls = 'ARC2_' . ucfirst($trigger);
+ $config = array_merge($this->a, array('query_infos' => $infos));
+ $trigger_obj = new $cls($config, $this);
+ if (method_exists($trigger_obj, 'go')) {
+ $r['trigger_results'][] = $trigger_obj->go();
+ }
+ }
+ }
+ }
+ $this->triggers = $trigger_defs;
+ return $r;
+ }
+
+ /* */
+
+ function getValueHash($val) {
+ return abs(crc32($val));
+ }
+
+ function getTermID($val, $term = '') {
+ $tbl = preg_match('/^(s|o)$/', $term) ? $term . '2val' : 'id2val';
+ $con = $this->getDBCon();
+ /* via hash */
+ if (preg_match('/^(s2val|o2val)$/', $tbl) && $this->hasHashColumn($tbl)) {
+ $sql = "SELECT id, val FROM " . $this->getTablePrefix() . $tbl . " WHERE val_hash = '" . $this->getValueHash($val) . "'";
+ if (($rs = $this->queryDB($sql, $con)) && mysql_num_rows($rs)) {
+ while ($row = mysql_fetch_array($rs)) {
+ if ($row['val'] == $val) {
+ return $row['id'];
+ }
+ }
+ }
+ }
+ /* exact match */
+ else {
+ $sql = "SELECT id FROM " . $this->getTablePrefix() . $tbl . " WHERE val = BINARY '" . mysql_real_escape_string($val, $con) . "' LIMIT 1";
+ if (($rs = $this->queryDB($sql, $con)) && mysql_num_rows($rs) && ($row = mysql_fetch_array($rs))) {
+ return $row['id'];
+ }
+ }
+ return 0;
+ }
+
+ function getIDValue($id, $term = '') {
+ $tbl = preg_match('/^(s|o)$/', $term) ? $term . '2val' : 'id2val';
+ $con = $this->getDBCon();
+ $sql = "SELECT val FROM " . $this->getTablePrefix() . $tbl . " WHERE id = " . mysql_real_escape_string($id, $con) . " LIMIT 1";
+ if (($rs = $this->queryDB($sql, $con)) && mysql_num_rows($rs) && ($row = mysql_fetch_array($rs))) {
+ return $row['val'];
+ }
+ return 0;
+ }
+
+ /* */
+
+ function getLock($t_out = 10, $t_out_init = '') {
+ if (!$t_out_init) $t_out_init = $t_out;
+ $con = $this->getDBCon();
+ $l_name = $this->a['db_name'] . '.' . $this->getTablePrefix() . '.write_lock';
+ if ($rs = $this->queryDB('SELECT IS_FREE_LOCK("' . $l_name. '") AS success', $con)) {
+ $row = mysql_fetch_array($rs);
+ if (!$row['success']) {
+ if ($t_out) {
+ sleep(1);
+ return $this->getLock($t_out - 1, $t_out_init);
+ }
+ }
+ elseif ($rs = $this->queryDB('SELECT GET_LOCK("' . $l_name. '", ' . $t_out_init. ') AS success', $con)) {
+ $row = mysql_fetch_array($rs);
+ return $row['success'];
+ }
+ }
+ return 0;
+ }
+
+ function releaseLock() {
+ $con = $this->getDBCon();
+ return $this->queryDB('DO RELEASE_LOCK("' . $this->a['db_name'] . '.' . $this->getTablePrefix() . '.write_lock")', $con);
+ }
+
+ /* */
+
+ function processTables($level = 2, $operation = 'optimize') {/* 1: triple + g2t, 2: triple + *2val, 3: all tables */
+ $con = $this->getDBCon();
+ $pre = $this->getTablePrefix();
+ $tbls = $this->getTables();
+ $sql = '';
+ foreach ($tbls as $tbl) {
+ if (($level < 3) && preg_match('/(backup|setting)$/', $tbl)) continue;
+ if (($level < 2) && preg_match('/(val)$/', $tbl)) continue;
+ $sql .= $sql ? ', ' : strtoupper($operation) . ' TABLE ';
+ $sql .= $pre . $tbl;
+ }
+ $this->queryDB($sql, $con);
+ if ($err = mysql_error($con)) $this->addError($err . ' in ' . $sql);
+ }
+
+ function optimizeTables($level = 2) {
+ return $this->processTables($level, 'optimize');
+ }
+
+ function checkTables($level = 2) {
+ return $this->processTables($level, 'check');
+ }
+
+ function repairTables($level = 2) {
+ return $this->processTables($level, 'repair');
+ }
+
+ /* */
+
+ function changeNamespaceURI($old_uri, $new_uri) {
+ ARC2::inc('StoreHelper');
+ $c = new ARC2_StoreHelper($this->a, $this);
+ return $c->changeNamespaceURI($old_uri, $new_uri);
+ }
+
+ /* */
+
+ function getResourceLabel($res, $unnamed_label = 'An unnamed resource') {
+ if (!isset($this->resource_labels)) $this->resource_labels = array();
+ if (isset($this->resource_labels[$res])) return $this->resource_labels[$res];
+ if (!preg_match('/^[a-z0-9\_]+\:[^\s]+$/si', $res)) return $res;/* literal */
+ $ps = $this->getLabelProps();
+ if ($this->getSetting('store_label_properties', '-') != md5(serialize($ps))) {
+ $this->inferLabelProps($ps);
+ }
+ //$sub_q .= $sub_q ? ' || ' : '';
+ //$sub_q .= 'REGEX(str(?p), "(last_name|name|fn|title|label)$", "i")';
+ $q = 'SELECT ?label WHERE { <' . $res . '> ?p ?label . ?p a <http://semsol.org/ns/arc#LabelProperty> } LIMIT 3';
+ $r = '';
+ if ($rows = $this->query($q, 'rows')) {
+ foreach ($rows as $row) {
+ $r = strlen($row['label']) > strlen($r) ? $row['label'] : $r;
+ }
+ }
+ if (!$r && preg_match('/^\_\:/', $res)) {
+ return $unnamed_label;
+ }
+ $r = $r ? $r : preg_replace("/^(.*[\/\#])([^\/\#]+)$/", '\\2', str_replace('#self', '', $res));
+ $r = str_replace('_', ' ', $r);
+ $r = preg_replace('/([a-z])([A-Z])/e', '"\\1 " . strtolower("\\2")', $r);
+ $this->resource_labels[$res] = $r;
+ return $r;
+ }
+
+ function getLabelProps() {
+ return array_merge(
+ $this->v('rdf_label_properties' , array(), $this->a),
+ array(
+ 'http://www.w3.org/2000/01/rdf-schema#label',
+ 'http://xmlns.com/foaf/0.1/name',
+ 'http://purl.org/dc/elements/1.1/title',
+ 'http://purl.org/rss/1.0/title',
+ 'http://www.w3.org/2004/02/skos/core#prefLabel',
+ 'http://xmlns.com/foaf/0.1/nick',
+ )
+ );
+ }
+
+ function inferLabelProps($ps) {
+ $this->query('DELETE FROM <label-properties>');
+ $sub_q = '';
+ foreach ($ps as $p) {
+ $sub_q .= ' <' . $p . '> a <http://semsol.org/ns/arc#LabelProperty> . ';
+ }
+ $this->query('INSERT INTO <label-properties> { ' . $sub_q. ' }');
+ $this->setSetting('store_label_properties', md5(serialize($ps)));
+ }
+
+ /* */
+
+ function getResourcePredicates($res) {
+ $r = array();
+ if ($rows = $this->query('SELECT DISTINCT ?p WHERE { <' . $res . '> ?p ?o . }', 'rows')) {
+ foreach ($rows as $row) {
+ $r[$row['p']] = array();
+ }
+ }
+ return $r;
+ }
+
+ function getDomains($p) {
+ $r = array();
+ foreach($this->query('SELECT DISTINCT ?type WHERE {?s <' . $p . '> ?o ; a ?type . }', 'rows') as $row) {
+ $r[] = $row['type'];
+ }
+ return $r;
+ }
+
+ function getPredicateRange($p) {
+ $row = $this->query('SELECT ?val WHERE {<' . $p . '> rdfs:range ?val . } LIMIT 1', 'row');
+ return $row ? $row['val'] : '';
+ }
+
+ /* */
+
+ function logQuery($q) {
+ $fp = @fopen("arc_query_log.txt", "a");
+ @fwrite($fp, date('Y-m-d\TH:i:s\Z', time()) . ' : ' . $q . '' . "\n\n");
+ @fclose($fp);
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 SPARQL ASK query handler
+ *
+ * @author Benjamin Nowack
+ * @license <http://arc.semsol.org/license>
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2009-12-15
+*/
+
+ARC2::inc('StoreSelectQueryHandler');
+
+class ARC2_StoreAskQueryHandler extends ARC2_StoreSelectQueryHandler {
+
+ function __construct($a = '', &$caller) {/* caller has to be a store */
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_StoreAskQueryHandler($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* db_con */
+ parent::__init();
+ $this->store =& $this->caller;
+ }
+
+ /* */
+
+ function runQuery($infos) {
+ $infos['query']['limit'] = 1;
+ $this->infos = $infos;
+ $this->buildResultVars();
+ return parent::runQuery($this->infos);
+ }
+
+ /* */
+
+ function buildResultVars() {
+ $this->infos['query']['result_vars'][] = array('var' => '1', 'aggregate' => '', 'alias' => 'success');
+ }
+
+ /* */
+
+ function getFinalQueryResult($q_sql, $tmp_tbl) {
+ $con = $this->store->getDBCon();
+ $rs = mysql_query('SELECT success FROM ' . $tmp_tbl, $con);
+ $r = ($row = mysql_fetch_array($rs)) ? $row['success'] : 0;
+ return $r ? true : false;
+ }
+
+ /* */
+
+}
+
+
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 Store Atom(2) Loader
+author: Benjamin Nowack
+version: 2008-09-26
+*/
+
+ARC2::inc('AtomParser');
+
+class ARC2_StoreAtomLoader extends ARC2_AtomParser {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_StoreAtomLoader($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ }
+
+ /* */
+
+ function addT($t) {
+ $this->caller->addT($t['s'], $t['p'], $t['o'], $t['s_type'], $t['o_type'], $t['o_datatype'], $t['o_lang']);
+ $this->t_count++;
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 Store CrunchBase API JSON Loader
+ *
+ * @author Benjamin Nowack <bnowack@semsol.com>
+ * @license http://arc.semsol.org/license
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-06-07
+*/
+
+ARC2::inc('CBJSONParser');
+
+class ARC2_StoreCBJSONLoader extends ARC2_CBJSONParser {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_StoreCBJSONLoader($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ }
+
+ /* */
+
+ function done() {
+ $this->extractRDF();
+ }
+
+ function addT($s, $p, $o, $s_type, $o_type, $o_dt = '', $o_lang = '') {
+ $o = $this->toUTF8($o);
+ $this->caller->addT($s, $p, $o, $s_type, $o_type, $o_dt, $o_lang);
+ $this->t_count++;
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 RDF Store CONSTRUCT Query Handler
+author: Benjamin Nowack
+version: 2008-02-11 (Fix: auto-adding DISTINCT to avoid unnecessary duplicates)
+*/
+
+ARC2::inc('StoreSelectQueryHandler');
+
+class ARC2_StoreConstructQueryHandler extends ARC2_StoreSelectQueryHandler {
+
+ function __construct($a = '', &$caller) {/* caller has to be a store */
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_StoreConstructQueryHandler($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* db_con */
+ parent::__init();
+ $this->store =& $this->caller;
+ }
+
+ /* */
+
+ function runQuery($infos) {
+ $this->infos = $infos;
+ $this->buildResultVars();
+ $this->infos['query']['distinct'] = 1;
+ $sub_r = parent::runQuery($this->infos);
+ $rf = $this->v('result_format', '', $infos);
+ if (in_array($rf, array('sql', 'structure', 'index'))) {
+ return $sub_r;
+ }
+ return $this->getResultIndex($sub_r);
+ }
+
+ /* */
+
+ function buildResultVars() {
+ $r = array();
+ foreach ($this->infos['query']['construct_triples'] as $t) {
+ foreach (array('s', 'p', 'o') as $term) {
+ if ($t[$term . '_type'] == 'var') {
+ if (!in_array($t[$term], $r)) {
+ $r[] = array('var' => $t[$term], 'aggregate' => '', 'alias' => '');
+ }
+ }
+ }
+ }
+ $this->infos['query']['result_vars'] = $r;
+ }
+
+ /* */
+
+ function getResultIndex($qr) {
+ $r = array();
+ $added = array();
+ $rows = $this->v('rows', array(), $qr);
+ $cts = $this->infos['query']['construct_triples'];
+ $bnc = 0;
+ foreach ($rows as $row) {
+ $bnc++;
+ foreach ($cts as $ct) {
+ $skip_t = 0;
+ $t = array();
+ foreach (array('s', 'p', 'o') as $term) {
+ $val = $ct[$term];
+ $type = $ct[$term . '_type'];
+ $val = ($type == 'bnode') ? $val . $bnc : $val;
+ if ($type == 'var') {
+ $skip_t = !isset($row[$val]) ? 1 : $skip_t;
+ $type = !$skip_t ? $row[$val . ' type'] : '';
+ $val = (!$skip_t) ? $row[$val] : '';
+ }
+ $t[$term] = $val;
+ $t[$term . '_type'] = $type;
+ if (isset($row[$term . ' lang'])) {
+ $t[$term . '_lang'] = $row[$term . ' lang'];
+ }
+ if (isset($row[$term . ' datatype'])) {
+ $t[$term . '_datatype'] = $row[$term . ' datatype'];
+ }
+ }
+ if (!$skip_t) {
+ $s = $t['s'];
+ $p = $t['p'];
+ $o = $t['o'];
+ if (!isset($r[$s])) {
+ $r[$s] = array();
+ }
+ if (!isset($r[$s][$p])) {
+ $r[$s][$p] = array();
+ }
+ $o = array('value' => $o);
+ foreach (array('lang', 'type', 'datatype') as $suffix) {
+ if (isset($t['o_' . $suffix]) && $t['o_' . $suffix]) {
+ $o[$suffix] = $t['o_' . $suffix];
+ }
+ }
+ if (!isset($added[md5($s . ' ' . $p . ' ' . serialize($o))])) {
+ $r[$s][$p][] = $o;
+ $added[md5($s . ' ' . $p . ' ' . serialize($o))] = 1;
+ }
+ }
+ }
+ }
+ return $r;
+ }
+
+ /* */
+
+}
+
+
--- /dev/null
+<?php
+/**
+ * ARC2 RDF Store DELETE Query Handler
+ *
+ * @author Benjamin Nowack <bnowack@semsol.com>
+ * @license http://arc.semsol.org/license
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-06-24
+*/
+
+ARC2::inc('StoreQueryHandler');
+
+class ARC2_StoreDeleteQueryHandler extends ARC2_StoreQueryHandler {
+
+ function __construct($a = '', &$caller) {/* caller has to be a store */
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_StoreDeleteQueryHandler($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* db_con */
+ parent::__init();
+ $this->store =& $this->caller;
+ $this->handler_type = 'delete';
+ }
+
+ /* */
+
+ function runQuery($infos) {
+ $this->infos = $infos;
+ $con = $this->store->getDBCon();
+ $t1 = ARC2::mtime();
+ /* delete */
+ $this->refs_deleted = false;
+ /* graph(s) only */
+ if (!$this->v('construct_triples', array(), $this->infos['query'])) {
+ $tc = $this->deleteTargetGraphs();
+ }
+ /* graph(s) + explicit triples */
+ elseif (!$this->v('pattern', array(), $this->infos['query'])) {
+ $tc = $this->deleteTriples();
+ }
+ /* graph(s) + constructed triples */
+ else {
+ $tc = $this->deleteConstructedGraph();
+ }
+ $t2 = ARC2::mtime();
+ /* clean up */
+ if ($tc && ($this->refs_deleted || (rand(1, 100) == 1))) $this->cleanTableReferences();
+ if ($tc && (rand(1, 100) == 1)) $this->store->optimizeTables();
+ if ($tc && (rand(1, 500) == 1)) $this->cleanValueTables();
+ $t3 = ARC2::mtime();
+ $index_dur = round($t3 - $t2, 4);
+ $dur = round($t3 - $t1, 4);
+ return array(
+ 't_count' => $tc,
+ 'delete_time' => $dur,
+ 'index_update_time' => $index_dur,
+ );
+ }
+
+ /* */
+
+ function deleteTargetGraphs() {
+ $tbl_prefix = $this->store->getTablePrefix();
+ $r = 0;
+ $con = $this->store->getDBCon();
+ foreach ($this->infos['query']['target_graphs'] as $g) {
+ if ($g_id = $this->getTermID($g, 'g')) {
+ $rs = mysql_query('DELETE FROM ' . $tbl_prefix . 'g2t WHERE g = ' .$g_id, $con);
+ $r += mysql_affected_rows($con);
+ }
+ }
+ $this->refs_deleted = $r ? 1 : 0;
+ return $r;
+ }
+
+ /* */
+
+ function deleteTriples() {
+ $r = 0;
+ $dbv = $this->store->getDBVersion();
+ $tbl_prefix = $this->store->getTablePrefix();
+ $con = $this->store->getDBCon();
+ /* graph restriction */
+ $tgs = $this->infos['query']['target_graphs'];
+ $gq = '';
+ foreach ($tgs as $g) {
+ if ($g_id = $this->getTermID($g, 'g')) {
+ $gq .= $gq ? ', ' . $g_id : $g_id;
+ }
+ }
+ $gq = $gq ? ' AND G.g IN (' . $gq . ')' : '';
+ /* triples */
+ foreach ($this->infos['query']['construct_triples'] as $t) {
+ $q = '';
+ $skip = 0;
+ foreach (array('s', 'p', 'o') as $term) {
+ if (isset($t[$term . '_type']) && preg_match('/(var)/', $t[$term . '_type'])) {
+ //$skip = 1;
+ }
+ else {
+ $term_id = $this->getTermID($t[$term], $term);
+ $q .= ($q ? ' AND ' : '') . 'T.' . $term . '=' . $term_id;
+ /* explicit lang/dt restricts the matching */
+ if ($term == 'o') {
+ $o_lang = $this->v1('o_lang', '', $t);
+ $o_lang_dt = $this->v1('o_datatype', $o_lang, $t);
+ if ($o_lang_dt) {
+ $q .= ($q ? ' AND ' : '') . 'T.o_lang_dt=' . $this->getTermID($o_lang_dt, 'lang_dt');
+ }
+ }
+ }
+ }
+ if ($skip) {
+ continue;
+ }
+ if ($gq) {
+ $sql = ($dbv < '04-01') ? 'DELETE ' . $tbl_prefix . 'g2t' : 'DELETE G';
+ $sql .= '
+ FROM ' . $tbl_prefix . 'g2t G
+ JOIN ' . $this->getTripleTable() . ' T ON (T.t = G.t' . $gq . ')
+ WHERE ' . $q . '
+ ';
+ $this->refs_deleted = 1;
+ }
+ else {/* triples only */
+ $sql = ($dbv < '04-01') ? 'DELETE ' . $this->getTripleTable() : 'DELETE T';
+ $sql .= ' FROM ' . $this->getTripleTable() . ' T WHERE ' . $q;
+ }
+ $rs = mysql_query($sql, $con);
+ if ($er = mysql_error($con)) {
+ $this->addError($er .' in ' . $sql);
+ }
+ $r += mysql_affected_rows($con);
+ }
+ return $r;
+ }
+
+ /* */
+
+ function deleteConstructedGraph() {
+ ARC2::inc('StoreConstructQueryHandler');
+ $h =& new ARC2_StoreConstructQueryHandler($this->a, $this->store);
+ $sub_r = $h->runQuery($this->infos);
+ $triples = ARC2::getTriplesFromIndex($sub_r);
+ $tgs = $this->infos['query']['target_graphs'];
+ $this->infos = array('query' => array('construct_triples' => $triples, 'target_graphs' => $tgs));
+ return $this->deleteTriples();
+ }
+
+ /* */
+
+ function cleanTableReferences() {
+ /* lock */
+ if (!$this->store->getLock()) return $this->addError('Could not get lock in "cleanTableReferences"');
+ $con = $this->store->getDBCon();
+ $tbl_prefix = $this->store->getTablePrefix();
+ $dbv = $this->store->getDBVersion();
+ /* check for unconnected triples */
+ $sql = '
+ SELECT T.t FROM '. $tbl_prefix . 'triple T LEFT JOIN '. $tbl_prefix . 'g2t G ON ( G.t = T.t )
+ WHERE G.t IS NULL LIMIT 1
+ ';
+ if (($rs = mysql_query($sql, $con)) && mysql_num_rows($rs)) {
+ /* delete unconnected triples */
+ $sql = ($dbv < '04-01') ? 'DELETE ' . $tbl_prefix . 'triple' : 'DELETE T';
+ $sql .= '
+ FROM ' . $tbl_prefix . 'triple T
+ LEFT JOIN ' . $tbl_prefix . 'g2t G ON (G.t = T.t)
+ WHERE G.t IS NULL
+ ';
+ mysql_query($sql, $con);
+ }
+ /* check for unconnected graph refs */
+ if ((rand(1, 10) == 1)) {
+ $sql = '
+ SELECT G.g FROM '. $tbl_prefix . 'g2t G LEFT JOIN '. $tbl_prefix . 'triple T ON ( T.t = G.t )
+ WHERE T.t IS NULL LIMIT 1
+ ';
+ if (($rs = mysql_query($sql, $con)) && mysql_num_rows($rs)) {
+ /* delete unconnected graph refs */
+ $sql = ($dbv < '04-01') ? 'DELETE ' . $tbl_prefix . 'g2t' : 'DELETE G';
+ $sql .= '
+ FROM ' . $tbl_prefix . 'g2t G
+ LEFT JOIN ' . $tbl_prefix . 'triple T ON (T.t = G.t)
+ WHERE T.t IS NULL
+ ';
+ mysql_query($sql, $con);
+ }
+ }
+ /* release lock */
+ $this->store->releaseLock();
+ }
+
+ /* */
+
+ function cleanValueTables() {
+ /* lock */
+ if (!$this->store->getLock()) return $this->addError('Could not get lock in "cleanValueTables"');
+ $con = $this->store->getDBCon();
+ $tbl_prefix = $this->store->getTablePrefix();
+ $dbv = $this->store->getDBVersion();
+ /* o2val */
+ $sql = ($dbv < '04-01') ? 'DELETE ' . $tbl_prefix . 'o2val' : 'DELETE V';
+ $sql .= '
+ FROM ' . $tbl_prefix . 'o2val V
+ LEFT JOIN ' . $tbl_prefix . 'triple T ON (T.o = V.id)
+ WHERE T.t IS NULL
+ ';
+ mysql_query($sql, $con);
+ /* s2val */
+ $sql = ($dbv < '04-01') ? 'DELETE ' . $tbl_prefix . 's2val' : 'DELETE V';
+ $sql .= '
+ FROM ' . $tbl_prefix . 's2val V
+ LEFT JOIN ' . $tbl_prefix . 'triple T ON (T.s = V.id)
+ WHERE T.t IS NULL
+ ';
+ mysql_query($sql, $con);
+ /* id2val */
+ $sql = ($dbv < '04-01') ? 'DELETE ' . $tbl_prefix . 'id2val' : 'DELETE V';
+ $sql .= '
+ FROM ' . $tbl_prefix . 'id2val V
+ LEFT JOIN ' . $tbl_prefix . 'g2t G ON (G.g = V.id)
+ LEFT JOIN ' . $tbl_prefix . 'triple T1 ON (T1.p = V.id)
+ LEFT JOIN ' . $tbl_prefix . 'triple T2 ON (T2.o_lang_dt = V.id)
+ WHERE G.g IS NULL AND T1.t IS NULL AND T2.t IS NULL
+ ';
+ //mysql_query($sql, $con);
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 Store DESCRIBE Query Handler
+author: Benjamin Nowack
+version: 2008-01-09 (Tweak: label auto-detection is now optional)
+*/
+
+ARC2::inc('StoreSelectQueryHandler');
+
+class ARC2_StoreDescribeQueryHandler extends ARC2_StoreSelectQueryHandler {
+
+ function __construct($a = '', &$caller) {/* caller has to be a store */
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_StoreDescribeQueryHandler($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* db_con */
+ parent::__init();
+ $this->store =& $this->caller;
+ $this->detect_labels = $this->v('detect_describe_query_labels', 0, $this->a);
+ }
+
+ /* */
+
+ function runQuery($infos) {
+ $ids = $infos['query']['result_uris'];
+ if ($vars = $infos['query']['result_vars']) {
+ $sub_r = parent::runQuery($infos);
+ $rf = $this->v('result_format', '', $infos);
+ if (in_array($rf, array('sql', 'structure', 'index'))) {
+ return $sub_r;
+ }
+ $rows = $this->v('rows', array(), $sub_r);
+ foreach ($rows as $row) {
+ foreach ($vars as $info) {
+ $val = isset($row[$info['var']]) ? $row[$info['var']] : '';
+ if ($val && ($row[$info['var'] . ' type'] != 'literal') && !in_array($val, $ids)) {
+ $ids[] = $val;
+ }
+ }
+ }
+ }
+ $this->r = array();
+ $this->described_ids = array();
+ $this->ids = $ids;
+ $this->added_triples = array();
+ $is_sub_describe = 0;
+ while ($this->ids) {
+ $id = $this->ids[0];
+ $this->described_ids[] = $id;
+ if ($this->detect_labels) {
+ $q = '
+ CONSTRUCT {
+ <' . $id . '> ?p ?o .
+ ?o ?label_p ?o_label .
+ ?o <http://arc.semsol.org/ns/arc#label> ?o_label .
+ } WHERE {
+ <' . $id . '> ?p ?o .
+ OPTIONAL {
+ ?o ?label_p ?o_label .
+ FILTER REGEX(str(?label_p), "(name|label|title|summary|nick|fn)$", "i")
+ }
+ }
+ ';
+ }
+ else {
+ $q = '
+ CONSTRUCT {
+ <' . $id . '> ?p ?o .
+ } WHERE {
+ <' . $id . '> ?p ?o .
+ }
+ ';
+ }
+ $sub_r = $this->store->query($q);
+ $sub_index = is_array($sub_r['result']) ? $sub_r['result'] : array();
+ $this->mergeSubResults($sub_index, $is_sub_describe);
+ $is_sub_describe = 1;
+ }
+ return $this->r;
+ }
+
+ /* */
+
+ function mergeSubResults($index, $is_sub_describe = 1) {
+ foreach ($index as $s => $ps) {
+ if (!isset($this->r[$s])) $this->r[$s] = array();
+ foreach ($ps as $p => $os) {
+ if (!isset($this->r[$s][$p])) $this->r[$s][$p] = array();
+ foreach ($os as $o) {
+ $id = md5($s . ' ' . $p . ' ' . serialize($o));
+ if (!isset($this->added_triples[$id])) {
+ if (1 || !$is_sub_describe) {
+ $this->r[$s][$p][] = $o;
+ if (is_array($o) && ($o['type'] == 'bnode') && !in_array($o['value'], $this->ids)) $this->ids[] = $o['value'];
+ }
+ elseif (!is_array($o) || ($o['type'] != 'bnode')) {
+ $this->r[$s][$p][] = $o;
+ }
+ $this->added_triples[$id] = 1;
+ }
+ }
+ }
+ }
+ /* adjust ids */
+ $ids = $this->ids;
+ $this->ids = array();
+ foreach ($ids as $id) {
+ if (!in_array($id, $this->described_ids)) $this->ids[] = $id;
+ }
+ }
+
+ /* */
+
+}
+
+
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 RDF Store DUMP Query Handler
+author: Benjamin Nowack
+version: 2008-07-02
+*/
+
+ARC2::inc('StoreQueryHandler');
+
+class ARC2_StoreDumpQueryHandler extends ARC2_StoreQueryHandler {
+
+ function __construct($a = '', &$caller) {/* caller has to be a store */
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_StoreDumpQueryHandler($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* db_con */
+ parent::__init();
+ $this->store =& $this->caller;
+ }
+
+ /* */
+
+ function runQuery($infos, $keep_bnode_ids = 0) {
+ $this->infos = $infos;
+ $con = $this->store->getDBCon();
+ ARC2::inc('StoreDumper');
+ $d =& new ARC2_StoreDumper($this->a, $this->store);
+ $d->dumpSPOG();
+ return 1;
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 Store Dumper
+ *
+ * @author Benjamin Nowack
+ * @license <http://arc.semsol.org/license>
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-06-09
+*/
+
+ARC2::inc('Class');
+
+class ARC2_StoreDumper extends ARC2_Class {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_StoreDumper($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ $this->store =& $this->caller;
+ $this->keep_time_limit = $this->v('keep_time_limit', 0, $this->a);
+ $this->limit = 100000;
+ }
+
+ /* */
+
+ function dumpSPOG() {
+ header('Content-Type: application/sparql-results+xml');
+ if ($this->v('store_use_dump_dir', 0, $this->a)) {
+ $path = $this->v('store_dump_dir', 'dumps', $this->a);
+ /* default: monthly dumps */
+ $path_suffix = $this->v('store_dump_suffix', date('Y_m'), $this->a);
+ $path .= '/dump_' . $path_suffix . '.spog';
+ if (!file_exists($path)) {
+ $this->saveSPOG($path);
+ }
+ readfile($path);
+ exit;
+ }
+ echo $this->getHeader();
+ $offset = 0;
+ do {
+ $proceed = 0;
+ $rs = $this->getRecordset($offset);
+ if (!$rs) break;
+ while ($row = mysql_fetch_array($rs)) {
+ echo $this->getEntry($row);
+ $proceed = 1;
+ }
+ $offset += $this->limit;
+ } while ($proceed);
+ echo $this->getFooter();
+ }
+
+ /* */
+
+ function saveSPOG($path, $q = '') {
+ if ($q) return $this->saveCustomSPOG($path, $q);
+ if (!$fp = @fopen($path, 'w')) return $this->addError('Could not create backup file at ' . realpath($path));
+ fwrite($fp, $this->getHeader());
+ $offset = 0;
+ do {
+ $proceed = 0;
+ $rs = $this->getRecordset($offset);
+ if (!$rs) break;
+ while ($row = mysql_fetch_array($rs)) {
+ fwrite($fp, $this->getEntry($row));
+ $proceed = 1;
+ }
+ $offset += $this->limit;
+ } while ($proceed);
+ fwrite($fp, $this->getFooter());
+ @fclose($fp);
+ return 1;
+ }
+
+ /* */
+
+ function saveCustomSPOG($path, $q) {
+ if (!$fp = @fopen($path, 'w')) return $this->addError('Could not create backup file at ' . realpath($path));
+ fwrite($fp, $this->getHeader());
+ $rows = $this->store->query($q, 'rows');
+ foreach ($rows as $row) {
+ fwrite($fp, $this->getEntry($row));
+ }
+ fwrite($fp, $this->getFooter());
+ @fclose($fp);
+ }
+
+ /* */
+
+ function getRecordset($offset) {
+ $prefix = $this->store->getTablePrefix();
+ $con = $this->store->getDBCon();
+ $sql = '
+ SELECT
+ VS.val AS s,
+ T.s_type AS `s type`,
+ VP.val AS p,
+ 0 AS `p type`,
+ VO.val AS o,
+ T.o_type AS `o type`,
+ VLDT.val as `o lang_dt`,
+ VG.val as g,
+ 0 AS `g type`
+ FROM
+ ' . $prefix . 'triple T
+ JOIN ' . $prefix . 's2val VS ON (T.s = VS.id)
+ JOIN ' . $prefix . 'id2val VP ON (T.p = VP.id)
+ JOIN ' . $prefix . 'o2val VO ON (T.o = VO.id)
+ JOIN ' . $prefix . 'id2val VLDT ON (T.o_lang_dt = VLDT.id)
+ JOIN ' . $prefix . 'g2t G2T ON (T.t = G2T.t)
+ JOIN ' . $prefix . 'id2val VG ON (G2T.g = VG.id)
+ ';
+ if ($this->limit) $sql .= ' LIMIT ' . $this->limit;
+ if ($offset) $sql .= ' OFFSET ' . $offset;
+ $rs = mysql_unbuffered_query($sql, $con);
+ if (($err = mysql_error($con))) {
+ return $this->addError($err);
+ }
+ return $rs;
+ }
+
+ /* */
+
+ function getHeader() {
+ $n = "\n";
+ return '' .
+ '<?xml version="1.0"?>' .
+ $n . '<sparql xmlns="http://www.w3.org/2005/sparql-results#">' .
+ $n . ' <head>' .
+ $n . ' <variable name="s"/>' .
+ $n . ' <variable name="p"/>' .
+ $n . ' <variable name="o"/>' .
+ $n . ' <variable name="g"/>' .
+ $n . ' </head>' .
+ $n . ' <results>' .
+ '';
+ }
+
+ function getEntry($row) {
+ if (!$this->keep_time_limit) @set_time_limit($this->v('time_limit', 1200, $this->a));
+ $n = "\n";
+ $r = '';
+ $r .= $n . ' <result>';
+ foreach (array('s', 'p', 'o', 'g') as $var) {
+ if (isset($row[$var])) {
+ $type = (string) $row[$var . ' type'];
+ $r .= $n . ' <binding name="' . $var . '">';
+ $val = $this->toUTF8($row[$var]);
+ if (($type == '0') || ($type == 'uri')) {
+ $r .= $n . ' <uri>' . $this->getSafeValue($val) . '</uri>';
+ }
+ elseif (($type == '1') || ($type == 'bnode')) {
+ $r .= $n . ' <bnode>' . substr($val, 2) . '</bnode>';
+ }
+ else {
+ $lang_dt = '';
+ foreach (array('o lang_dt', 'o lang', 'o datatype') as $k) {
+ if (($var == 'o') && isset($row[$k]) && $row[$k]) $lang_dt = $row[$k];
+ }
+ $is_lang = preg_match('/^([a-z]+(\-[a-z0-9]+)*)$/i', $lang_dt);
+ list($lang, $dt) = $is_lang ? array($lang_dt, '') : array('', $lang_dt);
+ $lang = $lang ? ' xml:lang="' . $lang . '"' : '';
+ $dt = $dt ? ' datatype="' . htmlspecialchars($dt) . '"' : '';
+ $r .= $n . ' <literal' . $dt . $lang . '>' . $this->getSafeValue($val) . '</literal>';
+ }
+ $r .= $n . ' </binding>';
+ }
+ }
+ $r .= $n . ' </result>';
+ return $r;
+ }
+
+ function getSafeValue($val) {/* mainly for fixing json_decode bugs */
+ $mappings = array(
+ '%00' => '',
+ '%01' => '',
+ '%02' => '',
+ '%03' => '',
+ '%04' => '',
+ '%05' => '',
+ '%06' => '',
+ '%07' => '',
+ '%08' => '',
+ '%09' => '',
+ '%0B' => '',
+ '%0C' => '',
+ '%0E' => '',
+ '%0F' => '',
+ '%15' => '',
+ '%17' => 'ė',
+ '%1A' => ',',
+ '%1F' => '',
+ );
+ $froms = array_keys($mappings);
+ $tos = array_values($mappings);
+ foreach ($froms as $i => $from) $froms[$i] = urldecode($from);
+ $val = str_replace($froms, $tos, $val);
+ if (strpos($val, '</') !== false) {
+ $val = "\n<![CDATA[\n" . $val . "\n]]>\n";
+ }
+ else {
+ $val = htmlspecialchars($val);
+ }
+ return $val;
+ }
+
+ function getFooter() {
+ $n = "\n";
+ return '' .
+ $n . ' </results>' .
+ $n . '</sparql>' .
+ $n .
+ '';
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 SPARQL Endpoint
+ *
+ * @author Benjamin Nowack
+ * @license <http://arc.semsol.org/license>
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-06-04
+*/
+
+ARC2::inc('Store');
+
+class ARC2_StoreEndpoint extends ARC2_Store {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_StoreEndpoint($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ $this->headers = array('http' => 'HTTP/1.1 200 OK', 'vary' => 'Vary: Accept');
+ $this->read_key = $this->v('endpoint_read_key', '', $this->a);
+ $this->write_key = $this->v('endpoint_write_key', '', $this->a);
+ $this->a['store_allow_extension_functions'] = $this->v('store_allow_extension_functions', 0, $this->a);
+ $this->allow_sql = $this->v('endpoint_enable_sql_output', 0, $this->a);
+ $this->result = '';
+ }
+
+ /* */
+
+ function getQueryString($mthd = '') {
+ $r = '';
+ if (!$mthd || ($mthd == 'post')) {
+ $r = @file_get_contents('php://input');
+ }
+ $r = !$r ?$this->v1('QUERY_STRING', '', $_SERVER) : $r;
+ return $r;
+ }
+
+ function p($name='', $mthd = '', $multi = '', $default = '') {
+ $mthd = strtolower($mthd);
+ if($multi){
+ $qs = $this->getQueryString($mthd);
+ if (preg_match_all('/\&' . $name . '=([^\&]+)/', $qs, $m)){
+ foreach ($m[1] as $i => $val) {
+ $m[1][$i] = stripslashes($val);
+ }
+ return $m[1];
+ }
+ return $default ? $default : array();
+ }
+ $args = array_merge($_GET, $_POST);
+ $r = isset($args[$name]) ? $args[$name] : $default;
+ return is_array($r) ? $r : stripslashes($r);
+ }
+
+ /* */
+
+ function getFeatures() {
+ return $this->v1('endpoint_features', array(), $this->a);
+ }
+
+ function setHeader($k, $v) {
+ $this->headers[$k] = $v;
+ }
+
+ function sendHeaders() {
+ if (!isset($this->is_dump) || !$this->is_dump) {
+ $this->setHeader('content-length', 'Content-Length: ' . strlen($this->getResult()));
+ foreach ($this->headers as $k => $v) {
+ header($v);
+ }
+ }
+ }
+
+ function getResult() {
+ return $this->result;
+ }
+
+ /* */
+
+ function handleRequest($auto_setup = 0) {
+ if (!$this->isSetUp()) {
+ if ($auto_setup) {
+ $this->setUp();
+ return $this->handleRequest(0);
+ }
+ else {
+ $this->setHeader('http', 'HTTP/1.1 400 Bad Request');
+ $this->setHeader('content-type', 'Content-type: text/plain; charset=utf-8');
+ $this->result = 'Missing configuration or the endpoint store was not set up yet.';
+ }
+ }
+ elseif (($img = $this->p('img'))) {
+ $this->handleImgRequest($img);
+ }
+ elseif (($q = $this->p('query'))) {
+ $this->checkProcesses();
+ $this->handleQueryRequest($q);
+ if ($this->p('show_inline')) {
+ $this->query_result = '
+ <div class="results">
+ ' . ($this->p('output') != 'htmltab' ? '<pre>' . htmlspecialchars($this->getResult()) . '</pre>' : $this->getResult()) . '
+ </div>
+ ';
+ $this->handleEmptyRequest();
+ }
+ }
+ else {
+ $this->handleEmptyRequest();
+ }
+ }
+
+ function go($auto_setup = 0) {
+ $this->handleRequest($auto_setup);
+ $this->sendHeaders();
+ echo $this->getResult();
+ }
+
+ /* */
+
+ function handleImgRequest($img) {
+ $this->setHeader('content-type', 'Content-type: image/gif');
+ $imgs = array(
+ 'bg_body' => base64_decode('R0lGODlhAQBkAMQAAPf39/Hx8erq6vPz8/Ly8u/v7+np6fT09Ovr6/b29u3t7ejo6Pz8/Pv7+/39/fr6+vj4+P7+/vn5+f///wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACH5BAAAAAAALAAAAAABAGQAAAUp4GIIiFIExHAkAAC9cAxJdG3TT67vTe//jKBQ6Cgaj5GkcpmcOJ/QZwgAOw=='),
+ );
+ $this->result = isset($imgs[$img]) ? $imgs[$img] : '';
+ $this->sendHeaders();
+ echo $this->getResult();
+ exit;
+ }
+
+ /* */
+
+ function handleEmptyRequest() {
+ /* service description */
+ $formats = array(
+ 'rdfxml' => 'RDFXML', 'rdf+xml' => 'RDFXML', 'html' => 'HTML'
+ );
+ if ($this->getResultFormat($formats, 'html') != 'HTML') {
+ $this->handleServiceDescriptionRequest();
+ }
+ else {
+ $this->setHeader('content-type', 'Content-type: text/html; charset=utf-8');
+ $this->result = $this->getHTMLFormDoc();
+ }
+ }
+
+ /* */
+
+ function handleServiceDescriptionRequest() {
+ $q = '
+ PREFIX void: <http://rdfs.org/ns/void#>
+ CONSTRUCT {
+ <> void:sparqlEndpoint <> .
+ }
+ WHERE {
+ ?s ?p ?o .
+ } LIMIT 1
+ ';
+ $this->handleQueryRequest($q);
+ }
+
+ /* */
+
+ function checkProcesses() {
+ if (method_exists($this->caller, 'checkSPARQLEndpointProcesses')) {
+ $sub_r = $this->caller->checkSPARQLEndpointProcesses();
+ }
+ }
+
+ /* */
+
+ function handleQueryRequest($q) {
+ if (preg_match('/^dump/i', $q)) {
+ $infos = array('query' => array('type' => 'dump'));
+ $this->is_dump = 1;
+ }
+ else {
+ ARC2::inc('SPARQLPlusParser');
+ $p = & new ARC2_SPARQLPlusParser($this->a, $this);
+ $p->parse($q);
+ $infos = $p->getQueryInfos();
+ }
+ /* errors? */
+ if ($errors = $this->getErrors()) {
+ $this->setHeader('http', 'HTTP/1.1 400 Bad Request');
+ $this->setHeader('content-type', 'Content-type: text/plain; charset=utf-8');
+ $this->result = join("\n", $errors);
+ return true;
+ }
+ $qt = $infos['query']['type'];
+ /* wrong read key? */
+ if ($this->read_key && ($this->p('key') != $this->read_key) && preg_match('/^(select|ask|construct|describe|dump)$/', $qt)) {
+ $this->setHeader('http', 'HTTP/1.1 401 Access denied');
+ $this->setHeader('content-type', 'Content-type: text/plain; charset=utf-8');
+ $this->result = 'Access denied. Missing or wrong "key" parameter.';
+ return true;
+ }
+ /* wrong write key? */
+ if ($this->write_key && ($this->p('key') != $this->write_key) && preg_match('/^(load|insert|delete|update)$/', $qt)) {
+ $this->setHeader('http', 'HTTP/1.1 401 Access denied');
+ $this->setHeader('content-type', 'Content-type: text/plain; charset=utf-8');
+ $this->result = 'Access denied. Missing or wrong "key" parameter.';
+ return true;
+ }
+ /* non-allowed query type? */
+ if (!in_array($qt, $this->getFeatures())) {
+ $this->setHeader('http', 'HTTP/1.1 401 Access denied');
+ $this->setHeader('content-type', 'Content-type: text/plain; charset=utf-8');
+ $this->result = 'Access denied for "' .$qt. '" query';
+ return true;
+ }
+ /* load/insert/delete via GET */
+ if (in_array($qt, array('load', 'insert', 'delete')) && isset($_GET['query'])) {
+ $this->setHeader('http', 'HTTP/1.1 501 Not Implemented');
+ $this->setHeader('content-type', 'Content-type: text/plain; charset=utf-8');
+ $this->result = 'Query type "' .$qt. '" not supported via GET';
+ return true;
+ }
+ /* unsupported query type */
+ if (!in_array($qt, array('select', 'ask', 'describe', 'construct', 'load', 'insert', 'delete', 'dump'))) {
+ $this->setHeader('http', 'HTTP/1.1 501 Not Implemented');
+ $this->setHeader('content-type', 'Content-type: text/plain; charset=utf-8');
+ $this->result = 'Unsupported query type "' .$qt. '"';
+ return true;
+ }
+ /* adjust infos */
+ $infos = $this->adjustQueryInfos($infos);
+ $t1 = ARC2::mtime();
+ $r = array('result' => $this->runQuery($infos, $qt));
+ $t2 = ARC2::mtime();
+ $r['query_time'] = $t2 - $t1;
+ /* query errors? */
+ if ($errors = $this->getErrors()) {
+ $this->setHeader('http', 'HTTP/1.1 400 Bad Request');
+ $this->setHeader('content-type', 'Content-type: text/plain; charset=utf-8');
+ $this->result = 'Error: ' . join("\n", $errors);
+ return true;
+ }
+ /* result */
+ $m = 'get' . ucfirst($qt) . 'ResultDoc';
+ if (method_exists($this, $m)) {
+ $this->result = $this->$m($r);
+ }
+ else {
+ $this->setHeader('content-type', 'Content-type: text/plain; charset=utf-8');
+ $this->result = 'Result serializer not available, dumping raw data:' . "\n" . print_r($r, 1);
+ }
+ }
+
+ /* */
+
+ function adjustQueryInfos($infos) {
+ /* limit */
+ if ($max_l = $this->v('endpoint_max_limit', 0, $this->a)) {
+ if ($this->v('limit', $max_l + 1, $infos['query']) > $max_l) {
+ $infos['query']['limit'] = $max_l;
+ }
+ }
+ /* default-graph-uri / named-graph-uri */
+ $dgs = $this->p('default-graph-uri', '', 1);
+ $ngs = $this->p('named-graph-uri', '', 1);
+ if (count(array_merge($dgs, $ngs))) {
+ $ds = array();
+ foreach ($dgs as $g) {
+ $ds[] = array('graph' => $this->calcURI($g), 'named' => 0);
+ }
+ foreach ($ngs as $g) {
+ $ds[] = array('graph' => $this->calcURI($g), 'named' => 1);
+ }
+ $infos['query']['dataset'] = $ds;
+ }
+ /* infos result format */
+ if (($this->p('format') == 'infos') || ($this->p('output') == 'infos')) {
+ $infos['result_format'] = 'structure';
+ }
+ /* sql result format */
+ if (($this->p('format') == 'sql') || ($this->p('output') == 'sql')) {
+ $infos['result_format'] = 'sql';
+ }
+ return $infos;
+ }
+
+ /* */
+
+ function getResultFormat($formats, $default) {
+ $prefs = array();
+ /* arg */
+ if (($v = $this->p('format')) || ($v = $this->p('output'))) {
+ $prefs[] = $v;
+ }
+ /* accept header */
+ $vals = explode(',', $_SERVER['HTTP_ACCEPT']);
+ if ($vals) {
+ $o_vals = array();
+ foreach ($vals as $val) {
+ if (preg_match('/(rdf\+n3|x\-turtle|rdf\+xml|sparql\-results\+xml|sparql\-results\+json|json)/', $val, $m)) {
+ $o_vals[$m[1]] = 1;
+ if (preg_match('/\;q\=([0-9\.]+)/', $val, $sub_m)) {
+ $o_vals[$m[1]] = 1 * $sub_m[1];
+ }
+ }
+ }
+ arsort($o_vals);
+ foreach ($o_vals as $val => $prio) {
+ $prefs[] = $val;
+ }
+ }
+ /* default */
+ $prefs[] = $default;
+ foreach ($prefs as $pref) {
+ if (isset($formats[$pref])) {
+ return $formats[$pref];
+ }
+ }
+ }
+
+ /* SELECT */
+
+ function getSelectResultDoc($r) {
+ $formats = array(
+ 'xml' => 'SPARQLXML', 'sparql-results+xml' => 'SPARQLXML',
+ 'json' => 'SPARQLJSON', 'sparql-results+json' => 'SPARQLJSON',
+ 'php_ser' => 'PHPSER', 'plain' => 'Plain',
+ 'sql' => ($this->allow_sql ? 'Plain' : 'xSQL'),
+ 'infos' => 'Plain',
+ 'htmltab' => 'HTMLTable',
+ 'tsv' => 'TSV',
+ );
+ if ($f = $this->getResultFormat($formats, 'xml')) {
+ $m = 'get' . $f . 'SelectResultDoc';
+ return method_exists($this, $m) ? $this->$m($r) : 'not implemented';
+ }
+ return '';
+ }
+
+ function getSPARQLXMLSelectResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: application/sparql-results+xml');
+ $vars = $r['result']['variables'];
+ $rows = $r['result']['rows'];
+ $dur = $r['query_time'];
+ $nl = "\n";
+ /* doc */
+ $r = '' .
+ '<?xml version="1.0"?>' .
+ $nl . '<sparql xmlns="http://www.w3.org/2005/sparql-results#">' .
+ '';
+ /* head */
+ $r .= $nl . ' <head>';
+ $r .= $nl . ' <!-- query time: '. round($dur, 4) .' sec -->';
+ if (is_array($vars)) {
+ foreach ($vars as $var) {
+ $r .= $nl . ' <variable name="' .$var. '"/>';
+ }
+ }
+ $r .= $nl . ' </head>';
+ /* results */
+ $r .= $nl . ' <results>';
+ if (is_array($rows)) {
+ foreach ($rows as $row) {
+ $r .= $nl . ' <result>';
+ foreach ($vars as $var) {
+ if (isset($row[$var])) {
+ $r .= $nl . ' <binding name="' .$var. '">';
+ if ($row[$var . ' type'] == 'uri') {
+ $r .= $nl . ' <uri>' .htmlspecialchars($row[$var]). '</uri>';
+ }
+ elseif ($row[$var . ' type'] == 'bnode') {
+ $r .= $nl . ' <bnode>' .substr($row[$var], 2). '</bnode>';
+ }
+ else {
+ $dt = isset($row[$var . ' datatype']) ? ' datatype="' .htmlspecialchars($row[$var . ' datatype']). '"' : '';
+ $lang = isset($row[$var . ' lang']) ? ' xml:lang="' .htmlspecialchars($row[$var . ' lang']). '"' : '';
+ $r .= $nl . ' <literal' . $dt . $lang . '>' .htmlspecialchars($row[$var]). '</literal>';
+ }
+ $r .= $nl . ' </binding>';
+ }
+ }
+ $r .= $nl . ' </result>';
+ }
+ }
+ $r .= $nl . ' </results>';
+ /* /doc */
+ $r .= $nl . '</sparql>';
+ return $r;
+ }
+
+ function getSPARQLJSONSelectResultDoc($r) {
+ $con = $this->getDBCon();
+ $this->setHeader('content-type', 'Content-Type: application/sparql-results+json');
+ $vars = $r['result']['variables'];
+ $rows = $r['result']['rows'];
+ $dur = $r['query_time'];
+ $nl = "\n";
+ /* doc */
+ $r = '{';
+ /* head */
+ $r .= $nl . ' "head": {';
+ $r .= $nl . ' "vars": [';
+ $first_var = 1;
+ foreach ($vars as $var) {
+ $r .= $first_var ? $nl : ',' . $nl;
+ $r .= ' "' .$var. '"';
+ $first_var = 0;
+ }
+ $r .= $nl . ' ]';
+ $r .= $nl . ' },';
+ /* results */
+ $r .= $nl . ' "results": {';
+ $r .= $nl . ' "bindings": [';
+ $first_row = 1;
+ foreach ($rows as $row) {
+ $r .= $first_row ? $nl : ',' . $nl;
+ $r .= ' {';
+ $first_var = 1;
+ foreach ($vars as $var) {
+ if (isset($row[$var])) {
+ $r .= $first_var ? $nl : ',' . $nl . $nl;
+ $r .= ' "' .$var. '": {';
+ if ($row[$var . ' type'] == 'uri') {
+ $r .= $nl . ' "type": "uri",';
+ $r .= $nl . ' "value": "' .mysql_real_escape_string($row[$var], $con). '"';
+ }
+ elseif ($row[$var . ' type'] == 'bnode') {
+ $r .= $nl . ' "type": "bnode",';
+ $r .= $nl . ' "value": "' . substr($row[$var], 2) . '"';
+ }
+ else {
+ $dt = isset($row[$var . ' datatype']) ? ',' . $nl .' "datatype": "' .mysql_real_escape_string($row[$var . ' datatype'], $con). '"' : '';
+ $lang = isset($row[$var . ' lang']) ? ',' . $nl .' "xml:lang": "' .mysql_real_escape_string($row[$var . ' lang'], $con). '"' : '';
+ $type = $dt ? 'typed-literal' : 'literal';
+ $r .= $nl . ' "type": "' . $type . '",';
+ $r .= $nl . ' "value": "' . $this->jsonEscape($row[$var]) . '"';
+ $r .= $dt . $lang;
+ }
+ $r .= $nl . ' }';
+ $first_var = 0;
+ }
+ }
+ $r .= $nl . ' }';
+ $first_row = 0;
+ }
+ $r .= $nl . ' ]';
+ $r .= $nl . ' }';
+ /* /doc */
+ $r .= $nl . '}';
+ if (($v = $this->p('jsonp')) || ($v = $this->p('callback'))) {
+ $r = $v . '(' . $r . ')';
+ }
+ return $r;
+ }
+
+ function getPHPSERSelectResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: text/plain');
+ return serialize($r);
+ }
+
+ function getPlainSelectResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: text/plain');
+ return print_r($r['result'], 1);
+ }
+
+ function getHTMLTableSelectResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: text/html; charset=utf-8');
+ $vars = $r['result']['variables'];
+ $rows = $r['result']['rows'];
+ $dur = $r['query_time'];
+ if ($this->p('show_inline')) return '<table>' . $this->getHTMLTableRows($rows, $vars) . '</table>';
+ return '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+ ' .$this->getHTMLDocHead() . '
+ <body>
+ <table>
+ ' . $this->getHTMLTableRows($rows, $vars) . '
+ </table>
+ </body>
+ </html>
+ ';
+ }
+
+ function getHTMLTableRows($rows, $vars) {
+ $r = '';
+ foreach ($rows as $row) {
+ $hr = '';
+ $rr = '';
+ foreach ($vars as $var) {
+ $hr .= $r ? '' : '<th>' . htmlspecialchars($var) . '</th>';
+ $rr .= '<td>' . @htmlspecialchars($row[$var]) . '</td>';
+ }
+ $r .= $hr ? '<tr>' . $hr . '</tr>' : '';
+ $r .= '<tr>' . $rr . '</tr>';
+ }
+ return $r ? $r : '<em>No results found</em>';
+ }
+
+ function getTSVSelectResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: text/plain; charset=utf-8');
+ $vars = $r['result']['variables'];
+ $rows = $r['result']['rows'];
+ $dur = $r['query_time'];
+ return $this->getTSVRows($rows, $vars);
+ }
+
+ function getTSVRows($rows, $vars) {
+ $r = '';
+ $delim = "\t";
+ $esc_delim = "\\t";
+ foreach ($rows as $row) {
+ $hr = '';
+ $rr = '';
+ foreach ($vars as $var) {
+ $hr .= $r ? '' : ($hr ? $delim . $var : $var);
+ $val = isset($row[$var]) ? str_replace($delim, $esc_delim, $row[$var]) : '';
+ $rr .= $rr ? $delim . $val : $val;
+ }
+ $r .= $hr . "\n" . $rr;
+ }
+ return $r ? $r : 'No results found';
+ }
+
+ /* ASK */
+
+ function getAskResultDoc($r) {
+ $formats = array(
+ 'xml' => 'SPARQLXML', 'sparql-results+xml' => 'SPARQLXML',
+ 'json' => 'SPARQLJSON', 'sparql-results+json' => 'SPARQLJSON',
+ 'plain' => 'Plain',
+ 'php_ser' => 'PHPSER',
+ 'sql' => ($this->allow_sql ? 'Plain' : 'xSQL'),
+ 'infos' => 'Plain',
+ );
+ if ($f = $this->getResultFormat($formats, 'xml')) {
+ $m = 'get' . $f . 'AskResultDoc';
+ return method_exists($this, $m) ? $this->$m($r) : 'not implemented';
+ }
+ return '';
+ }
+
+ function getSPARQLXMLAskResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: application/sparql-results+xml');
+ $r_val = $r['result'] ? 'true' : 'false';
+ $dur = $r['query_time'];
+ $nl = "\n";
+ return '' .
+ '<?xml version="1.0"?>' .
+ $nl . '<sparql xmlns="http://www.w3.org/2005/sparql-results#">' .
+ $nl . ' <head>' .
+ $nl . ' <!-- query time: '. round($dur, 4) .' sec -->' .
+ $nl . ' </head>' .
+ $nl . ' <boolean>' .$r_val. '</boolean>' .
+ $nl . '</sparql>' .
+ '';
+ }
+
+ function getSPARQLJSONAskResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: application/sparql-results+json');
+ $r_val = $r['result'] ? 'true' : 'false';
+ $dur = $r['query_time'];
+ $nl = "\n";
+ $r = '' .
+ $nl . '{' .
+ $nl . ' "head": {' .
+ $nl . ' },' .
+ $nl . ' "boolean" : ' . $r_val .
+ $nl . '}' .
+ '';
+ if (($v = $this->p('jsonp')) || ($v = $this->p('callback'))) {
+ $r = $v . '(' . $r . ')';
+ }
+ return $r;
+ }
+
+ function getPHPSERAskResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: text/plain');
+ return serialize($r);
+ }
+
+ function getPlainAskResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: text/plain');
+ return $r['result'] ? 'true' : 'false';
+ }
+
+ /* CONSTRUCT */
+
+ function getConstructResultDoc($r) {
+ $formats = array(
+ 'rdfxml' => 'RDFXML', 'rdf+xml' => 'RDFXML',
+ 'json' => 'RDFJSON', 'rdf+json' => 'RDFJSON',
+ 'turtle' => 'Turtle', 'x-turtle' => 'Turtle', 'rdf+n3' => 'Turtle',
+ 'php_ser' => 'PHPSER',
+ 'sql' => ($this->allow_sql ? 'Plain' : 'xSQL'),
+ 'infos' => 'Plain',
+ );
+ if ($f = $this->getResultFormat($formats, 'rdfxml')) {
+ $m = 'get' . $f . 'ConstructResultDoc';
+ return method_exists($this, $m) ? $this->$m($r) : 'not implemented';
+ }
+ return '';
+ }
+
+ function getRDFXMLConstructResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: application/rdf+xml');
+ $index = $r['result'];
+ $ser = ARC2::getRDFXMLSerializer($this->a);
+ $dur = $r['query_time'];
+ return $ser->getSerializedIndex($index) . "\n" . '<!-- query time: ' . $dur . ' -->';
+ }
+
+ function getTurtleConstructResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: application/x-turtle');
+ $index = $r['result'];
+ $ser = ARC2::getTurtleSerializer($this->a);
+ $dur = $r['query_time'];
+ return '# query time: ' . $dur . "\n" . $ser->getSerializedIndex($index);
+ }
+
+ function getRDFJSONConstructResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: application/json');
+ $index = $r['result'];
+ $ser = ARC2::getRDFJSONSerializer($this->a);
+ $dur = $r['query_time'];
+ $r = $ser->getSerializedIndex($index);
+ if (($v = $this->p('jsonp')) || ($v = $this->p('callback'))) {
+ $r = $v . '(' . $r . ')';
+ }
+ return $r;
+ }
+
+ function getPHPSERConstructResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: text/plain');
+ return serialize($r);
+ }
+
+ function getPlainConstructResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: text/plain');
+ return print_r($r['result'], 1);
+ }
+
+ /* DESCRIBE */
+
+ function getDescribeResultDoc($r) {
+ $formats = array(
+ 'rdfxml' => 'RDFXML', 'rdf+xml' => 'RDFXML',
+ 'json' => 'RDFJSON', 'rdf+json' => 'RDFJSON',
+ 'turtle' => 'Turtle', 'x-turtle' => 'Turtle', 'rdf+n3' => 'Turtle',
+ 'php_ser' => 'PHPSER',
+ 'sql' => ($this->allow_sql ? 'Plain' : 'xSQL'),
+ 'infos' => 'Plain'
+ );
+ if ($f = $this->getResultFormat($formats, 'rdfxml')) {
+ $m = 'get' . $f . 'DescribeResultDoc';
+ return method_exists($this, $m) ? $this->$m($r) : 'not implemented';
+ }
+ return '';
+ }
+
+ function getRDFXMLDescribeResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: application/rdf+xml');
+ $index = $r['result'];
+ $ser = ARC2::getRDFXMLSerializer($this->a);
+ $dur = $r['query_time'];
+ return $ser->getSerializedIndex($index) . "\n" . '<!-- query time: ' . $dur . ' -->';
+ }
+
+ function getTurtleDescribeResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: application/x-turtle');
+ $index = $r['result'];
+ $ser = ARC2::getTurtleSerializer($this->a);
+ $dur = $r['query_time'];
+ return '# query time: ' . $dur . "\n" . $ser->getSerializedIndex($index);
+ }
+
+ function getRDFJSONDescribeResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: application/json');
+ $index = $r['result'];
+ $ser = ARC2::getRDFJSONSerializer($this->a);
+ $dur = $r['query_time'];
+ $r = $ser->getSerializedIndex($index);
+ if (($v = $this->p('jsonp')) || ($v = $this->p('callback'))) {
+ $r = $v . '(' . $r . ')';
+ }
+ return $r;
+ }
+
+ function getPHPSERDescribeResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: text/plain');
+ return serialize($r);
+ }
+
+ function getPlainDescribeResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: text/plain');
+ return print_r($r['result'], 1);
+ }
+
+ /* DUMP */
+
+ function getDumpResultDoc() {
+ $this->headers = array();
+ return '';
+ }
+
+ /* LOAD */
+
+ function getLoadResultDoc($r) {
+ $formats = array(
+ 'xml' => 'SPARQLXML', 'sparql-results+xml' => 'SPARQLXML',
+ 'json' => 'SPARQLJSON', 'sparql-results+json' => 'SPARQLJSON',
+ 'plain' => 'Plain',
+ 'php_ser' => 'PHPSER',
+ 'sql' => ($this->allow_sql ? 'Plain' : 'xSQL'),
+ 'infos' => 'Plain',
+ );
+ if ($f = $this->getResultFormat($formats, 'xml')) {
+ $m = 'get' . $f . 'LoadResultDoc';
+ return method_exists($this, $m) ? $this->$m($r) : 'not implemented';
+ }
+ return '';
+ }
+
+ function getSPARQLXMLLoadResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: application/sparql-results+xml');
+ $r_val = $r['result']['t_count'];
+ $dur = $r['query_time'];
+ $nl = "\n";
+ return '' .
+ '<?xml version="1.0"?>' .
+ $nl . '<sparql xmlns="http://www.w3.org/2005/sparql-results#">' .
+ $nl . ' <head>' .
+ $nl . ' <!-- query time: '. round($dur, 4) .' sec -->' .
+ $nl . ' </head>' .
+ $nl . ' <inserted>' .$r_val. '</inserted>' .
+ $nl . '</sparql>' .
+ '';
+ }
+
+ function getSPARQLJSONLoadResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: application/sparql-results+json');
+ $r_val = $r['result']['t_count'];
+ $dur = $r['query_time'];
+ $nl = "\n";
+ $r = '' .
+ $nl . '{' .
+ $nl . ' "head": {' .
+ $nl . ' },' .
+ $nl . ' "inserted" : ' . $r_val .
+ $nl . '}' .
+ '';
+ if (($v = $this->p('jsonp')) || ($v = $this->p('callback'))) {
+ $r = $v . '(' . $r . ')';
+ }
+ return $r;
+ }
+
+ function getPHPSERLoadResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: text/plain');
+ return serialize($r);
+ }
+
+ function getPlainLoadResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: text/plain');
+ return print_r($r['result'], 1);
+ }
+
+ /* DELETE */
+
+ function getDeleteResultDoc($r) {
+ $formats = array(
+ 'xml' => 'SPARQLXML', 'sparql-results+xml' => 'SPARQLXML',
+ 'json' => 'SPARQLJSON', 'sparql-results+json' => 'SPARQLJSON',
+ 'plain' => 'Plain',
+ 'php_ser' => 'PHPSER'
+ );
+ if ($f = $this->getResultFormat($formats, 'xml')) {
+ $m = 'get' . $f . 'DeleteResultDoc';
+ return method_exists($this, $m) ? $this->$m($r) : 'not implemented';
+ }
+ return '';
+ }
+
+ function getSPARQLXMLDeleteResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: application/sparql-results+xml');
+ $r_val = $r['result']['t_count'];
+ $dur = $r['query_time'];
+ $nl = "\n";
+ return '' .
+ '<?xml version="1.0"?>' .
+ $nl . '<sparql xmlns="http://www.w3.org/2005/sparql-results#">' .
+ $nl . ' <head>' .
+ $nl . ' <!-- query time: '. round($dur, 4) .' sec -->' .
+ $nl . ' </head>' .
+ $nl . ' <deleted>' .$r_val. '</deleted>' .
+ $nl . '</sparql>' .
+ '';
+ }
+
+ function getSPARQLJSONDeleteResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: application/sparql-results+json');
+ $r_val = $r['result']['t_count'];
+ $dur = $r['query_time'];
+ $nl = "\n";
+ $r = '' .
+ $nl . '{' .
+ $nl . ' "head": {' .
+ $nl . ' },' .
+ $nl . ' "deleted" : ' . $r_val .
+ $nl . '}' .
+ '';
+ if (($v = $this->p('jsonp')) || ($v = $this->p('callback'))) {
+ $r = $v . '(' . $r . ')';
+ }
+ return $r;
+ }
+
+ function getPHPSERDeleteResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: text/plain');
+ return serialize($r);
+ }
+
+ function getPlainDeleteResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: text/plain');
+ return print_r($r['result'], 1);
+ }
+
+ /* INSERT */
+
+ function getInsertResultDoc($r) {
+ $formats = array(
+ 'xml' => 'SPARQLXML', 'sparql-results+xml' => 'SPARQLXML',
+ 'json' => 'SPARQLJSON', 'sparql-results+json' => 'SPARQLJSON',
+ 'plain' => 'Plain',
+ 'php_ser' => 'PHPSER'
+ );
+ if ($f = $this->getResultFormat($formats, 'xml')) {
+ $m = 'get' . $f . 'InsertResultDoc';
+ return method_exists($this, $m) ? $this->$m($r) : 'not implemented';
+ }
+ return '';
+ }
+
+ function getSPARQLXMLInsertResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: application/sparql-results+xml');
+ $r_val = $r['result']['t_count'];
+ $dur = $r['query_time'];
+ $nl = "\n";
+ return '' .
+ '<?xml version="1.0"?>' .
+ $nl . '<sparql xmlns="http://www.w3.org/2005/sparql-results#">' .
+ $nl . ' <head>' .
+ $nl . ' <!-- query time: '. round($dur, 4) .' sec -->' .
+ $nl . ' </head>' .
+ $nl . ' <inserted>' .$r_val. '</inserted>' .
+ $nl . '</sparql>' .
+ '';
+ }
+
+ function getSPARQLJSONInsertResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: application/sparql-results+json');
+ $r_val = $r['result']['t_count'];
+ $dur = $r['query_time'];
+ $nl = "\n";
+ $r = '' .
+ $nl . '{' .
+ $nl . ' "head": {' .
+ $nl . ' },' .
+ $nl . ' "inserted" : ' . $r_val .
+ $nl . '}' .
+ '';
+ if (($v = $this->p('jsonp')) || ($v = $this->p('callback'))) {
+ $r = $v . '(' . $r . ')';
+ }
+ return $r;
+ }
+
+ function getPHPSERInsertResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: text/plain');
+ return serialize($r);
+ }
+
+ function getPlainInsertResultDoc($r) {
+ $this->setHeader('content-type', 'Content-Type: text/plain');
+ return print_r($r['result'], 1);
+ }
+
+ /* */
+
+ function jsonEscape($v) {
+ if (function_exists('json_encode')) return trim(json_encode($v), '"');
+ $from = array("\\", "\r", "\t", "\n", '"', "\b", "\f", "/");
+ $to = array('\\\\', '\r', '\t', '\n', '\"', '\b', '\f', '\/');
+ return str_replace($from, $to, $v);
+ }
+
+ /* */
+
+ function getHTMLFormDoc() {
+ return '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+ ' . $this->getHTMLDocHead() . '
+ ' . $this->getHTMLDocBody() . '
+ </html>
+ ';
+ }
+
+ function getHTMLDocHead() {
+ return '
+ <head>
+ <title>' . $this->getHTMLDocTitle() . '</title>
+ <style type="text/css">
+ ' . $this->getHTMLDocCSS() . '
+ </style>
+ </head>
+ ';
+ }
+
+ function getHTMLDocTitle() {
+ return $this->v('endpoint_title', 'ARC SPARQL+ Endpoint', $this->a);
+ }
+
+ function getHTMLDocHeading() {
+ return $this->v('endpoint_heading', 'ARC SPARQL+ Endpoint (v' . ARC2::getVersion() . ')', $this->a);
+ }
+
+ function getHTMLDocCSS() {
+ $default = '
+ body {
+ font-size: 14px;
+ font-family: Trebuchet MS, Verdana, Geneva, sans-serif;
+ background: #fff url(?img=bg_body) top center repeat-x;
+ padding: 5px 20px 20px 20px;
+ color: #666;
+ }
+ h1 { font-size: 1.6em; font-weight: normal; }
+ a { color: #c00000; }
+ th, td {
+ border: 1px dotted #eee;
+ padding: 2px 4px;
+ }
+ #sparql-form {
+ margin-bottom: 30px;
+ }
+ #query {
+ float: left;
+ width: 60%;
+ display: block;
+ height: 265px;
+ margin-bottom: 10px;
+ }
+ .options {
+ float: right;
+ font-size: 0.9em;
+ width: 35%;
+ border-top: 1px solid #ccc;
+ }
+ .options h3 {
+ margin: 5px;
+ }
+ .options dl{
+ margin: 0px;
+ padding: 0px 10px 5px 20px;
+ }
+ .options dl dt {
+ border-top: 1px dotted #ddd;
+ padding-top: 10px;
+ }
+ .options dl dt.first {
+ border: none;
+ }
+ .options dl dd {
+ padding: 5px 0px 7px 0px;
+ }
+ .options-2 {
+ clear: both;
+ margin: 10px 0px;
+ }
+ .form-buttons {
+ }
+ .results {
+ border: 1px solid #eee;
+ padding: 5px;
+ background-color: #fcfcfc;
+ }
+ ';
+ return $this->v('endpoint_css', $default, $this->a);
+ }
+
+ function getHTMLDocBody() {
+ return '
+ <body>
+ <h1>' . $this->getHTMLDocHeading() . '</h1>
+ <div class="intro">
+ <p>
+ <a href="?">This interface</a> implements
+ <a href="http://www.w3.org/TR/rdf-sparql-query/">SPARQL</a> and
+ <a href="http://arc.semsol.org/docs/v2/sparql+">SPARQL+</a> via <a href="http://www.w3.org/TR/rdf-sparql-protocol/#query-bindings-http">HTTP Bindings</a>.
+ </p>
+ <p>
+ Enabled operations: ' . join(', ', $this->getFeatures()) . '
+ </p>
+ <p>
+ Max. number of results : ' . $this->v('endpoint_max_limit', '<em>unrestricted</em>', $this->a) . '
+ </p>
+ </div>
+ ' . $this->getHTMLDocForm() .'
+ ' . ($this->p('show_inline') ? $this->query_result : '') . '
+ </body>
+ ';
+ }
+
+ function getHTMLDocForm() {
+ $q = $this->p('query') ? htmlspecialchars($this->p('query')) : "SELECT * WHERE {\n GRAPH ?g { ?s ?p ?o . }\n}\nLIMIT 10";
+ return '
+ <form id="sparql-form" action="?" enctype="application/x-www-form-urlencoded" method="' . ($_SERVER['REQUEST_METHOD'] == 'GET' ? 'get' : 'post' ) . '">
+ <textarea id="query" name="query" rows="20" cols="80">' . $q . '</textarea>
+ ' . $this->getHTMLDocOptions() . '
+ <div class="form-buttons">
+ <input type="submit" value="Send Query" />
+ <input type="reset" value="Reset" />
+ </div>
+ </form>
+ ';
+ }
+
+ function getHTMLDocOptions() {
+ $sel = $this->p('output');
+ $sel_code = ' selected="selected"';
+ return '
+ <div class="options">
+ <h3>Options</h3>
+ <dl>
+ <dt class="first">Output format (if supported by query type):</dt>
+ <dd>
+ <select id="output" name="output">
+ <option value="" ' . (!$sel ? $sel_code : '') . '>default</option>
+ <option value="xml" ' . ($sel == 'xml' ? $sel_code : '') . '>XML</option>
+ <option value="json" ' . ($sel == 'json' ? $sel_code : '') . '>JSON</option>
+ <option value="plain" ' . ($sel == 'plain' ? $sel_code : '') . '>Plain</option>
+ <option value="php_ser" ' . ($sel == 'php_ser' ? $sel_code : '') . '>Serialized PHP</option>
+ <option value="turtle" ' . ($sel == 'turtle' ? $sel_code : '') . '>Turtle</option>
+ <option value="rdfxml" ' . ($sel == 'rdfxml' ? $sel_code : '') . '>RDF/XML</option>
+ <option value="infos" ' . ($sel == 'infos' ? $sel_code : '') . '>Query Structure</option>
+ ' . ($this->allow_sql ? '<option value="sql" ' . ($sel == 'sql' ? $sel_code : '') . '>SQL</option>' : '') . '
+ <option value="htmltab" ' . ($sel == 'htmltab' ? $sel_code : '') . '>HTML Table</option>
+ <option value="tsv" ' . ($sel == 'tsv' ? $sel_code : '') . '>TSV</option>
+ </select>
+ </dd>
+
+ <dt>jsonp/callback (for JSON results)</dt>
+ <dd>
+ <input type="text" id="jsonp" name="jsonp" value="' . htmlspecialchars($this->p('jsonp')) . '" />
+ </dd>
+
+ <dt>API key (if required)</dt>
+ <dd>
+ <input type="text" id="key" name="key" value="' . htmlspecialchars($this->p('key')) . '" />
+ </dd>
+
+ <dt>Show results inline: </dt>
+ <dd>
+ <input type="checkbox" name="show_inline" value="1" ' . ($this->p('show_inline') ? ' checked="checked"' : '') . ' />
+ </dd>
+
+ </dl>
+ </div>
+ <div class="options-2">
+ Change HTTP method:
+ <a href="javascript:;" onclick="javascript:document.getElementById(\'sparql-form\').method=\'get\'">GET</a>
+ <a href="javascript:;" onclick="javascript:document.getElementById(\'sparql-form\').method=\'post\'">POST</a>
+ </div>
+ ';
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 RDF Store Helper
+author: Benjamin Nowack
+version: 2008-04-03 (Tweak: Changed locking approach from "LOCK TABLE" to "GET LOCK")
+*/
+
+ARC2::inc('Class');
+
+class ARC2_StoreHelper extends ARC2_Class {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_StoreHelper($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* db_con */
+ parent::__init();
+ $this->store =& $this->caller;
+ }
+
+ /* */
+
+ function changeNamespaceURI($old_uri, $new_uri) {
+ $id_changes = 0;
+ $t_changes = 0;
+ /* table lock */
+ if ($this->store->getLock()) {
+ $con = $this->store->getDBCon();
+ foreach (array('id', 's', 'o') as $id_col) {
+ $tbl = $this->store->getTablePrefix() . $id_col . '2val';
+ $sql = 'SELECT id, val FROM ' . $tbl . ' WHERE val LIKE "' . mysql_real_escape_string($old_uri, $con). '%"';
+ if ($rs = mysql_query($sql, $con)) {
+ while ($row = mysql_fetch_array($rs)) {
+ $new_val = str_replace($old_uri, $new_uri, $row['val']);
+ $new_id = $this->store->getTermID($new_val, $id_col , 'id');
+ if (!$new_id) {/* new ns uri */
+ $sub_sql = "UPDATE " . $tbl . " SET val = '" . mysql_real_escape_string($new_val, $con) . "' WHERE id = " . $row['id'];
+ $sub_r = mysql_query($sub_sql, $con);
+ $id_changes++;
+ }
+ else {
+ $t_tbls = $this->store->getTables();
+ foreach ($t_tbls as $t_tbl) {
+ if (preg_match('/^triple_/', $t_tbl)) {
+ foreach (array('s', 'p', 'o', 'o_lang_dt') as $t_col) {
+ $sub_sql = "UPDATE " . $this->store->getTablePrefix() . $t_tbl . " SET " . $t_col . " = " . $new_id . " WHERE " . $t_col . " = " . $row['id'];
+ $sub_r = mysql_query($sub_sql, $con);
+ $t_changes += mysql_affected_rows($con);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ $this->store->releaseLock();
+ }
+ return array('id_replacements' => $id_changes, 'triple_updates' => $t_changes);
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 RDF Store INSERT Query Handler
+author: Benjamin Nowack
+version: 2007-09-11 (Fix: empty CONSTRUCT results were not caught, which led to a GET in LOAD
+ Tweak: INSERT CONSTRUCT will keep bnode ids unchanged)
+*/
+
+ARC2::inc('StoreQueryHandler');
+
+class ARC2_StoreInsertQueryHandler extends ARC2_StoreQueryHandler {
+
+ function __construct($a = '', &$caller) {/* caller has to be a store */
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_StoreInsertQueryHandler($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* db_con */
+ parent::__init();
+ $this->store =& $this->caller;
+ }
+
+ /* */
+
+ function runQuery($infos, $keep_bnode_ids = 0) {
+ $this->infos = $infos;
+ $con = $this->store->getDBCon();
+ /* insert */
+ if (!$this->v('pattern', array(), $this->infos['query'])) {
+ return $this->store->insert($this->infos['query']['construct_triples'], $this->infos['query']['target_graph'], $keep_bnode_ids);
+ }
+ else {
+ $keep_bnode_ids = 1;
+ ARC2::inc('StoreConstructQueryHandler');
+ $h =& new ARC2_StoreConstructQueryHandler($this->a, $this->store);
+ if ($sub_r = $h->runQuery($this->infos)) {
+ return $this->store->insert($sub_r, $this->infos['query']['target_graph'], $keep_bnode_ids);
+ }
+ return array('t_count' => 0, 'load_time' => 0);
+ }
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 RDF Store LOAD Query Handler
+ *
+ * @author Benjamin Nowack
+ * @license <http://arc.semsol.org/license>
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-06-24
+*/
+
+ARC2::inc('StoreQueryHandler');
+
+class ARC2_StoreLoadQueryHandler extends ARC2_StoreQueryHandler {
+
+ function __construct($a = '', &$caller) {/* caller has to be a store */
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_StoreLoadQueryHandler($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* db_con, store_log_inserts */
+ parent::__init();
+ $this->store =& $this->caller;
+ $this->write_buffer_size = $this->v('store_write_buffer', 2500, $this->a);
+ $this->split_threshold = $this->v('store_split_threshold', 0, $this->a);
+ $this->has_pcre_unicode = @preg_match('/\pL/u', 'test');
+ $this->strip_mb_comp_str = $this->v('store_strip_mb_comp_str', 0, $this->a);
+ }
+
+ /* */
+
+ function runQuery($infos, $data = '', $keep_bnode_ids = 0) {
+ $url = $infos['query']['url'];
+ $graph = $infos['query']['target_graph'];
+ $this->target_graph = $graph ? $this->calcURI($graph) : $this->calcURI($url);
+ $this->fixed_target_graph = $graph ? $this->target_graph : '';
+ $this->keep_bnode_ids = $keep_bnode_ids;
+ /* reader */
+ ARC2::inc('Reader');
+ $reader =& new ARC2_Reader($this->a, $this);
+ $reader->activate($url, $data);
+ /* format detection */
+ $mappings = array(
+ 'rdfxml' => 'RDFXML',
+ 'sparqlxml' => 'SPOG',
+ 'turtle' => 'Turtle',
+ 'ntriples' => 'Turtle',
+ 'rss' => 'RSS',
+ 'atom' => 'Atom',
+ 'n3' => 'Turtle',
+ 'html' => 'SemHTML',
+ 'sgajson' => 'SGAJSON',
+ 'cbjson' => 'CBJSON'
+ );
+ $format = $reader->getFormat();
+ if (!$format || !isset($mappings[$format])) {
+ return $this->addError('No loader available for "' .$url. '": ' . $format);
+ }
+ /* format loader */
+ $suffix = 'Store' . $mappings[$format] . 'Loader';
+ ARC2::inc($suffix);
+ $cls = 'ARC2_' . $suffix;
+ $loader =& new $cls($this->a, $this);
+ $loader->setReader($reader);
+ /* lock */
+ if (!$this->store->getLock()) {
+ $l_name = $this->a['db_name'] . '.' . $this->store->getTablePrefix() . '.write_lock';
+ return $this->addError('Could not get lock in "runQuery" (' . $l_name . ')');
+ }
+ $this->has_lock = 1;
+ /* logging */
+ $this->t_count = 0;
+ $this->t_start = ARC2::mtime();
+ $this->log_inserts = $this->v('store_log_inserts', 0, $this->a);
+ if ($this->log_inserts) {
+ @unlink("arc_insert_log.txt");
+ $this->inserts = array();
+ $this->insert_times = array();
+ $this->t_prev = $this->t_start;
+ $this->t_count_prev = 0 ;
+ }
+ /* load and parse */
+ $this->max_term_id = $this->getMaxTermID();
+ $this->max_triple_id = $this->getMaxTripleID();
+ $this->column_type = $this->store->getColumnType();
+ //$this->createMergeTable();
+ $this->term_ids = array();
+ $this->triple_ids = array();
+ $this->sql_buffers = array();
+ $r = $loader->parse($url, $data);
+ /* done */
+ $this->checkSQLBuffers(1);
+ if ($this->log_inserts) {
+ $this->logInserts();
+ }
+ $this->store->releaseLock();
+ //$this->dropMergeTable();
+ if ((rand(1, 100) == 1)) $this->store->optimizeTables();
+ $t2 = ARC2::mtime();
+ $dur = round($t2 - $this->t_start, 4);
+ $r = array(
+ 't_count' => $this->t_count,
+ 'load_time' => $dur,
+ );
+ if ($this->log_inserts) {
+ $r['inserts'] = $this->inserts;
+ $r['insert_times'] = $this->insert_times;
+ }
+ return $r;
+ }
+
+ /* */
+
+ function addT($s, $p, $o, $s_type, $o_type, $o_dt = '', $o_lang = '') {
+ if (!$this->has_lock) return 0;
+ $type_ids = array ('uri' => '0', 'bnode' => '1' , 'literal' => '2');
+ $g = $this->getTermID($this->target_graph, '0', 'id');
+ $s = (($s_type == 'bnode') && !$this->keep_bnode_ids) ? '_:b' . abs(crc32($g . $s)) . '_' . (strlen($s) > 12 ? substr(substr($s, 2) , -10) : substr($s, 2)) : $s;
+ $o = (($o_type == 'bnode') && !$this->keep_bnode_ids) ? '_:b' . abs(crc32($g . $o)) . '_' . (strlen($o) > 12 ? substr(substr($o, 2), -10) : substr($o, 2)) : $o;
+ /* triple */
+ $t = array(
+ 's' => $this->getTermID($s, $type_ids[$s_type], 's'),
+ 'p' => $this->getTermID($p, '0', 'id'),
+ 'o' => $this->getTermID($o, $type_ids[$o_type], 'o'),
+ 'o_lang_dt' => $this->getTermID($o_dt . $o_lang, $o_dt ? '0' : '2', 'id'),
+ 'o_comp' => $this->getOComp($o),
+ 's_type' => $type_ids[$s_type],
+ 'o_type' => $type_ids[$o_type],
+ );
+ $t['t'] = $this->getTripleID($t);
+ if (is_array($t['t'])) {/* t exists already */
+ $t['t'] = $t['t'][0];
+ }
+ else {
+ $this->bufferTripleSQL($t);
+ }
+ /* g2t */
+ $g2t = array('g' => $g, 't' => $t['t']);
+ $this->bufferGraphSQL($g2t);
+ $this->t_count++;
+ /* check buffers */
+ if (($this->t_count % $this->write_buffer_size) == 0) {
+ $force_write = 1;
+ $reset_buffers = (($this->t_count % ($this->write_buffer_size * 2)) == 0);
+ $refresh_lock = (($this->t_count % 25000) == 0);
+ $split_tables = (($this->t_count % ($this->write_buffer_size * 10)) == 0);
+ if ($this->log_inserts) $this->logInserts();
+ $this->checkSQLBuffers($force_write, $reset_buffers, $refresh_lock, $split_tables);
+ }
+ }
+
+ /* */
+
+ function getMaxTermID() {
+ $con = $this->store->getDBCon();
+ $sql = '';
+ foreach (array('id2val', 's2val', 'o2val') as $tbl) {
+ $sql .= $sql ? ' UNION ' : '';
+ $sql .= "(SELECT MAX(id) as `id` FROM " . $this->store->getTablePrefix() . $tbl . ')';
+ }
+ $r = 0;
+ if (($rs = $this->queryDB($sql, $con)) && mysql_num_rows($rs)) {
+ while ($row = mysql_fetch_array($rs)) {
+ $r = ($r < $row['id']) ? $row['id'] : $r;
+ }
+ }
+ return $r + 1;
+ }
+
+ function getMaxTripleID() {
+ $con = $this->store->getDBCon();
+ $sql = "SELECT MAX(t) AS `id` FROM " . $this->store->getTablePrefix() . "triple";
+ if (($rs = $this->queryDB($sql, $con)) && mysql_num_rows($rs) && ($row = mysql_fetch_array($rs))) {
+ return $row['id'] + 1;
+ }
+ return 1;
+ }
+
+ function getTermID($val, $type_id, $tbl) {
+ $con = $this->store->getDBCon();
+ /* buffered */
+ if (isset($this->term_ids[$val])) {
+ if (!isset($this->term_ids[$val][$tbl])) {
+ foreach (array('id', 's', 'o') as $other_tbl) {
+ if (isset($this->term_ids[$val][$other_tbl])) {
+ $this->term_ids[$val][$tbl] = $this->term_ids[$val][$other_tbl];
+ $this->bufferIDSQL($tbl, $this->term_ids[$val][$tbl], $val, $type_id);
+ break;
+ }
+ }
+ }
+ return $this->term_ids[$val][$tbl];
+ }
+ /* db */
+ $tbl_prefix = $this->store->getTablePrefix();
+ $sub_tbls = ($tbl == 'id') ? array('id2val', 's2val', 'o2val') : ($tbl == 's' ? array('s2val', 'id2val', 'o2val') : array('o2val', 'id2val', 's2val'));
+ foreach ($sub_tbls as $sub_tbl) {
+ $id = 0;
+ //$sql = "SELECT id AS `id`, '" . $sub_tbl . "' AS `tbl` FROM " . $tbl_prefix . $sub_tbl . " WHERE val = BINARY '" . mysql_real_escape_string($val, $con) . "'";
+ /* via hash */
+ if (preg_match('/^(s2val|o2val)$/', $sub_tbl) && $this->hasHashColumn($sub_tbl)) {
+ $sql = "SELECT id AS `id`, val AS `val` FROM " . $tbl_prefix . $sub_tbl . " WHERE val_hash = BINARY '" . $this->getValueHash($val) . "'";
+ if (($rs = $this->queryDB($sql, $con)) && mysql_num_rows($rs)) {
+ while ($row = mysql_fetch_array($rs)) {
+ if ($row['val'] == $val) {
+ $id = $row['id'];
+ break;
+ }
+ }
+ }
+ }
+ else {
+ $sql = "SELECT id AS `id` FROM " . $tbl_prefix . $sub_tbl . " WHERE val = BINARY '" . mysql_real_escape_string($val, $con) . "'";
+ if (($rs = $this->queryDB($sql . ' LIMIT 1', $con)) && mysql_num_rows($rs)) {
+ $row = mysql_fetch_array($rs);
+ $id = $row['id'];
+ }
+ }
+ if ($id) {
+ $this->term_ids[$val] = array($tbl => $id);
+ if ($sub_tbl != $tbl . '2val') {
+ $this->bufferIDSQL($tbl, $id, $val, $type_id);
+ }
+ break;
+ }
+ }
+ /* new */
+ if (!isset($this->term_ids[$val])) {
+ $this->term_ids[$val] = array($tbl => $this->max_term_id);
+ $this->bufferIDSQL($tbl, $this->max_term_id, $val, $type_id);
+ $this->max_term_id++;
+ /* upgrade tables ? */
+ if (($this->column_type == 'mediumint') && ($this->max_term_id >= 16750000)) {
+ $this->store->extendColumns();
+ $this->column_type = 'int';
+ }
+ }
+ return $this->term_ids[$val][$tbl];
+ }
+
+ function getTripleID($t) {
+ $con = $this->store->getDBCon();
+ $val = serialize($t);
+ /* buffered */
+ if (isset($this->triple_ids[$val])) {
+ return array($this->triple_ids[$val]);/* hack for "don't insert this triple" */
+ }
+ /* db */
+ $sql = "SELECT t FROM " . $this->store->getTablePrefix() . "triple WHERE
+ s = " . $t['s'] . " AND p = " . $t['p'] . " AND o = " . $t['o'] . " AND o_lang_dt = " . $t['o_lang_dt'] . " AND s_type = " . $t['s_type'] . " AND o_type = " . $t['o_type'] . "
+ LIMIT 1
+ ";
+ if (($rs = $this->queryDB($sql, $con)) && mysql_num_rows($rs) && ($row = mysql_fetch_array($rs))) {
+ $this->triple_ids[$val] = $row['t'];/* hack for "don't insert this triple" */
+ return array($row['t']);/* hack for "don't insert this triple" */
+ }
+ /* new */
+ else {
+ $this->triple_ids[$val] = $this->max_triple_id;
+ $this->max_triple_id++;
+ /* split tables ? */
+ if ($this->split_threshold && !($this->max_triple_id % $this->split_threshold)) {
+ $this->store->splitTables();
+ $this->dropMergeTable();
+ $this->createMergeTable();
+ }
+ return $this->triple_ids[$val];
+ }
+ }
+
+ function getOComp($val) {
+ /* try date (e.g. 21 August 2007) */
+ if (preg_match('/^[0-9]{1,2}\s+[a-z]+\s+[0-9]{4}/i', $val) && ($uts = strtotime($val)) && ($uts !== -1)) {
+ return date("Y-m-d\TH:i:s", $uts);
+ }
+ /* xsd date (e.g. 2009-05-28T18:03:38+09:00 2009-05-28T18:03:38GMT) */
+ if (preg_match('/^([0-9]{4}\-[0-9]{2}\-[0-9]{2}\T)([0-9\:]+)?([0-9\+\-\:\Z]+)?(\s*[a-z]{2,3})?$/si', $val, $m)) {
+ /* yyyy-mm-dd */
+ $val = $m[1];
+ /* hh:ss */
+ if ($m[2]) {
+ $val .= $m[2];
+ /* timezone offset */
+ if (isset($m[3]) && ($m[3] != 'Z')) {
+ $uts = strtotime(str_replace('T', ' ', $val));
+ if (preg_match('/([\+\-])([0-9]{2})\:?([0-9]{2})$/', $m[3], $sub_m)) {
+ $diff_mins = (3600 * ltrim($sub_m[2], '0')) + ltrim($sub_m[3], '0');
+ $uts = ($sub_m[1] == '-') ? $uts + $diff_mins : $uts - $diff_mins;
+ $val = date('Y-m-d\TH:i:s\Z', $uts);
+ }
+ }
+ else {
+ $val .= 'Z';
+ }
+ }
+ return $val;
+ }
+ /* fallback & backup w/o UTC calculation, to be removed in later revision */
+ if (preg_match('/^[0-9]{4}[0-9\-\:\T\Z\+]+([a-z]{2,3})?$/i', $val)) {
+ return $val;
+ }
+ if (is_numeric($val)) {
+ $val = sprintf("%f", $val);
+ if (preg_match("/([\-\+])([0-9]*)\.([0-9]*)/", $val, $m)) {
+ return $m[1] . sprintf("%018s", $m[2]) . "." . sprintf("%-015s", $m[3]);
+ }
+ if (preg_match("/([0-9]*)\.([0-9]*)/", $val, $m)) {
+ return "+" . sprintf("%018s", $m[1]) . "." . sprintf("%-015s", $m[2]);
+ }
+ return $val;
+ }
+ /* any other string: remove tags, linebreaks etc., but keep MB-chars */
+ //$val = substr(trim(preg_replace('/[\W\s]+/is', '-', strip_tags($val))), 0, 35);
+ $re = $this->has_pcre_unicode ? '/[\PL\s]+/isu' : '/[\s\'\"\´\`]+/is';
+ $val = trim(preg_replace($re, '-', strip_tags($val)));
+ if (strlen($val) > 35) {
+ $fnc = function_exists("mb_substr") ? 'mb_substr' : 'substr';
+ $val = $fnc($val, 0, 17) . '-' . $fnc($val, -17);
+ }
+ if ($this->strip_mb_comp_str) {
+ $val = urldecode(preg_replace('/\%[0-9A-F]{2}/', '', urlencode($val)));
+ }
+ return $this->toUTF8($val);
+ }
+
+ /* */
+
+ function bufferTripleSQL($t) {
+ $con = $this->store->getDBCon();
+ $tbl = 'triple';
+ $sql = ", ";
+ if (!isset($this->sql_buffers[$tbl])) {
+ $this->sql_buffers[$tbl] = "INSERT IGNORE INTO " . $this->store->getTablePrefix() . $tbl . " (t, s, p, o, o_lang_dt, o_comp, s_type, o_type) VALUES";
+ $sql = " ";
+ }
+ $this->sql_buffers[$tbl] .= $sql . "(" . $t['t'] . ", " . $t['s'] . ", " . $t['p'] . ", " . $t['o'] . ", " . $t['o_lang_dt'] . ", '" . mysql_real_escape_string($t['o_comp'], $con) . "', " . $t['s_type'] . ", " . $t['o_type'] . ")";
+ }
+
+ function bufferGraphSQL($g2t) {
+ $tbl = 'g2t';
+ $sql = ", ";
+ if (!isset($this->sql_buffers[$tbl])) {
+ $this->sql_buffers[$tbl] = "INSERT IGNORE INTO " . $this->store->getTablePrefix() . $tbl . " (g, t) VALUES";
+ $sql = " ";
+ }
+ $this->sql_buffers[$tbl] .= $sql . "(" . $g2t['g'] . ", " . $g2t['t'] . ")";
+ }
+
+ function bufferIDSQL($tbl, $id, $val, $val_type) {
+ $con = $this->store->getDBCon();
+ $tbl = $tbl . '2val';
+ if ($tbl == 'id2val') {
+ $cols = "id, val, val_type";
+ $vals = "(" . $id . ", '" . mysql_real_escape_string($val, $con) . "', " . $val_type . ")";
+ }
+ elseif (preg_match('/^(s2val|o2val)$/', $tbl) && $this->hasHashColumn($tbl)) {
+ $cols = "id, val_hash, val";
+ $vals = "(" . $id . ", '" . $this->getValueHash($val). "', '" . mysql_real_escape_string($val, $con) . "')";
+ }
+ else {
+ $cols = "id, val";
+ $vals = "(" . $id . ", '" . mysql_real_escape_string($val, $con) . "')";
+ }
+ if (!isset($this->sql_buffers[$tbl])) {
+ $this->sql_buffers[$tbl] = '';
+ $sql = "INSERT IGNORE INTO " . $this->store->getTablePrefix() . $tbl . "(" . $cols . ") VALUES ";
+ }
+ else {
+ $sql = ", ";
+ }
+ $sql .= $vals;
+ $this->sql_buffers[$tbl] .= $sql;
+ }
+
+ /* */
+
+ function checkSQLBuffers($force_write = 0, $reset_id_buffers = 0, $refresh_lock = 0, $split_tables = 0) {
+ $con = $this->store->getDBCon();
+ if (!$this->keep_time_limit) @set_time_limit($this->v('time_limit', 60, $this->a));
+ foreach (array('triple', 'g2t', 'id2val', 's2val', 'o2val') as $tbl) {
+ $buffer_size = isset($this->sql_buffers[$tbl]) ? 1 : 0;
+ if ($buffer_size && $force_write) {
+ $t1 = ARC2::mtime();
+ $this->queryDB($this->sql_buffers[$tbl], $con);
+ /* table error */
+ if ($er = mysql_error($con)) {
+ $this->autoRepairTable($er, $con, $this->sql_buffers[$tbl]);
+ }
+ unset($this->sql_buffers[$tbl]);
+ if ($this->log_inserts) {
+ $t2 = ARC2::mtime();
+ $this->inserts[$tbl] = $this->v($tbl, 0, $this->inserts) + max(0, mysql_affected_rows($con));
+ $dur = round($t2 - $t1, 4);
+ $this->insert_times[$tbl] = isset($this->insert_times[$tbl]) ? $this->insert_times[$tbl] : array('min' => $dur, 'max' => $dur, 'sum' => $dur);
+ $this->insert_times[$tbl] = array('min' => min($dur, $this->insert_times[$tbl]['min']), 'max' => max($dur, $this->insert_times[$tbl]['max']), 'sum' => $dur + $this->insert_times[$tbl]['sum']);
+ }
+ /* reset term id buffers */
+ if ($reset_id_buffers) {
+ $this->term_ids = array();
+ $this->triple_ids = array();
+ }
+ /* refresh lock */
+ if ($refresh_lock) {
+ $this->store->releaseLock();
+ $this->has_lock = 0;
+ sleep(1);
+ if (!$this->store->getLock(5)) return $this->addError('Could not re-obtain lock in "checkSQLBuffers"');
+ $this->has_lock = 1;
+ }
+ }
+ }
+ return 1;
+ }
+
+ function autoRepairTable($er, $con, $sql = '') {
+ $this->addError('MySQL error: ' . $er . ' (' . $sql . ')');
+ if (preg_match('/Table \'[^\']+\/([a-z0-9\_\-]+)\' .*(crashed|repair)/i', $er, $m)) {
+ $rs = $this->queryDB('REPAIR TABLE ' . rawurlencode($m[1]), $con);
+ $msg = $rs ? mysql_fetch_array($rs) : array();
+ if ($this->v('Msg_type', 'error', $msg) == 'error') {
+ /* auto-reset */
+ if ($this->v('store_reset_on_table_crash', 0, $this->a)) {
+ $this->store->drop();
+ $this->store->setUp();
+ }
+ else {
+ $er = $this->v('Msg_text', 'unknown error', $msg);
+ $this->addError('Auto-repair failed on ' . rawurlencode($m[1]) . ': ' . $er);
+ }
+ //die("Fatal errors: \n" . print_r($this->getErrors(), 1));
+ }
+ }
+ }
+
+ /* speed log */
+
+ function logInserts() {
+ $t_start = $this->t_start;
+ $t_prev = $this->t_prev;
+ $t_now = ARC2::mtime();
+ $tc_prev = $this->t_count_prev;
+ $tc_now = $this->t_count;
+ $tc_diff = $tc_now - $tc_prev;
+
+ $dur_full = $t_now - $t_start;
+ $dur_diff = $t_now - $t_prev;
+
+ $speed_full = round($tc_now / $dur_full);
+ $speed_now = round($tc_diff / $dur_diff);
+
+ $r = $tc_diff . ' in ' . round($dur_diff, 5) . ' = ' . $speed_now . ' t/s (' .$tc_now. ' in ' . round($dur_full, 5). ' = ' . $speed_full . ' t/s )';
+ $fp = @fopen("arc_insert_log.txt", "a");
+ @fwrite($fp, $r . "\r\n");
+ @fclose($fp);
+
+ $this->t_prev = $t_now;
+ $this->t_count_prev = $tc_now;
+ }
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 RDF Store Query Handler
+ *
+ * @author Benjamin Nowack
+ * @license <http://arc.semsol.org/license>
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-04-11
+*/
+
+ARC2::inc('Class');
+
+class ARC2_StoreQueryHandler extends ARC2_Class {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_StoreQueryHandler($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* db_con */
+ parent::__init();
+ $this->xsd = 'http://www.w3.org/2001/XMLSchema#';
+ $this->allow_extension_functions = $this->v('store_allow_extension_functions', 1, $this->a);
+ $this->keep_time_limit = $this->v('keep_time_limit', 0, $this->a);
+ $this->handler_type = '';
+ }
+
+ /* */
+
+ function getTermID($val, $term = '') {
+ return $this->store->getTermID($val, $term);
+ }
+
+ function hasHashColumn($tbl) {
+ return $this->store->hasHashColumn($tbl);
+ }
+
+ function getValueHash($val) {
+ return $this->store->getValueHash($val);
+ }
+
+ /* */
+
+ function getTripleTable() {
+ $r = $this->store->getTablePrefix() . 'triple';
+ return $r;
+ }
+
+ /* */
+
+ function createMergeTable() {
+ $split_ps = $this->store->getSetting('split_predicates', array());
+ if (!$split_ps) return 1;
+ $this->mrg_table_id = 'MRG_' . $this->store->getTablePrefix() . crc32(uniqid(rand()));
+ $con = $this->store->getDBCon();
+ $this->queryDB("FLUSH TABLES", $con);
+ $indexes = $this->v('store_indexes', array('sp (s,p)', 'os (o,s)', 'po (p,o)'), $this->a);
+ $index_code = $indexes ? 'KEY ' . join(', KEY ', $indexes) . ', ' : '';
+ $prefix = $this->store->getTablePrefix();
+ $sql = "
+ CREATE TEMPORARY TABLE IF NOT EXISTS " . $prefix . "triple_all (
+ t mediumint UNSIGNED NOT NULL,
+ s mediumint UNSIGNED NOT NULL,
+ p mediumint UNSIGNED NOT NULL,
+ o mediumint UNSIGNED NOT NULL,
+ o_lang_dt mediumint UNSIGNED NOT NULL,
+ o_comp char(35) NOT NULL, /* normalized value for ORDER BY operations */
+ s_type tinyint(1) NOT NULL default 0, /* uri/bnode => 0/1 */
+ o_type tinyint(1) NOT NULL default 0, /* uri/bnode/literal => 0/1/2 */
+ misc tinyint(1) NOT NULL default 0, /* temporary flags */
+ UNIQUE KEY (t), " . $index_code . " KEY (misc)
+ )
+ ";
+ $v = $this->store->getDBVersion();
+ $sql .= (($v < '04-01-00') && ($v >= '04-00-18')) ? 'ENGINE' : (($v >= '04-01-02') ? 'ENGINE' : 'TYPE');
+ $sql .= "=MERGE UNION=(" . $prefix . "triple" ;
+ foreach ($split_ps as $pos => $p) {
+ $sql .= ',' . $prefix . 'triple_' . abs(crc32($p));
+ }
+ $sql .= ")";
+ //$sql .= ($v >= '04-00-00') ? " CHARACTER SET utf8" : "";
+ //$sql .= ($v >= '04-01-00') ? " COLLATE utf8_unicode_ci" : "";
+ //echo $sql;
+ return $this->queryDB($sql, $con);
+ }
+
+ function dropMergeTable() {
+ return 1;
+ $sql = "DROP TABLE IF EXISTS " . $this->store->getTablePrefix() . "triple_all";
+ //echo $sql;
+ return $this->queryDB($sql, $this->store->getDBCon());
+ }
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 Store RDF/XML Loader
+author: Benjamin Nowack
+version: 2007-08-21
+*/
+
+ARC2::inc('RDFXMLParser');
+
+class ARC2_StoreRDFXMLLoader extends ARC2_RDFXMLParser {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_StoreRDFXMLLoader($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ }
+
+ /* */
+
+ function addT($s, $p, $o, $s_type, $o_type, $o_dt = '', $o_lang = '') {
+ $this->caller->addT($s, $p, $o, $s_type, $o_type, $o_dt, $o_lang);
+ $this->t_count++;
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 Store RSS(2) Loader
+author: Benjamin Nowack
+version: 2008-06-28 (Tweak: adjusted to normalized "literal" type)
+*/
+
+ARC2::inc('RSSParser');
+
+class ARC2_StoreRSSLoader extends ARC2_RSSParser {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_StoreRSSLoader($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ }
+
+ /* */
+
+ function addT($t) {
+ $this->caller->addT($t['s'], $t['p'], $t['o'], $t['s_type'], $t['o_type'], $t['o_datatype'], $t['o_lang']);
+ $this->t_count++;
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 Store SG API JSON Loader
+author: Benjamin Nowack
+version: 2008-07-15
+*/
+
+ARC2::inc('SGAJSONParser');
+
+class ARC2_StoreSGAJSONLoader extends ARC2_SGAJSONParser {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_StoreSGAJSONLoader($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ }
+
+ /* */
+
+ function done() {
+ $this->extractRDF();
+ }
+
+ function addT($s, $p, $o, $s_type, $o_type, $o_dt = '', $o_lang = '') {
+ $this->caller->addT($s, $p, $o, $s_type, $o_type, $o_dt, $o_lang);
+ $this->t_count++;
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 Store SPOG Loader
+author: Morten Høybye Frederiksen / Benjamin Nowack
+version: 2008-07-02
+*/
+
+ARC2::inc('SPOGParser');
+
+class ARC2_StoreSPOGLoader extends ARC2_SPOGParser {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_StoreSPOGLoader($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ }
+
+ /* */
+
+ function addT($s, $p, $o, $s_type, $o_type, $o_dt = '', $o_lang = '', $g) {
+ if (!($s && $p && $o)) return 0;
+ if (!$g) $g = $this->caller->target_graph;
+ if ($this->caller->fixed_target_graph) $g = $this->caller->fixed_target_graph;
+ $prev_g = $this->caller->target_graph;
+ $this->caller->target_graph = $g;
+ $this->caller->addT($s, $p, $o, $s_type, $o_type, $o_dt, $o_lang);
+ $this->caller->target_graph = $prev_g;
+ $this->t_count++;
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 RDF Store SELECT Query Handler
+ *
+ * @author Benjamin Nowack
+ * @license http://arc.semsol.org/license
+ * @homepage <http://arc.semsol.org/>
+ * @package ARC2
+ * @version 2010-06-22
+ *
+*/
+
+ARC2::inc('StoreQueryHandler');
+
+class ARC2_StoreSelectQueryHandler extends ARC2_StoreQueryHandler {
+
+ function __construct($a = '', &$caller) {/* caller has to be a store */
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_StoreSelectQueryHandler($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* db_con */
+ parent::__init();
+ $this->store =& $this->caller;
+ $con = $this->store->getDBCon();
+ $this->handler_type = 'select';
+ $this->engine_type = $this->v('store_engine_type', 'MyISAM', $this->a);
+ $this->cache_results = $this->v('store_cache_results', 0, $this->a);
+ }
+
+ /* */
+
+ function runQuery($infos) {
+ $con = $this->store->getDBCon();
+ $rf = $this->v('result_format', '', $infos);
+ $this->infos = $infos;
+ $this->infos['null_vars'] = array();
+ $this->indexes = array();
+ $this->pattern_order_offset = 0;
+ $q_sql = $this->getSQL();
+
+ /* debug result formats */
+ if ($rf == 'sql') return $q_sql;
+ if ($rf == 'structure') return $this->infos;
+ if ($rf == 'index') return $this->indexes;
+ /* create intermediate results (ID-based) */
+ $tmp_tbl = $this->createTempTable($q_sql);
+ /* join values */
+ $r = $this->getFinalQueryResult($q_sql, $tmp_tbl);
+ /* remove intermediate results */
+ if (!$this->cache_results) {
+ $this->queryDB('DROP TABLE IF EXISTS ' . $tmp_tbl, $con);
+ }
+ return $r;
+ }
+
+ function getSQL() {
+ $r = '';
+ $nl = "\n";
+ $this->buildInitialIndexes();
+ foreach ($this->indexes as $i => $index) {
+ $this->index = array_merge($this->getEmptyIndex(), $index);
+ $this->analyzeIndex($this->getPattern('0'));
+ $sub_r = $this->getQuerySQL();
+ $r .= $r ? $nl . 'UNION' . $this->getDistinctSQL() . $nl : '';
+ $r .= $this->is_union_query ? '(' . $sub_r . ')' : $sub_r;
+ $this->indexes[$i] = $this->index;
+ }
+ $r .= $this->is_union_query ? $this->getLIMITSQL() : '';
+ if ($this->v('order_infos', 0, $this->infos['query'])) {
+ $r = preg_replace('/SELECT(\s+DISTINCT)?\s*/', 'SELECT\\1 NULL AS `_pos_`, ', $r);
+ }
+ if ($pd_count = $this->problematicDependencies()) {
+ /* re-arranging the patterns sometimes reduces the LEFT JOIN dependencies */
+ $set_sql = 0;
+ if (!$this->pattern_order_offset) $set_sql = 1;
+ if (!$set_sql && ($pd_count < $this->opt_sql_pd_count)) $set_sql = 1;
+ if (!$set_sql && ($pd_count == $this->opt_sql_pd_count) && (strlen($r) < strlen($this->opt_sql))) $set_sql = 1;
+ if ($set_sql) {
+ $this->opt_sql = $r;
+ $this->opt_sql_pd_count = $pd_count;
+ }
+ $this->pattern_order_offset++;
+ if ($this->pattern_order_offset > 5) {
+ return $this->opt_sql;
+ }
+ return $this->getSQL();
+ }
+ return $r;
+ }
+
+ function buildInitialIndexes() {
+ $this->dependency_log = array();
+ $this->index = $this->getEmptyIndex();
+ $this->buildIndex($this->infos['query']['pattern'], 0);
+ $tmp = $this->index;
+ $this->analyzeIndex($this->getPattern('0'));
+ $this->initial_index = $this->index;
+ $this->index = $tmp;
+ $this->is_union_query = $this->index['union_branches'] ? 1 : 0;
+ $this->indexes = $this->is_union_query ? $this->getUnionIndexes($this->index) : array($this->index);
+ }
+
+ function createTempTable($q_sql) {
+ $con = $this->store->getDBCon();
+ $v = $this->store->getDBVersion();
+ if ($this->cache_results) {
+ $tbl = $this->store->getTablePrefix() . 'Q' . md5($q_sql);
+ }
+ else {
+ $tbl = $this->store->getTablePrefix() . 'Q' . md5($q_sql . time() . uniqid(rand()));
+ }
+ if (strlen($tbl) > 64) $tbl = 'Q' . md5($tbl);
+ $tmp_sql = 'CREATE TEMPORARY TABLE ' . $tbl . ' ( ' . $this->getTempTableDef($tbl, $q_sql) . ') ';
+ $tmp_sql .= (($v < '04-01-00') && ($v >= '04-00-18')) ? 'ENGINE' : (($v >= '04-01-02') ? 'ENGINE' : 'TYPE');
+ $tmp_sql .= '=' . $this->engine_type;/* HEAP doesn't support AUTO_INCREMENT, and MySQL breaks on MEMORY sometimes */
+ if (!$this->queryDB($tmp_sql, $con) && !$this->queryDB(str_replace('CREATE TEMPORARY', 'CREATE', $tmp_sql), $con)) {
+ return $this->addError(mysql_error($con));
+ }
+ mysql_unbuffered_query('INSERT INTO ' . $tbl . ' ' . "\n" . $q_sql, $con);
+ if ($er = mysql_error($con)) $this->addError($er);
+ return $tbl;
+ }
+
+ function getEmptyIndex() {
+ return array(
+ 'from' => array(),
+ 'join' => array(),
+ 'left_join' => array(),
+ 'vars' => array(), 'graph_vars' => array(), 'graph_uris' => array(),
+ 'bnodes' => array(),
+ 'triple_patterns' => array(),
+ 'sub_joins' => array(),
+ 'constraints' => array(),
+ 'union_branches'=> array(),
+ 'patterns' => array(),
+ 'havings' => array()
+ );
+ }
+
+ function getTempTableDef($tmp_tbl, $q_sql) {
+ $col_part = preg_replace('/^SELECT\s*(DISTINCT)?(.*)FROM.*$/s', '\\2', $q_sql);
+ $parts = explode(',', $col_part);
+ $has_order_infos = $this->v('order_infos', 0, $this->infos['query']);
+ $r = '';
+ $added = array();
+ foreach ($parts as $part) {
+ if (preg_match('/\.?(.+)\s+AS\s+`(.+)`/U', trim($part), $m) && !isset($added[$m[2]])) {
+ $col = $m[1];
+ $alias = $m[2];
+ if ($alias == '_pos_') continue;
+ $r .= $r ? ',' : '';
+ $r .= "\n `" . $alias . "` int UNSIGNED";
+ $added[$alias] = 1;
+ }
+ }
+ if ($has_order_infos) {
+ $r = "\n" . '`_pos_` mediumint NOT NULL AUTO_INCREMENT PRIMARY KEY, ' . $r;
+ }
+ return $r ? $r . "\n" : '';
+ }
+
+ function getFinalQueryResult($q_sql, $tmp_tbl) {
+ /* var names */
+ $vars = array();
+ $aggregate_vars = array();
+ foreach ($this->infos['query']['result_vars'] as $entry) {
+ if ($entry['aggregate']) {
+ $vars[] = $entry['alias'];
+ $aggregate_vars[] = $entry['alias'];
+ }
+ else {
+ $vars[] = $entry['var'];
+ }
+ }
+ /* result */
+ $r = array('variables' => $vars);
+ $v_sql = $this->getValueSQL($tmp_tbl, $q_sql);
+ //echo "\n\n" . $v_sql;
+ $t1 = ARC2::mtime();
+ $con = $this->store->getDBCon();
+ $rs = mysql_unbuffered_query($v_sql, $con);
+ if ($er = mysql_error($con)) {
+ $this->addError($er);
+ }
+ $t2 = ARC2::mtime();
+ $rows = array();
+ $types = array(0 => 'uri', 1 => 'bnode', 2 => 'literal');
+ if ($rs) {
+ while ($pre_row = mysql_fetch_array($rs)) {
+ $row = array();
+ foreach ($vars as $var) {
+ if (isset($pre_row[$var])) {
+ $row[$var] = $pre_row[$var];
+ $row[$var . ' type'] = isset($pre_row[$var . ' type']) ? $types[$pre_row[$var . ' type']] : (in_array($var, $aggregate_vars) ? 'literal' : 'uri');
+ if (isset($pre_row[$var . ' lang_dt']) && ($lang_dt = $pre_row[$var . ' lang_dt'])) {
+ if (preg_match('/^([a-z]+(\-[a-z0-9]+)*)$/i', $lang_dt)) {
+ $row[$var . ' lang'] = $lang_dt;
+ }
+ else {
+ $row[$var . ' datatype'] = $lang_dt;
+ }
+ }
+ }
+ }
+ if ($row || !$vars) {
+ $rows[] = $row;
+ }
+ }
+ }
+ $r['rows'] = $rows;
+ return $r;
+ }
+
+ /* */
+
+ function buildIndex($pattern, $id) {
+ $pattern['id'] = $id;
+ $type = $this->v('type', '', $pattern);
+ if (($type == 'filter') && $this->v('constraint', 0, $pattern)) {
+ $sub_pattern = $pattern['constraint'];
+ $sub_pattern['parent_id'] = $id;
+ $sub_id = $id . '_0';
+ $this->buildIndex($sub_pattern, $sub_id);
+ $pattern['constraint'] = $sub_id;
+ }
+ else {
+ $sub_patterns = $this->v('patterns', array(), $pattern);
+ $keys = array_keys($sub_patterns);
+ $spc = count($sub_patterns);
+ if (($spc > 4) && $this->pattern_order_offset) {
+ $keys = array();
+ for ($i = 0 ; $i < $spc; $i++) {
+ $keys[$i] = $i + $this->pattern_order_offset;
+ while ($keys[$i] >= $spc) $keys[$i] -= $spc;
+ }
+ }
+ foreach ($keys as $i => $key) {
+ $sub_pattern = $sub_patterns[$key];
+ $sub_pattern['parent_id'] = $id;
+ $sub_id = $id . '_' . $key;
+ $this->buildIndex($sub_pattern, $sub_id);
+ $pattern['patterns'][$i] = $sub_id;
+ if ($type == 'union') {
+ $this->index['union_branches'][] = $sub_id;
+ }
+ }
+ }
+ $this->index['patterns'][$id] = $pattern;
+ }
+
+ /* */
+
+ function analyzeIndex($pattern) {
+ $type = $pattern['type'];
+ $id = $pattern['id'];
+ /* triple */
+ if ($type == 'triple') {
+ foreach (array('s', 'p', 'o') as $term) {
+ if ($pattern[$term . '_type'] == 'var') {
+ $val = $pattern[$term];
+ $this->index['vars'][$val] = array_merge($this->v($val, array(), $this->index['vars']), array(array('table' => $pattern['id'], 'col' =>$term)));
+ }
+ if ($pattern[$term . '_type'] == 'bnode') {
+ $val = $pattern[$term];
+ $this->index['bnodes'][$val] = array_merge($this->v($val, array(), $this->index['bnodes']), array(array('table' => $pattern['id'], 'col' =>$term)));
+ }
+ }
+ $this->index['triple_patterns'][] = $pattern['id'];
+ /* joins */
+ if ($this->isOptionalPattern($id)) {
+ $this->index['left_join'][] = $id;
+ }
+ elseif (!$this->index['from']) {
+ $this->index['from'][] = $id;
+ }
+ elseif (!$this->getJoinInfos($id)) {
+ $this->index['from'][] = $id;
+ }
+ else {
+ $this->index['join'][] = $id;
+ }
+ /* graph infos, graph vars */
+ $this->index['patterns'][$id]['graph_infos'] = $this->getGraphInfos($id);
+ foreach ($this->index['patterns'][$id]['graph_infos'] as $info) {
+ if ($info['type'] == 'graph') {
+ if ($info['var']) {
+ $val = $info['var']['value'];
+ $this->index['graph_vars'][$val] = array_merge($this->v($val, array(), $this->index['graph_vars']), array(array('table' => $id)));
+ }
+ elseif ($info['uri']) {
+ $val = $info['uri'];
+ $this->index['graph_uris'][$val] = array_merge($this->v($val, array(), $this->index['graph_uris']), array(array('table' => $id)));
+ }
+ }
+ }
+ }
+ $sub_ids = $this->v('patterns', array(), $pattern);
+ foreach ($sub_ids as $sub_id) {
+ $this->analyzeIndex($this->getPattern($sub_id));
+ }
+ }
+
+ /* */
+
+ function getGraphInfos($id) {
+ $r = array();
+ if ($id) {
+ $pattern = $this->index['patterns'][$id];
+ $type = $pattern['type'];
+ /* graph */
+ if ($type == 'graph') {
+ $r[] = array('type' => 'graph', 'var' => $pattern['var'], 'uri' => $pattern['uri']);
+ }
+ $p_pattern = $this->index['patterns'][$pattern['parent_id']];
+ if (isset($p_pattern['graph_infos'])) {
+ return array_merge($p_pattern['graph_infos'], $r);
+ }
+ return array_merge($this->getGraphInfos($pattern['parent_id']), $r);
+ }
+ /* FROM / FROM NAMED */
+ else {
+ if (isset($this->infos['query']['dataset'])) {
+ foreach ($this->infos['query']['dataset'] as $set) {
+ $r[] = array_merge(array('type' => 'dataset'), $set);
+ }
+ }
+ }
+ return $r;
+ }
+
+ /* */
+
+ function getPattern($id) {
+ if (is_array($id)) {
+ return $id;
+ }
+ return $this->v($id, array(), $this->index['patterns']);
+ }
+
+ function getInitialPattern($id) {
+ return $this->v($id, array(), $this->initial_index['patterns']);
+ }
+
+ /* */
+
+ function getUnionIndexes($pre_index) {
+ $r = array();
+ $branches = array();
+ $min_depth = 1000;
+ /* only process branches with minimum depth */
+ foreach ($pre_index['union_branches'] as $id) {
+ $branches[$id] = count(preg_split('/\_/', $id));
+ $min_depth = min($min_depth, $branches[$id]);
+ }
+ foreach ($branches as $branch_id => $depth) {
+ if ($depth == $min_depth) {
+ $union_id = preg_replace('/\_[0-9]+$/', '', $branch_id);
+ $index = array('keeping' => $branch_id, 'union_branches' => array(), 'patterns' => $pre_index['patterns']);
+ $old_branches = $index['patterns'][$union_id]['patterns'];
+ $skip_id = ($old_branches[0] == $branch_id) ? $old_branches[1] : $old_branches[0];
+ $index['patterns'][$union_id]['type'] = 'group';
+ $index['patterns'][$union_id]['patterns'] = array($branch_id);
+ $has_sub_unions = 0;
+ foreach ($index['patterns'] as $pattern_id => $pattern) {
+ if (preg_match('/^' .$skip_id. '/', $pattern_id)) {
+ unset($index['patterns'][$pattern_id]);
+ }
+ elseif ($pattern['type'] == 'union') {
+ foreach ($pattern['patterns'] as $sub_union_branch_id) {
+ $index['union_branches'][] = $sub_union_branch_id;
+ }
+ }
+ }
+ if ($index['union_branches']) {
+ $r = array_merge($r, $this->getUnionIndexes($index));
+ }
+ else {
+ $r[] = $index;
+ }
+ }
+ }
+ return $r;
+ }
+
+ /* */
+
+ function isOptionalPattern($id) {
+ $pattern = $this->getPattern($id);
+ if ($this->v('type', '', $pattern) == 'optional') {
+ return 1;
+ }
+ if ($this->v('parent_id', '0', $pattern) == '0') {
+ return 0;
+ }
+ return $this->isOptionalPattern($pattern['parent_id']);
+ }
+
+ function getOptionalPattern($id) {
+ $pn = $this->getPattern($id);
+ do {
+ $pn = $this->getPattern($pn['parent_id']);
+ } while ($pn['parent_id'] && ($pn['type'] != 'optional'));
+ return $pn['id'];
+ }
+
+ function sameOptional($id, $id2) {
+ return $this->getOptionalPattern($id) == $this->getOptionalPattern($id2);
+ }
+
+ /* */
+
+ function isUnionPattern($id) {
+ $pattern = $this->getPattern($id);
+ if ($this->v('type', '', $pattern) == 'union') {
+ return 1;
+ }
+ if ($this->v('parent_id', '0', $pattern) == '0') {
+ return 0;
+ }
+ return $this->isUnionPattern($pattern['parent_id']);
+ }
+
+ /* */
+
+ function getValueTable($col) {
+ return $this->store->getTablePrefix() . (preg_match('/^(s|o)$/', $col) ? $col . '2val' : 'id2val');
+ }
+
+ function getGraphTable() {
+ return $this->store->getTablePrefix() . 'g2t';
+ }
+
+ /* */
+
+ function getQuerySQL() {
+ $nl = "\n";
+ $where_sql = $this->getWHERESQL(); /* pre-fills $index['sub_joins'] $index['constraints'] */
+ $order_sql = $this->getORDERSQL(); /* pre-fills $index['sub_joins'] $index['constraints'] */
+ return '' .
+ ($this->is_union_query ? 'SELECT' : 'SELECT' . $this->getDistinctSQL()) . $nl .
+ $this->getResultVarsSQL() . $nl . /* fills $index['sub_joins'] */
+ $this->getFROMSQL() .
+ $this->getAllJoinsSQL() .
+ $this->getWHERESQL() .
+ $this->getGROUPSQL() .
+ $this->getORDERSQL() .
+ ($this->is_union_query ? '' : $this->getLIMITSQL()) .
+ $nl .
+ '';
+ }
+
+ /* */
+
+ function getDistinctSQL() {
+ if ($this->is_union_query) {
+ return ($this->v('distinct', 0, $this->infos['query']) || $this->v('reduced', 0, $this->infos['query'])) ? '' : ' ALL';
+ }
+ return ($this->v('distinct', 0, $this->infos['query']) || $this->v('reduced', 0, $this->infos['query'])) ? ' DISTINCT' : '';
+ }
+
+ /* */
+
+ function getResultVarsSQL() {
+ $r = '';
+ $vars = $this->infos['query']['result_vars'];
+ $nl = "\n";
+ $added = array();
+ foreach ($vars as $var) {
+ $var_name = $var['var'];
+ $tbl_alias = '';
+ if ($tbl_infos = $this->getVarTableInfos($var_name, 0)) {
+ $tbl = $tbl_infos['table'];
+ $col = $tbl_infos['col'];
+ $tbl_alias = $tbl_infos['table_alias'];
+ }
+ elseif ($var_name == 1) {/* ASK query */
+ $r .= '1 AS `success`';
+ }
+ else {
+ $this->addError('Result variable "' .$var_name. '" not used in query.');
+ }
+ if ($tbl_alias) {
+ /* aggregate */
+ if ($var['aggregate']) {
+ $conv_code = '';
+ if (strtolower($var['aggregate']) != 'count') {
+ $tbl_alias = 'V_' . $tbl . '_' . $col . '.val';
+ $conv_code = '0 + ';
+ }
+ if (!isset($added[$var['alias']])) {
+ $r .= $r ? ',' . $nl . ' ' : ' ';
+ $distinct_code = (strtolower($var['aggregate']) == 'count') && $this->v('distinct', 0, $this->infos['query']) ? 'DISTINCT ' : '';
+ $r .= $var['aggregate'] . '(' . $conv_code . $distinct_code . $tbl_alias. ') AS `' . $var['alias'] . '`';
+ $added[$var['alias']] = 1;
+ }
+ }
+ /* normal var */
+ else {
+ if (!isset($added[$var_name])) {
+ $r .= $r ? ',' . $nl . ' ' : ' ';
+ $r .= $tbl_alias . ' AS `' . $var_name . '`';
+ $is_s = ($col == 's');
+ $is_p = ($col == 'p');
+ $is_o = ($col == 'o');
+ if ($tbl_alias == 'NULL') {
+ /* type / add in UNION queries? */
+ if ($is_s || $is_o) {
+ $r .= ', ' . $nl . ' NULL AS `' . $var_name . ' type`';
+ }
+ /* lang_dt / always add it in UNION queries, the var may be used as s/p/o */
+ if ($is_o || $this->is_union_query) {
+ $r .= ', ' . $nl . ' NULL AS `' . $var_name . ' lang_dt`';
+ }
+ }
+ else {
+ /* type */
+ if ($is_s || $is_o) {
+ $r .= ', ' . $nl . ' ' .$tbl_alias . '_type AS `' . $var_name . ' type`';
+ }
+ /* lang_dt / always add it in UNION queries, the var may be used as s/p/o */
+ if ($is_o) {
+ $r .= ', ' . $nl . ' ' .$tbl_alias . '_lang_dt AS `' . $var_name . ' lang_dt`';
+ }
+ elseif ($this->is_union_query) {
+ $r .= ', ' . $nl . ' NULL AS `' . $var_name . ' lang_dt`';
+ }
+ }
+ $added[$var_name] = 1;
+ }
+ }
+ if (!in_array($tbl_alias, $this->index['sub_joins'])) {
+ $this->index['sub_joins'][] = $tbl_alias;
+ }
+ }
+ }
+ return $r ? $r : '1 AS `success`';
+ }
+
+ function getVarTableInfos($var, $ignore_initial_index = 1) {
+ if ($var == '*') {
+ return array('table' => '', 'col' => '', 'table_alias' => '*');
+ }
+ if ($infos = $this->v($var, 0, $this->index['vars'])) {
+ $infos[0]['table_alias'] = 'T_' . $infos[0]['table'] . '.' . $infos[0]['col'];
+ return $infos[0];
+ }
+ if ($infos = $this->v($var, 0, $this->index['graph_vars'])) {
+ $infos[0]['col'] = 'g';
+ $infos[0]['table_alias'] = 'G_' . $infos[0]['table'] . '.' . $infos[0]['col'];
+ return $infos[0];
+ }
+ if ($this->is_union_query && !$ignore_initial_index) {
+ if (($infos = $this->v($var, 0, $this->initial_index['vars'])) || ($infos = $this->v($var, 0, $this->initial_index['graph_vars']))) {
+ if (!in_array($var, $this->infos['null_vars'])) {
+ $this->infos['null_vars'][] = $var;
+ }
+ $infos[0]['table_alias'] = 'NULL';
+ $infos[0]['col'] = !isset($infos[0]['col']) ? '' : $infos[0]['col'];
+ return $infos[0];
+ }
+ }
+ return 0;
+ }
+
+ /* */
+
+ function getFROMSQL() {
+ $r = '';
+ foreach ($this->index['from'] as $id) {
+ $r .= $r ? ', ' : 'FROM (';
+ $r .= $this->getTripleTable($id) . ' T_' . $id;
+ }
+ return $r ? $r . ')' : '';
+ }
+
+ /* */
+
+ function getOrderedJoinIDs() {
+ return array_merge($this->index['from'], $this->index['join'], $this->index['left_join']);
+ }
+
+ function getJoinInfos($id) {
+ $r = array();
+ $tbl_ids = $this->getOrderedJoinIDs();
+ $pattern = $this->getPattern($id);
+ foreach ($tbl_ids as $tbl_id) {
+ $tbl_pattern = $this->getPattern($tbl_id);
+ if ($tbl_id != $id) {
+ foreach (array('s', 'p', 'o') as $tbl_term) {
+ foreach (array('var', 'bnode', 'uri') as $term_type) {
+ if ($tbl_pattern[$tbl_term . '_type'] == $term_type) {
+ foreach (array('s', 'p', 'o') as $term) {
+ if (($pattern[$term . '_type'] == $term_type) && ($tbl_pattern[$tbl_term] == $pattern[$term])) {
+ $r[] = array('term' => $term, 'join_tbl' => $tbl_id, 'join_term' => $tbl_term);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ return $r;
+ }
+
+ function getAllJoinsSQL() {
+ $js = $this->getJoins();
+ $ljs = $this->getLeftJoins();
+ $entries = array_merge($js, $ljs);
+ $id2code = array();
+ foreach ($entries as $entry) {
+ if (preg_match('/([^\s]+) ON (.*)/s', $entry, $m)) {
+ $id2code[$m[1]] = $entry;
+ }
+ }
+ $deps = array();
+ foreach ($id2code as $id => $code) {
+ $deps[$id]['rank'] = 0;
+ foreach ($id2code as $other_id => $other_code) {
+ $deps[$id]['rank'] += ($id != $other_id) && preg_match('/' . $other_id . '/', $code) ? 1 : 0;
+ $deps[$id][$other_id] = ($id != $other_id) && preg_match('/' . $other_id . '/', $code) ? 1 : 0;
+ }
+ }
+ $r = '';
+ do {
+ /* get next 0-rank */
+ $next_id = 0;
+ foreach ($deps as $id => $infos) {
+ if ($infos['rank'] == 0) {
+ $next_id = $id;
+ break;
+ }
+ }
+ if ($next_id) {
+ $r .= "\n" . $id2code[$next_id];
+ unset($deps[$next_id]);
+ foreach ($deps as $id => $infos) {
+ $deps[$id]['rank'] = 0;
+ unset($deps[$id][$next_id]);
+ foreach ($infos as $k => $v) {
+ if (!in_array($k, array('rank', $next_id))) {
+ $deps[$id]['rank'] += $v;
+ $deps[$id][$k] = $v;
+ }
+ }
+ }
+ }
+ }
+ while ($next_id);
+ if ($deps) {
+ $this->addError('Not all patterns could be rewritten to SQL JOINs');
+ }
+ return $r;
+ }
+
+ function getJoins() {
+ $r = array();
+ $nl = "\n";
+ foreach ($this->index['join'] as $id) {
+ $sub_r = $this->getJoinConditionSQL($id);
+ $r[] = 'JOIN ' . $this->getTripleTable($id) . ' T_' . $id . ' ON (' . $sub_r . $nl . ')';
+ }
+ foreach (array_merge($this->index['from'], $this->index['join']) as $id) {
+ if ($sub_r = $this->getRequiredSubJoinSQL($id)) {
+ $r[] = $sub_r;
+ }
+ }
+ return $r;
+ }
+
+ function getLeftJoins() {
+ $r = array();
+ $nl = "\n";
+ foreach ($this->index['left_join'] as $id) {
+ $sub_r = $this->getJoinConditionSQL($id);
+ $r[] = 'LEFT JOIN ' . $this->getTripleTable($id) . ' T_' . $id . ' ON (' . $sub_r . $nl . ')';
+ }
+ foreach ($this->index['left_join'] as $id) {
+ if ($sub_r = $this->getRequiredSubJoinSQL($id, 'LEFT')) {
+ $r[] = $sub_r;
+ }
+ }
+ return $r;
+ }
+
+ function getJoinConditionSQL($id) {
+ $r = '';
+ $nl = "\n";
+ $infos = $this->getJoinInfos($id);
+ $pattern = $this->getPattern($id);
+
+ $tbl = 'T_' . $id;
+ /* core dependency */
+ $d_tbls = $this->getDependentJoins($id);
+ foreach ($d_tbls as $d_tbl) {
+ if (preg_match('/^T_([0-9\_]+)\.[spo]+/', $d_tbl, $m) && ($m[1] != $id)) {
+ if ($this->isJoinedBefore($m[1], $id) && !in_array($m[1], array_merge($this->index['from'], $this->index['join']))) {
+ $r .= $r ? $nl . ' AND ' : $nl . ' ';
+ $r .= '(' . $d_tbl . ' IS NOT NULL)';
+ }
+ $this->logDependency($id, $d_tbl);
+ }
+ }
+ /* triple-based join info */
+ foreach ($infos as $info) {
+ if ($this->isJoinedBefore($info['join_tbl'], $id) && $this->joinDependsOn($id, $info['join_tbl'])) {
+ $r .= $r ? $nl . ' AND ' : $nl . ' ';
+ $r .= '(' . $tbl . '.' . $info['term'] . ' = T_' . $info['join_tbl'] . '.' . $info['join_term'] . ')';
+ }
+ }
+ /* filters etc */
+ if ($sub_r = $this->getPatternSQL($pattern, 'join__T_' . $id)) {
+ $r .= $r ? $nl . ' AND ' . $sub_r : $nl . ' ' . '(' . $sub_r . ')';
+ }
+ return $r;
+ }
+
+ /**
+ * A log of identified table join dependencies in getJoinConditionSQL
+ *
+ */
+
+ function logDependency($id, $tbl) {
+ if (!isset($this->dependency_log[$id])) $this->dependency_log[$id] = array();
+ if (!in_array($tbl, $this->dependency_log[$id])) {
+ $this->dependency_log[$id][] = $tbl;
+ }
+ }
+
+ /**
+ * checks whether entries in the dependecy log could perhaps be optimized
+ * (triggers re-ordering of patterns
+ */
+
+ function problematicDependencies() {
+ foreach ($this->dependency_log as $id => $tbls) {
+ if (count($tbls) > 1) return count($tbls);
+ }
+ return 0;
+ }
+
+ function isJoinedBefore($tbl_1, $tbl_2) {
+ $tbl_ids = $this->getOrderedJoinIDs();
+ foreach ($tbl_ids as $id) {
+ if ($id == $tbl_1) {
+ return 1;
+ }
+ if ($id == $tbl_2) {
+ return 0;
+ }
+ }
+ }
+
+ function joinDependsOn($id, $id2) {
+ if (in_array($id2, array_merge($this->index['from'], $this->index['join']))) {
+ return 1;
+ }
+ $d_tbls = $this->getDependentJoins($id2);
+ //echo $id . ' :: ' . $id2 . '=>' . print_r($d_tbls, 1);
+ foreach ($d_tbls as $d_tbl) {
+ if (preg_match('/^T_' .$id. '\./', $d_tbl)) {
+ return 1;
+ }
+ }
+ return 0;
+ }
+
+ function getDependentJoins($id) {
+ $r = array();
+ /* sub joins */
+ foreach ($this->index['sub_joins'] as $alias) {
+ if (preg_match('/^(T|V|G)_' . $id . '/', $alias)) {
+ $r[] = $alias;
+ }
+ }
+ /* siblings in shared optional */
+ $o_id = $this->getOptionalPattern($id);
+ foreach ($this->index['sub_joins'] as $alias) {
+ if (preg_match('/^(T|V|G)_' . $o_id . '/', $alias) && !in_array($alias, $r)) {
+ $r[] = $alias;
+ }
+ }
+ foreach ($this->index['left_join'] as $alias) {
+ if (preg_match('/^' . $o_id . '/', $alias) && !in_array($alias, $r)) {
+ $r[] = 'T_' . $alias . '.s';
+ }
+ }
+ return $r;
+ }
+
+ /* */
+
+ function getRequiredSubJoinSQL($id, $prefix = '') {/* id is a triple pattern id. Optional FILTERS and GRAPHs are getting added to the join directly */
+ $nl = "\n";
+ $r = '';
+ foreach ($this->index['sub_joins'] as $alias) {
+ if (preg_match('/^V_' . $id . '_([a-z\_]+)\.val$/', $alias, $m)) {
+ $col = $m[1];
+ $sub_r = '';
+ if ($this->isOptionalPattern($id)) {
+ $pattern = $this->getPattern($id);
+ do {
+ $pattern = $this->getPattern($pattern['parent_id']);
+ } while ($pattern['parent_id'] && ($pattern['type'] != 'optional'));
+ $sub_r = $this->getPatternSQL($pattern, 'sub_join__V_' . $id);
+ }
+ $sub_r = $sub_r ? $nl . ' AND (' . $sub_r . ')' : '';
+ /* lang dt only on literals */
+ if ($col == 'o_lang_dt') {
+ $sub_sub_r = 'T_' . $id . '.o_type = 2';
+ $sub_r .= $nl . ' AND (' . $sub_sub_r . ')';
+ }
+ //$cur_prefix = $prefix ? $prefix . ' ' : 'STRAIGHT_';
+ $cur_prefix = $prefix ? $prefix . ' ' : '';
+ if ($col == 'g') {
+ $r .= trim($cur_prefix . 'JOIN '. $this->getValueTable($col) . ' V_' .$id . '_' . $col. ' ON (' .$nl. ' (G_' . $id . '.' . $col. ' = V_' . $id. '_' . $col. '.id) ' . $sub_r . $nl . ')');
+ }
+ else {
+ $r .= trim($cur_prefix . 'JOIN '. $this->getValueTable($col) . ' V_' .$id . '_' . $col. ' ON (' .$nl. ' (T_' . $id . '.' . $col. ' = V_' . $id. '_' . $col. '.id) ' . $sub_r . $nl . ')');
+ }
+ }
+ elseif (preg_match('/^G_' . $id . '\.g$/', $alias, $m)) {
+ $pattern = $this->getPattern($id);
+ $sub_r = $this->getPatternSQL($pattern, 'graph_sub_join__G_' . $id);
+ $sub_r = $sub_r ? $nl . ' AND ' . $sub_r : '';
+ /* dataset restrictions */
+ $gi = $this->getGraphInfos($id);
+ $sub_sub_r = '';
+ $added_gts = array();
+ foreach ($gi as $set) {
+ if (isset($set['graph']) && !in_array($set['graph'], $added_gts)) {
+ $sub_sub_r .= $sub_sub_r !== '' ? ',' : '';
+ $sub_sub_r .= $this->getTermID($set['graph'], 'g');
+ $added_gts[] = $set['graph'];
+ }
+ }
+ $sub_r .= ($sub_sub_r !== '') ? $nl . ' AND (G_' . $id . '.g IN (' . $sub_sub_r . '))' : ''; // /* ' . str_replace('#' , '::', $set['graph']) . ' */';
+ /* other graph join conditions */
+ foreach ($this->index['graph_vars'] as $var => $occurs) {
+ $occur_tbls = array();
+ foreach ($occurs as $occur) {
+ $occur_tbls[] = $occur['table'];
+ if ($occur['table'] == $id) break;
+ }
+ foreach($occur_tbls as $tbl) {
+ if (($tbl != $id) && in_array($id, $occur_tbls) && $this->isJoinedBefore($tbl, $id)) {
+ $sub_r .= $nl . ' AND (G_' .$id. '.g = G_' .$tbl. '.g)';
+ }
+ }
+ }
+ //$cur_prefix = $prefix ? $prefix . ' ' : 'STRAIGHT_';
+ $cur_prefix = $prefix ? $prefix . ' ' : '';
+ $r .= trim($cur_prefix . 'JOIN '. $this->getGraphTable() . ' G_' .$id . ' ON (' .$nl. ' (T_' . $id . '.t = G_' .$id. '.t)' . $sub_r . $nl . ')');
+ }
+ }
+ return $r;
+ }
+
+ /* */
+
+ function getWHERESQL() {
+ $r = '';
+ $nl = "\n";
+ /* standard constraints */
+ $sub_r = $this->getPatternSQL($this->getPattern('0'), 'where');
+ /* additional constraints */
+ foreach ($this->index['from'] as $id) {
+ if ($sub_sub_r = $this->getConstraintSQL($id)) {
+ $sub_r .= $sub_r ? $nl . ' AND ' . $sub_sub_r : $sub_sub_r;
+ }
+ }
+ $r .= $sub_r ? $sub_r : '';
+ /* left join dependencies */
+ foreach ($this->index['left_join'] as $id) {
+ $d_joins = $this->getDependentJoins($id);
+ $added = array();
+ $d_aliases = array();
+ //echo $id . ' =>' . print_r($d_joins, 1);
+ $id_alias = 'T_' . $id . '.s';
+ foreach ($d_joins as $alias) {
+ if (preg_match('/^(T|V|G)_([0-9\_]+)(_[spo])?\.([a-z\_]+)/', $alias, $m)) {
+ $tbl_type = $m[1];
+ $tbl_pattern_id = $m[2];
+ $suffix = $m[3];
+ if (($tbl_pattern_id >= $id) && $this->sameOptional($tbl_pattern_id, $id)) {/* get rid of dependency permutations and nested optionals */
+ if (!in_array($tbl_type . '_' . $tbl_pattern_id . $suffix, $added)) {
+ $sub_r .= $sub_r ? ' AND ' : '';
+ $sub_r .= $alias . ' IS NULL';
+ $d_aliases[] = $alias;
+ $added[] = $tbl_type . '_' . $tbl_pattern_id . $suffix;
+ $id_alias = ($tbl_pattern_id == $id) ? $alias : $id_alias;
+ }
+ }
+ }
+ }
+ if (count($d_aliases) > 2) {/* @@todo fix this! */
+ $sub_r1 = ' /* '.$id_alias.' dependencies */';
+ $sub_r2 = '((' . $id_alias . ' IS NULL) OR (CONCAT(' . join(', ', $d_aliases) . ') IS NOT NULL))';
+ $r .= $r ? $nl . $sub_r1 . $nl . ' AND ' .$sub_r2 : $sub_r1 . $nl . $sub_r2;
+ }
+ }
+ return $r ? $nl . 'WHERE ' . $r : '';
+ }
+
+ /* */
+
+ function addConstraintSQLEntry($id, $sql) {
+ if (!isset($this->index['constraints'][$id])) {
+ $this->index['constraints'][$id] = array();
+ }
+ if (!in_array($sql, $this->index['constraints'][$id])) {
+ $this->index['constraints'][$id][] = $sql;
+ }
+ }
+
+ function getConstraintSQL($id) {
+ $r = '';
+ $nl = "\n";
+ $constraints = $this->v($id, array(), $this->index['constraints']);
+ foreach ($constraints as $constraint) {
+ $r .= $r ? $nl . ' AND ' . $constraint : $constraint;
+ }
+ return $r;
+ }
+
+ /* */
+
+ function getPatternSQL($pattern, $context) {
+ $type = $pattern['type'];
+ $m = 'get' . ucfirst($type) . 'PatternSQL';
+ return method_exists($this, $m) ? $this->$m($pattern, $context) : $this->getDefaultPatternSQL($pattern, $context);
+ }
+
+ function getDefaultPatternSQL($pattern, $context) {
+ $r = '';
+ $nl = "\n";
+ $sub_ids = $this->v('patterns', array(), $pattern);
+ foreach ($sub_ids as $sub_id) {
+ $sub_r = $this->getPatternSQL($this->getPattern($sub_id), $context);
+ $r .= ($r && $sub_r) ? $nl . ' AND (' . $sub_r . ')' : ($sub_r ? $sub_r : '');
+ }
+ return $r ? $r : '';
+ }
+
+ function getTriplePatternSQL($pattern, $context) {
+ $r = '';
+ $nl = "\n";
+ $id = $pattern['id'];
+ /* s p o */
+ $vars = array();
+ foreach (array('s', 'p', 'o') as $term) {
+ $sub_r = '';
+ $type = $pattern[$term . '_type'];
+ if ($type == 'uri') {
+ $term_id = $this->getTermID($pattern[$term], $term);
+ $sub_r = '(T_' . $id . '.' . $term . ' = ' . $term_id . ') /* ' . str_replace('#' , '::', $pattern[$term]) . ' */';
+ }
+ elseif ($type == 'literal') {
+ $term_id = $this->getTermID($pattern[$term], $term);
+ $sub_r = '(T_' . $id . '.' . $term . ' = ' . $term_id . ') /* ' . preg_replace('/[\#\n]/' , ' ', $pattern[$term]) . ' */';
+ if (($lang_dt = $this->v1($term . '_lang', '', $pattern)) || ($lang_dt = $this->v1($term . '_datatype', '', $pattern))) {
+ $lang_dt_id = $this->getTermID($lang_dt);
+ $sub_r .= $nl . ' AND (T_' . $id . '.' .$term. '_lang_dt = ' . $lang_dt_id . ') /* ' . str_replace('#' , '::', $lang_dt) . ' */';
+ }
+ }
+ elseif ($type == 'var') {
+ $val = $pattern[$term];
+ if (isset($vars[$val])) {/* repeated var in pattern */
+ $sub_r = '(T_' . $id . '.' . $term . '=' . 'T_' . $id . '.' . $vars[$val] . ')';
+ }
+ $vars[$val] = $term;
+ if ($infos = $this->v($val, 0, $this->index['graph_vars'])) {/* graph var in triple pattern */
+ $sub_r .= $sub_r ? $nl . ' AND ' : '';
+ $tbl = $infos[0]['table'];
+ $sub_r .= 'G_' . $tbl . '.g = T_' . $id . '.' . $term;
+ }
+ }
+ if ($sub_r) {
+ if (preg_match('/^(join)/', $context) || (preg_match('/^where/', $context) && in_array($id, $this->index['from']))) {
+ $r .= $r ? $nl . ' AND ' . $sub_r : $sub_r;
+ }
+ }
+ }
+ /* g */
+ if ($infos = $pattern['graph_infos']) {
+ $tbl_alias = 'G_' . $id . '.g';
+ if (!in_array($tbl_alias, $this->index['sub_joins'])) {
+ $this->index['sub_joins'][] = $tbl_alias;
+ }
+ $sub_r = array('graph_var' => '', 'graph_uri' => '', 'from' => '', 'from_named' => '');
+ foreach ($infos as $info) {
+ $type = $info['type'];
+ if ($type == 'graph') {
+ if ($info['uri']) {
+ $term_id = $this->getTermID($info['uri'], 'g');
+ $sub_r['graph_uri'] .= $sub_r['graph_uri'] ? $nl . ' AND ' : '';
+ $sub_r['graph_uri'] .= '(' .$tbl_alias. ' = ' . $term_id . ') /* ' . str_replace('#' , '::', $info['uri']) . ' */';
+ }
+ }
+ }
+ if ($sub_r['from'] && $sub_r['from_named']) {
+ $sub_r['from_named'] = '';
+ }
+ if (!$sub_r['from'] && !$sub_r['from_named']) {
+ $sub_r['graph_var'] = '';
+ }
+ if (preg_match('/^(graph_sub_join)/', $context)) {
+ foreach ($sub_r as $g_type => $g_sql) {
+ if ($g_sql) {
+ $r .= $r ? $nl . ' AND ' . $g_sql : $g_sql;
+ }
+ }
+ }
+ }
+ /* optional sibling filters? */
+ if (preg_match('/^(join|sub_join)/', $context) && $this->isOptionalPattern($id)) {
+ $o_pattern = $pattern;
+ do {
+ $o_pattern = $this->getPattern($o_pattern['parent_id']);
+ } while ($o_pattern['parent_id'] && ($o_pattern['type'] != 'optional'));
+ if ($sub_r = $this->getPatternSQL($o_pattern, 'optional_filter' . preg_replace('/^(.*)(__.*)$/', '\\2', $context))) {
+ $r .= $r ? $nl . ' AND ' . $sub_r : $sub_r;
+ }
+ /* created constraints */
+ if ($sub_r = $this->getConstraintSQL($id)) {
+ $r .= $r ? $nl . ' AND ' . $sub_r : $sub_r;
+ }
+ }
+ /* result */
+ if (preg_match('/^(where)/', $context) && $this->isOptionalPattern($id)) {
+ return '';
+ }
+ return $r;
+ }
+
+ /* */
+
+ function getFilterPatternSQL($pattern, $context) {
+ $r = '';
+ $id = $pattern['id'];
+ $constraint_id = $this->v1('constraint', '', $pattern);
+ $constraint = $this->getPattern($constraint_id);
+ $constraint_type = $constraint['type'];
+ if ($constraint_type == 'built_in_call') {
+ $r = $this->getBuiltInCallSQL($constraint, $context);
+ }
+ elseif ($constraint_type == 'expression') {
+ $r = $this->getExpressionSQL($constraint, $context, '', 'filter');
+ }
+ else {
+ $m = 'get' . ucfirst($constraint_type) . 'ExpressionSQL';
+ if (method_exists($this, $m)) {
+ $r = $this->$m($constraint, $context, '', 'filter');
+ }
+ }
+ if ($this->isOptionalPattern($id) && !preg_match('/^(join|optional_filter)/', $context)) {
+ return '';
+ }
+ /* unconnected vars in FILTERs eval to false */
+ if ($sub_r = $this->hasUnconnectedFilterVars($id)) {
+ if ($sub_r == 'alias') {
+ if (!in_array($r, $this->index['havings'])) $this->index['havings'][] = $r;
+ return '';
+ }
+ elseif (preg_match('/^T([^\s]+\.)g (.*)$/s', $r, $m)) {/* graph filter */
+ return 'G' . $m[1] . 't ' . $m[2];
+ }
+ elseif (preg_match('/^\(*V[^\s]+_g\.val .*$/s', $r, $m)) {/* graph value filter, @@improveMe */
+ //return $r;
+ }
+ else {
+ return 'FALSE';
+ }
+ }
+ /* some really ugly tweaks */
+ /* empty language filter: FILTER ( lang(?v) = '' ) */
+ $r = preg_replace('/\(\/\* language call \*\/ ([^\s]+) = ""\)/s', '((\\1 = "") OR (\\1 LIKE "%:%"))', $r);
+ return $r;
+ }
+
+ /* */
+
+ function hasUnconnectedFilterVars($filter_id) {
+ $pattern = $this->getInitialPattern($filter_id);
+ $gp = $this->getInitialPattern($pattern['parent_id']);
+ $vars = array();
+ foreach ($this->initial_index['patterns'] as $id => $p) {
+ /* vars in given filter */
+ if (preg_match('/^' .$filter_id. '.+/', $id)) {
+ if ($p['type'] == 'var') {
+ $vars[$p['value']][] = 'filter';
+ }
+ if (($p['type'] == 'built_in_call') && ($p['call'] == 'bound')) {
+ $vars[$p['args'][0]['value']][] = 'filter';
+ }
+ }
+ /* triple patterns if their scope is in the parent path of the filter */
+ if ($p['type'] == 'triple') {
+ $tp = $p;
+ do {
+ $proceed = 1;
+ $tp = $this->getInitialPattern($tp['parent_id']);
+ if ($tp['type'] == 'group') {
+ $proceed = 0;
+ if (isset($tp['parent_id']) && ($p_tp = $this->getInitialPattern($tp['parent_id'])) && ($p_tp['type'] == 'union')) {
+ $proceed = 1;
+ }
+ }
+ } while ($proceed);
+ $tp_id = $tp['id'];
+ $fp_id = $filter_id;
+ $ok = 0;
+ do {
+ $fp = $this->getInitialPattern($fp_id);
+ $fp_id = $fp['parent_id'];
+ if (($fp['type'] != 'group') && ($fp_id === $tp_id)) {
+ $ok = 1;
+ break;
+ }
+ } while (($fp['parent_id'] != $fp['id']) && ($fp['type'] != 'group'));
+ if ($ok) {
+ foreach (array('s', 'p', 'o') as $term) {
+ if ($p[$term . '_type'] == 'var') {
+ $vars[$p[$term]][] = 'triple';
+ }
+ }
+ }
+ }
+ }
+ foreach ($vars as $var => $types) {
+ if (!in_array('triple', $types)) {
+ /* might be an alias */
+ $r = 1;
+ foreach ($this->infos['query']['result_vars'] as $r_var) {
+ if ($r_var['alias'] == $var) {
+ $r = 'alias';
+ break;
+ }
+ //if ($r_var['alias'] == $var) $r = 0;
+ }
+ /* filter */
+ //if (in_array('filter', $types)) $r = 0;
+ if ($r) return $r;
+ }
+ }
+ return 0;
+ }
+
+ /* */
+
+ function getExpressionSQL($pattern, $context, $val_type = '', $parent_type = '') {
+ $r = '';
+ $nl = "\n";
+ $type = $this->v1('type', '', $pattern);
+ $sub_type = $this->v1('sub_type', $type, $pattern);
+ if (preg_match('/^(and|or)$/', $sub_type)) {
+ foreach ($pattern['patterns'] as $sub_id) {
+ $sub_pattern = $this->getPattern($sub_id);
+ $sub_pattern_type = $sub_pattern['type'];
+ if ($sub_pattern_type == 'built_in_call') {
+ $sub_r = $this->getBuiltInCallSQL($sub_pattern, $context, '', $parent_type);
+ }
+ else {
+ $sub_r = $this->getExpressionSQL($sub_pattern, $context, '', $parent_type);
+ }
+ if ($sub_r) {
+ $r .= $r ? ' ' . strtoupper($sub_type). ' (' .$sub_r. ')' : '(' . $sub_r . ')';
+ }
+ }
+ }
+ elseif ($sub_type == 'built_in_call') {
+ $r = $this->getBuiltInCallSQL($pattern, $context, $val_type, $parent_type);
+ }
+ elseif (preg_match('/literal/', $sub_type)) {
+ $r = $this->getLiteralExpressionSQL($pattern, $context, $val_type, $parent_type);
+ }
+ elseif ($sub_type) {
+ $m = 'get' . ucfirst($sub_type) . 'ExpressionSQL';
+ if (method_exists($this, $m)) {
+ $r = $this->$m($pattern, $context, '', $parent_type);
+ }
+ }
+ /* skip expressions that reference non-yet-joined tables */
+ if (preg_match('/__(T|V|G)_(.+)$/', $context, $m)) {
+ $context_pattern_id = $m[2];
+ $context_table_type = $m[1];
+ if (preg_match_all('/((T|V|G)(\_[0-9])+)/', $r, $m)) {
+ $aliases = $m[1];
+ $keep = 1;
+ foreach ($aliases as $alias) {
+ if (preg_match('/(T|V|G)_(.*)$/', $alias, $m)) {
+ $tbl_type = $m[1];
+ $tbl = $m[2];
+ if (!$this->isJoinedBefore($tbl, $context_pattern_id)) {
+ $keep = 0;
+ }
+ elseif (($context_pattern_id == $tbl) && preg_match('/(TV)/', $context_table_type . $tbl_type)) {
+ $keep = 0;
+ }
+ }
+ }
+ $r = $keep ? $r : '';
+ }
+ }
+ return $r ? '(' . $r . ')' : $r;
+ }
+
+ function detectExpressionValueType($pattern_ids) {
+ foreach ($pattern_ids as $id) {
+ $pattern = $this->getPattern($id);
+ $type = $this->v('type', '', $pattern);
+ if (($type == 'literal') && isset($pattern['datatype'])) {
+ if (in_array($pattern['datatype'], array($this->xsd . 'integer', $this->xsd . 'float', $this->xsd . 'double'))) {
+ return 'numeric';
+ }
+ }
+ }
+ return '';
+ }
+
+ /* */
+
+ function getRelationalExpressionSQL($pattern, $context, $val_type = '', $parent_type = '') {
+ $r = '';
+ $val_type = $this->detectExpressionValueType($pattern['patterns']);
+ $op = $pattern['operator'];
+ foreach ($pattern['patterns'] as $sub_id) {
+ $sub_pattern = $this->getPattern($sub_id);
+ $sub_pattern['parent_op'] = $op;
+ $sub_type = $sub_pattern['type'];
+ $m = ($sub_type == 'built_in_call') ? 'getBuiltInCallSQL' : 'get' . ucfirst($sub_type) . 'ExpressionSQL';
+ $m = str_replace('ExpressionExpression', 'Expression', $m);
+ $sub_r = method_exists($this, $m) ? $this->$m($sub_pattern, $context, $val_type, 'relational') : '';
+ $r .= $r ? ' ' . $op . ' ' . $sub_r : $sub_r;
+ }
+ return $r ? '(' . $r . ')' : $r;
+ }
+
+ function getAdditiveExpressionSQL($pattern, $context, $val_type = '', $parent_type = '') {
+ $r = '';
+ $val_type = $this->detectExpressionValueType($pattern['patterns']);
+ foreach ($pattern['patterns'] as $sub_id) {
+ $sub_pattern = $this->getPattern($sub_id);
+ $sub_type = $this->v('type', '', $sub_pattern);
+ $m = ($sub_type == 'built_in_call') ? 'getBuiltInCallSQL' : 'get' . ucfirst($sub_type) . 'ExpressionSQL';
+ $m = str_replace('ExpressionExpression', 'Expression', $m);
+ $sub_r = method_exists($this, $m) ? $this->$m($sub_pattern, $context, $val_type, 'additive') : '';
+ $r .= $r ? ' ' . $sub_r : $sub_r;
+ }
+ return $r;
+ }
+
+ function getMultiplicativeExpressionSQL($pattern, $context, $val_type = '', $parent_type = '') {
+ $r = '';
+ $val_type = $this->detectExpressionValueType($pattern['patterns']);
+ foreach ($pattern['patterns'] as $sub_id) {
+ $sub_pattern = $this->getPattern($sub_id);
+ $sub_type = $sub_pattern['type'];
+ $m = ($sub_type == 'built_in_call') ? 'getBuiltInCallSQL' : 'get' . ucfirst($sub_type) . 'ExpressionSQL';
+ $m = str_replace('ExpressionExpression', 'Expression', $m);
+ $sub_r = method_exists($this, $m) ? $this->$m($sub_pattern, $context, $val_type, 'multiplicative') : '';
+ $r .= $r ? ' ' . $sub_r : $sub_r;
+ }
+ return $r;
+ }
+
+ /* */
+
+ function getVarExpressionSQL($pattern, $context, $val_type = '', $parent_type = '') {
+ $var = $pattern['value'];
+ $info = $this->getVarTableInfos($var);
+ if (!$tbl = $info['table']) {
+ /* might be an aggregate var */
+ $vars = $this->infos['query']['result_vars'];
+ foreach ($vars as $test_var) {
+ if ($test_var['alias'] == $pattern['value']) {
+ return '`' . $pattern['value'] . '`';
+ }
+ }
+ return '';
+ }
+ $col = $info['col'];
+ if (($context == 'order') && ($col == 'o')) {
+ $tbl_alias = 'T_' . $tbl . '.o_comp';
+ }
+ elseif ($context == 'sameterm') {
+ $tbl_alias = 'T_' . $tbl . '.' . $col;
+ }
+ elseif (($parent_type == 'relational') && ($col == 'o') && (preg_match('/[\<\>]/', $this->v('parent_op', '', $pattern)))) {
+ $tbl_alias = 'T_' . $tbl . '.o_comp';
+ }
+ else {
+ $tbl_alias = 'V_' . $tbl . '_' . $col . '.val';
+ if (!in_array($tbl_alias, $this->index['sub_joins'])) {
+ $this->index['sub_joins'][] = $tbl_alias;
+ }
+ }
+ $op = $this->v('operator', '', $pattern);
+ if (preg_match('/^(filter|and)/', $parent_type)) {
+ if ($op == '!') {
+ $r = '(((' . $tbl_alias . ' = 0) AND (CONCAT("1", ' . $tbl_alias . ') != 1))'; /* 0 and no string */
+ $r .= ' OR (' . $tbl_alias . ' IN ("", "false")))'; /* or "", or "false" */
+ }
+ else {
+ $r = '((' . $tbl_alias . ' != 0)'; /* not null */
+ $r .= ' OR ((CONCAT("1", ' . $tbl_alias . ') = 1) AND (' . $tbl_alias . ' NOT IN ("", "false"))))'; /* string, and not "" or "false" */
+ }
+ }
+ else {
+ $r = trim($op . ' ' . $tbl_alias);
+ if ($val_type == 'numeric') {
+ if (preg_match('/__(T|V|G)_(.+)$/', $context, $m)) {
+ $context_pattern_id = $m[2];
+ $context_table_type = $m[1];
+ }
+ else {
+ $context_pattern_id = $pattern['id'];
+ $context_table_type = 'T';
+ }
+ if ($this->isJoinedBefore($tbl, $context_pattern_id)) {
+ $add = ($tbl != $context_pattern_id) ? 1 : 0;
+ $add = (!$add && ($context_table_type == 'V')) ? 1 : 0;
+ if ($add) {
+ $this->addConstraintSQLEntry($context_pattern_id, '(' .$r. ' = "0" OR ' . $r . '*1.0 != 0)');
+ }
+ }
+ }
+ }
+ return $r;
+ }
+
+ /* */
+
+ function getUriExpressionSQL($pattern, $context, $val_type = '') {
+ $val = $pattern['uri'];
+ $r = $pattern['operator'];
+ $r .= is_numeric($val) ? ' ' . $val : ' "' . mysql_real_escape_string($val, $this->store->getDBCon()) . '"';
+ return $r;
+ }
+
+ /* */
+
+ function getLiteralExpressionSQL($pattern, $context, $val_type = '', $parent_type = '') {
+ $val = $pattern['value'];
+ $r = $pattern['operator'];
+ if (is_numeric($val) && $this->v('datatype', 0, $pattern)) {
+ $r .= ' ' . $val;
+ }
+ elseif (preg_match('/^(true|false)$/i', $val) && ($this->v1('datatype', '', $pattern) == 'http://www.w3.org/2001/XMLSchema#boolean')) {
+ $r .= ' ' . strtoupper($val);
+ }
+ elseif ($parent_type == 'regex') {
+ $sub_r = mysql_real_escape_string($val, $this->store->getDBCon());
+ $r .= ' "' . preg_replace('/\x5c\x5c/', '\\', $sub_r) . '"';
+ }
+ else {
+ $r .= ' "' . mysql_real_escape_string($val, $this->store->getDBCon()) . '"';
+ }
+ if (($lang_dt = $this->v1('lang', '', $pattern)) || ($lang_dt = $this->v1('datatype', '', $pattern))) {
+ /* try table/alias via var in siblings */
+ if ($var = $this->findSiblingVarExpression($pattern['id'])) {
+ if (isset($this->index['vars'][$var])) {
+ $infos = $this->index['vars'][$var];
+ foreach ($infos as $info) {
+ if ($info['col'] == 'o') {
+ $tbl = $info['table'];
+ $term_id = $this->getTermID($lang_dt);
+ if ($pattern['operator'] != '!=') {
+ if (preg_match('/__(T|V|G)_(.+)$/', $context, $m)) {
+ $context_pattern_id = $m[2];
+ $context_table_type = $m[1];
+ }
+ elseif ($context == 'where') {
+ $context_pattern_id = $tbl;
+ }
+ else {
+ $context_pattern_id = $pattern['id'];
+ }
+ if ($tbl == $context_pattern_id) {/* @todo better dependency check */
+ if ($term_id || ($lang_dt != 'http://www.w3.org/2001/XMLSchema#integer')) {/* skip if simple int, but no id */
+ $this->addConstraintSQLEntry($context_pattern_id, 'T_' . $tbl . '.o_lang_dt = ' . $term_id . ' /* ' . str_replace('#' , '::', $lang_dt) . ' */');
+ }
+ }
+ }
+ break;
+ }
+ }
+ }
+ }
+ }
+ return trim($r);
+ }
+
+ function findSiblingVarExpression($id) {
+ $pattern = $this->getPattern($id);
+ do {
+ $pattern = $this->getPattern($pattern['parent_id']);
+ } while ($pattern['parent_id'] && ($pattern['type'] != 'expression'));
+ $sub_patterns = $this->v('patterns', array(), $pattern);
+ foreach ($sub_patterns as $sub_id) {
+ $sub_pattern = $this->getPattern($sub_id);
+ if ($sub_pattern['type'] == 'var') {
+ return $sub_pattern['value'];
+ }
+ }
+ return '';
+ }
+
+ /* */
+
+ function getFunctionExpressionSQL($pattern, $context, $val_type = '', $parent_type = '') {
+ $fnc_uri = $pattern['uri'];
+ $op = $this->v('operator', '', $pattern);
+ if ($op) $op .= ' ';
+ if ($this->allow_extension_functions) {
+ /* mysql functions */
+ if (preg_match('/^http\:\/\/web\-semantics\.org\/ns\/mysql\/(.*)$/', $fnc_uri, $m)) {
+ $fnc_name = strtoupper($m[1]);
+ $sub_r = '';
+ foreach ($pattern['args'] as $arg) {
+ $sub_r .= $sub_r ? ', ' : '';
+ $sub_r .= $this->getExpressionSQL($arg, $context, $val_type, $parent_type);
+ }
+ return $op . $fnc_name . '(' . $sub_r . ')';
+ }
+ /* any other: ignore */
+ }
+ /* simple type conversions */
+ if (strpos($fnc_uri, 'http://www.w3.org/2001/XMLSchema#') === 0) {
+ return $op . $this->getExpressionSQL($pattern['args'][0], $context, $val_type, $parent_type);
+ }
+ return '';
+ }
+
+ /* */
+
+ function getBuiltInCallSQL($pattern, $context) {
+ $call = $pattern['call'];
+ $m = 'get' . ucfirst($call) . 'CallSQL';
+ if (method_exists($this, $m)) {
+ return $this->$m($pattern, $context);
+ }
+ else {
+ $this->addError('Unknown built-in call "' . $call . '"');
+ }
+ return '';
+ }
+
+ function getBoundCallSQL($pattern, $context) {
+ $r = '';
+ $var = $pattern['args'][0]['value'];
+ $info = $this->getVarTableInfos($var);
+ if (!$tbl = $info['table']) {
+ return '';
+ }
+ $col = $info['col'];
+ $tbl_alias = 'T_' . $tbl . '.' . $col;
+ if ($pattern['operator'] == '!') {
+ return $tbl_alias . ' IS NULL';
+ }
+ return $tbl_alias . ' IS NOT NULL';
+ }
+
+ function getHasTypeCallSQL($pattern, $context, $type) {
+ $r = '';
+ $var = $pattern['args'][0]['value'];
+ $info = $this->getVarTableInfos($var);
+ if (!$tbl = $info['table']) {
+ return '';
+ }
+ $col = $info['col'];
+ $tbl_alias = 'T_' . $tbl . '.' . $col . '_type';
+ return $tbl_alias . ' ' .$this->v('operator', '', $pattern) . '= ' . $type;
+ }
+
+ function getIsliteralCallSQL($pattern, $context) {
+ return $this->getHasTypeCallSQL($pattern, $context, 2);
+ }
+
+ function getIsblankCallSQL($pattern, $context) {
+ return $this->getHasTypeCallSQL($pattern, $context, 1);
+ }
+
+ function getIsiriCallSQL($pattern, $context) {
+ return $this->getHasTypeCallSQL($pattern, $context, 0);
+ }
+
+ function getIsuriCallSQL($pattern, $context) {
+ return $this->getHasTypeCallSQL($pattern, $context, 0);
+ }
+
+ function getStrCallSQL($pattern, $context) {
+ $sub_pattern = $pattern['args'][0];
+ $sub_type = $sub_pattern['type'];
+ $m = 'get' . ucfirst($sub_type) . 'ExpressionSQL';
+ if (method_exists($this, $m)) {
+ return $this->$m($sub_pattern, $context);
+ }
+ }
+
+ function getFunctionCallSQL($pattern, $context) {
+ $f_uri = $pattern['uri'];
+ if (preg_match('/(integer|double|float|string)$/', $f_uri)) {/* skip conversions */
+ $sub_pattern = $pattern['args'][0];
+ $sub_type = $sub_pattern['type'];
+ $m = 'get' . ucfirst($sub_type) . 'ExpressionSQL';
+ if (method_exists($this, $m)) {
+ return $this->$m($sub_pattern, $context);
+ }
+ }
+ }
+
+ function getLangDatatypeCallSQL($pattern, $context) {
+ $r = '';
+ if (isset($pattern['patterns'])) { /* proceed with first argument only (assumed as base type for type promotion) */
+ $sub_pattern = array('args' => array($pattern['patterns'][0]));
+ return $this->getLangDatatypeCallSQL($sub_pattern, $context);
+ }
+ if (!isset($pattern['args'])) {
+ return 'FALSE';
+ }
+ $sub_type = $pattern['args'][0]['type'];
+ if ($sub_type != 'var') {
+ return $this->getLangDatatypeCallSQL($pattern['args'][0], $context);
+ }
+ $var = $pattern['args'][0]['value'];
+ $info = $this->getVarTableInfos($var);
+ if (!$tbl = $info['table']) {
+ return '';
+ }
+ $col = 'o_lang_dt';
+ $tbl_alias = 'V_' . $tbl . '_' . $col . '.val';
+ if (!in_array($tbl_alias, $this->index['sub_joins'])) {
+ $this->index['sub_joins'][] = $tbl_alias;
+ }
+ $op = $this->v('operator', '', $pattern);
+ $r = trim($op . ' ' . $tbl_alias);
+ return $r;
+ }
+
+ function getDatatypeCallSQL($pattern, $context) {
+ return '/* datatype call */ ' . $this->getLangDatatypeCallSQL($pattern, $context);
+ }
+
+ function getLangCallSQL($pattern, $context) {
+ return '/* language call */ ' . $this->getLangDatatypeCallSQL($pattern, $context);
+ }
+
+ function getLangmatchesCallSQL($pattern, $context) {
+ if (count($pattern['args']) == 2) {
+ $arg_1 = $pattern['args'][0];
+ $arg_2 = $pattern['args'][1];
+ $sub_r_1 = $this->getBuiltInCallSQL($arg_1, $context);/* adds value join */
+ $sub_r_2 = $this->getExpressionSQL($arg_2, $context);
+ $op = $this->v('operator', '', $pattern);
+ if (preg_match('/^([\"\'])([^\'\"]+)/', $sub_r_2, $m)) {
+ if ($m[2] == '*') {
+ $r = ($op == '!') ? 'NOT (' . $sub_r_1 . ' REGEXP "^[a-zA-Z\-]+$"' . ')' : $sub_r_1 . ' REGEXP "^[a-zA-Z\-]+$"';
+ }
+ else {
+ $r = ($op == '!') ? $sub_r_1 . ' NOT LIKE ' . $m[1] . $m[2] . '%' . $m[1] : $sub_r_1 . ' LIKE ' . $m[1] . $m[2] . '%' . $m[1];
+ }
+ }
+ else {
+ $r = ($op == '!') ? $sub_r_1 . ' NOT LIKE CONCAT(' . $sub_r_2 . ', "%")' : $sub_r_1 . ' LIKE CONCAT(' . $sub_r_2 . ', "%")';
+ }
+ return $r;
+ }
+ return '';
+ }
+
+ function getSametermCallSQL($pattern, $context) {
+ if (count($pattern['args']) == 2) {
+ $arg_1 = $pattern['args'][0];
+ $arg_2 = $pattern['args'][1];
+ $sub_r_1 = $this->getExpressionSQL($arg_1, 'sameterm');
+ $sub_r_2 = $this->getExpressionSQL($arg_2, 'sameterm');
+ $op = $this->v('operator', '', $pattern);
+ $r = $sub_r_1 . ' ' . $op . '= ' . $sub_r_2;
+ return $r;
+ }
+ return '';
+ }
+
+ function getRegexCallSQL($pattern, $context) {
+ $ac = count($pattern['args']);
+ if ($ac >= 2) {
+ foreach ($pattern['args'] as $i => $arg) {
+ $var = 'sub_r_' . ($i + 1);
+ $$var = $this->getExpressionSQL($arg, $context, '', 'regex');
+ }
+ $sub_r_3 = (isset($sub_r_3) && preg_match('/[\"\'](.+)[\"\']/', $sub_r_3, $m)) ? strtolower($m[1]) : '';
+ $op = ($this->v('operator', '', $pattern) == '!') ? ' NOT' : '';
+ if (!$sub_r_1 || !$sub_r_2) return '';
+ $is_simple_search = preg_match('/^[\(\"]+(\^)?([a-z0-9\_\-\s]+)(\$)?[\)\"]+$/is', $sub_r_2, $m);
+ $is_simple_search = preg_match('/^[\(\"]+(\^)?([^\\\*\[\]\}\{\(\)\"\'\?\+\.]+)(\$)?[\)\"]+$/is', $sub_r_2, $m);
+ $is_o_search = preg_match('/o\.val\)*$/', $sub_r_1);
+ /* fulltext search */
+ if ($is_simple_search && $is_o_search && !$op && (strlen($m[2]) > 4) && $this->store->hasFulltextIndex()) {
+ return 'MATCH(' . trim($sub_r_1, '()') . ') AGAINST("' . $m[2] . '")';
+ }
+ /* LIKE */
+ if ($is_simple_search && ($sub_r_3 == 'i')) {
+ $sub_r_2 = $m[1] ? $m[2] : '%' . $m[2];
+ $sub_r_2 .= isset($m[3]) && $m[3] ? '' : '%';
+ return $sub_r_1 . $op . ' LIKE "' . $sub_r_2 . '"';
+ }
+ /* REGEXP */
+ $opt = ($sub_r_3 == 'i') ? '' : 'BINARY ';
+ return $sub_r_1 . $op . ' REGEXP ' . $opt . $sub_r_2;
+ }
+ return '';
+ }
+
+ /* */
+
+ function getGROUPSQL() {
+ $r = '';
+ $nl = "\n";
+ $infos = $this->v('group_infos', array(), $this->infos['query']);
+ foreach ($infos as $info) {
+ $var = $info['value'];
+ if ($tbl_infos = $this->getVarTableInfos($var, 0)) {
+ $tbl_alias = $tbl_infos['table_alias'];
+ $r .= $r ? ', ' : 'GROUP BY ';
+ $r .= $tbl_alias;
+ }
+ }
+ $hr = '';
+ foreach ($this->index['havings'] as $having) {
+ $hr .= $hr ? ' AND' : ' HAVING';
+ $hr .= '(' . $having . ')';
+ }
+ $r .= $hr;
+ return $r ? $nl . $r : $r;
+ }
+
+ /* */
+
+ function getORDERSQL() {
+ $r = '';
+ $nl = "\n";
+ $infos = $this->v('order_infos', array(), $this->infos['query']);
+ foreach ($infos as $info) {
+ $type = $info['type'];
+ $ms = array('expression' => 'getExpressionSQL', 'built_in_call' => 'getBuiltInCallSQL', 'function_call' => 'getFunctionCallSQL');
+ $m = isset($ms[$type]) ? $ms[$type] : 'get' . ucfirst($type) . 'ExpressionSQL';
+ if (method_exists($this, $m)) {
+ $sub_r = '(' . $this->$m($info, 'order') . ')';
+ $sub_r .= $this->v('direction', '', $info) == 'desc' ? ' DESC' : '';
+ $r .= $r ? ',' .$nl . $sub_r : $sub_r;
+ }
+ }
+ return $r ? $nl . 'ORDER BY ' . $r : '';
+ }
+
+ /* */
+
+ function getLIMITSQL() {
+ $r = '';
+ $nl = "\n";
+ $limit = $this->v('limit', -1, $this->infos['query']);
+ $offset = $this->v('offset', -1, $this->infos['query']);
+ if ($limit != -1) {
+ $offset = ($offset == -1) ? 0 : mysql_real_escape_string($offset, $this->store->getDBCon());
+ $r = 'LIMIT ' . $offset . ',' . $limit;
+ }
+ elseif ($offset != -1) {
+ $r = 'LIMIT ' . mysql_real_escape_string($offset, $this->store->getDBCon()) . ',999999999999'; /* mysql doesn't support stand-alone offsets .. */
+ }
+ return $r ? $nl . $r : '';
+ }
+
+ /* */
+
+ function getValueSQL($q_tbl, $q_sql) {
+ $r = '';
+ /* result vars */
+ $vars = $this->infos['query']['result_vars'];
+ $nl = "\n";
+ $v_tbls = array('JOIN' => array(), 'LEFT JOIN' => array());
+ $vc = 1;
+ foreach ($vars as $var) {
+ $var_name = $var['var'];
+ $r .= $r ? ',' . $nl . ' ' : ' ';
+ $col = '';
+ $tbl = '';
+ if ($var_name != '*') {
+ if (in_array($var_name, $this->infos['null_vars'])) {
+ if (isset($this->initial_index['vars'][$var_name])) {
+ $col = $this->initial_index['vars'][$var_name][0]['col'];
+ $tbl = $this->initial_index['vars'][$var_name][0]['table'];
+ }
+ if (isset($this->initial_index['graph_vars'][$var_name])) {
+ $col = 'g';
+ $tbl = $this->initial_index['graph_vars'][$var_name][0]['table'];
+ }
+ }
+ elseif (isset($this->index['vars'][$var_name])) {
+ $col = $this->index['vars'][$var_name][0]['col'];
+ $tbl = $this->index['vars'][$var_name][0]['table'];
+ }
+ }
+ if ($var['aggregate']) {
+ $r .= 'TMP.`' . $var['alias'] . '`';
+ }
+ else {
+ $join_type = in_array($tbl, array_merge($this->index['from'], $this->index['join'])) ? 'JOIN' : 'LEFT JOIN';/* val may be NULL */
+ $v_tbls[$join_type][] = array('t_col' => $col, 'q_col' => $var_name, 'vc' => $vc);
+ $r .= 'V' . $vc . '.val AS `' . $var_name . '`';
+ if (in_array($col, array('s', 'o'))) {
+ if (strpos($q_sql, '`' . $var_name . ' type`')) {
+ $r .= ', ' . $nl . ' TMP.`' . $var_name . ' type` AS `' . $var_name . ' type`';
+ //$r .= ', ' . $nl . ' CASE TMP.`' . $var_name . ' type` WHEN 2 THEN "literal" WHEN 1 THEN "bnode" ELSE "uri" END AS `' . $var_name . ' type`';
+ }
+ else {
+ $r .= ', ' . $nl . ' NULL AS `' . $var_name . ' type`';
+ }
+ }
+ $vc++;
+ if ($col == 'o') {
+ $v_tbls[$join_type][] = array('t_col' => 'id', 'q_col' => $var_name . ' lang_dt', 'vc' => $vc);
+ if (strpos($q_sql, '`' . $var_name . ' lang_dt`')) {
+ $r .= ', ' .$nl. ' V' . $vc . '.val AS `' . $var_name . ' lang_dt`';
+ $vc++;
+ }
+ else {
+ $r .= ', ' .$nl. ' NULL AS `' . $var_name . ' lang_dt`';
+ }
+ }
+ }
+ }
+ if (!$r) $r = '*';
+ /* from */
+ $r .= $nl . 'FROM (' . $q_tbl . ' TMP)';
+ foreach (array('JOIN', 'LEFT JOIN') as $join_type) {
+ foreach ($v_tbls[$join_type] as $v_tbl) {
+ $tbl = $this->getValueTable($v_tbl['t_col']);
+ $var_name = preg_replace('/^([^\s]+)(.*)$/', '\\1', $v_tbl['q_col']);
+ $cur_join_type = in_array($var_name, $this->infos['null_vars']) ? 'LEFT JOIN' : $join_type;
+ if (!strpos($q_sql, '`' . $v_tbl['q_col'].'`')) continue;
+ $r .= $nl . ' ' . $cur_join_type . ' ' . $tbl . ' V' . $v_tbl['vc'] . ' ON (
+ (V' . $v_tbl['vc'] . '.id = TMP.`' . $v_tbl['q_col'].'`)
+ )';
+ }
+ }
+ /* create pos columns, id needed */
+ if ($this->v('order_infos', array(), $this->infos['query'])) {
+ $r .= $nl . ' ORDER BY _pos_';
+ }
+ return 'SELECT' . $nl . $r;
+ }
+
+ /* */
+
+}
+
+
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 Store SemHTML Loader
+author: Benjamin Nowack
+version: 2008-06-28 (Tweak: adjusted to normalized "literal" type)
+*/
+
+ARC2::inc('SemHTMLParser');
+
+class ARC2_StoreSemHTMLLoader extends ARC2_SemHTMLParser {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_StoreSemHTMLLoader($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ }
+
+ /* */
+
+ function done() {
+ $this->extractRDF();
+ }
+
+ function addT($t) {
+ $this->caller->addT($t['s'], $t['p'], $t['o'], $t['s_type'], $t['o_type'], $t['o_datatype'], $t['o_lang']);
+ $this->t_count++;
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/**
+ * ARC2 RDF Store Table Manager
+ *
+ * @license http://arc.semsol.org/license
+ * @author Benjamin Nowack
+ * @version 2010-06-21
+ *
+*/
+
+ARC2::inc('Store');
+
+class ARC2_StoreTableManager extends ARC2_Store {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_StoreTableManager($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {/* db_con */
+ parent::__init();
+ $this->engine_type = $this->v('store_engine_type', 'MyISAM', $this->a);
+ }
+
+ /* */
+
+ function getTableOptionsCode() {
+ $v = $this->getDBVersion();
+ $r = "";
+ $r .= (($v < '04-01-00') && ($v >= '04-00-18')) ? 'ENGINE' : (($v >= '04-01-02') ? 'ENGINE' : 'TYPE');
+ $r .= "=" . $this->engine_type;
+ $r .= ($v >= '04-00-00') ? " CHARACTER SET utf8" : "";
+ $r .= ($v >= '04-01-00') ? " COLLATE utf8_unicode_ci" : "";
+ $r .= " DELAY_KEY_WRITE = 1";
+ return $r;
+ }
+
+ /* */
+
+ function createTables() {
+ $con = $this->getDBCon();
+ if(!$this->createTripleTable()) {
+ return $this->addError('Could not create "triple" table (' . mysql_error($con) . ').');
+ }
+ if(!$this->createG2TTable()) {
+ return $this->addError('Could not create "g2t" table (' . mysql_error($con) . ').');
+ }
+ if(!$this->createID2ValTable()) {
+ return $this->addError('Could not create "id2val" table (' . mysql_error($con) . ').');
+ }
+ if(!$this->createS2ValTable()) {
+ return $this->addError('Could not create "s2val" table (' . mysql_error($con) . ').');
+ }
+ if(!$this->createO2ValTable()) {
+ return $this->addError('Could not create "o2val" table (' . mysql_error($con) . ').');
+ }
+ if(!$this->createSettingTable()) {
+ return $this->addError('Could not create "setting" table (' . mysql_error($con) . ').');
+ }
+ return 1;
+ }
+
+ /* */
+
+ function createTripleTable($suffix = 'triple') {
+ /* keep in sync with merge def in StoreQueryHandler ! */
+ $indexes = $this->v('store_indexes', array('sp (s,p)', 'os (o,s)', 'po (p,o)'), $this->a);
+ $index_code = $indexes ? 'KEY ' . join(', KEY ', $indexes) . ', ' : '';
+ $sql = "
+ CREATE TABLE IF NOT EXISTS " . $this->getTablePrefix() . $suffix . " (
+ t mediumint UNSIGNED NOT NULL,
+ s mediumint UNSIGNED NOT NULL,
+ p mediumint UNSIGNED NOT NULL,
+ o mediumint UNSIGNED NOT NULL,
+ o_lang_dt mediumint UNSIGNED NOT NULL,
+ o_comp char(35) NOT NULL, /* normalized value for ORDER BY operations */
+ s_type tinyint(1) NOT NULL default 0, /* uri/bnode => 0/1 */
+ o_type tinyint(1) NOT NULL default 0, /* uri/bnode/literal => 0/1/2 */
+ misc tinyint(1) NOT NULL default 0, /* temporary flags */
+ UNIQUE KEY (t), " . $index_code . " KEY (misc)
+ ) ". $this->getTableOptionsCode() . "
+ ";
+ return mysql_query($sql, $this->getDBCon());
+ }
+
+ function extendTripleTableColumns($suffix = 'triple') {
+ $sql = "
+ ALTER TABLE " . $this->getTablePrefix() . $suffix . "
+ MODIFY t int(10) UNSIGNED NOT NULL,
+ MODIFY s int(10) UNSIGNED NOT NULL,
+ MODIFY p int(10) UNSIGNED NOT NULL,
+ MODIFY o int(10) UNSIGNED NOT NULL,
+ MODIFY o_lang_dt int(10) UNSIGNED NOT NULL
+ ";
+ return mysql_query($sql, $this->getDBCon());
+ }
+
+ /* */
+
+ function createG2TTable() {
+ $sql = "
+ CREATE TABLE IF NOT EXISTS " . $this->getTablePrefix() . "g2t (
+ g mediumint UNSIGNED NOT NULL,
+ t mediumint UNSIGNED NOT NULL,
+ UNIQUE KEY gt (g,t), KEY tg (t,g)
+ ) ". $this->getTableOptionsCode() . "
+ ";
+ return mysql_query($sql, $this->getDBCon());
+ }
+
+ function extendG2tTableColumns($suffix = 'g2t') {
+ $sql = "
+ ALTER TABLE " . $this->getTablePrefix() . $suffix . "
+ MODIFY g int(10) UNSIGNED NOT NULL,
+ MODIFY t int(10) UNSIGNED NOT NULL
+ ";
+ return mysql_query($sql, $this->getDBCon());
+ }
+
+ /* */
+
+ function createID2ValTable() {
+ $sql = "
+ CREATE TABLE IF NOT EXISTS " . $this->getTablePrefix() . "id2val (
+ id mediumint UNSIGNED NOT NULL,
+ misc tinyint(1) NOT NULL default 0,
+ val text NOT NULL,
+ val_type tinyint(1) NOT NULL default 0, /* uri/bnode/literal => 0/1/2 */
+ UNIQUE KEY (id,val_type), KEY v (val(64))
+ ) ". $this->getTableOptionsCode() . "
+ ";
+ return mysql_query($sql, $this->getDBCon());
+ }
+
+ function extendId2valTableColumns($suffix = 'id2val') {
+ $sql = "
+ ALTER TABLE " . $this->getTablePrefix() . $suffix . "
+ MODIFY id int(10) UNSIGNED NOT NULL
+ ";
+ return mysql_query($sql, $this->getDBCon());
+ }
+
+ /* */
+
+ function createS2ValTable() {
+ //$indexes = 'UNIQUE KEY (id), KEY vh (val_hash), KEY v (val(64))';
+ $indexes = 'UNIQUE KEY (id), KEY vh (val_hash)';
+ $sql = "
+ CREATE TABLE IF NOT EXISTS " . $this->getTablePrefix() . "s2val (
+ id mediumint UNSIGNED NOT NULL,
+ misc tinyint(1) NOT NULL default 0,
+ val_hash char(32) NOT NULL,
+ val text NOT NULL,
+ " . $indexes . "
+ ) " . $this->getTableOptionsCode() . "
+ ";
+ return mysql_query($sql, $this->getDBCon());
+ }
+
+ function extendS2valTableColumns($suffix = 's2val') {
+ $sql = "
+ ALTER TABLE " . $this->getTablePrefix() . $suffix . "
+ MODIFY id int(10) UNSIGNED NOT NULL
+ ";
+ return mysql_query($sql, $this->getDBCon());
+ }
+
+ /* */
+
+ function createO2ValTable() {
+ /* object value index, e.g. "KEY v (val(64))" and/or "FULLTEXT KEY vft (val)" */
+ $val_index = $this->v('store_object_index', 'KEY v (val(64))', $this->a);
+ if ($val_index) $val_index = ', ' . ltrim($val_index, ',');
+ $sql = "
+ CREATE TABLE IF NOT EXISTS " . $this->getTablePrefix() . "o2val (
+ id mediumint UNSIGNED NOT NULL,
+ misc tinyint(1) NOT NULL default 0,
+ val_hash char(32) NOT NULL,
+ val text NOT NULL,
+ UNIQUE KEY (id), KEY vh (val_hash)" . $val_index . "
+ ) ". $this->getTableOptionsCode() . "
+ ";
+ return mysql_query($sql, $this->getDBCon());
+ }
+
+ function extendO2valTableColumns($suffix = 'o2val') {
+ $sql = "
+ ALTER TABLE " . $this->getTablePrefix() . $suffix . "
+ MODIFY id int(10) UNSIGNED NOT NULL
+ ";
+ return mysql_query($sql, $this->getDBCon());
+ }
+
+ /* */
+
+ function createSettingTable() {
+ $sql = "
+ CREATE TABLE IF NOT EXISTS " . $this->getTablePrefix() . "setting (
+ k char(32) NOT NULL,
+ val text NOT NULL,
+ UNIQUE KEY (k)
+ ) ". $this->getTableOptionsCode() . "
+ ";
+ return mysql_query($sql, $this->getDBCon());
+ }
+
+ /* */
+
+ function extendColumns() {
+ $con = $this->getDBCon();
+ $tbl_prefix = $this->getTablePrefix();
+ $tbls = $this->getTables();
+ foreach ($tbls as $suffix) {
+ if (preg_match('/^(triple|g2t|id2val|s2val|o2val)/', $suffix, $m)) {
+ $mthd = 'extend' . ucfirst($m[1]) . 'TableColumns';
+ $this->$mthd($suffix);
+ }
+ }
+ }
+
+ /* */
+
+ function splitTables() {
+ $old_ps = $this->getSetting('split_predicates', array());
+ $new_ps = $this->retrieveSplitPredicates();
+ $add_ps = array_diff($new_ps, $old_ps);
+ $del_ps = array_diff($old_ps, $new_ps);
+ $final_ps = array();
+ foreach ($del_ps as $p) {
+ if (!$this->unsplitPredicate($p)) $final_ps[] = $p;
+ }
+ foreach ($add_ps as $p) {
+ if ($this->splitPredicate($p)) $final_ps[] = $p;
+ }
+ $this->setSetting('split_predicates', $new_ps);
+ }
+
+ function unsplitPredicate($p) {
+ $suffix = 'triple_' . abs(crc32($p));
+ $old_tbl = $this->getTablePrefix() . $suffix;
+ $new_tbl = $this->getTablePrefix() . 'triple';
+ $p_id = $this->getTermID($p, 'p');
+ $con = $this->getDBCon();
+ $sql = '
+ INSERT IGNORE INTO ' . $new_tbl .'
+ SELECT * FROM ' . $old_tbl . ' WHERE ' . $old_tbl . '.p = ' . $p_id . '
+ ';
+ if ($rs = mysql_query($sql, $con)) {
+ mysql_query('DROP TABLE ' . $old_tbl, $con);
+ return 1;
+ }
+ else {
+ return 0;
+ }
+ }
+
+ function splitPredicate($p) {
+ $suffix = 'triple_' . abs(crc32($p));
+ $this->createTripleTable($suffix);
+ $old_tbl = $this->getTablePrefix() . 'triple';
+ $new_tbl = $this->getTablePrefix() . $suffix;
+ $p_id = $this->getTermID($p, 'p');
+ $con = $this->getDBCon();
+ $sql = '
+ INSERT IGNORE INTO ' . $new_tbl .'
+ SELECT * FROM ' . $old_tbl . ' WHERE ' . $old_tbl . '.p = ' . $p_id . '
+ ';
+ if ($rs = mysql_query($sql, $con)) {
+ mysql_query('DELETE FROM ' . $old_tbl . ' WHERE ' . $old_tbl . '.p = ' . $p_id, $con);
+ return 1;
+ }
+ else {
+ mysql_query('DROP TABLE ' . $new_tbl, $con);
+ return 0;
+ }
+ }
+
+ function retrieveSplitPredicates() {
+ $r = $this->split_predicates;
+ $limit = $this->max_split_tables - count($r);
+ $q = 'SELECT ?p COUNT(?p) AS ?pc WHERE { ?s ?p ?o } GROUP BY ?p ORDER BY DESC(?pc) LIMIT ' . $limit;
+ $rows = $this->query($q, 'rows');
+ foreach ($rows as $row) {
+ $r[] = $row['p'];
+ }
+ return $r;
+ }
+
+ /* */
+
+}
--- /dev/null
+<?php
+/*
+homepage: http://arc.semsol.org/
+license: http://arc.semsol.org/license
+
+class: ARC2 Store Turtle Loader
+author: Benjamin Nowack
+version: 2008-06-28 (Tweak: adjusted to normalized "literal" type)
+*/
+
+ARC2::inc('TurtleParser');
+
+class ARC2_StoreTurtleLoader extends ARC2_TurtleParser {
+
+ function __construct($a = '', &$caller) {
+ parent::__construct($a, $caller);
+ }
+
+ function ARC2_StoreTurtleLoader($a = '', &$caller) {
+ $this->__construct($a, $caller);
+ }
+
+ function __init() {
+ parent::__init();
+ }
+
+ /* */
+
+ function addT($t) {
+ $this->caller->addT($t['s'], $t['p'], $t['o'], $t['s_type'], $t['o_type'], $t['o_datatype'], $t['o_lang']);
+ $this->t_count++;
+ }
+
+ /* */
+
+}