3 * ARC2 N-Triples Serializer
5 * @author Benjamin Nowack
6 * @license <http://arc.semsol.org/license>
7 * @homepage <http://arc.semsol.org/>
12 ARC2::inc('RDFSerializer');
14 class ARC2_NTriplesSerializer extends ARC2_RDFSerializer {
16 function __construct($a = '', &$caller) {
17 parent::__construct($a, $caller);
20 function ARC2_NTriplesSerializer($a = '', &$caller) {
21 $this->__construct($a, $caller);
26 $this->esc_chars = array();
32 function getTerm($v) {
34 if (preg_match('/^\_\:/', $v)) {
37 if (preg_match('/^[a-z0-9]+\:[^\s\"]*$/is', $v)) {
38 return '<' . $this->escape($v) . '>';
40 return $this->getTerm(array('type' => 'literal', 'value' => $v));
42 if ($v['type'] != 'literal') {
43 return $this->getTerm($v['value']);
47 if ($this->raw && preg_match('/\"/', $v['value'])) {
49 if (preg_match('/\'/', $v['value'])) {
51 if (preg_match('/\"\"\"/', $v['value']) || preg_match('/\"$/', $v['value']) || preg_match('/^\"/', $v['value'])) {
53 $v['value'] = preg_replace("/'$/", "' ", $v['value']);
54 $v['value'] = preg_replace("/^'/", " '", $v['value']);
55 $v['value'] = str_replace("'''", '\\\'\\\'\\\'', $v['value']);
59 if ($this->raw && (strlen($quot) == 1) && preg_match('/[\x0d\x0a]/', $v['value'])) {
60 $quot = $quot . $quot . $quot;
62 $suffix = isset($v['lang']) && $v['lang'] ? '@' . $v['lang'] : '';
63 $suffix = isset($v['datatype']) && $v['datatype'] ? '^^' . $this->getTerm($v['datatype']) : $suffix;
64 //return $quot . "object" . utf8_encode($v['value']) . $quot . $suffix;
65 return $quot . $this->escape($v['value']) . $quot . $suffix;
68 function getSerializedIndex($index, $raw = 0) {
72 foreach ($index as $s => $ps) {
73 $s = $this->getTerm($s);
74 foreach ($ps as $p => $os) {
75 $p = $this->getTerm($p);
76 if (!is_array($os)) {/* single literal o */
77 $os = array(array('value' => $os, 'type' => 'literal'));
80 $o = $this->getTerm($o);
82 $r .= $s . ' ' . $p . ' ' . $o . ' .';
93 $v = (strpos(utf8_decode(str_replace('?', '', $v)), '?') === false) ? utf8_decode($v) : $v;
94 if ($this->raw) return $v;
95 for ($i = 0, $i_max = strlen($v); $i < $i_max; $i++) {
97 if (!isset($this->esc_chars[$c])) {
98 $this->esc_chars[$c] = $this->getEscapedChar($c, $this->getCharNo($c));
100 $r .= $this->esc_chars[$c];
107 function getCharNo($c) {
108 $c_utf = utf8_encode($c);
109 $bl = strlen($c_utf);/* binary length */
112 case 1:/* 0####### (0-127) */
115 case 2:/* 110##### 10###### = 192+x 128+x */
116 $r = ((ord($c_utf[0]) - 192) * 64) + (ord($c_utf[1]) - 128);
118 case 3:/* 1110#### 10###### 10###### = 224+x 128+x 128+x */
119 $r = ((ord($c_utf[0]) - 224) * 4096) + ((ord($c_utf[1]) - 128) * 64) + (ord($c_utf[2]) - 128);
121 case 4:/* 1111#### 10###### 10###### 10###### = 240+x 128+x 128+x 128+x */
122 $r = ((ord($c_utf[0]) - 240) * 262144) + ((ord($c_utf[1]) - 128) * 4096) + ((ord($c_utf[2]) - 128) * 64) + (ord($c_utf[3]) - 128);
128 function getEscapedChar($c, $no) {/*see http://www.w3.org/TR/rdf-testcases/#ntrip_strings */
129 if ($no < 9) return "\\u" . sprintf('%04X', $no); /* #x0-#x8 (0-8) */
130 if ($no == 9) return '\t'; /* #x9 (9) */
131 if ($no == 10) return '\n'; /* #xA (10) */
132 if ($no < 13) return "\\u" . sprintf('%04X', $no); /* #xB-#xC (11-12) */
133 if ($no == 13) return '\r'; /* #xD (13) */
134 if ($no < 32) return "\\u" . sprintf('%04X', $no); /* #xE-#x1F (14-31) */
135 if ($no < 34) return $c; /* #x20-#x21 (32-33) */
136 if ($no == 34) return '\"'; /* #x22 (34) */
137 if ($no < 92) return $c; /* #x23-#x5B (35-91) */
138 if ($no == 92) return '\\'; /* #x5C (92) */
139 if ($no < 127) return $c; /* #x5D-#x7E (93-126) */
140 if ($no < 65536) return "\\u" . sprintf('%04X', $no); /* #x7F-#xFFFF (128-65535) */
141 if ($no < 1114112) return "\\U" . sprintf('%08X', $no); /* #x10000-#x10FFFF (65536-1114111) */
142 return ''; /* not defined => ignore */