3 * FusionForge text sanitisation
5 * Copyright (C) 2005, Daniel Perez
6 * Copyright (C) 2008-2009 Alcatel-Lucent
8 * This file is part of FusionForge.
10 * FusionForge is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published
12 * by the Free Software Foundation; either version 2 of the License,
13 * or (at your option) any later version.
15 * FusionForge is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with FusionForge; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
27 * Standard Alcatel-Lucent disclaimer for contributing to open source
29 * "The Style Sheet ("Contribution") has not been tested and/or
30 * validated for release as or in products, combinations with products or
31 * other commercial use. Any use of the Contribution is entirely made at
32 * the user's own responsibility and the user can not rely on any features,
33 * functionalities or performances Alcatel-Lucent has attributed to the
36 * THE CONTRIBUTION BY ALCATEL-LUCENT IS PROVIDED AS IS, WITHOUT WARRANTY
37 * OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
38 * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, COMPLIANCE,
39 * NON-INTERFERENCE AND/OR INTERWORKING WITH THE SOFTWARE TO WHICH THE
40 * CONTRIBUTION HAS BEEN MADE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL
41 * ALCATEL-LUCENT BE LIABLE FOR ANY DAMAGES OR OTHER LIABLITY, WHETHER IN
42 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
43 * CONTRIBUTION OR THE USE OR OTHER DEALINGS IN THE CONTRIBUTION, WHETHER
44 * TOGETHER WITH THE SOFTWARE TO WHICH THE CONTRIBUTION RELATES OR ON A STAND
48 require_once('HTMLPurifier.auto.php');
50 Class TextSanitizer extends Error {
54 * convertExtendedCharsForEmail - Grabs some text with html special characters and converts them to the corresponding character.
56 * @param string The input string
57 * @return string The output string
59 function convertExtendedCharsForEmail($text) {
60 $text = str_replace("´","'",$text); //it's better to see that char in the email than the html entity
61 $text = str_replace("&","&",$text);
62 $text = str_replace(""",'"',$text);
63 $text = str_replace(" ",' ',$text);
64 $text = str_replace("<",'<',$text);
65 $text = str_replace(">",'>',$text);
66 $text = str_replace("°",'°',$text);
67 $text = str_replace("<br>","\n",$text);
68 $text = str_replace("<br />","\n",$text);
70 $text = str_replace("é","é",$text);
71 $text = str_replace("è","è",$text);
72 $text = str_replace("ê","ê",$text);
73 $text = str_replace("ë","ë",$text);
74 $text = str_replace("à","à",$text);
75 $text = str_replace("â","â",$text);
76 $text = str_replace("ç","ç",$text);
77 $text = str_replace("ù","ù",$text);
78 $text = str_replace("û","û",$text);
79 $text = str_replace("ü","ü",$text);
80 $text = str_replace("ô","ô",$text);
81 $text = str_replace("ï","ï",$text);
87 * convertNeededTagsForEmail - Grabs some text with html tags and those which are important for display (<br>, <p>) convert accordingly
89 * @param string The input string
90 * @return string The output string
92 function convertNeededTagsForEmail($text) {
93 $text = str_replace("<br>","\n",$text);
94 $text = str_replace("<br />","\n",$text);
95 $text = str_replace("<br/>","\n",$text);
96 $text = str_replace("<p>","\n",$text);
97 $text = str_replace("</p>","\n",$text);
98 $text = str_replace("<li>","\n - ",$text);
99 $text = str_replace("</li>",'',$text);
100 $text = str_replace("<ul>",'',$text);
101 $text = str_replace("</ul>","\n",$text);
102 $text = str_replace("\xc2\xa0",' ',$text);
106 function unhtmlentities ($string) {
107 $trans_tbl = get_html_translation_table (HTML_SPECIALCHARS );
108 $trans_tbl = array_flip ($trans_tbl );
109 $res = strtr ($string ,$trans_tbl );
110 $res = str_replace("&quot;",'"',$res);
115 * SanitizeHtml - Grabs some text with all kinds of html code and parses it to make it safe
117 * @param string The HTML Code
118 * @return string The HTML output
120 function SanitizeHtml($input) {
122 $input = htmlspecialchars($input); // first strip all chars
124 $input=str_replace('&','&',$input);
125 $input=str_replace('"','"',$input);
126 $input=str_replace('/>','/>',$input);
127 $input=str_replace('">','">',$input);
128 $input=str_replace('</a>','</a>',$input);
129 $input=str_replace('<strike>','<strike>',$input);
130 $input=str_replace('</strike>','</strike>',$input);
131 $input=str_replace('<sub>','<sub>',$input);
132 $input=str_replace('</sub>','</sub>',$input);
133 $input=str_replace('<span','<span',$input);
134 $input=str_replace('</span>','</span>',$input);
135 $input=str_replace('<font','<font',$input);
136 $input=str_replace('</font>','</font>',$input);
137 $input=str_replace('<hr>','<hr>',$input);
138 $input=str_replace('<hr','<hr',$input);
139 $input=str_replace('<br>','<br>',$input);
140 $input=str_replace('<br />','<br />',$input);
141 $input=str_replace('<tbody>','<tbody>',$input);
142 $input=str_replace('</tbody>','</tbody>',$input);
143 $input=str_replace('<tr>','<tr>',$input);
144 $input=str_replace('</tr>','</tr>',$input);
145 $input=str_replace('<td>','<td>',$input);
146 $input=str_replace('</td>','</td>',$input);
147 $input=str_replace('<td','<td',$input);
148 $input=str_replace('<table>','<table>',$input);
149 $input=str_replace('<table','<table',$input);
150 $input=str_replace('</table>','</table>',$input);
151 $input=str_replace('<div>','<div>',$input);
152 $input=str_replace('<div','<div',$input);
153 $input=str_replace('</div>','</div>',$input);
154 $input=str_replace('<u>','<u>',$input);
155 $input=str_replace('<u ','<u ',$input); // rg
156 $input=str_replace('</u>','</u>',$input);
157 $input=str_replace('<p>','<p>',$input);
158 $input=str_replace('</p>','</p>',$input);
159 $input=str_replace('<p ','<p ',$input);
160 $input=str_replace('<li>','<li>',$input);
161 $input=str_replace('</li>','</li>',$input);
162 $input=str_replace('<ul>','<ul>',$input);
163 $input=str_replace('</ul>','</ul>',$input);
164 $input=str_replace('<ol>','<ol>',$input);
165 $input=str_replace('</ol>','</ol>',$input);
166 $input=str_replace('<blockquote>','<blockquote>',$input);
167 $input=str_replace('<blockquote','<blockquote',$input);
168 $input=str_replace('</blockquote>','</blockquote>',$input);
169 $input=str_replace('<em>','<em>',$input);
170 $input=str_replace('</em>','</em>',$input);
171 $input=str_replace('<strong>','<strong>',$input);
172 $input=str_replace('</strong>','</strong>',$input);
173 $input=str_replace('<sup>','<sup>',$input);
174 $input=str_replace('</sup>','</sup>',$input);
175 $input=str_replace('<input ','<input ',$input);
176 $input=str_replace('<img ','<img ',$input);
177 $input=str_replace('<textarea ','<textarea ',$input);
178 $input=str_replace('</textarea>','</textarea>',$input);
179 $input=str_replace('<a ','<a ',$input);
180 $input=str_replace('<h1>','<h1>',$input);
181 $input=str_replace('</h1>','</h1>',$input);
182 $input=str_replace('<h2>','<h2>',$input);
183 $input=str_replace('</h2>','</h2>',$input);
184 $input=str_replace('<h3>','<h3>',$input);
185 $input=str_replace('</h3>','</h3>',$input);
186 $input=str_replace('<h4>','<h4>',$input);
187 $input=str_replace('</h4>','</h4>',$input);
188 $input=str_replace('<h5>','<h5>',$input);
189 $input=str_replace('</h5>','</h5>',$input);
190 $input=str_replace('<h6>','<h6>',$input);
191 $input=str_replace('</h6>','</h6>',$input);
192 $input=str_replace('<pre>','<pre>',$input);
193 $input=str_replace('</pre>','</pre>',$input);
194 $input=str_replace('<address>','<address>',$input);
195 $input=str_replace('</address>','</address>',$input);
196 $input=str_replace('<h1 ','<h1 ',$input);
197 $input=str_replace('<h2 ','<h2 ',$input);
198 $input=str_replace('<h3 ','<h3 ',$input);
199 $input=str_replace('<h4 ','<h4 ',$input);
200 $input=str_replace('<h5 ','<h5 ',$input);
201 $input=str_replace('<h6 ','<h6 ',$input);
202 $input=str_replace('’','\\\'',$input);
203 $input=str_replace('•','-',$input);
205 // Allow embbeding video like youtube ones.
206 $input=str_replace('<object ','<object ',$input);
207 $input=str_replace('</object>','</object>',$input);
208 $input=str_replace('<param ','<param ',$input);
209 $input=str_replace('</param>','</param>',$input);
210 $input=str_replace('<embed ','<embed ',$input);
211 $input=str_replace('</embed>','</embed>',$input);
216 function stripTags ($text, $allowed='br,p,li,ul') {
217 $config = HTMLPurifier_Config::createDefault();
218 $config->set('Cache.DefinitionImpl', NULL);
219 $config->set('HTML.Allowed', $allowed);
220 $purifier = new HTMLPurifier($config);
221 $text = $purifier->purify($text);
226 function purify ($text) {
227 // Remove string like "<![if !supportLists]>" or "<![endif]>"
228 $text = preg_replace('/<!\[.+?\]>/', '', $text);
229 $config = HTMLPurifier_Config::createDefault();
230 //$config->set('HTML.Allowed','a[href|title],strike,sub,span,font,hr,br,tbody,tr,td,table,div,u,p,ul,li,ol,blockquote,em,strong,sup,input,img,textarea,h1,h2,h3,h4,h5,h6,pre,address');
231 $config->set('Cache.DefinitionImpl', NULL);
232 $purifier = new HTMLPurifier($config);
233 return $purifier->purify($text);
236 function summarize ($text, $nb_line=4, $truncate=true, $nb_char=145) {
237 $text = $this->stripTags($text);
238 $text = $this->convertNeededTagsForEmail($text);
239 // Remove MS Windows extra char for CR
240 $text = preg_replace('/\r/', '', $text);
242 $text = preg_replace('/\n[\n\s]*/', "\n", $text);
244 $arr = explode("\n", $text);
245 $nb_max = count($arr);
246 if ($nb_max > $nb_line) $nb_max = $nb_line;
248 for ($l = 0; $l < $nb_max; $l++) {
249 $summary .= '<br />';
250 if ($truncate == true && $nb_max < $nb_line && $l == $nb_max - 1) {
251 $nb_char = $nb_char * ($nb_line - $nb_max + 1);
253 $summary .= util_make_links((($truncate == true && strlen($arr[$l]) > $nb_char) ?
254 preg_replace('/[^\s]*$/', ' <b>...</b>', substr($arr[$l], 0, $nb_char), 1) :
264 // c-file-style: "bsd"