3 * Copyright (c) STMicroelectronics, 2007. All Rights Reserved.
5 * Originally written by Manuel VACELET, 2007.
7 * This file is a part of Fusionforge.
9 * Fusionforge is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * Fusionforge is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Codendi. If not, see <http://www.gnu.org/licenses/>.
24 * Clean-up HTML code for user output.
26 * This class aims to purify the HTML code provided by a user for beeing
27 * displayed saftly (remove XSS and make the HTML std compliant).
30 * require_once('pre.php');
31 * require_once('common/include/Codendi_HTMLPurifier.class.php');
32 * $crapy = '<a href="" onmouseover="alert(1);">testé</a>';
33 * $hp =& Codendi_HTMLPurifier::instance();
34 * $clean = $hp->purify($crapy);
38 define('CODENDI_PURIFIER_CONVERT_HTML', 0);
39 define('CODENDI_PURIFIER_STRIP_HTML', 1);
40 define('CODENDI_PURIFIER_BASIC', 5);
41 define('CODENDI_PURIFIER_BASIC_NOBR', 6);
42 define('CODENDI_PURIFIER_LIGHT', 10);
43 define('CODENDI_PURIFIER_FULL', 15);
44 define('CODENDI_PURIFIER_JS_QUOTE', 20);
45 define('CODENDI_PURIFIER_JS_DQUOTE', 25);
46 define('CODENDI_PURIFIER_DISABLED', 100);
48 class Codendi_HTMLPurifier {
50 * Hold an instance of the class
52 private static $Codendi_HTMLPurifier_instance;
57 private function __construct() {
65 public static function instance() {
66 if (!isset(self::$Codendi_HTMLPurifier_instance)) {
68 self::$Codendi_HTMLPurifier_instance = new $c;
70 return self::$Codendi_HTMLPurifier_instance;
74 * Base configuration of HTML Purifier for codendi.
76 protected function getCodendiConfig() {
77 $config = HTMLPurifier_Config::createDefault();
78 $config->set('Core', 'Encoding', 'UTF-8');
79 // $config->set('HTML', 'Doctype', 'XHTML 1.0 Strict');
80 $config->set('Cache', 'SerializerPath', $GLOBALS['codendi_cache_dir']);
85 * Allow basic formatting markups.
88 function getLightConfig() {
89 $config = $this->getCodendiConfig();
90 $config->set('HTML', 'Allowed', $this->getLightConfigMarkups());
95 * Get allowed markups for light config
97 * This function defines the markups allowed for a light
98 * formatting. This includes markups for lists, for paragraphs, hypertext
99 * links, and content-based text.
102 * - 'a[href|title|class]'
104 * - 'cite', 'code', 'blockquote', 'strong', 'em', 'pre', 'b', 'i'
106 function getLightConfigMarkups() {
107 $eParagraph = array('p', 'br');
108 $eLinks = array('a[href|title|class]');
109 $eList = array('ul', 'ol', 'li');
110 $eContentBasedTxt = array('cite', 'code', 'blockquote', 'strong', 'em',
113 $aa = array_merge($eParagraph, $eLinks, $eList, $eContentBasedTxt);
114 $allowed = implode(',', $aa);
122 function getStripConfig() {
123 $config = $this->getCodendiConfig();
124 $config->set('HTML', 'Allowed', '');
129 * HTML Purifier configuration factory
131 function getHPConfig($level) {
134 case CODENDI_PURIFIER_LIGHT:
135 $config = $this->getLightConfig();
138 case CODENDI_PURIFIER_FULL:
139 $config = $this->getCodendiConfig();
142 case CODENDI_PURIFIER_STRIP_HTML:
143 $config = $this->getStripConfig();
150 * Wrap call to util_make_links (for testing purpose).
152 function _makeLinks($str, $groupId) {
153 return util_make_links($str, $groupId);
157 * Perform HTML purification depending of level purification required.
159 * There are 5 level of purification, from the most restrictive to most
161 * - CODENDI_PURIFIER_CONVERT_HTML (default)
162 * Transform HTML markups it in entities.
164 * - CODENDI_PURIFIER_STRIP_HTML
165 * Removes all HTML markups. Note: as we relly on HTML Purifier to
166 * perform this operation this option is not considered as secure as
167 * CONVERT_HTML. If you are looking for the most secure option please
168 * consider CONVERT_HTML.
170 * - CODENDI_PURIFIER_BASIC (need $groupId to be set for automagic links)
171 * Removes all user submitted HTML markups but:
172 * - transform typed URLs into clickable URLs.
173 * - transform autmagic links.
174 * - transform carrige return into HTML br markup.
176 * - CODENDI_PURIFIER_LIGHT
177 * First set of HTML formatting (@see getLightConfig() for allowed
178 * markups) plus all what is allowed by CODENDI_PURIFIER_BASIC.
180 * - CODENDI_PURIFIER_FULL
181 * Clean-up plain HTML using HTML Purifier rules (remove forms,
182 * javascript, ...). Warning: there is no longer codendi facilities
183 * (neither automagic links nor carrige return to br transformation).
185 * - CODENDI_PURIFIER_DISABLED
188 function purify($html, $level=0, $groupId=0) {
191 case CODENDI_PURIFIER_DISABLED:
195 case CODENDI_PURIFIER_LIGHT:
196 $html = nl2br($this->_makeLinks($html, $groupId));
197 case CODENDI_PURIFIER_STRIP_HTML:
198 case CODENDI_PURIFIER_FULL:
199 require_once 'HTMLPurifier.auto.php';
200 $hp =& HTMLPurifier::getInstance();
201 $config = $this->getHPConfig($level);
202 $clean = $hp->purify($html, $config);
203 // Quite big object, it's better to unset it (memory).
207 case CODENDI_PURIFIER_BASIC:
208 $clean = nl2br($this->_makeLinks(htmlentities($html, ENT_QUOTES, 'UTF-8'), $groupId));
210 case CODENDI_PURIFIER_BASIC_NOBR:
211 $clean = $this->_makeLinks(htmlentities($html, ENT_QUOTES, 'UTF-8'), $groupId);
214 case CODENDI_PURIFIER_JS_QUOTE:
215 $clean = preg_replace('/\<\/script\>/umsi', "</'+'script>", addslashes(preg_replace('/\\\n/ums', "
218 case CODENDI_PURIFIER_JS_DQUOTE:
219 $clean = preg_replace('/\<\/script\>/umsi', '</"+"script>', addslashes(preg_replace('/\\\n/ums', '
222 case CODENDI_PURIFIER_CONVERT_HTML:
224 $clean = htmlentities($html, ENT_QUOTES, 'UTF-8');
230 function purifyMap($array, $level=0, $groupId=0) {
231 return array_map(array(&$this, "purify"), $array, array($level), array($groupId));