3 * Copyright (c) STMicroelectronics, 2007. All Rights Reserved.
5 * Originally written by Manuel VACELET, 2007.
7 * This file is a part of Fusionforge.
9 * Fusionforge is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * Fusionforge is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Codendi. If not, see <http://www.gnu.org/licenses/>.
24 * Clean-up HTML code for user output.
26 * This class aims to purify the HTML code provided by a user for beeing
27 * displayed saftly (remove XSS and make the HTML std compliant).
30 * require_once('pre.php');
31 * require_once('common/include/Codendi_HTMLPurifier.class.php');
32 * $crapy = '<a href="" onmouseover="alert(1);">testé</a>';
33 * $hp =& Codendi_HTMLPurifier::instance();
34 * $clean = $hp->purify($crapy);
38 define('CODENDI_PURIFIER_CONVERT_HTML', 0);
39 define('CODENDI_PURIFIER_STRIP_HTML', 1);
40 define('CODENDI_PURIFIER_BASIC', 5);
41 define('CODENDI_PURIFIER_BASIC_NOBR', 6);
42 define('CODENDI_PURIFIER_LIGHT', 10);
43 define('CODENDI_PURIFIER_FULL', 15);
44 define('CODENDI_PURIFIER_JS_QUOTE', 20);
45 define('CODENDI_PURIFIER_JS_DQUOTE', 25);
46 define('CODENDI_PURIFIER_DISABLED', 100);
48 class Codendi_HTMLPurifier {
50 * Hold an instance of the class
52 private static $Codendi_HTMLPurifier_instance;
54 private function __construct() {
62 public static function instance() {
63 if (!isset(self::$Codendi_HTMLPurifier_instance)) {
65 self::$Codendi_HTMLPurifier_instance = new $c;
67 return self::$Codendi_HTMLPurifier_instance;
71 * Base configuration of HTML Purifier for codendi.
73 protected function getCodendiConfig() {
74 $config = HTMLPurifier_Config::createDefault();
75 $config->set('Core', 'Encoding', 'UTF-8');
76 // $config->set('HTML', 'Doctype', 'XHTML 1.0 Strict');
77 $config->set('Cache', 'SerializerPath', $GLOBALS['codendi_cache_dir']);
82 * Allow basic formatting markups.
85 function getLightConfig() {
86 $config = $this->getCodendiConfig();
87 $config->set('HTML', 'Allowed', $this->getLightConfigMarkups());
92 * Get allowed markups for light config
94 * This function defines the markups allowed for a light
95 * formatting. This includes markups for lists, for paragraphs, hypertext
96 * links, and content-based text.
99 * - 'a[href|title|class]'
101 * - 'cite', 'code', 'blockquote', 'strong', 'em', 'pre', 'b', 'i'
103 function getLightConfigMarkups() {
104 $eParagraph = array('p', 'br');
105 $eLinks = array('a[href|title|class]');
106 $eList = array('ul', 'ol', 'li');
107 $eContentBasedTxt = array('cite', 'code', 'blockquote', 'strong', 'em',
110 $aa = array_merge($eParagraph, $eLinks, $eList, $eContentBasedTxt);
111 $allowed = implode(',', $aa);
119 function getStripConfig() {
120 $config = $this->getCodendiConfig();
121 $config->set('HTML', 'Allowed', '');
126 * HTML Purifier configuration factory
128 function getHPConfig($level) {
131 case CODENDI_PURIFIER_LIGHT:
132 $config = $this->getLightConfig();
135 case CODENDI_PURIFIER_FULL:
136 $config = $this->getCodendiConfig();
139 case CODENDI_PURIFIER_STRIP_HTML:
140 $config = $this->getStripConfig();
147 * Wrap call to util_make_links (for testing purpose).
149 function _makeLinks($str, $groupId) {
150 return util_make_links($str, $groupId);
154 * Perform HTML purification depending of level purification required.
156 * There are 5 level of purification, from the most restrictive to most
158 * - CODENDI_PURIFIER_CONVERT_HTML (default)
159 * Transform HTML markups it in entities.
161 * - CODENDI_PURIFIER_STRIP_HTML
162 * Removes all HTML markups. Note: as we relly on HTML Purifier to
163 * perform this operation this option is not considered as secure as
164 * CONVERT_HTML. If you are looking for the most secure option please
165 * consider CONVERT_HTML.
167 * - CODENDI_PURIFIER_BASIC (need $groupId to be set for automagic links)
168 * Removes all user submitted HTML markups but:
169 * - transform typed URLs into clickable URLs.
170 * - transform autmagic links.
171 * - transform carrige return into HTML br markup.
173 * - CODENDI_PURIFIER_LIGHT
174 * First set of HTML formatting (@see getLightConfig() for allowed
175 * markups) plus all what is allowed by CODENDI_PURIFIER_BASIC.
177 * - CODENDI_PURIFIER_FULL
178 * Clean-up plain HTML using HTML Purifier rules (remove forms,
179 * javascript, ...). Warning: there is no longer codendi facilities
180 * (neither automagic links nor carrige return to br transformation).
182 * - CODENDI_PURIFIER_DISABLED
185 function purify($html, $level=0, $groupId=0) {
188 case CODENDI_PURIFIER_DISABLED:
192 case CODENDI_PURIFIER_LIGHT:
193 $html = nl2br($this->_makeLinks($html, $groupId));
194 case CODENDI_PURIFIER_STRIP_HTML:
195 case CODENDI_PURIFIER_FULL:
196 require_once 'HTMLPurifier.auto.php';
197 $hp =& HTMLPurifier::getInstance();
198 $config = $this->getHPConfig($level);
199 $clean = $hp->purify($html, $config);
200 // Quite big object, it's better to unset it (memory).
204 case CODENDI_PURIFIER_BASIC:
205 $clean = nl2br($this->_makeLinks(htmlentities($html, ENT_QUOTES, 'UTF-8'), $groupId));
207 case CODENDI_PURIFIER_BASIC_NOBR:
208 $clean = $this->_makeLinks(htmlentities($html, ENT_QUOTES, 'UTF-8'), $groupId);
211 case CODENDI_PURIFIER_JS_QUOTE:
212 $clean = preg_replace('/\<\/script\>/umsi', "</'+'script>", addslashes(preg_replace('/\\\n/ums', "
215 case CODENDI_PURIFIER_JS_DQUOTE:
216 $clean = preg_replace('/\<\/script\>/umsi', '</"+"script>', addslashes(preg_replace('/\\\n/ums', '
219 case CODENDI_PURIFIER_CONVERT_HTML:
221 $clean = htmlentities($html, ENT_QUOTES, 'UTF-8');
227 function purifyMap($array, $level=0, $groupId=0) {
228 return array_map(array(&$this, "purify"), $array, array($level), array($groupId));