3 * FusionForge document search engine
5 * Copyright 2005, Fabio Bertagnin
6 * Copyright 2011, Franck Villaume - Capgemini
7 * http://fusionforge.org
9 * This file is part of FusionForge. FusionForge is free software;
10 * you can redistribute it and/or modify it under the terms of the
11 * GNU General Public License as published by the Free Software
12 * Foundation; either version 2 of the Licence, or (at your option)
15 * FusionForge is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License along
21 * with FusionForge; if not, write to the Free Software Foundation, Inc.,
22 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 require_once $gfcommon.'include/config.php';
27 function parser_text($fichin) {
28 $tstart = microtime_float();
29 if (!is_file($fichin))
32 $fp = fopen($fichin, "r");
33 $buff = fread($fp, filesize($fichin));
36 if (function_exists('mb_strtolower')) {
37 $buff = mb_strtolower($buff);
39 $buff = strtolower($buff);
42 // élimination d'éventuels caractères unicode encore présents
43 if (function_exists('mb_convert_encoding')) {
44 $buff = mb_convert_encoding($buff, "ascii");
47 // élimination caractères avec accents
48 // et caractères spéciaux
49 $buff = suppression_diacritics($buff);
50 // tous les mots dans un tableau
51 $a = explode(" ", $buff);
52 // élimination des doublons
53 $a = array_unique($a);
54 // envoi du résultat sur stdout
55 $rep = print_list($a);
59 function print_list($list) {
61 foreach ($list as $el) {
62 if (strlen($el) > 1) $rep .= "$el ";
67 function suppression_diacritics($text) {
69 $b = iconv('UTF-8', 'US-ASCII//TRANSLIT', $b) ;
70 $b = strtr($b, "\t\r\n?.*'\":;,#![]()", " ");
74 function microtime_float() {
75 list($usec, $sec) = explode(" ", microtime());
76 return ((float)$usec + (float)$sec);
79 function print_debug ($text) {
80 echo "$text <br />\n";
86 // c-file-style: "bsd"