3 * FusionForge document search engine
5 * Copyright 2005, Fabio Bertagnin
6 * Copyright 2011, Franck Villaume - Capgemini
7 * Copyright (C) 2012 Alain Peyrat - Alcatel-Lucent
8 * http://fusionforge.org
10 * This file is part of FusionForge. FusionForge is free software;
11 * you can redistribute it and/or modify it under the terms of the
12 * GNU General Public License as published by the Free Software
13 * Foundation; either version 2 of the Licence, or (at your option)
16 * FusionForge is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License along
22 * with FusionForge; if not, write to the Free Software Foundation, Inc.,
23 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 require_once $gfcommon.'include/config.php';
28 function parser_text($fichin) {
29 if (!is_file($fichin))
32 if (filesize($fichin) == 0)
35 $handle = fopen($fichin, "r");
36 $buff = fread($handle, filesize($fichin));
39 if (function_exists('mb_strtolower')) {
40 $buff = mb_strtolower($buff);
42 $buff = strtolower($buff);
45 // élimination d'éventuels caractères unicode encore présents
46 if (function_exists('mb_convert_encoding')) {
47 $buff = mb_convert_encoding($buff, "ascii");
50 // élimination caractères avec accents
51 // et caractères spéciaux
52 $buff = suppression_diacritics($buff);
53 // tous les mots dans un tableau
54 $words = explode(" ", $buff);
55 // élimination des doublons
56 $words = array_unique($words);
57 // envoi du résultat sur stdout
58 $rep = print_list($words);
62 function print_list($list) {
64 foreach ($list as $el) {
65 if (strlen($el) > 1) $rep .= "$el ";
70 function suppression_diacritics($text) {
71 $text = iconv('UTF-8', 'US-ASCII//TRANSLIT', $text) ;
72 $text = strtr($text, "\t\r\n?.*'\":;,#![]()", " ");
76 function microtime_float() {
77 list($usec, $sec) = explode(" ", microtime());
78 return ((float)$usec + (float)$sec);
81 function print_debug ($text) {
82 echo "$text <br />\n";
88 // c-file-style: "bsd"