3 * FusionForge document search engine
5 * Copyright 2005, Fabio Bertagnin
6 * http://fusionforge.org
8 * This file is part of FusionForge.
10 * FusionForge is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published
12 * by the Free Software Foundation; either version 2 of the License,
13 * or (at your option) any later version.
15 * FusionForge is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with FusionForge; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
26 function parser_text($fichin) {
27 $tstart = microtime_float();
28 if (!is_file($fichin))
31 $fp = fopen ($fichin, "r");
32 $buff = fread ($fp, filesize($fichin));
34 $buff = mb_strtolower($buff);
35 // élimination d'éventuels caractères unicode encore présents
36 $buff = mb_convert_encoding ($buff, "ascii");
37 // élimination caractères avec accents
38 // et caractères spéciaux
39 $buff = suppression_diacritics($buff);
40 // tous les mots dans un tableau
41 $a = explode(" ", $buff);
42 // élimination des doublons
43 $a = array_unique($a);
44 // envoi du résultat sur stdout
45 $rep = print_list($a);
49 function print_list($list) {
51 foreach ($list as $el) {
52 if (strlen($el) > 1) $rep .= "$el ";
57 function suppression_diacritics($text) {
59 $b = iconv('UTF-8', 'US-ASCII//TRANSLIT', $b) ;
60 $b = strtr($b, "\t\r\n?.*'\":;,#![]()", " ");
64 function microtime_float() {
65 list($usec, $sec) = explode(" ", microtime());
66 return ((float)$usec + (float)$sec);
69 function print_debug ($text) {
70 echo "$text <br />\n";
76 // c-file-style: "bsd"