3 * FusionForge document search engine
5 * Copyright 2005, Fabio Bertagnin
7 * This file is part of FusionForge.
9 * FusionForge is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published
11 * by the Free Software Foundation; either version 2 of the License,
12 * or (at your option) any later version.
14 * FusionForge is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with FusionForge; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
25 function parser_text($fichin)
27 $tstart = microtime_float();
28 if (!is_file($fichin)) return "";
29 $fp = fopen ($fichin, "r");
30 $buff = fread ($fp, filesize($fichin));
32 $buff = mb_strtolower($buff);
33 // élimination d'éventuels caractères unicode encore présents
34 $buff = mb_convert_encoding ($buff, "ascii");
35 // élimination caractères avec accents
36 // et caractères spéciaux
37 $buff = suppression_diacritics($buff);
38 // tous les mots dans un tableau
39 $a = explode(" ", $buff);
41 // élimination des doublons
42 $a = array_unique($a);
43 // envoi du résultat sur stdout
44 $rep = print_list($a);
48 function print_list ($list)
51 foreach ($list as $el)
53 if (strlen($el) > 1) $rep .= "$el ";
58 function suppression_diacritics($text)
61 $b = strtr($b, "éêèëàâäîïùûüôöç", "eeeeaaaiiuuuooc");
62 $b = strtr($b, "\t\r\n?.*'\":;,#![]()", " ");
66 function microtime_float()
68 list($usec, $sec) = explode(" ", microtime());
69 return ((float)$usec + (float)$sec);
73 function print_debug ($text)
75 echo "$text <br />\n";
81 // c-file-style: "bsd"