2 // rcs_id('$Id: AccessLog.php 7639 2010-08-11 12:15:16Z vargenau $');
4 * Copyright 2005, 2007 Reini Urban
6 * This file is part of PhpWiki.
8 * PhpWiki is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * PhpWiki is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * Read and write file and SQL accesslog. Write sequentially.
26 * Read from file per pagename: Hits
31 * Create NCSA "combined" log entry for current request.
32 * Also needed for advanced spam prevention.
33 * global object holding global state (sql or file, entries, to dump)
35 class Request_AccessLog {
37 * @param $logfile string Log file name.
39 function Request_AccessLog ($logfile, $do_sql = false) {
40 //global $request; // request not yet initialized!
42 $this->logfile = $logfile;
43 if ($logfile and !is_writeable($logfile)) {
45 (sprintf(_("%s is not writable."), _("The PhpWiki access log file"))
47 . sprintf(_("Please ensure that %s is writable, or redefine %s in config/config.ini."),
48 sprintf(_("the file '%s'"), ACCESS_LOG),
52 //$request->_accesslog =& $this;
53 //if (empty($request->_accesslog->entries))
54 register_shutdown_function("Request_AccessLogEntry_shutdown_function");
57 if (!$request->_dbi->isSQL()) {
58 trigger_error("Unsupported database backend for ACCESS_LOG_SQL.\nNeed DATABASE_TYPE=SQL or ADODB or PDO");
61 //$this->_dbi =& $request->_dbi;
62 $this->logtable = (!empty($DBParams['prefix']) ? $DBParams['prefix'] : '')."accesslog";
65 $this->entries = array();
66 $this->entries[] = new Request_AccessLogEntry($this);
69 function _do($cmd, &$arg) {
71 for ($i=0; $i < count($this->entries);$i++)
72 $this->entries[$i]->$cmd($arg);
74 function push(&$request) { $this->_do('push',$request); }
75 function setSize($arg) { $this->_do('setSize',$arg); }
76 function setStatus($arg) { $this->_do('setStatus',$arg); }
77 function setDuration($arg) { $this->_do('setDuration',$arg); }
80 * Read sequentially all previous entries from the beginning.
81 * while ($logentry = Request_AccessLogEntry::read()) ;
82 * For internal log analyzers: RecentReferrers, WikiAccessRestrictions
85 return $this->logtable ? $this->read_sql() : $this->read_file();
89 * Return iterator of referer items reverse sorted (latest first).
91 function get_referer($limit=15, $external_only=false) {
92 if ($external_only) { // see stdlin.php:isExternalReferrer()
94 $blen = strlen($base);
96 if (!empty($this->_dbi)) {
97 // check same hosts in referer and request and remove them
98 $ext_where = " AND LEFT(referer,$blen) <> ".$this->_dbi->quote($base)
99 ." AND LEFT(referer,$blen) <> LEFT(CONCAT(".$this->_dbi->quote(SERVER_URL).",request_uri),$blen)";
100 return $this->_read_sql_query("(referer <>'' AND NOT(ISNULL(referer)))"
101 .($external_only ? $ext_where : '')
102 ." ORDER BY time_stamp DESC"
103 .($limit ? " LIMIT $limit" : ""));
105 $iter = new WikiDB_Array_generic_iter(0);
106 $logs =& $iter->_array;
107 while ($logentry = $this->read_file()) {
108 if (!empty($logentry->referer)
109 and (!$external_only or (substr($logentry->referer,0,$blen) != $base)))
111 $iter->_array[] = $logentry;
112 if ($limit and count($logs) > $limit)
116 $logs = array_reverse($logs);
117 $logs = array_slice($logs,0,min($limit,count($logs)));
123 * Return iterator of matching host items reverse sorted (latest first).
125 function get_host($host, $since_minutes=20) {
126 if ($this->logtable) {
127 // mysql specific only:
128 return $this->read_sql("request_host=".$this->_dbi->quote($host)
129 ." AND time_stamp > ". (time()-$since_minutes*60)
130 ." ORDER BY time_stamp DESC");
132 $iter = new WikiDB_Array_generic_iter();
133 $logs =& $iter->_array;
134 $logentry = new Request_AccessLogEntry($this);
135 while ($logentry->read_file()) {
136 if (!empty($logentry->referer)) {
137 $iter->_array[] = $logentry;
138 if ($limit and count($logs) > $limit)
140 $logentry = new Request_AccessLogEntry($this);
143 $logs = array_reverse($logs);
144 $logs = array_slice($logs,0,min($limit,count($logs)));
150 * Read sequentially backwards all previous entries from log file.
153 function read_file() {
155 if ($this->logfile) $this->logfile = ACCESS_LOG; // support Request_AccessLog::read
157 if (empty($this->reader)) // start at the beginning
158 $this->reader = fopen($this->logfile, "r");
159 if ($s = fgets($this->reader)) {
160 $entry = new Request_AccessLogEntry($this);
161 $re = '/^(\S+)\s(\S+)\s(\S+)\s\[(.+?)\] "([^"]+)" (\d+) (\d+) "([^"]*)" "([^"]*)"$/';
162 if (preg_match($re, $s, $m)) {
163 list(,$entry->host, $entry->ident, $entry->user, $entry->time,
164 $entry->request, $entry->status, $entry->size,
165 $entry->referer, $entry->user_agent) = $m;
168 } else { // until the end
169 fclose($this->reader);
173 function read_sql($where='') {
174 if (empty($this->sqliter))
175 $this->sqliter = $this->_read_sql_query($where);
176 return $this->sqliter->next();
178 function _read_sql_query($where='') {
180 $dbh =& $request->_dbi;
181 $log_tbl =& $this->logtable;
182 return $dbh->genericSqlIter("SELECT *,request_uri as request,request_time as time,remote_user as user,"
183 ."remote_host as host,agent as user_agent"
185 . ($where ? " WHERE $where" : ""));
188 /* done in request->finish() before the db is closed */
189 function write_sql() {
191 $dbh =& $request->_dbi;
192 if (isset($this->entries) and $dbh and $dbh->isOpen())
193 foreach ($this->entries as $entry) {
197 /* done in the shutdown callback */
198 function write_file() {
199 if (isset($this->entries) and $this->logfile)
200 foreach ($this->entries as $entry) {
201 $entry->write_file();
203 unset($this->entries);
205 /* in an ideal world... */
207 if ($this->logfile) $this->write_file();
208 if ($this->logtable) $this->write_sql();
209 unset($this->entries);
213 class Request_AccessLogEntry
218 * The log entry will be automatically appended to the log file or
219 * SQL table when the current request terminates.
221 * If you want to modify a Request_AccessLogEntry before it gets
222 * written (e.g. via the setStatus and setSize methods) you should
223 * use an '&' on the constructor, so that you're working with the
224 * original (rather than a copy) object.
227 * $log_entry = & new Request_AccessLogEntry("/tmp/wiki_access_log");
228 * $log_entry->setStatus(401);
229 * $log_entry->push($request);
234 function Request_AccessLogEntry (&$accesslog) {
235 $this->_accesslog = $accesslog;
236 $this->logfile = $accesslog->logfile;
237 $this->time = time();
238 $this->status = 200; // see setStatus()
239 $this->size = 0; // see setSize()
243 * @param $request object Request object for current request.
245 function push(&$request) {
246 $this->host = $request->get('REMOTE_HOST');
247 $this->ident = $request->get('REMOTE_IDENT');
250 $user = $request->getUser();
251 if ($user->isAuthenticated())
252 $this->user = $user->UserName();
255 $this->request = join(' ', array($request->get('REQUEST_METHOD'),
256 $request->get('REQUEST_URI'),
257 $request->get('SERVER_PROTOCOL')));
258 $this->referer = (string) $request->get('HTTP_REFERER');
259 $this->user_agent = (string) $request->get('HTTP_USER_AGENT');
263 * Set result status code.
265 * @param $status integer HTTP status code.
267 function setStatus ($status) {
268 $this->status = $status;
274 * @param $size integer
276 function setSize ($size=0) {
277 $this->size = (int)$size;
279 function setDuration ($seconds) {
280 // Pear DB does not correctly quote , in floats using ?. e.g. in european locales.
282 $this->duration = str_replace(",",".",sprintf("%f",$seconds));
286 * Get time zone offset.
288 * This is a static member function.
290 * @param $time integer Unix timestamp (defaults to current time).
291 * @return string Zone offset, e.g. "-0800" for PST.
293 function _zone_offset ($time = false) {
296 $offset = date("Z", $time);
302 $offhours = floor($offset / 3600);
303 $offmins = $offset / 60 - $offhours * 60;
304 return sprintf("%s%02d%02d", $negoffset, $offhours, $offmins);
308 * Format time in NCSA format.
310 * This is a static member function.
312 * @param $time integer Unix timestamp (defaults to current time).
313 * @return string Formatted date & time.
315 function _ncsa_time($time = false) {
318 return date("d/M/Y:H:i:s", $time) .
319 " " . $this->_zone_offset();
323 if ($this->_accesslog->logfile) $this->write_file();
324 if ($this->_accesslog->logtable) $this->write_sql();
328 * Write entry to log file.
330 function write_file() {
331 $entry = sprintf('%s %s %s [%s] "%s" %d %d "%s" "%s"',
332 $this->host, $this->ident, $this->user,
333 $this->_ncsa_time($this->time),
334 $this->request, $this->status, $this->size,
335 $this->referer, $this->user_agent);
336 if (!empty($this->_accesslog->reader)) {
337 fclose($this->_accesslog->reader);
338 unset($this->_accesslog->reader);
340 //Error log doesn't provide locking.
341 //error_log("$entry\n", 3, $this->logfile);
343 if (($fp = fopen($this->logfile, "a"))) {
345 fputs($fp, "$entry\n");
350 /* This is better been done by apache mod_log_sql */
351 /* If ACCESS_LOG_SQL & 2 we do write it by our own */
352 function write_sql() {
355 $dbh =& $request->_dbi;
356 if ($dbh and $dbh->isOpen() and $this->_accesslog->logtable) {
357 //$log_tbl =& $this->_accesslog->logtable;
358 if ($request->get('REQUEST_METHOD') == "POST") {
359 // strangely HTTP_POST_VARS doesn't contain all posted vars.
360 $args = $_POST; // copy not ref. clone not needed on hashes
362 if (!empty($args['auth']['passwd'])) $args['auth']['passwd'] = '<not displayed>';
363 if (!empty($args['dbadmin']['passwd'])) $args['dbadmin']['passwd'] = '<not displayed>';
364 if (!empty($args['pref']['passwd'])) $args['pref']['passwd'] = '<not displayed>';
365 if (!empty($args['pref']['passwd2'])) $args['pref']['passwd2'] = '<not displayed>';
366 $this->request_args = substr(serialize($args),0,254); // if VARCHAR(255) is used.
368 $this->request_args = $request->get('QUERY_STRING');
370 $this->request_method = $request->get('REQUEST_METHOD');
371 $this->request_uri = $request->get('REQUEST_URI');
372 // duration problem: sprintf "%f" might use comma e.g. "100,201" in european locales
373 $dbh->_backend->write_accesslog($this);
379 * Shutdown callback. Ensures that the file is written.
382 * @see Request_AccessLogEntry
384 function Request_AccessLogEntry_shutdown_function () {
387 if (isset($request->_accesslog->entries) and $request->_accesslog->logfile)
388 foreach ($request->_accesslog->entries as $entry) {
389 $entry->write_file();
391 unset($request->_accesslog->entries);
394 // TODO: SQL access methods....
395 // (c) 2005 Charles Corrigan (the mysql parts)
396 // (c) 2006 Rein Urban (the postgresql parts)
397 // from AnalyseAccessLogSql.php
398 class Request_AccessLog_SQL
402 * Build the query string
404 * FIXME: some or all of these queries may be MySQL specific / non-portable
405 * FIXME: properly quote the string args
407 * The column names displayed are generated from the actual query column
408 * names, so make sure that each column in the query is given a user
409 * friendly name. Note that the column names are passed to _() and so may be
412 * If there are query specific where conditions, then the construction
413 * " if ($where_conditions<>'')
414 * $where_conditions = 'WHERE '.$where_conditions.' ';"
415 * should be changed to
416 * " if ($where_conditions<>'')
417 * $where_conditions = 'AND '.$where_conditions.' ';"
418 * and in the assignment to query have something like
419 * " $query= "SELECT "
421 * ."FROM $accesslog "
422 * ."WHERE referer IS NOT NULL "
425 function _getQueryString(&$args) {
426 // extract any parametrised conditions from the arguments,
427 // in particular, how much history to select
428 $where_conditions = $this->_getWhereConditions($args);
430 // get the correct name for the table
431 //FIXME is there a more correct way to do this?
432 global $DBParams, $request;
433 $accesslog = (!empty($DBParams['prefix']) ? $DBParams['prefix'] : '')."accesslog";
436 $backend_type = $request->_dbi->_backend->backendType();
437 switch ($backend_type) {
439 $Referring_URL = "left(referer,length(referer)-instr(reverse(referer),'?'))"; break;
442 $Referring_URL = "substr(referer,0,position('?' in referer))"; break;
444 $Referring_URL = "referer";
446 switch ($args['mode']) {
447 case 'referring_urls':
448 if ($where_conditions<>'')
449 $where_conditions = 'WHERE '.$where_conditions.' ';
451 . "$Referring_URL AS Referring_URL, "
452 . "count(*) AS Referral_Count "
455 . "GROUP BY Referring_URL";
457 case 'external_referers':
458 $args['local_referrers'] = 'false';
459 $where_conditions = $this->_getWhereConditions($args);
460 if ($where_conditions<>'')
461 $where_conditions = 'WHERE '.$where_conditions.' ';
463 . "$Referring_URL AS Referring_URL, "
464 . "count(*) AS Referral_Count "
467 . "GROUP BY Referring_URL";
469 case 'referring_domains':
470 if ($where_conditions<>'')
471 $where_conditions = 'WHERE '.$where_conditions.' ';
472 switch ($backend_type) {
474 $Referring_Domain = "left(referer, if(locate('/', referer, 8) > 0,locate('/', referer, 8) -1, length(referer)))"; break;
477 $Referring_Domain = "substr(referer,0,8) || regexp_replace(substr(referer,8), '/.*', '')"; break;
479 $Referring_Domain = "referer"; break;
482 . "$Referring_Domain AS Referring_Domain, "
483 . "count(*) AS Referral_Count "
486 . "GROUP BY Referring_Domain";
489 if ($where_conditions<>'')
490 $where_conditions = 'WHERE '.$where_conditions.' ';
492 ."remote_host AS Remote_Host, "
493 ."count(*) AS Access_Count "
496 ."GROUP BY Remote_Host";
499 if ($where_conditions<>'')
500 $where_conditions = 'WHERE '.$where_conditions.' ';
502 ."remote_user AS User, "
503 ."count(*) AS Access_Count "
506 ."GROUP BY remote_user";
509 if ($where_conditions<>'')
510 $where_conditions = 'WHERE '.$where_conditions.' ';
512 ."remote_host AS Remote_Host, "
513 ."remote_user AS User, "
514 ."count(*) AS Access_Count "
517 ."GROUP BY remote_host, remote_user";
520 // This queries for all entries in the SQL access log table that
521 // have a dns name that I know to be a web search engine crawler and
522 // categorises the results into time buckets as per the list below
524 // 1 - 1 hour - 3600 = 60 * 60
525 // 2 - 1 day - 86400 = 60 * 60 * 24
526 // 3 - 1 week - 604800 = 60 * 60 * 24 * 7
527 // 4 - 1 month - 2629800 = 60 * 60 * 24 * 365.25 / 12
528 // 5 - 1 year - 31557600 = 60 * 60 * 24 * 365.25
531 ."CASE WHEN $now-time_stamp<60 THEN '"._("0 - last minute")."' ELSE "
532 ."CASE WHEN $now-time_stamp<3600 THEN '"._("1 - 1 minute to 1 hour")."' ELSE "
533 ."CASE WHEN $now-time_stamp<86400 THEN '"._("2 - 1 hour to 1 day")."' ELSE "
534 ."CASE WHEN $now-time_stamp<604800 THEN '"._("3 - 1 day to 1 week")."' ELSE "
535 ."CASE WHEN $now-time_stamp<2629800 THEN '"._("4 - 1 week to 1 month")."' ELSE "
536 ."CASE WHEN $now-time_stamp<31557600 THEN '"._("5 - 1 month to 1 year")."' ELSE "
537 ."'"._("6 - more than 1 year")."' END END END END END END AS Time_Scale, "
538 ."remote_host AS Remote_Host, "
539 ."count(*) AS Access_Count "
541 ."WHERE (remote_host LIKE '%googlebot.com' "
542 ."OR remote_host LIKE '%alexa.com' "
543 ."OR remote_host LIKE '%inktomisearch.com' "
544 ."OR remote_host LIKE '%msnbot.msn.com') "
545 .($where_conditions ? 'AND '.$where_conditions : '')
546 ."GROUP BY Time_Scale, remote_host";
548 case "search_bots_hits":
549 // This queries for all entries in the SQL access log table that
550 // have a dns name that I know to be a web search engine crawler and
551 // displays the URI that was hit.
552 // If PHPSESSID appears in the URI, just display the URI to the left of this
553 $sessname = session_name();
554 switch ($backend_type) {
556 $Request_URI = "IF(instr(request_uri, '$sessname')=0, request_uri,left(request_uri, instr(request_uri, '$sessname')-2))";
560 $Request_URI = "regexp_replace(request_uri, '$sessname.*', '')"; break;
562 $Request_URI = 'request_uri'; break;
566 ."CASE WHEN $now-time_stamp<60 THEN '"._("0 - last minute")."' ELSE "
567 ."CASE WHEN $now-time_stamp<3600 THEN '"._("1 - 1 minute to 1 hour")."' ELSE "
568 ."CASE WHEN $now-time_stamp<86400 THEN '"._("2 - 1 hour to 1 day")."' ELSE "
569 ."CASE WHEN $now-time_stamp<604800 THEN '"._("3 - 1 day to 1 week")."' ELSE "
570 ."CASE WHEN $now-time_stamp<2629800 THEN '"._("4 - 1 week to 1 month")."' ELSE "
571 ."CASE WHEN $now-time_stamp<31557600 THEN '"._("5 - 1 month to 1 year")."' ELSE "
572 ."'"._("6 - more than 1 year")."' END END END END END END AS Time_Scale, "
573 ."remote_host AS Remote_Host, "
574 ."$Request_URI AS Request_URI "
576 ."WHERE (remote_host LIKE '%googlebot.com' "
577 ."OR remote_host LIKE '%alexa.com' "
578 ."OR remote_host LIKE '%inktomisearch.com' "
579 ."OR remote_host LIKE '%msnbot.msn.com') "
580 .($where_conditions ? 'AND '.$where_conditions : '')
581 ."ORDER BY time_stamp";
586 /** Honeypot for xgettext. Those strings are translated dynamically.
588 function _locale_dummy() {
592 _("external_referers"),
593 _("referring_domains"),
598 _("search_bots_hits"),
607 function getDefaultArguments() {
609 'mode' => 'referring_domains',
610 // referring_domains, referring_urls, remote_hosts, users, host_users, search_bots, search_bots_hits
612 // blank means use the mode as the caption/title for the output
613 'local_referrers' => 'true', // only show external referring sites
614 'period' => '', // the type of period to report:
615 // may be weeks, days, hours, minutes, or blank for all
616 'count' => '0' // the number of periods to report
621 function table_output () {
622 $query = $this->_getQueryString($args);
625 return HTML::p(sprintf( _("Unrecognised parameter 'mode=%s'"),
628 // get the data back.
629 // Note that this must be done before the final generation ofthe table,
630 // otherwise the headers will not be ready
631 $tbody = $this->_getQueryResults($query, $dbi);
633 return HTML::table(array('border' => 1,
636 HTML::caption(HTML::h1(HTML::br(),$this->_getCaption($args))),
637 HTML::thead($this->_theadrow),
641 function _getQueryResults($query, &$dbi) {
642 $queryResult = $dbi->genericSqlIter($query);
644 $tbody = HTML::tbody(HTML::tr(HTML::td(_("<empty>"))));
646 $tbody = HTML::tbody();
647 while ($row = $queryResult->next()) {
648 $this->_setHeaders($row);
650 foreach ($row as $value) {
651 // output a '-' for empty values, otherwise the table looks strange
652 $tr->pushContent(HTML::td( empty($value) ? '-' : $value ));
654 $tbody->pushContent($tr);
657 $queryResult->free();
661 function _setHeaders($row) {
662 if (!$this->_headerSet) {
663 foreach ($row as $key => $value) {
664 $this->_theadrow->pushContent(HTML::th(_($key)));
666 $this->_headerSet = true;
670 function _getWhereConditions(&$args) {
671 $where_conditions = '';
673 if ($args['period']<>'') {
675 if ($args['period']=='minutes') {
677 } elseif ($args['period']=='hours') {
679 } elseif ($args['period']=='days') {
680 $since = 60 * 60 * 24;
681 } elseif ($args['period']=='weeks') {
682 $since = 60 * 60 * 24 * 7;
684 $since = $since * $args['count'];
686 if ($where_conditions<>'')
687 $where_conditions = $where_conditions.' AND ';
688 $since = time() - $since;
689 $where_conditions = $where_conditions."time_stamp > $since";
693 if ($args['local_referrers']<>'true') {
695 if ($where_conditions<>'')
696 $where_conditions = $where_conditions.' AND ';
697 $localhost = SERVER_URL;
698 $len = strlen($localhost);
699 $backend_type = $request->_dbi->_backend->backendType();
700 switch ($backend_type) {
702 $ref_localhost = "left(referer,$len)<>'$localhost'"; break;
705 $ref_localhost = "substr(referer,0,$len)<>'$localhost'"; break;
709 $where_conditions = $where_conditions.$ref_localhost;
712 // The assumed contract is that there is a space at the end of the
713 // conditions string, so that following SQL clauses (such as GROUP BY)
714 // will not cause a syntax error
715 if ($where_conditions<>'')
716 $where_conditions = $where_conditions.' ';
718 return $where_conditions;
721 function _getCaption(&$args) {
722 $caption = $args['caption'];
724 $caption = gettext($args['mode']);
725 if ($args['period']<>'' && $args['count'])
726 $caption = $caption." - ".$args['count']." ". gettext($args['period']);
736 // c-hanging-comment-ender-p: nil
737 // indent-tabs-mode: nil