Bivio::Util::HTTPLog
# Copyright (c) 2001-2009 bivio Software, Inc. All rights reserved.
# $Id$
package Bivio::Util::HTTPLog;
use strict;
use Bivio::Base 'Bivio.ShellUtil';
use Bivio::IO::Config;
use Bivio::IO::Trace;
use Bivio::Type::DateTime;
use Bivio::Type::Integer;
use IO::File ();
# C<Bivio::Util::HTTPLog> manipulates HTTP logs.
our($_TRACE);
my($_IDI) = __PACKAGE__->instance_data_index;
my($_CFG) = {
error_file => (-r '/var/log/bivio-proxy/error_log'
? '/var/log/bivio-proxy/error_log'
: -r '/var/log/httpd/error_log'
? '/var/log/httpd/error_log'
: -r '/var/log/httpd/error.log'
? '/var/log/httpd/error.log'
: '/var/log/apache2/error_log'),
email => 'root',
pager_email => '',
error_count_for_page => 3,
ignore_list => Bivio::IO::Config->REQUIRED,
error_list => Bivio::IO::Config->REQUIRED,
critical_list => Bivio::IO::Config->REQUIRED,
ignore_unless_count => Bivio::Type::Integer->get_max,
ignore_unless_count_list => [],
test_now => undef,
};
Bivio::IO::Config->register($_CFG);
my($_RECORD_PREFIX) = '^(?:\[('
. _clean_regexp(Bivio::Type::DateTime->REGEX_CTIME)
. ')\]|(?:\[\d+\])?('
. _clean_regexp(Bivio::Type::DateTime->REGEX_ALERT)
. '))';
my($_REGEXP);
my($_DT) = b_use('Type.DateTime');
sub USAGE {
# : string
# Returns:
#
# usage: b-http-log [options] command [args...]
# commands:
# parse_errors interval_minutes -- returns errors found in last interval
return <<'EOF';
usage: b-http-log [options] command [args...]
commands:
parse_errors interval_minutes -- returns errors found in last interval
EOF
}
sub error_file {
return $_CFG->{error_file};
}
sub handle_config {
# (proto, hash) : undef
# Make sure regexps (error_list, etc.) are unique, e.g. have a '::' in them.
# This avoids misidentification of messages which contain user data, but are
# critical or errors.
#
#
# critical_list : array_ref (required)
#
# List of regexps which will cause the to be sent to I<pager_email>. The
# matching value will be sent to the I<pager_email>, not the entire line
#
# email : string [root]
#
# Where to send mail to. L<Bivio::ShellUtil|Bivio::ShellUtil> -email flag
# overrides this value if it is defined.
#
# error_count_for_page : int [3]
#
# How many error_list messages in an interval are required before
# the message is critical?
#
# error_file : string [/var/log/httpd/error.log || /var/log/httpd/error_log]
#
# File where errors are writted by httpd.
#
# error_list : array_ref (required)
#
# List of regexps which which will be emailed always. Also see
# I<error_count_for_page>.
#
# ignore_list : array_ref (required)
#
# List of regexps which will be thrown away.
#
# ignore_unless_count : int [9999]
#
# How many times should we ignore matches to I<ignore_unless_count_list>?
#
# ignore_unless_count_list : array_ref []
#
# List of regexps which will be thrown away unless they exceed
# I<ignore_unless_count>.
#
# pager_email : string ['']
#
# Email addresses separated by commas which get pager messages. See
# I<critical_list> and I<error_count_for_page>.
my(undef, $cfg) = @_;
$_CFG = $cfg;
$_REGEXP = {};
foreach my $r (qw(ignore critical error ignore_unless_count)) {
my($x) = $cfg->{"${r}_list"};
$_REGEXP->{$r} = @$x ? qr/(@{[join('|', @$x)]})/ : undef;
}
return;
}
sub parse_errors {
# (self, int) : string_ref
# Check Apache error logs for unknown messages during the last interval.
# You enter this in a crontab as:
#
# 0,15,30,45 * * * * /usr/local/bin/b-http-log parse_errors 15
#
# I<interval_minutes> must match the execute time in cron.
my($self, $interval_minutes) = @_;
return ($self->lock_action(sub {
$self->get_request;
#TODO: dies later unless this is here
return _parse_errors_complete($self)
unless $interval_minutes = _parse_errors_init(
$self, $interval_minutes);
my($fields) = $self->[$_IDI];
my($start) = Bivio::Type::DateTime->add_seconds(
$_CFG->{test_now} || Bivio::Type::DateTime->now,
-$interval_minutes * 60,
);
my($error_countdown) = $_CFG->{error_count_for_page};
my($date, $record, $in_interval);
my($last_error) = Bivio::Type::DateTime->get_min;
my($ignored) = {};
my(%error_times);
RECORD: while (_parse_record($self, \$record, \$date)) {
unless ($in_interval) {
next RECORD
if Bivio::Type::DateTime->compare($start, $date) >= 0;
$in_interval = 1;
}
_trace('record: ', $record) if $_TRACE;
if ($_REGEXP->{ignore} && $record =~ $_REGEXP->{ignore}) {
_trace('ignoring: ', $1) if $_TRACE;
next RECORD;
}
if ($_REGEXP->{ignore_unless_count}
&& $record =~ $_REGEXP->{ignore_unless_count}) {
$ignored->{$1}++;
_trace('ignore_unless_count: ', $1) if $_TRACE;
next RECORD;
}
# Critical already avoids dups, so put before time check after.
if ($_REGEXP->{critical} && $record =~ $_REGEXP->{critical}) {
my($e) = $1;
_trace('critical: ', $e) if $_TRACE;
$e =~ s/^Bivio::DieCode:://g;
$e =~ s/ class=>\S+//g;
$e =~ s/\w+=>//g;
_pager_report($self, $e);
$record =~ s/^/***CRITICAL*** /;
}
if ($_REGEXP->{error} && $record =~ $_REGEXP->{error}) {
_trace('error: ', $1) if $_TRACE;
# Certain error messages don't pass the $_REGEXP->{error} on
# the first output. die message comes out first and it's what
# we want in the email. However, we need to count the error
# regexp on the second message. This code does this correctly.
# We don't recount error REGEXPs output at the same time.
_pager_report($self, $1)
if !$error_times{$date}++ && --$error_countdown == 0;
}
# Avoid duplicate error messages by checking $last_error
if (Bivio::Type::DateTime->compare($last_error, $date) == 0
&& $record !~ /JOB_ERROR/) {
_trace('same time: ', $record) if $_TRACE;
next RECORD;
}
$last_error = $date;
# Never send more than 256 bytes (three lines) in a record via email
_report($self, substr($record, 0, 256));
}
foreach my $k (sort(keys(%$ignored))) {
_report($self, "[repeated $ignored->{$k} times] ", $k)
if $ignored->{$k} >= $_CFG->{ignore_unless_count};
}
return _parse_errors_complete($self);
}, __PACKAGE__ . 'parse_errors' . $_CFG->{error_file}))[0];
}
sub _clean_regexp {
# (string) : string
# Makes sure parethesizes regexes don't match anything
my($value) = @_;
$value =~ s/\(([^?])/\(?:$1/g;
return $value;
}
sub _pager_report {
# (self, arg, ....) : undef
# Reports the error to the pager and puts at top of $fields->{res}.
my($self, @args) = @_;
my($fields) = $self->[$_IDI];
my($msg) = Bivio::IO::Alert->format_args(@args);
$fields->{res} = "CRITICAL ERRORS\n$fields->{res}"
unless $fields->{res} =~ /^CRITICAL ERRORS/;
my($last) = $fields->{pager_res}->[$#{$fields->{pager_res}}];
push(@{$fields->{pager_res}}, $msg)
if !$last || $last ne $msg;
return;
}
sub _parse_errors_complete {
# (self) : string_ref
# Returns $fields->{res}. Sends email to pager if pager_res and pager_email
# are non-null.
my($self) = @_;
my($fields) = $self->[$_IDI];
$fields->{fh}->close
if $fields->{fh} && !$fields->{fh}->eof;
my($pr) = substr(join('', @{$fields->{pager_res}}), 0, 100);
$self->send_mail(
$_CFG->{pager_email},
_subject(),
\$pr,
) if $pr && $_CFG->{pager_email};
return \$fields->{res};
}
sub _parse_errors_init {
# (self, int) : array
# Returns its arguments, but first checks validity. Sets up email
# and result_name. Failure is returned as interval_minutes being 0.
my($self, $interval_minutes) = @_;
# Initializes the request (timezone)
$self->usage('interval_minutes must be supplied')
if $interval_minutes <= 0;
$self->put(email => $_CFG->{email})
unless defined($self->unsafe_get('email'));
$self->put(result_subject => _subject($_CFG->{error_file}));
my($fields) = $self->[$_IDI] = {
res => '',
pager_res => [],
fh => IO::File->new,
};
my($err);
if ($self->req->is_production && ! -s $_CFG->{error_file}) {
if (abs($_DT->diff_seconds(
$_DT->now,
$_DT->set_local_beginning_of_day($_DT->now),
)) < 300) {
# log file may be rotating
return 0;
}
$err = "$_CFG->{error_file}: error file missing or empty";
}
elsif (! $fields->{fh}->open($_CFG->{error_file})) {
$err = "$_CFG->{error_file}: $!";
}
return $interval_minutes unless $err;
_pager_report($self, $err);
_report($self, $err);
return 0;
}
sub _parse_line {
# (hash_ref) : boolean
# Returns 0 at eof. Fills in $fields->{line}.
my($fields) = @_;
return 1 if defined($fields->{line});
$fields->{line} = $fields->{fh}->getline;
return defined($fields->{line}) ? 1 : 0;
}
sub _parse_record {
# (self, string_ref, string_ref) : boolean
# Parses a record (the entire text) from the file. There's a lookahead
# buffer.
my($self, $record, $date) = @_;
my($fields) = $self->[$_IDI];
$$record = undef;
while (_parse_line($fields)) {
last if $$record && $fields->{line} =~ /$_RECORD_PREFIX/o;
$$record .= $fields->{line};
$fields->{line} = undef;
}
return 0 unless defined($$record);
my($err);
my($d1, $d2) = $$record =~ /$_RECORD_PREFIX/o;
my($m) = $_CFG->{test_now} ? 'from_literal' : 'from_local_literal';
($$date, $err) = Bivio::Type::DateTime->$m($d1 || $d2);
unless ($$date) {
_report($self, "can't parse date: ", $err, ": ", $$record);
$$record = '';
return 1;
}
return 1;
}
sub _report {
# (self, arg, ...) : undef
# Adds errors (safely) to $fields->{res}
my($self, @args) = @_;
my($fields) = $self->[$_IDI];
$fields->{res} .= Bivio::IO::Alert->format_args(@args);
return;
}
sub _subject {
my($subject) = @_;
return (b_use('Bivio.BConf')->bconf_host_name =~ /^([^\.]+)/)[0]
. ($subject ? (' ' . $subject) : '')
;
}
1;