#!/usr/bin/perl
# 
# Sawdog - A collection of simple scripts, which informs in case of server outages
# Copyright 1999-2000 by Christian Gloor
# 
# This piece of code is distributed under the terms of the GNU General Public License (GPL)
# You should have received a copy of the GPL (file COPYING) along with those scripts; 
# if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambrisge, MA 02139, USA
#
# Current maintainer: chgloor@digicomp.ch, don't hesitate to contact him if you have any questions. 
#
# Fall 1999: 1.0.0 initial  - chgloor@digicomp.ch: initial release for internal use only 
# May  2000: 1.2.0          - michi@digicomp.ch: changed for sms use only 
# May  2000: 2.0.0 phoenix  - chgloor: additional features (state unknown, logfile, locking)
# June 2000: 2.1.0 generic  - chgloor: compact notification, aliases, generic notification
# June 2000: 2.2.0 speedup  - chgloor: expect scritps speedup, 'required' mode

$version       = '2.2.10 speedup/cleanup - chgloor 25.06.00';

# use absolute paths here if you want to start this from crontab!
$configfile    = './sawdog.conf';
$statusfile    = './sawdog.status';
$lockfile      = './sawdog.lock';
$logfile       = './sawdog.log';
$servicedir    = './services/';

# don't change anything below this line, unless you really know what you do, this is the end of the config section
# ----------------------------------------------------------------------------------------------------------------

$notify=0;  $notify        = 1 if ($ARGV[0] eq 'notify');
$verbose=1; $verbose       = 0 if ($notify == 1);

# check for another copy running - if there is a lockfile, exit, create a new lockflie otherwise.
open (LCK, "<$lockfile") and die ("There's another copy of sawdog running right now.\nRemove $lockfile if you want to continue anyway. \n");
open (LCK, ">$lockfile");

# print release number and stuff if not invoked with argument 'notify'
print "This is sawdog Release $version\nPlease use the argument 'notify' to enable notification\n" if ($notify == 0);
$start = time;

# try to open the existing statusfile, and read all the stuff into memory
if (open (ST, "<$statusfile")) { 
    @status = <ST>; 
    close (ST); 
} else {
    print "Can't open the statusfile: $statusfile\nI'll create a new one\n";
}

# try to read the configuration file. if this fails, we cannot do anything, so exit then.
open (CF, "<$configfile") or print "cannot open the configfile: $configfile - check the permissions and the filename\n";
@config = <CF>;
close (CF);

# create a new, empty statusfile. if one exists already, overwrite it. we have the content in memory.
# if the script crashes here, don't care, you will just get some 'we are up' messages. future enhancement possible here.
open (ST, ">$statusfile") or print "cannot reopen the statusfile: $statusfile - check the permissions\n";

# open the logfile in append mode. 
open (LOG, ">>$logfile") or print "cannot append to the logfile: $logfile - check the permissions\n";

# for each line in the config file do this.
# first check if valid, then extract the arguments, then process the services (inner loop)
foreach (@config) {
    $status = '';
    $required = 0; # 1 if the entry is marked with an '!' as required
    # if the line is a comment '#' or empty ' '
    next if ((/^\#/)||(/^\s/));
    # fill the notification hash with notification methods. if the line starts with a dot only
    if (/^\.(.+?)\s+\((.+?)\)/) {
	$notification{$1} = $2;
	next;
    }

    if (s/^\!//) { # the entry is marked with an '!' and is therefore required to be reachable
	$required = 1;
    }
    # the first word, until a whitespace occurs
    ($server) = /^(.+?)\s/;
    # if an alias is set, else servername. the word before the first open bracket '('
    ($alias)  = /(\S+?)\s+\(/;
    # the ports. words between brackets. brute force.
    ($port[0]) = /\((.+?)\s*[,|\)]/;
    ($port[1]) = /\(.+?,\s*(.+?)\s*[,|\)]/;
    ($port[2]) = /\(.+?,.+?,\s*(.+?)\s*[,|\)]/;
    ($port[3]) = /\(.+?,.+?,.+?,\s*(.+?)\s*[,|\)]/;
    ($port[4]) = /\(.+?,.+?,.+?,.+?,\s*(.+?)\s*[,|\)]/;
    # the person responsible for this server, between last close bracket ')' and line end.
    ($sms) = /\)\s+(.+)$/;
    
    print "\nprocessing $server ($alias): " if $verbose;      
    $startstring = localtime(time);


    # this is the main inner loop: for each service execute the expect script 
    foreach $i (0..4) {
	$executable = $port[$i];
	if ($executable) { # only if there is an executable defined
            print "$executable " if $verbose;
	    # call the expect script and if it terminates with a return value of zero, assume the server is up
	    if (system("$servicedir$executable $server  &> /dev/null") == 0) {
		$unknown = 1;
		foreach (@status) {
		    if (/$server\s$executable\sdead/) {
			print "up! " if $verbose;
                        $hashup{$sms} = "UP: $alias ->" if ($hashup{$sms} eq '');
                        $hashup{$sms} = "$hashup{$sms} $executable";
          		print ST "$server $executable alive\n";
			print LOG "[$startstring] $server $executable alive\n";
			$unknown = 0;
			last;
		    } elsif (/$server\s$executable\sunknown/) {
			print "up? " if $verbose;
	        	print ST "$server $executable alive\n";
		        $unknown = 0;
			last;
		    } elsif (/$server\s$executable\salive/) {
		        print "up. " if $verbose;
			print ST "$server $executable alive\n";
			$unknown = 0;
		        last;
		    }
		}
		print "new " if ($verbose and $unknown);
		print ST "$server $executable unknown\n" if $unknown;
	        print LOG "[$startstring] $server $executable new\n" if $unknown;
		$status = 'up';
	    } else { # the server must be down, because the expect script terminated with an return value higher than zero
	        $unknown = 1;
		foreach (@status) {
		    if (/$server\s$executable\salive/) {
			print "unknown " if $verbose;
		        print ST "$server $executable unknown\n";
			print LOG "[$startstring] $server $executable unknown\n";
       	        	$unknown = 0;
			last;
		    } elsif (/$server\s$executable\sunknown/) {
			print "down! " if $verbose;
                        $hashdown{$sms} = "DOWN: $alias ->" if ($hashdown{$sms} eq '');
                        $hashdown{$sms} = "$hashdown{$sms} $executable";
		        print ST "$server $executable dead\n";
			print LOG "[$startstring] $server $executable dead\n";
       	        	$unknown = 0;
			last;
		    } elsif (/$server\s$executable\sdead/) {
		        print "down. " if $verbose;
			print ST "$server $executable dead\n";
			$unknown = 0;
		        last;
		    }
		}
		print "new " if ($verbose and $unknown);
		print ST "$server $executable unknown\n" if $unknown; 
		print LOG "[$startstring] $server $executable new\n" if $unknown;
		$status = 'down';
	    }
	}
    }
    # process the entire hash 'hashup', if there's something in, at least one server is up again
    while (($methoduser, $message) = each %hashup) {
	# trigger notification for each name and each server, but all ports at once
        # check if there is a -> in the username, if so, split it into method and username
	if ($methoduser =~ /->/) {
	    ($method, $user) = $methoduser =~ /^(.+?)->(.+)/;
	} else {
	# if not, set the default
	    $method = 'default';
	    $user = $methoduser;
	}
	print "\n$user via $method --> $message" if $verbose;
	print LOG "$user via $method --> $message\n";
        # substitute the %xx% tokens by the proper values
	$toexecute = $notification{$method};
	$toexecute =~ s/\%user\%/$user/gi;
	$toexecute =~ s/\%message\%/$message/gi;
	# execute the string finally 
	eval '`$toexecute`' if ($ARGV[0] eq 'notify');
	# remove the key from the hash, cleanup
	delete $hashup{$methoduser};
    }    
    # process the entire hash 'hashdown', if there's something in, at least one server is down
    while (($methoduser, $message) = each %hashdown) {
	# trigger notification for each name and each server, but all ports at once
	# check if there is a -> in the username, if so, split it into method and username
        if ($methoduser =~ /->/) {
	    ($method, $user) = $methoduser =~ /^(.+?)->(.+)/;
	} else {
	# if not, set the default
	    $method = 'default';
	    $user = $methoduser;
	}
	$message = "$message !processing aborted!" if ($required==1);
	print "\n$user --> $message" if $verbose;
	print LOG "$user via $method --> $message\n";
	# substitute the %xx% tokens by the proper values
        $toexecute = $notification{$method};
	$toexecute =~ s/\%user\%/$user/gi;
	$toexecute =~ s/\%message\%/$message/gi;
	# execute the string finally
	eval '`$toexecute`' if ($ARGV[0] eq 'notify');
	# remove the key from the hash, cleanup
	delete $hashdown{$methoduser};
    }
    # if the server is marked as 'required', stop processing if it's unreachable
    last if (($required==1) & ($status eq 'down'));
}

# compute the time elapsed by processing the whole thing
# nice to know if you're planning to run this as a frequent cron job
$duration = time - $start;
print "\n\nProcessing took $duration seconds.\n" if $verbose;

# close the statusfile (and flush the buffer, i.e. write to the disk)
close(ST);

print "done.\n" if $verbose;

# remove the lockfile.
unlink $lockfile;














