Tugger the SLUGger!SLUG Mailing List Archives

Re: [chat] Re: [SLUG] "My uptime is bigger than your uptime"


On Wed, Dec 19, 2001 at 01:41:56PM +1100, PColbourn@xxxxxxxxxxxx wrote:
> 1 roofus.argus                  456 days, 23:52:12 Linux
> 2 mike-blacktown.argus          368 days, 21:34:32 Linux
> 3 mike-gateway.argus            349 days, 6:05:34  Linux
> 4 asargus02.argus               320 days, 3:20:34  Microsoft
> 5 argusimp01.argus              315 days, 3:21:52  Linux
> 6 argusimp02.argus              315 days, 3:44:41  Linux
> 7 mike-chullora.argus           309 days, 4:40:58  Linux
> 8 stardust.argus                248 days, 13:13:56 Linux

roofus is a worry, will it crash when it hits the magical 497 
days of uptime? Some older kernels have a bug that causes
a lockup when its uptime gets to 497 days. As far as I know,
all kernels wrap back to 0 when they hit 497, but I did
hear talk of a fix. I'm not sure if it was accepted, though.

stardust was well in to 400+ days when it crashed 250 odd 
days ago, we weren't monitoring uptimes too closely then, 
so I don't know if it hit 497 days and crashed, or whether 
it was something else.

The microsoft box is NT, it does nothing as far as I know,
it used to be a very lightly loaded web server.

So the reason we monitor uptimes is a technical one, we need
to know when they get near 497 days, its not about bragging. :)

Heres the nasty perl script we use, (its a bit big, but I've seen
bigger quotes) you need a few perl modules to make it run. It just 
needs a file with a list of hosts in it, one per line. It then uses
snmp to get the uptimes. I removed some smtp stuff from it...


#!/usr/bin/perl 
require 5;

use strict;
use SNMP_Session;
use BER;
use Getopt::Std;

$SNMP_Session::suppress_warnings = 1;

my $sysupoid = encode_oid(1,3,6,1,2,1,1,3,0);
my $hostupoid = encode_oid(1,3,6,1,2,1,25,1,1,0);
my $sysdescoid = encode_oid(1,3,6,1,2,1,1,1,0);

my $options_ref = {};
getopts('c:', $options_ref);

# the config file is just a list of hosts.
if (!defined $options_ref->{c}) {
        $options_ref->{c}="/usr/local/etc/uptimehosts.conf";
}

open (HOSTS, $options_ref->{c}) or die "error opening uptimehosts.conf";

my $community="public";
my $port=161;

my %sysdesc;
my %notrespond;
my %uptime;

# used for formating, length of the bigest host string.
my $lennew=0;
my $lenold=0;

# formating again.
my $uplennew=0;
my $uplenold=0;

while(<HOSTS>) {
	my ($host) = split;
	if(!$host || $host =~ /#/) {
		next;
	}

	$lennew = length $host;
	if($lennew > $lenold) {
		$lenold = $lennew;
	}

	my ($oid,$value);


	my $session = SNMP_Session->open ($host, $community, $port)
		|| next;

	
	# get the system desciption, because the description can be quite big,
	# if its a linux box I set it to "linux" and if its a windows box, I
	# set it to "windows".
	
	if ($session->get_request_response ($sysdescoid)) {
		my ($bindings) = $session->decode_get_response ($session->{pdu_buffer});
		if ($bindings ne '') {
			my ($binding,$bindings) = &decode_sequence ($bindings);
			($sysdescoid,$value) = &decode_by_template ($binding, "%O%@");
			if(&pretty_print ($value) =~ /linux/i ) {
				$sysdesc{$host} = "Linux";
			}
			elsif(&pretty_print ($value) =~ /windows/i) {
				$sysdesc{$host} = "Microsoft";
			}
			else {
				$sysdesc{$host} = &pretty_print ($value);
			}
		}
	}	 
	
	
	else {
		$notrespond{$host}=$host;
	}
	
}

# get the uptime from hosts that responded to the system description.

foreach my $host (keys %sysdesc) {

	my %oid;
	
	my ($oid,$value);


	my $session = SNMP_Session->open ($host, $community, $port)
		|| next;


	# try to get the host uptime, if not fall back to the agent uptime.
	if ($session->get_request_response ($hostupoid)) {
		my ($bindings) = $session->decode_get_response ($session->{pdu_buffer});
		if ($bindings ne '') {
			my ($binding,$bindings) = &decode_sequence ($bindings);
			($oid,$value) = &decode_by_template ($binding, "%O%@");
			$uptime{$host} = &pretty_print ($value);
			$uplennew = length &pretty_print ($value);
			if($uplennew > $uplenold) {
				$uplenold = $uplennew;
			}
			if($uptime{$host} !~ /day/) {
				$uptime{$host} = "0 days, $uptime{$host}";
			}
		}
	} elsif ($session->get_request_response ($sysupoid)) {
		my ($bindings) = $session->decode_get_response ($session->{pdu_buffer});
		if ($bindings ne '') {
			my ($binding,$bindings) = &decode_sequence ($bindings);
			($oid,$value) = &decode_by_template ($binding, "%O%@");
			$uptime{$host} = &pretty_print ($value);
			$uplennew = length &pretty_print ($value);
			if($uplennew > $uplenold) {
				$uplenold = $uplennew;
			}
			if($uptime{$host} !~ /day/) {
				$uptime{$host} = "0 days, $uptime{$host}";
			}
		}
	}
}

my $ten=0;
my $linux=0;
my $linuxtotal=0;
my $total=0;
my $tophost=0;
my $message;
$message = "\n".
   "\tThe Extreme All Time Clasic World Wide Prime Time Top Ten Uptime List\n".
	 "\t---------------------------------------------------------------------\n\n";

		 
foreach my $host (sort { $uptime{$b} <=> $uptime{$a} } keys %uptime) {

	$total++;
	my $out = sprintf "% 2s %-*s %-*s %s\n", $total, $lenold, $host, $uplenold, $uptime{$host}, $sysdesc{$host};
	$message = $message.$out;
	if($ten < 10 ) {
		$ten++;
		if($sysdesc{$host} =~ /linux/i) {
			$linux++;
		}
	}
	if($sysdesc{$host} =~ /linux/i) {
		$linuxtotal++;
	}
						
}

$message = $message. "\n$total boxes are being monitored.\n".
	"$linuxtotal of the $total are linux boxes.\n".
	"$linux linux boxes are in the Top Ten.\n\n".
	"\tThe Extreme All Time Clasic World Wide Prime Time Top Ten Uptime List\n".
	"\t---------------------------------------------------------------------\n\n";

foreach (keys %notrespond) {
	my $out = sprintf "%-*s is not responding to snmp\n",$lenold, $_;
	$message = $message.$out;
}

print $message;


-- 

Note: You can skip this section if you want to move on.
	
	chesty