mirror of
https://github.com/ranl/monitor-utils.git
synced 2024-11-22 07:23:42 +01:00
Update check-netapp-ng.pl
Hello Ran This is Laurent DUFOUR (laurent.dufour@havas.com) from Paris France I propose a small little change to the handling of timeouts in this check-netapp-ng script, in order to avoid the message "Return code of 142 is out of bounds" from Nagios. In fact we have to deal with two type of timeouts as I explain below, feel free to contact me if you need more explanations CLARIFICATION FOR TIMEOUTS There are multiples timeouts we depend on Perl plugins timeout (utils.pm)--- > $TIMEOUT --- > originally 15 sec --> recommandation to raise it to 180 Net::SNMP timeout --- > Used in Net::SNMP->session --- > originally 5 sec --> recommandation to raise it to 60 Beware that the max value 60 seconds. If set above you get the error message "Can't create snmp session" Do not forget that in nagios you need to increase service_check_timeout to a value above $TIMEOUT_PLUGINS Nagios service check timeout (nagios.cfg) --- > service_check_timeout=240 --- > originally 30 sec
This commit is contained in:
parent
fc9c1902ab
commit
c9271972a6
@ -30,8 +30,21 @@
|
||||
## AUTOSUPPORTSTATUS|NFSOPS|
|
||||
## CIFSOPS|SHELFINFO|...
|
||||
##
|
||||
#####################################
|
||||
#####################################
|
||||
##
|
||||
##
|
||||
## CLARIFICATION FOR TIMEOUTS
|
||||
## There are multiples timeouts we depend on
|
||||
##
|
||||
## Perl plugins timeout (utils.pm)--- > $TIMEOUT --- > originally 15 sec --> recommandation to raise it to 180
|
||||
##
|
||||
## Net::SNMP timeout --- > Used in Net::SNMP->session --- > originally 5 sec --> recommandation to raise it to 60
|
||||
## Beware that the max value 60 seconds. If set above you get the error message "Can't create snmp session"
|
||||
##
|
||||
## Do not forget that in nagios you need to increase service_check_timeout to a value above $TIMEOUT_PLUGINS
|
||||
## Nagios service check timeout (nagios.cfg) --- > service_check_timeout=240 --- > originally 30 sec
|
||||
##
|
||||
####################################################################################################################################################
|
||||
####################################################################################################################################################
|
||||
|
||||
|
||||
use strict;
|
||||
@ -47,13 +60,16 @@ use Getopt::Long;
|
||||
use Time::Local;
|
||||
use IPC::Cmd qw(run_forked);
|
||||
|
||||
|
||||
|
||||
Getopt::Long::Configure('bundling');
|
||||
|
||||
my $TIMEOUT_PLUGINS=$TIMEOUT;
|
||||
my $stat = 0;
|
||||
my $msg;
|
||||
my $perf;
|
||||
my $script_name = basename($0);
|
||||
my $script_version = 1.3;
|
||||
my $script_version = 1.3.1;
|
||||
|
||||
my $counterFilePath="/tmp";
|
||||
my $counterFile;
|
||||
@ -339,9 +355,7 @@ sub FSyntaxError($) {
|
||||
my $err = shift;
|
||||
print <<EOU;
|
||||
$err
|
||||
|
||||
This is $script_name in version $script_version.
|
||||
|
||||
Syntax:
|
||||
-H <IP_or_Hostname> Ip/Dns Name of the Filer
|
||||
-C <community_name> SNMP Community Name for read
|
||||
@ -356,7 +370,6 @@ This is $script_name in version $script_version.
|
||||
-e <vol1[,vol2[,...]]> Exclude volumes from snap check (SNAPSHOT/SNAPSHOTAGE)
|
||||
-I Inform only, return OK every time (ignore -w and -c values)
|
||||
-h This help
|
||||
|
||||
Available check types:
|
||||
TEMP - Temperature
|
||||
FAN - Fan Fail
|
||||
@ -382,20 +395,15 @@ This is $script_name in version $script_version.
|
||||
UPTIME - Only show\'s uptime
|
||||
CACHEAGE - Cache Age (-w -c)
|
||||
FSSTATUS - Overall file system health
|
||||
|
||||
Examples:
|
||||
$script_name -H netapp.mydomain -C public -T UPTIME
|
||||
UPTIME: 2 days, 23:03:21.09 | uptime=255801s
|
||||
|
||||
$script_name -H netapp.mydomain -C public -T DISKUSED -v /vol/data/ -w 90 -c 95 -V 2c
|
||||
OK: DISKUSED 79% | /vol/data/=8104595240k
|
||||
|
||||
$script_name -H netapp.mydomain -C public -T GLOBALSTATUS
|
||||
CRIT: GLOBALSTATUS nonCritical 4 Disk on adapter 1a, shelf 1, bay 9, failed. | globalstatus=4
|
||||
|
||||
$script_name -H netapp.mydomain -C public -T DISKUSED -v wtf
|
||||
WARN: Unknown volume path or aggregate name 'wtf'. Available values: aggr_p1a_sas2_mirror /vol/vol0/ /vol/esx/ /vol/xen_a/
|
||||
|
||||
EOU
|
||||
exit($ERRORS{'UNKNOWN'});
|
||||
}
|
||||
@ -493,6 +501,8 @@ $opt{'crit'} = 500;
|
||||
$opt{'warn'} = 500;
|
||||
$opt{'version'} = 2;
|
||||
$opt{'timeout'} = 60;
|
||||
$TIMEOUT_PLUGINS = 180 ;
|
||||
|
||||
my $result = GetOptions(\%opt,
|
||||
'filer|H=s',
|
||||
'community|C=s',
|
||||
@ -507,6 +517,17 @@ my $result = GetOptions(\%opt,
|
||||
"help|h",
|
||||
);
|
||||
|
||||
if ( $opt{'timeout'} > 60)
|
||||
{
|
||||
#Set timeout for plugin to the parameter received via command line, but set snmp timeout to the max (60 seconds) if CLI timeout is above 60 seconds
|
||||
$TIMEOUT_PLUGINS=$opt{'timeout'};
|
||||
$opt{'timeout'}=60;
|
||||
} else
|
||||
{
|
||||
$TIMEOUT_PLUGINS=$opt{'timeout'};
|
||||
}
|
||||
|
||||
|
||||
FSyntaxError("") if defined $opt{'help'};
|
||||
FSyntaxError("Missing -H") unless defined $opt{'filer'};
|
||||
FSyntaxError("Missing -C") unless defined $opt{'community'};
|
||||
@ -536,9 +557,16 @@ if (!defined($counterFilePath)) {
|
||||
|
||||
|
||||
|
||||
# Just in case of problems, let's not hang Nagios
|
||||
# with "Return code of 142 is out of bounds", instead we set the message "No response in time"
|
||||
|
||||
$SIG{'ALRM'} = sub {
|
||||
print ("CRITICAL: No response in time\n");
|
||||
exit $ERRORS{"CRITICAL"};
|
||||
};
|
||||
|
||||
# Starting Alarm
|
||||
alarm($TIMEOUT);
|
||||
alarm($TIMEOUT_PLUGINS);
|
||||
|
||||
# Establish SNMP Session
|
||||
our $snmp_session = _create_session($opt{'filer'},$opt{'community'},$opt{'version'},$opt{'timeout'});
|
||||
@ -1149,3 +1177,4 @@ $msg =~ s/\n//g;
|
||||
$perf ? print "$msg | $perf\n" : print "$msg\n";
|
||||
|
||||
exit($stat);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user