1
0
mirror of https://github.com/ranl/monitor-utils.git synced 2024-11-22 15:33:43 +01:00

Add checks for blocks operations and other changes

* Add ISCSIOPS and FCPOPS checks type similar to CIFSOPS
* New -V option sets SNMP version (needs 2c for reading 64bit values)
* New -I option return every time OK state (if you needs only performance data)
* redesign help message and append examples
* nagioscache files create only if check needs caching
* each check has own *.nagioscache file (when I using multiple tests I get
  inexact values of performance data)
* change version to 1.2
This commit is contained in:
Michal Svamberg 2015-05-30 21:10:51 +02:00
parent 8a52be3cfe
commit dc36706c2d

View File

@ -26,7 +26,7 @@
## ##
## DISKSUMMARY|HA|CIFSSESSIONS| ## DISKSUMMARY|HA|CIFSSESSIONS|
## AUTOSUPPORTSTATUS|NFSOPS| ## AUTOSUPPORTSTATUS|NFSOPS|
## CIFSOPS|SHELFINFO ## CIFSOPS|SHELFINFO|...
## ##
##################################### #####################################
##################################### #####################################
@ -47,11 +47,13 @@ my $stat = 0;
my $msg; my $msg;
my $perf; my $perf;
my $script_name = "check-netapp-ng.pl"; my $script_name = "check-netapp-ng.pl";
my $script_version = 1.1; my $script_version = 1.2;
my $counterFilePath="/tmp"; my $counterFilePath="/tmp";
my $counterFile; my $counterFile;
my %opt;
my $elapsedtime = 1;
my %ERRORS = ( my %ERRORS = (
'OK' => '0', 'OK' => '0',
@ -76,6 +78,8 @@ my $fileRuntime;
my $fileHostUptime; my $fileHostUptime;
my $fileNfsOps; my $fileNfsOps;
my $fileCifsOps; my $fileCifsOps;
my $fileIscsiOps;
my $fileFcpOps;
my $snmpHostUptime; my $snmpHostUptime;
@ -161,7 +165,9 @@ my $snmp_netapp_miscLowNfsOps = '.1.3.6.1.4.1.789.1.2.2.6.0';
my $snmp_netapp_miscHighCifsOps = '.1.3.6.1.4.1.789.1.2.2.7.0'; my $snmp_netapp_miscHighCifsOps = '.1.3.6.1.4.1.789.1.2.2.7.0';
my $snmp_netapp_miscLowCifsOps = '.1.3.6.1.4.1.789.1.2.2.8.0'; my $snmp_netapp_miscLowCifsOps = '.1.3.6.1.4.1.789.1.2.2.8.0';
my $snmp_netapp_blocks = '.1.3.6.1.4.1.789.1.17';
my $snmp_netapp_blocks_iscsi64Ops = "$snmp_netapp_blocks.24.0";
my $snmp_netapp_blocks_fcp64Ops = "$snmp_netapp_blocks.25.0";
# SNMP Status Codes # SNMP Status Codes
my %nvramBatteryStatus = ( my %nvramBatteryStatus = (
@ -253,8 +259,7 @@ my %EcnlStatusIndex = (
sub _create_session(@) { sub _create_session(@) {
my ($server, $comm) = @_; my ($server, $comm, $version) = @_;
my $version = 1;
my ($sess, $err) = Net::SNMP->session( -hostname => $server, -version => $version, -community => $comm); my ($sess, $err) = Net::SNMP->session( -hostname => $server, -version => $version, -community => $comm);
if (!defined($sess)) { if (!defined($sess)) {
print "Can't create SNMP session to $server\n"; print "Can't create SNMP session to $server\n";
@ -266,37 +271,58 @@ sub _create_session(@) {
sub FSyntaxError($) { sub FSyntaxError($) {
my $err = shift; my $err = shift;
print <<EOU; print <<EOU;
$err This is $script_name in version $script_version.
Syntax: $err
$script_name
Version : $script_version
-H = Ip/Dns Name of the Filer -w = Warning Value Syntax:
-C = SNMP Community -c = Critical Value -H <IP or Hostname> Ip/Dns Name of the Filer
-T = Check type --vol = Volume Name -C <community name> SNMP Community Name for read
-e = vol exclude from snap check -V <1|2c> SNMP version (default 1)
TEMP - Temperature -T <Check type> Type of check, see bellow
FAN - Fan Fail
PS - Power Supply Fail
CPULOAD - CPU Load (-w -c)
NVRAM - NVram Battery Status
DISKUSED - Vol Usage Precentage (-w -c --vol)
SNAPSHOT - Snapshot Config (-e volname,volname2,volname3)
SHELF - Shelf Health
SHELFINFO - Shelf Model & Temperature Information
NFSOPS - Nfs Ops per seconds (-w -c)
CIFSOPS - Cifs Ops per seconds (-w -c)
NDMPSESSIONS - Number of ndmp sessions (-w -c)
CIFSSESSIONS - Number of cifs sessions (-w -c)
GLOBALSTATUS - Global Status of the filer
AUTOSUPPORTSTATUS - Auto Support Status of the filer
HA - High Availability
DISKSUMMARY - Status of disks
FAILEDDISK - Number of failed disks
UPTIME - Only show\'s uptime
CACHEAGE - Cache Age
-w <number> Warning Value (default 500)
-c <number> Critical Value (default 500)
-v <volume_path> Volume Name in format /vol/volname/
-e <vol1[,vol2[,...]]> Exclude volumes from snap check (SNAPSHOT)
-I Inform only, return OK every time (ignore -w and -c values)
Available check types:
TEMP - Temperature
FAN - Fan Fail
PS - Power Supply Fail
CPULOAD - CPU Load (-w -c)
NVRAM - NVram Battery Status
DISKUSED - Vol Usage Precentage (-w -c -v)
SNAPSHOT - Snapshot Config (-e volname,volname2,volname3)
SHELF - Shelf Health
SHELFINFO - Shelf Model & Temperature Information
NFSOPS - Nfs Ops per seconds (-w -c)
CIFSOPS - Cifs Ops per seconds (-w -c)
ISCSIOPS - iSCSI Ops per seconds, sets -V 2c (-w -c)
FCPOPS - FibreChannel Ops per seconds, sets -V 2c (-w -c)
NDMPSESSIONS - Number of ndmp sessions (-w -c)
CIFSSESSIONS - Number of cifs sessions (-w -c)
GLOBALSTATUS - Global Status of the filer
AUTOSUPPORTSTATUS - Auto Support Status of the filer
HA - High Availability
DISKSUMMARY - Status of disks
FAILEDDISK - Number of failed disks
UPTIME - Only show\'s uptime
CACHEAGE - Cache Age
Examples:
$script_name -H netapp.mydomain -C public -T UPTIME
UPTIME: 2 days, 23:03:21.09 | uptime=255801s
$script_name -H netapp.mydomain -C public -T FCOPS -I
CRIT: FCPOPS 1130 | fcpops=1130
$script_name -H netapp.mydomain -C public -T DISKUSED -v /vol/vol0/ -w 90 -c 95
OK: DISKUSED 5% | /vol/vol0/=8639316k
$script_name -H netapp.mydomain -C public -T GLOBALSTATUS
CRIT: GLOBALSTATUS nonCritical 4 Disk on adapter 1a, shelf 1, bay 9, failed. | globalstatus=4
EOU EOU
exit($ERRORS{'UNKNOWN'}); exit($ERRORS{'UNKNOWN'});
@ -317,7 +343,7 @@ sub _clac_generic_err_stat(@) {
my $scale = shift; my $scale = shift;
my $r_msg; my $r_msg;
my $r_stat; my $r_stat;
if($value <= $tmp_warn) { if($opt{'inform'} or ($value <= $tmp_warn)) {
$r_stat = $ERRORS{'OK'}; $r_stat = $ERRORS{'OK'};
$r_msg = "OK: $value_type $value $scale"; $r_msg = "OK: $value_type $value $scale";
} elsif($value > $tmp_warn and $value < $tmp_crit) { } elsif($value > $tmp_warn and $value < $tmp_crit) {
@ -378,17 +404,19 @@ sub _clac_minutes_err_stat(@) {
### Gather input from user ### Gather input from user
############################# #############################
my %opt;
$opt{'crit'} = 500; $opt{'crit'} = 500;
$opt{'warn'} = 500; $opt{'warn'} = 500;
$opt{'version'} = 1;
my $result = GetOptions(\%opt, my $result = GetOptions(\%opt,
'filer|H=s', 'filer|H=s',
'community|C=s', 'community|C=s',
'version|V=s',
'check_type|T=s', 'check_type|T=s',
'warn|w=i', 'warn|w=i',
'crit|c=i', 'crit|c=i',
'vol|v=s', 'vol|v=s',
'exclude|e=s', 'exclude|e=s',
'inform|I',
); );
FSyntaxError("Missing -H") unless defined $opt{'filer'}; FSyntaxError("Missing -H") unless defined $opt{'filer'};
@ -405,6 +433,9 @@ if($opt{'crit'} and $opt{'warn'}) {
} }
} }
if( ($opt{'check_type'} eq 'ISCSIOPS') or ($opt{'check_type'} eq 'FCPOPS') ) {
$opt{'version'} = '2c';
}
if (!defined($counterFilePath)) { if (!defined($counterFilePath)) {
$state = "UNKNOWN"; $state = "UNKNOWN";
@ -416,82 +447,92 @@ if (!defined($counterFilePath)) {
# Starting Alaram # Starting Alaram
alarm($TIMEOUT); alarm($TIMEOUT);
# Establish SNMP Session # Establish SNMP Session
our $snmp_session = _create_session($opt{'filer'},$opt{'community'}); our $snmp_session = _create_session($opt{'filer'},$opt{'community'},$opt{'version'});
# setup counterFile now that we have host IP and oid # setup counterFile now that we have host IP and check type
$counterFile = $counterFilePath."/".$opt{'filer'}.".check-netapp-ng.ops.nagioscache"; $counterFile = $counterFilePath."/".$opt{'filer'}.".check-netapp-ng.$opt{'check_type'}.nagioscache";
$snmpHostUptime = _get_oid_value($snmp_session,$snmpSysUpTime); $snmpHostUptime = _get_oid_value($snmp_session,$snmpSysUpTime);
# READ CACHE DATA FROM FILE IF IT EXISTS # READ AND UPDATE CACHE FOR SPECIFIC TESTS FROM FILE
if (-e $counterFile) { if (("$opt{'check_type'}" eq "CIFSOPS") or ("$opt{'check_type'}" eq "NFSOPS") or ("$opt{'check_type'}" eq "ISCSIOPS") or ("$opt{'check_type'}" eq "FCPOPS")) {
open(FILE, "$counterFile");
chomp($fileRuntime = <FILE>);
chomp($fileHostUptime = <FILE>);
chomp($fileNfsOps = <FILE>);
chomp($fileCifsOps = <FILE>);
close(FILE);
} # end if file exists
# READ CACHE DATA FROM FILE IF IT EXISTS
if (-e $counterFile) {
open(FILE, "$counterFile");
chomp($fileRuntime = <FILE>);
chomp($fileHostUptime = <FILE>);
chomp($fileNfsOps = <FILE>);
chomp($fileCifsOps = <FILE>);
chomp($fileIscsiOps = <FILE>);
chomp($fileFcpOps = <FILE>);
close(FILE);
} # end if file exists
# POPULATE CACHE DATA TO FILE # POPULATE CACHE DATA TO FILE
if ((-w $counterFile) || (-w dirname($counterFile))) {
open(FILE, ">$counterFile");
print FILE "$runtime\n";
print FILE "$snmpHostUptime\n";
if ((-w $counterFile) || (-w dirname($counterFile))) { my $low_nfs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscLowNfsOps);
open(FILE, ">$counterFile"); my $high_nfs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscHighNfsOps);
print FILE "$runtime\n";
print FILE "$snmpHostUptime\n";
my $low_nfs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscLowNfsOps); my $temp_high_ops = $high_nfs_ops << 32;
my $high_nfs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscHighNfsOps); my $total_nfs_ops = $temp_high_ops | $low_nfs_ops;
my $temp_high_ops = $high_nfs_ops << 32; print FILE "$total_nfs_ops\n";
my $total_nfs_ops = $temp_high_ops | $low_nfs_ops;
print FILE "$total_nfs_ops\n"; my $low_cifs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscLowCifsOps);
my $high_cifs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscHighCifsOps);
my $low_cifs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscLowCifsOps); my $temp_high_ops = $high_cifs_ops << 32;
my $high_cifs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscHighCifsOps); my $total_cifs_ops = $temp_high_ops | $low_cifs_ops;
my $temp_high_ops = $high_cifs_ops << 32; print FILE "$total_cifs_ops\n";
my $total_cifs_ops = $temp_high_ops | $low_cifs_ops;
print FILE "$total_cifs_ops\n"; my $blocks_iscsi_ops = _get_oid_value($snmp_session,$snmp_netapp_blocks_iscsi64Ops);
close(FILE); print FILE "$blocks_iscsi_ops\n";
} else {
$state = "WARNING";
$answer = "file $counterFile is not writable\n";
print ("$state: $answer\n");
exit $ERRORS{$state};
} # end if file is writable
my $blocks_fcp_ops = _get_oid_value($snmp_session,$snmp_netapp_blocks_fcp64Ops);
print FILE "$blocks_fcp_ops\n";
# check to see if we pulled data from the cache file or not close(FILE);
if ( (!defined($fileRuntime)) && ( ("$opt{'check_type'}" eq "CIFSOPS") or ("$opt{'check_type'}" eq "NFSOPS") )) { } else {
$state = "OK"; $state = "WARNING";
$answer = "never cached - caching\n"; $answer = "file $counterFile is not writable\n";
print "$state: $answer\n"; print ("$state: $answer\n");
exit $ERRORS{$state}; exit $ERRORS{$state};
} # end if cache file didn't exist } # end if file is writable
# check host's uptime to see if it goes backward # check to see if we pulled data from the cache file or not
if ($fileHostUptime > $snmpHostUptime) { if ( (!defined($fileRuntime)) ) {
$state = "WARNING"; $state = "OK";
$answer = "uptime goes backward - recaching data\n"; $answer = "never cached - caching\n";
print "$state: $answer\n"; print "$state: $answer\n";
exit $ERRORS{$state}; exit $ERRORS{$state};
} # end if host uptime goes backward } # end if cache file didn't exist
my $elapsedtime=$runtime-$fileRuntime; # check host's uptime to see if it goes backward
if ($fileHostUptime > $snmpHostUptime) {
$state = "WARNING";
$answer = "uptime goes backward - recaching data\n";
print "$state: $answer\n";
exit $ERRORS{$state};
} # end if host uptime goes backward
if ($elapsedtime<1){ $elapsedtime=1; } $elapsedtime=$runtime-$fileRuntime;
if ($elapsedtime<1){ $elapsedtime=1; }
} # end populate cache only for *OPS tests
#print "fileHostUptime : ".$fileHostUptime."\n"; #print "fileHostUptime : ".$fileHostUptime."\n";
#print "snmpeHostUptime : ".$snmpHostUptime."\n"; #print "snmpeHostUptime : ".$snmpHostUptime."\n";
@ -499,10 +540,6 @@ if ($elapsedtime<1){ $elapsedtime=1; }
### Temperature ### ### Temperature ###
if("$opt{'check_type'}" eq "TEMP") { if("$opt{'check_type'}" eq "TEMP") {
my $check = _get_oid_value($snmp_session,$snmpenvOverTemperature); my $check = _get_oid_value($snmp_session,$snmpenvOverTemperature);
@ -569,6 +606,26 @@ if("$opt{'check_type'}" eq "TEMP") {
($msg,$stat) = _clac_absolute_err_stat($check,$opt{'check_type'},$opt{'warn'},$opt{'crit'}); ($msg,$stat) = _clac_absolute_err_stat($check,$opt{'check_type'},$opt{'warn'},$opt{'crit'});
$perf = "cifsops=$check"; $perf = "cifsops=$check";
### ISCSIOPS ###
} elsif("$opt{'check_type'}" eq "ISCSIOPS") {
my $total_iscsi_ops = _get_oid_value($snmp_session,$snmp_netapp_blocks_iscsi64Ops);
my $iscsiops_per_seconds=floor ( ($total_iscsi_ops-$fileIscsiOps)/$elapsedtime );
my $check=$iscsiops_per_seconds;
($msg,$stat) = _clac_absolute_err_stat($check,$opt{'check_type'},$opt{'warn'},$opt{'crit'});
$perf = "iscsiops=$check";
### FCPOPS ###
} elsif("$opt{'check_type'}" eq "FCPOPS") {
my $total_fcp_ops = _get_oid_value($snmp_session,$snmp_netapp_blocks_fcp64Ops);
my $fcpops_per_seconds=floor ( ($total_fcp_ops-$fileFcpOps)/$elapsedtime );
my $check=$fcpops_per_seconds;
($msg,$stat) = _clac_absolute_err_stat($check,$opt{'check_type'},$opt{'warn'},$opt{'crit'});
$perf = "fcpops=$check";
### NVRAM ### ### NVRAM ###
} elsif("$opt{'check_type'}" eq "NVRAM") { } elsif("$opt{'check_type'}" eq "NVRAM") {
my $check = _get_oid_value($snmp_session,$snmpnvramBatteryStatus); my $check = _get_oid_value($snmp_session,$snmpnvramBatteryStatus);
@ -583,7 +640,7 @@ if("$opt{'check_type'}" eq "TEMP") {
### DISKUSED ### ### DISKUSED ###
} elsif("$opt{'check_type'}" eq "DISKUSED") { } elsif("$opt{'check_type'}" eq "DISKUSED") {
FSyntaxError("Missing -vol") unless defined $opt{'vol'}; FSyntaxError("Missing -v") unless defined $opt{'vol'};
my $r_vol_tbl = $snmp_session->get_table($snmp_netapp_volume_id_table_df_name); my $r_vol_tbl = $snmp_session->get_table($snmp_netapp_volume_id_table_df_name);
foreach my $key ( keys %$r_vol_tbl) { foreach my $key ( keys %$r_vol_tbl) {