From dc36706c2df221186d9f727f6e2e5be1c4e3cd73 Mon Sep 17 00:00:00 2001 From: Michal Svamberg Date: Sat, 30 May 2015 21:10:51 +0200 Subject: [PATCH 01/15] Add checks for blocks operations and other changes * Add ISCSIOPS and FCPOPS checks type similar to CIFSOPS * New -V option sets SNMP version (needs 2c for reading 64bit values) * New -I option return every time OK state (if you needs only performance data) * redesign help message and append examples * nagioscache files create only if check needs caching * each check has own *.nagioscache file (when I using multiple tests I get inexact values of performance data) * change version to 1.2 --- nagios/check-netapp-ng.pl | 241 +++++++++++++++++++++++--------------- 1 file changed, 149 insertions(+), 92 deletions(-) diff --git a/nagios/check-netapp-ng.pl b/nagios/check-netapp-ng.pl index eb5576b..b5733bc 100755 --- a/nagios/check-netapp-ng.pl +++ b/nagios/check-netapp-ng.pl @@ -26,7 +26,7 @@ ## ## DISKSUMMARY|HA|CIFSSESSIONS| ## AUTOSUPPORTSTATUS|NFSOPS| -## CIFSOPS|SHELFINFO +## CIFSOPS|SHELFINFO|... ## ##################################### ##################################### @@ -47,11 +47,13 @@ my $stat = 0; my $msg; my $perf; my $script_name = "check-netapp-ng.pl"; -my $script_version = 1.1; +my $script_version = 1.2; my $counterFilePath="/tmp"; my $counterFile; +my %opt; +my $elapsedtime = 1; my %ERRORS = ( 'OK' => '0', @@ -76,6 +78,8 @@ my $fileRuntime; my $fileHostUptime; my $fileNfsOps; my $fileCifsOps; +my $fileIscsiOps; +my $fileFcpOps; my $snmpHostUptime; @@ -161,7 +165,9 @@ my $snmp_netapp_miscLowNfsOps = '.1.3.6.1.4.1.789.1.2.2.6.0'; my $snmp_netapp_miscHighCifsOps = '.1.3.6.1.4.1.789.1.2.2.7.0'; my $snmp_netapp_miscLowCifsOps = '.1.3.6.1.4.1.789.1.2.2.8.0'; - +my $snmp_netapp_blocks = '.1.3.6.1.4.1.789.1.17'; +my $snmp_netapp_blocks_iscsi64Ops = "$snmp_netapp_blocks.24.0"; +my $snmp_netapp_blocks_fcp64Ops = "$snmp_netapp_blocks.25.0"; # SNMP Status Codes my %nvramBatteryStatus = ( @@ -253,8 +259,7 @@ my %EcnlStatusIndex = ( sub _create_session(@) { - my ($server, $comm) = @_; - my $version = 1; + my ($server, $comm, $version) = @_; my ($sess, $err) = Net::SNMP->session( -hostname => $server, -version => $version, -community => $comm); if (!defined($sess)) { print "Can't create SNMP session to $server\n"; @@ -266,37 +271,58 @@ sub _create_session(@) { sub FSyntaxError($) { my $err = shift; print < Ip/Dns Name of the Filer + -C SNMP Community Name for read + -V <1|2c> SNMP version (default 1) + -T Type of check, see bellow + -w Warning Value (default 500) + -c Critical Value (default 500) + -v Volume Name in format /vol/volname/ + -e Exclude volumes from snap check (SNAPSHOT) + -I Inform only, return OK every time (ignore -w and -c values) + + Available check types: + TEMP - Temperature + FAN - Fan Fail + PS - Power Supply Fail + CPULOAD - CPU Load (-w -c) + NVRAM - NVram Battery Status + DISKUSED - Vol Usage Precentage (-w -c -v) + SNAPSHOT - Snapshot Config (-e volname,volname2,volname3) + SHELF - Shelf Health + SHELFINFO - Shelf Model & Temperature Information + NFSOPS - Nfs Ops per seconds (-w -c) + CIFSOPS - Cifs Ops per seconds (-w -c) + ISCSIOPS - iSCSI Ops per seconds, sets -V 2c (-w -c) + FCPOPS - FibreChannel Ops per seconds, sets -V 2c (-w -c) + NDMPSESSIONS - Number of ndmp sessions (-w -c) + CIFSSESSIONS - Number of cifs sessions (-w -c) + GLOBALSTATUS - Global Status of the filer + AUTOSUPPORTSTATUS - Auto Support Status of the filer + HA - High Availability + DISKSUMMARY - Status of disks + FAILEDDISK - Number of failed disks + UPTIME - Only show\'s uptime + CACHEAGE - Cache Age + + Examples: + $script_name -H netapp.mydomain -C public -T UPTIME + UPTIME: 2 days, 23:03:21.09 | uptime=255801s + + $script_name -H netapp.mydomain -C public -T FCOPS -I + CRIT: FCPOPS 1130 | fcpops=1130 + + $script_name -H netapp.mydomain -C public -T DISKUSED -v /vol/vol0/ -w 90 -c 95 + OK: DISKUSED 5% | /vol/vol0/=8639316k + + $script_name -H netapp.mydomain -C public -T GLOBALSTATUS + CRIT: GLOBALSTATUS nonCritical 4 Disk on adapter 1a, shelf 1, bay 9, failed. | globalstatus=4 EOU exit($ERRORS{'UNKNOWN'}); @@ -317,7 +343,7 @@ sub _clac_generic_err_stat(@) { my $scale = shift; my $r_msg; my $r_stat; - if($value <= $tmp_warn) { + if($opt{'inform'} or ($value <= $tmp_warn)) { $r_stat = $ERRORS{'OK'}; $r_msg = "OK: $value_type $value $scale"; } elsif($value > $tmp_warn and $value < $tmp_crit) { @@ -378,17 +404,19 @@ sub _clac_minutes_err_stat(@) { ### Gather input from user ############################# -my %opt; $opt{'crit'} = 500; $opt{'warn'} = 500; +$opt{'version'} = 1; my $result = GetOptions(\%opt, 'filer|H=s', 'community|C=s', + 'version|V=s', 'check_type|T=s', 'warn|w=i', 'crit|c=i', 'vol|v=s', 'exclude|e=s', + 'inform|I', ); FSyntaxError("Missing -H") unless defined $opt{'filer'}; @@ -405,6 +433,9 @@ if($opt{'crit'} and $opt{'warn'}) { } } +if( ($opt{'check_type'} eq 'ISCSIOPS') or ($opt{'check_type'} eq 'FCPOPS') ) { + $opt{'version'} = '2c'; +} if (!defined($counterFilePath)) { $state = "UNKNOWN"; @@ -416,82 +447,92 @@ if (!defined($counterFilePath)) { + # Starting Alaram alarm($TIMEOUT); # Establish SNMP Session -our $snmp_session = _create_session($opt{'filer'},$opt{'community'}); +our $snmp_session = _create_session($opt{'filer'},$opt{'community'},$opt{'version'}); -# setup counterFile now that we have host IP and oid -$counterFile = $counterFilePath."/".$opt{'filer'}.".check-netapp-ng.ops.nagioscache"; +# setup counterFile now that we have host IP and check type +$counterFile = $counterFilePath."/".$opt{'filer'}.".check-netapp-ng.$opt{'check_type'}.nagioscache"; $snmpHostUptime = _get_oid_value($snmp_session,$snmpSysUpTime); -# READ CACHE DATA FROM FILE IF IT EXISTS -if (-e $counterFile) { - open(FILE, "$counterFile"); - chomp($fileRuntime = ); - chomp($fileHostUptime = ); - chomp($fileNfsOps = ); - chomp($fileCifsOps = ); - close(FILE); - } # end if file exists +# READ AND UPDATE CACHE FOR SPECIFIC TESTS FROM FILE +if (("$opt{'check_type'}" eq "CIFSOPS") or ("$opt{'check_type'}" eq "NFSOPS") or ("$opt{'check_type'}" eq "ISCSIOPS") or ("$opt{'check_type'}" eq "FCPOPS")) { + # READ CACHE DATA FROM FILE IF IT EXISTS + if (-e $counterFile) { + open(FILE, "$counterFile"); + chomp($fileRuntime = ); + chomp($fileHostUptime = ); + chomp($fileNfsOps = ); + chomp($fileCifsOps = ); + chomp($fileIscsiOps = ); + chomp($fileFcpOps = ); + close(FILE); + } # end if file exists -# POPULATE CACHE DATA TO FILE + # POPULATE CACHE DATA TO FILE + if ((-w $counterFile) || (-w dirname($counterFile))) { + open(FILE, ">$counterFile"); + print FILE "$runtime\n"; + print FILE "$snmpHostUptime\n"; -if ((-w $counterFile) || (-w dirname($counterFile))) { - open(FILE, ">$counterFile"); - print FILE "$runtime\n"; - print FILE "$snmpHostUptime\n"; + my $low_nfs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscLowNfsOps); + my $high_nfs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscHighNfsOps); - my $low_nfs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscLowNfsOps); - my $high_nfs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscHighNfsOps); - - my $temp_high_ops = $high_nfs_ops << 32; - my $total_nfs_ops = $temp_high_ops | $low_nfs_ops; + my $temp_high_ops = $high_nfs_ops << 32; + my $total_nfs_ops = $temp_high_ops | $low_nfs_ops; - print FILE "$total_nfs_ops\n"; + print FILE "$total_nfs_ops\n"; - my $low_cifs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscLowCifsOps); - my $high_cifs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscHighCifsOps); - - my $temp_high_ops = $high_cifs_ops << 32; - my $total_cifs_ops = $temp_high_ops | $low_cifs_ops; + my $low_cifs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscLowCifsOps); + my $high_cifs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscHighCifsOps); - print FILE "$total_cifs_ops\n"; + my $temp_high_ops = $high_cifs_ops << 32; + my $total_cifs_ops = $temp_high_ops | $low_cifs_ops; - close(FILE); - } else { - $state = "WARNING"; - $answer = "file $counterFile is not writable\n"; - print ("$state: $answer\n"); - exit $ERRORS{$state}; - } # end if file is writable + print FILE "$total_cifs_ops\n"; + my $blocks_iscsi_ops = _get_oid_value($snmp_session,$snmp_netapp_blocks_iscsi64Ops); -# check to see if we pulled data from the cache file or not -if ( (!defined($fileRuntime)) && ( ("$opt{'check_type'}" eq "CIFSOPS") or ("$opt{'check_type'}" eq "NFSOPS") )) { - $state = "OK"; - $answer = "never cached - caching\n"; - print "$state: $answer\n"; - exit $ERRORS{$state}; -} # end if cache file didn't exist + print FILE "$blocks_iscsi_ops\n"; -# check host's uptime to see if it goes backward -if ($fileHostUptime > $snmpHostUptime) { - $state = "WARNING"; - $answer = "uptime goes backward - recaching data\n"; - print "$state: $answer\n"; - exit $ERRORS{$state}; -} # end if host uptime goes backward + my $blocks_fcp_ops = _get_oid_value($snmp_session,$snmp_netapp_blocks_fcp64Ops); + print FILE "$blocks_fcp_ops\n"; -my $elapsedtime=$runtime-$fileRuntime; + close(FILE); + } else { + $state = "WARNING"; + $answer = "file $counterFile is not writable\n"; + print ("$state: $answer\n"); + exit $ERRORS{$state}; + } # end if file is writable -if ($elapsedtime<1){ $elapsedtime=1; } + # check to see if we pulled data from the cache file or not + if ( (!defined($fileRuntime)) ) { + $state = "OK"; + $answer = "never cached - caching\n"; + print "$state: $answer\n"; + exit $ERRORS{$state}; + } # end if cache file didn't exist + # check host's uptime to see if it goes backward + if ($fileHostUptime > $snmpHostUptime) { + $state = "WARNING"; + $answer = "uptime goes backward - recaching data\n"; + print "$state: $answer\n"; + exit $ERRORS{$state}; + } # end if host uptime goes backward + $elapsedtime=$runtime-$fileRuntime; + + if ($elapsedtime<1){ $elapsedtime=1; } + +} # end populate cache only for *OPS tests #print "fileHostUptime : ".$fileHostUptime."\n"; #print "snmpeHostUptime : ".$snmpHostUptime."\n"; @@ -499,10 +540,6 @@ if ($elapsedtime<1){ $elapsedtime=1; } - - - - ### Temperature ### if("$opt{'check_type'}" eq "TEMP") { my $check = _get_oid_value($snmp_session,$snmpenvOverTemperature); @@ -569,6 +606,26 @@ if("$opt{'check_type'}" eq "TEMP") { ($msg,$stat) = _clac_absolute_err_stat($check,$opt{'check_type'},$opt{'warn'},$opt{'crit'}); $perf = "cifsops=$check"; +### ISCSIOPS ### +} elsif("$opt{'check_type'}" eq "ISCSIOPS") { + my $total_iscsi_ops = _get_oid_value($snmp_session,$snmp_netapp_blocks_iscsi64Ops); + + my $iscsiops_per_seconds=floor ( ($total_iscsi_ops-$fileIscsiOps)/$elapsedtime ); + + my $check=$iscsiops_per_seconds; + + ($msg,$stat) = _clac_absolute_err_stat($check,$opt{'check_type'},$opt{'warn'},$opt{'crit'}); + $perf = "iscsiops=$check"; +### FCPOPS ### +} elsif("$opt{'check_type'}" eq "FCPOPS") { + my $total_fcp_ops = _get_oid_value($snmp_session,$snmp_netapp_blocks_fcp64Ops); + + my $fcpops_per_seconds=floor ( ($total_fcp_ops-$fileFcpOps)/$elapsedtime ); + + my $check=$fcpops_per_seconds; + + ($msg,$stat) = _clac_absolute_err_stat($check,$opt{'check_type'},$opt{'warn'},$opt{'crit'}); + $perf = "fcpops=$check"; ### NVRAM ### } elsif("$opt{'check_type'}" eq "NVRAM") { my $check = _get_oid_value($snmp_session,$snmpnvramBatteryStatus); @@ -583,7 +640,7 @@ if("$opt{'check_type'}" eq "TEMP") { ### DISKUSED ### } elsif("$opt{'check_type'}" eq "DISKUSED") { - FSyntaxError("Missing -vol") unless defined $opt{'vol'}; + FSyntaxError("Missing -v") unless defined $opt{'vol'}; my $r_vol_tbl = $snmp_session->get_table($snmp_netapp_volume_id_table_df_name); foreach my $key ( keys %$r_vol_tbl) { From 53ac3aa8a421182cad7b51e7dcdc90c7475a02ef Mon Sep 17 00:00:00 2001 From: Michal Svamberg Date: Sat, 30 May 2015 21:36:18 +0200 Subject: [PATCH 02/15] Add -t option as timeout for SNMP connection. This is fix for comment https://exchange.nagios.org/directory/Plugins/Hardware/Storage-Systems/SAN-and-NAS/NetApp/Check-Netapp-NG/details#rev-3387 --- nagios/check-netapp-ng.pl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/nagios/check-netapp-ng.pl b/nagios/check-netapp-ng.pl index b5733bc..a23d11b 100755 --- a/nagios/check-netapp-ng.pl +++ b/nagios/check-netapp-ng.pl @@ -259,8 +259,8 @@ my %EcnlStatusIndex = ( sub _create_session(@) { - my ($server, $comm, $version) = @_; - my ($sess, $err) = Net::SNMP->session( -hostname => $server, -version => $version, -community => $comm); + my ($server, $comm, $version, $timeout) = @_; + my ($sess, $err) = Net::SNMP->session( -hostname => $server, -version => $version, -community => $comm, -timeout => $timeout); if (!defined($sess)) { print "Can't create SNMP session to $server\n"; exit(1); @@ -279,7 +279,8 @@ $err -H Ip/Dns Name of the Filer -C SNMP Community Name for read -V <1|2c> SNMP version (default 1) - -T Type of check, see bellow + -T Type of check, see bellow + -t Timeout to SNMP session in seconds (default 5) -w Warning Value (default 500) -c Critical Value (default 500) @@ -407,6 +408,7 @@ sub _clac_minutes_err_stat(@) { $opt{'crit'} = 500; $opt{'warn'} = 500; $opt{'version'} = 1; +$opt{'timeout'} = 5; my $result = GetOptions(\%opt, 'filer|H=s', 'community|C=s', @@ -417,6 +419,7 @@ my $result = GetOptions(\%opt, 'vol|v=s', 'exclude|e=s', 'inform|I', + 'timeout|t=i', ); FSyntaxError("Missing -H") unless defined $opt{'filer'}; @@ -452,7 +455,7 @@ if (!defined($counterFilePath)) { alarm($TIMEOUT); # Establish SNMP Session -our $snmp_session = _create_session($opt{'filer'},$opt{'community'},$opt{'version'}); +our $snmp_session = _create_session($opt{'filer'},$opt{'community'},$opt{'version'},$opt{'timeout'}); # setup counterFile now that we have host IP and check type $counterFile = $counterFilePath."/".$opt{'filer'}.".check-netapp-ng.$opt{'check_type'}.nagioscache"; From f6d0d9deb9ce06f501d5276ef162247eed710159 Mon Sep 17 00:00:00 2001 From: Michal Svamberg Date: Sat, 30 May 2015 23:32:05 +0200 Subject: [PATCH 03/15] DISKUSED used 64bit counter if with -V 2c, fix 32bit On big volumes (bigger then 32bit counter = 4GB) overflow to negative values. SNMP on version 2c can transport 64bit value, it's not necessary using Low and High 32bit part of 64bit number. Fix DISKUSED on SNMPv1 if using more then 4GB of space. New _ulong64() function for counting 64bit number from its high and low 32bit parts. This function computing correct 64bit number on 32bit operating system. --- nagios/check-netapp-ng.pl | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/nagios/check-netapp-ng.pl b/nagios/check-netapp-ng.pl index a23d11b..ab64d4b 100755 --- a/nagios/check-netapp-ng.pl +++ b/nagios/check-netapp-ng.pl @@ -85,6 +85,7 @@ my $snmpHostUptime; ### SNMP OIDs +### You can browse at http://www.oidview.com/mibs/789/NETAPP-MIB.html ############### my $snmpSysUpTime = '.1.3.6.1.2.1.1.3.0'; my $snmpFailedFanCount = '.1.3.6.1.4.1.789.1.2.4.2.0'; @@ -133,7 +134,10 @@ my $snmp_netapp_volume_id_table_df_total = "$snmp_netapp_volume_id_table_df.3"; my $snmp_netapp_volume_id_table_df_used = "$snmp_netapp_volume_id_table_df.4"; my $snmp_netapp_volume_id_table_df_free = "$snmp_netapp_volume_id_table_df.5"; my $snmp_netapp_volume_id_table_df_used_prec = "$snmp_netapp_volume_id_table_df.6"; - +# 64bit values for SNMP v2c +my $snmp_netapp_volume_id_table_df64_total = "$snmp_netapp_volume_id_table_df.29"; +my $snmp_netapp_volume_id_table_df64_used = "$snmp_netapp_volume_id_table_df.30"; +my $snmp_netapp_volume_id_table_df64_free = "$snmp_netapp_volume_id_table_df.31"; my $snmp_netapp_enclNumber = '.1.3.6.1.4.1.789.1.21.1.1'; my $snmpEnclTable = '.1.3.6.1.4.1.789.1.21.1.2.1'; @@ -294,14 +298,14 @@ $err PS - Power Supply Fail CPULOAD - CPU Load (-w -c) NVRAM - NVram Battery Status - DISKUSED - Vol Usage Precentage (-w -c -v) + DISKUSED - Vol Usage Percentage (-w -c -v), for big volumes (>4GB) use -V 2c SNAPSHOT - Snapshot Config (-e volname,volname2,volname3) SHELF - Shelf Health SHELFINFO - Shelf Model & Temperature Information NFSOPS - Nfs Ops per seconds (-w -c) CIFSOPS - Cifs Ops per seconds (-w -c) - ISCSIOPS - iSCSI Ops per seconds, sets -V 2c (-w -c) - FCPOPS - FibreChannel Ops per seconds, sets -V 2c (-w -c) + ISCSIOPS - iSCSI Ops per seconds, using -V 2c automatic (-w -c) + FCPOPS - FibreChannel Ops per seconds, using -V 2c automatic (-w -c) NDMPSESSIONS - Number of ndmp sessions (-w -c) CIFSSESSIONS - Number of cifs sessions (-w -c) GLOBALSTATUS - Global Status of the filer @@ -310,7 +314,7 @@ $err DISKSUMMARY - Status of disks FAILEDDISK - Number of failed disks UPTIME - Only show\'s uptime - CACHEAGE - Cache Age + CACHEAGE - Cache Age (-w -c) Examples: $script_name -H netapp.mydomain -C public -T UPTIME @@ -319,8 +323,8 @@ $err $script_name -H netapp.mydomain -C public -T FCOPS -I CRIT: FCPOPS 1130 | fcpops=1130 - $script_name -H netapp.mydomain -C public -T DISKUSED -v /vol/vol0/ -w 90 -c 95 - OK: DISKUSED 5% | /vol/vol0/=8639316k + $script_name -H netapp.mydomain -C public -T DISKUSED -v /vol/data/ -w 90 -c 95 -V 2c + OK: DISKUSED 79% | /vol/data/=8104595240k $script_name -H netapp.mydomain -C public -T GLOBALSTATUS CRIT: GLOBALSTATUS nonCritical 4 Disk on adapter 1a, shelf 1, bay 9, failed. | globalstatus=4 @@ -650,8 +654,13 @@ if("$opt{'check_type'}" eq "TEMP") { if("$$r_vol_tbl{$key}" eq "$opt{'vol'}") { my @tmp_arr = split(/\./, $key); my $oid = pop(@tmp_arr); - - my $used = _get_oid_value($snmp_session,"$snmp_netapp_volume_id_table_df_used.$oid"); + my $used = ""; + if ($opt{'version'} eq '2c') { + $used = _get_oid_value($snmp_session,"$snmp_netapp_volume_id_table_df64_used.$oid"); + } + else { + $used = _get_oid_value($snmp_session,"$snmp_netapp_volume_id_table_df_used.$oid"); + } my $used_prec = _get_oid_value($snmp_session,"$snmp_netapp_volume_id_table_df_used_prec.$oid"); ($msg,$stat) = _clac_err_stat($used_prec,$opt{'check_type'},$opt{'warn'},$opt{'crit'}); From f0dc4fea318fa60e8184701b12d7043a8cca82f8 Mon Sep 17 00:00:00 2001 From: Michal Svamberg Date: Sun, 31 May 2015 00:54:51 +0200 Subject: [PATCH 04/15] Fix example in help message. --- nagios/check-netapp-ng.pl | 40 ++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/nagios/check-netapp-ng.pl b/nagios/check-netapp-ng.pl index ab64d4b..6b5c6f5 100755 --- a/nagios/check-netapp-ng.pl +++ b/nagios/check-netapp-ng.pl @@ -134,6 +134,8 @@ my $snmp_netapp_volume_id_table_df_total = "$snmp_netapp_volume_id_table_df.3"; my $snmp_netapp_volume_id_table_df_used = "$snmp_netapp_volume_id_table_df.4"; my $snmp_netapp_volume_id_table_df_free = "$snmp_netapp_volume_id_table_df.5"; my $snmp_netapp_volume_id_table_df_used_prec = "$snmp_netapp_volume_id_table_df.6"; +my $snmp_netapp_volume_id_table_df_used_high = "$snmp_netapp_volume_id_table_df.16"; +my $snmp_netapp_volume_id_table_df_used_low = "$snmp_netapp_volume_id_table_df.17"; # 64bit values for SNMP v2c my $snmp_netapp_volume_id_table_df64_total = "$snmp_netapp_volume_id_table_df.29"; my $snmp_netapp_volume_id_table_df64_used = "$snmp_netapp_volume_id_table_df.30"; @@ -298,7 +300,7 @@ $err PS - Power Supply Fail CPULOAD - CPU Load (-w -c) NVRAM - NVram Battery Status - DISKUSED - Vol Usage Percentage (-w -c -v), for big volumes (>4GB) use -V 2c + DISKUSED - Vol Usage Percentage (-w -c -v) SNAPSHOT - Snapshot Config (-e volname,volname2,volname3) SHELF - Shelf Health SHELFINFO - Shelf Model & Temperature Information @@ -321,7 +323,7 @@ $err UPTIME: 2 days, 23:03:21.09 | uptime=255801s $script_name -H netapp.mydomain -C public -T FCOPS -I - CRIT: FCPOPS 1130 | fcpops=1130 + OK: FCPOPS 1130 | fcpops=1130 $script_name -H netapp.mydomain -C public -T DISKUSED -v /vol/data/ -w 90 -c 95 -V 2c OK: DISKUSED 79% | /vol/data/=8104595240k @@ -405,6 +407,19 @@ sub _clac_minutes_err_stat(@) { return($r_msg,$r_stat); } +sub _ulong64(@) { + my $high = shift; + my $low = shift; + if ($low < 0) { + $low = $low & 0x00000000FFFFFFFF; + $low = $low | 0x0000000080000000; + } + if ($high < 0) { + $high = $high & 0x00000000FFFFFFFF; + $high = $high | 0x0000000080000000; + } + return ($high<<32)|$low; +} ### Gather input from user @@ -490,17 +505,13 @@ if (("$opt{'check_type'}" eq "CIFSOPS") or ("$opt{'check_type'}" eq "NFSOPS") or my $low_nfs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscLowNfsOps); my $high_nfs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscHighNfsOps); - - my $temp_high_ops = $high_nfs_ops << 32; - my $total_nfs_ops = $temp_high_ops | $low_nfs_ops; + my $total_nfs_ops = _ulong64($high_nfs_ops, $low_nfs_ops); print FILE "$total_nfs_ops\n"; my $low_cifs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscLowCifsOps); my $high_cifs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscHighCifsOps); - - my $temp_high_ops = $high_cifs_ops << 32; - my $total_cifs_ops = $temp_high_ops | $low_cifs_ops; + my $total_cifs_ops = _ulong64($high_cifs_ops, $low_cifs_ops); print FILE "$total_cifs_ops\n"; @@ -589,10 +600,8 @@ if("$opt{'check_type'}" eq "TEMP") { } elsif("$opt{'check_type'}" eq "NFSOPS") { my $low_nfs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscLowNfsOps); my $high_nfs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscHighNfsOps); + my $total_nfs_ops = _ulong64($high_nfs_ops,$low_nfs_ops); - my $temp_high_ops = $high_nfs_ops << 32; - my $total_nfs_ops = $temp_high_ops | $low_nfs_ops; - my $nfsops_per_seconds=floor ( ($total_nfs_ops-$fileNfsOps)/$elapsedtime ); my $check=$nfsops_per_seconds; @@ -603,9 +612,7 @@ if("$opt{'check_type'}" eq "TEMP") { } elsif("$opt{'check_type'}" eq "CIFSOPS") { my $low_cifs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscLowCifsOps); my $high_cifs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscHighCifsOps); - - my $temp_high_ops = $high_cifs_ops << 32; - my $total_cifs_ops = $temp_high_ops | $low_cifs_ops; + my $total_cifs_ops = _ulong64($high_cifs_ops,$low_cifs_ops); my $cifsops_per_seconds=floor ( ($total_cifs_ops-$fileCifsOps)/$elapsedtime ); @@ -659,7 +666,10 @@ if("$opt{'check_type'}" eq "TEMP") { $used = _get_oid_value($snmp_session,"$snmp_netapp_volume_id_table_df64_used.$oid"); } else { - $used = _get_oid_value($snmp_session,"$snmp_netapp_volume_id_table_df_used.$oid"); + my $used_high = _get_oid_value($snmp_session,"$snmp_netapp_volume_id_table_df_used_high.$oid"); + my $used_low = _get_oid_value($snmp_session,"$snmp_netapp_volume_id_table_df_used_low.$oid"); + $used = _ulong64($used_high, $used_low); + print "$used_high | $used_low | $used\n"; } my $used_prec = _get_oid_value($snmp_session,"$snmp_netapp_volume_id_table_df_used_prec.$oid"); From c9e7c6839c9311bb8a2acc01c834f7c3b27c551c Mon Sep 17 00:00:00 2001 From: Michal Svamberg Date: Sun, 31 May 2015 01:07:02 +0200 Subject: [PATCH 05/15] Remove forgeted debug output. --- nagios/check-netapp-ng.pl | 1 - 1 file changed, 1 deletion(-) diff --git a/nagios/check-netapp-ng.pl b/nagios/check-netapp-ng.pl index 6b5c6f5..290a410 100755 --- a/nagios/check-netapp-ng.pl +++ b/nagios/check-netapp-ng.pl @@ -669,7 +669,6 @@ if("$opt{'check_type'}" eq "TEMP") { my $used_high = _get_oid_value($snmp_session,"$snmp_netapp_volume_id_table_df_used_high.$oid"); my $used_low = _get_oid_value($snmp_session,"$snmp_netapp_volume_id_table_df_used_low.$oid"); $used = _ulong64($used_high, $used_low); - print "$used_high | $used_low | $used\n"; } my $used_prec = _get_oid_value($snmp_session,"$snmp_netapp_volume_id_table_df_used_prec.$oid"); From 55ccc3a08121c3f181c8fce8fe696023f8cd2f3f Mon Sep 17 00:00:00 2001 From: Michal Svamberg Date: Sun, 31 May 2015 02:01:25 +0200 Subject: [PATCH 06/15] Fix non-existent items in SNMP v2c for SHELF test Non existent items returns SNMPv1 and SNMPv2c different. In SNMPv1 return as empty string but SNMPv2c return string with 'noSuchInstance'. --- nagios/check-netapp-ng.pl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/nagios/check-netapp-ng.pl b/nagios/check-netapp-ng.pl index 290a410..7d6b5b8 100755 --- a/nagios/check-netapp-ng.pl +++ b/nagios/check-netapp-ng.pl @@ -878,8 +878,7 @@ if("$opt{'check_type'}" eq "TEMP") { foreach my $subkey ( keys %shelf) { - if ( $shelf{$subkey} ne "" ) { - print "$subkey->$shelf{$subkey} "; + if ( ($shelf{$subkey} ne "") and ($shelf{$subkey} ne "noSuchInstance") ) { if ( "$subkey" eq "CurrentTemp" ) { $shelf{$subkey} =~ m/^([0-9]+)C.*$/; $perf_temp = "$perf_temp, temp_$shelf{'ShelfNumber'}=$1"; @@ -889,7 +888,7 @@ if("$opt{'check_type'}" eq "TEMP") { { print "$subkey->"; print "None "; } if ("$opt{'check_type'}" eq "SHELF") { - if($shelf{$subkey}) { push(@shelf_err,"$addr $subkey,") } + if(($shelf{$subkey} ne "") and ($shelf{$subkey} ne "noSuchInstance")) { push(@shelf_err,"$addr $subkey,") } } } From d5a2688d576015698f669ffde0a6058b63d381e6 Mon Sep 17 00:00:00 2001 From: Michal Svamberg Date: Sun, 31 May 2015 02:47:41 +0200 Subject: [PATCH 07/15] DISKUSED accept aggregate name in -v option --- nagios/check-netapp-ng.pl | 94 +++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/nagios/check-netapp-ng.pl b/nagios/check-netapp-ng.pl index 7d6b5b8..9452198 100755 --- a/nagios/check-netapp-ng.pl +++ b/nagios/check-netapp-ng.pl @@ -281,55 +281,53 @@ This is $script_name in version $script_version. $err - Syntax: - -H Ip/Dns Name of the Filer - -C SNMP Community Name for read - -V <1|2c> SNMP version (default 1) - -T Type of check, see bellow - -t Timeout to SNMP session in seconds (default 5) + Syntax: + -H Ip/Dns Name of the Filer + -C SNMP Community Name for read + -V <1|2c> SNMP version (default 1), some checks run only 2c + -T Type of check, see bellow + -t Timeout to SNMP session in seconds (default 5) - -w Warning Value (default 500) - -c Critical Value (default 500) - -v Volume Name in format /vol/volname/ - -e Exclude volumes from snap check (SNAPSHOT) - -I Inform only, return OK every time (ignore -w and -c values) + -w Warning Value (default 500) + -c Critical Value (default 500) + -v Volume Name in format /vol/volname/ or aggregate name + -e Exclude volumes from snap check (SNAPSHOT) + -I Inform only, return OK every time (ignore -w and -c values) + -h This help - Available check types: - TEMP - Temperature - FAN - Fan Fail - PS - Power Supply Fail - CPULOAD - CPU Load (-w -c) - NVRAM - NVram Battery Status - DISKUSED - Vol Usage Percentage (-w -c -v) - SNAPSHOT - Snapshot Config (-e volname,volname2,volname3) - SHELF - Shelf Health - SHELFINFO - Shelf Model & Temperature Information - NFSOPS - Nfs Ops per seconds (-w -c) - CIFSOPS - Cifs Ops per seconds (-w -c) - ISCSIOPS - iSCSI Ops per seconds, using -V 2c automatic (-w -c) - FCPOPS - FibreChannel Ops per seconds, using -V 2c automatic (-w -c) - NDMPSESSIONS - Number of ndmp sessions (-w -c) - CIFSSESSIONS - Number of cifs sessions (-w -c) - GLOBALSTATUS - Global Status of the filer - AUTOSUPPORTSTATUS - Auto Support Status of the filer - HA - High Availability - DISKSUMMARY - Status of disks - FAILEDDISK - Number of failed disks - UPTIME - Only show\'s uptime - CACHEAGE - Cache Age (-w -c) + Available check types: + TEMP - Temperature + FAN - Fan Fail + PS - Power Supply Fail + CPULOAD - CPU Load (-w -c) + NVRAM - NVram Battery Status + DISKUSED - Usage Percentage of volume or aggregate (-w -c -v) + SNAPSHOT - Snapshot Config (-e volname,volname2,volname3) + SHELF - Shelf Health + SHELFINFO - Shelf Model & Temperature Information + NFSOPS - Nfs Ops per seconds (-w -c) + CIFSOPS - Cifs Ops per seconds (-w -c) + ISCSIOPS - iSCSI Ops per seconds, using SNMP version 2c (-w -c) + FCPOPS - FibreChannel Ops per seconds, using SNMP version 2c (-w -c) + NDMPSESSIONS - Number of ndmp sessions (-w -c) + CIFSSESSIONS - Number of cifs sessions (-w -c) + GLOBALSTATUS - Global Status of the filer + AUTOSUPPORTSTATUS - Auto Support Status of the filer + HA - High Availability + DISKSUMMARY - Status of disks + FAILEDDISK - Number of failed disks + UPTIME - Only show\'s uptime + CACHEAGE - Cache Age (-w -c) - Examples: - $script_name -H netapp.mydomain -C public -T UPTIME - UPTIME: 2 days, 23:03:21.09 | uptime=255801s + Examples: + $script_name -H netapp.mydomain -C public -T UPTIME + UPTIME: 2 days, 23:03:21.09 | uptime=255801s - $script_name -H netapp.mydomain -C public -T FCOPS -I - OK: FCPOPS 1130 | fcpops=1130 + $script_name -H netapp.mydomain -C public -T DISKUSED -v /vol/data/ -w 90 -c 95 -V 2c + OK: DISKUSED 79% | /vol/data/=8104595240k - $script_name -H netapp.mydomain -C public -T DISKUSED -v /vol/data/ -w 90 -c 95 -V 2c - OK: DISKUSED 79% | /vol/data/=8104595240k - - $script_name -H netapp.mydomain -C public -T GLOBALSTATUS - CRIT: GLOBALSTATUS nonCritical 4 Disk on adapter 1a, shelf 1, bay 9, failed. | globalstatus=4 + $script_name -H netapp.mydomain -C public -T GLOBALSTATUS + CRIT: GLOBALSTATUS nonCritical 4 Disk on adapter 1a, shelf 1, bay 9, failed. | globalstatus=4 EOU exit($ERRORS{'UNKNOWN'}); @@ -439,14 +437,16 @@ my $result = GetOptions(\%opt, 'exclude|e=s', 'inform|I', 'timeout|t=i', + "help|h", ); +FSyntaxError("") if defined $opt{'help'}; FSyntaxError("Missing -H") unless defined $opt{'filer'}; FSyntaxError("Missing -C") unless defined $opt{'community'}; FSyntaxError("Missing -T") unless defined $opt{'check_type'}; if($opt{'vol'}) { - if($opt{'vol'} !~ /^\/.*\/$/) { - FSyntaxError("$opt{'vol'} format is /vol/volname/ !"); + if ( !( ($opt{'vol'} =~ m#^/vol/.*/$#) or ($opt{'vol'} =~ m#^[^/]*$#) ) ) { + FSyntaxError("$opt{'vol'} format is /vol/volname/ or 'aggregate_name' !"); } } if($opt{'crit'} and $opt{'warn'}) { @@ -679,7 +679,7 @@ if("$opt{'check_type'}" eq "TEMP") { } if ($msg =~ /^$/) { $stat = $ERRORS{'WARNING'}; - $msg = "WARN: Missing volume $opt{'vol'} !"; + $msg = "WARN: Missing volume path or aggregate name '$opt{'vol'}' !"; } ### SNAPSHOT ### } elsif("$opt{'check_type'}" eq "SNAPSHOT") { From fc14520f7dbbfe5af07cd5cc4a63109af8778de6 Mon Sep 17 00:00:00 2001 From: Michal Svamberg Date: Sun, 31 May 2015 03:24:45 +0200 Subject: [PATCH 08/15] Fix SHELFINFO output and complete help. --- nagios/check-netapp-ng.pl | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/nagios/check-netapp-ng.pl b/nagios/check-netapp-ng.pl index 9452198..c1cb289 100755 --- a/nagios/check-netapp-ng.pl +++ b/nagios/check-netapp-ng.pl @@ -287,10 +287,10 @@ $err -V <1|2c> SNMP version (default 1), some checks run only 2c -T Type of check, see bellow -t Timeout to SNMP session in seconds (default 5) - -w Warning Value (default 500) -c Critical Value (default 500) - -v Volume Name in format /vol/volname/ or aggregate name + -v Volume Name in format /vol/volname/ + or aggregate name (not available in 7.x ONTAP) -e Exclude volumes from snap check (SNAPSHOT) -I Inform only, return OK every time (ignore -w and -c values) -h This help @@ -884,16 +884,15 @@ if("$opt{'check_type'}" eq "TEMP") { $perf_temp = "$perf_temp, temp_$shelf{'ShelfNumber'}=$1"; } } - else - { print "$subkey->"; print "None "; } + #else { print "$subkey->"; print "None "; } if ("$opt{'check_type'}" eq "SHELF") { if(($shelf{$subkey} ne "") and ($shelf{$subkey} ne "noSuchInstance")) { push(@shelf_err,"$addr $subkey,") } } } - { print "\n"; } - ##if ("$opt{'check_type'}" eq "SHELFINFO") { print "\n"; } + #{ print "\n"; } + #if ("$opt{'check_type'}" eq "SHELF") { print "\n"; } if($#shelf_err != -1) { push(@errs,@shelf_err) From 62e9ec4bb779fe7117e8a9e86432235c94b1008c Mon Sep 17 00:00:00 2001 From: Michal Svamberg Date: Sun, 31 May 2015 03:32:04 +0200 Subject: [PATCH 09/15] Move error to first line in help output --- nagios/check-netapp-ng.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nagios/check-netapp-ng.pl b/nagios/check-netapp-ng.pl index c1cb289..b2a1e73 100755 --- a/nagios/check-netapp-ng.pl +++ b/nagios/check-netapp-ng.pl @@ -277,10 +277,10 @@ sub _create_session(@) { sub FSyntaxError($) { my $err = shift; print < Ip/Dns Name of the Filer -C SNMP Community Name for read From e81a22128942a5302ff6aaf97a97d0b02b34d5c3 Mon Sep 17 00:00:00 2001 From: Michal Svamberg Date: Sun, 31 May 2015 11:58:50 +0200 Subject: [PATCH 10/15] Add performance data and optimize SNMP queries Add performance read/write bytes to disks and FCP or ISCSI in FCPOPS and ISCSIOPS check. Now is nagioscache files with only necessary values (for FCPOPS only FCP values, no NFS or CIFS or ISCSI). On all *OPS check store value to global variable when writing to nagioscache file. Now getting value only once, this output is precisely and using less SNMP queries. --- nagios/check-netapp-ng.pl | 137 +++++++++++++++++++++++++------------- 1 file changed, 91 insertions(+), 46 deletions(-) diff --git a/nagios/check-netapp-ng.pl b/nagios/check-netapp-ng.pl index b2a1e73..488f099 100755 --- a/nagios/check-netapp-ng.pl +++ b/nagios/check-netapp-ng.pl @@ -79,7 +79,25 @@ my $fileHostUptime; my $fileNfsOps; my $fileCifsOps; my $fileIscsiOps; +my $fileIscsi64ReadBytes; +my $fileIscsi64WriteBytes; my $fileFcpOps; +my $fileFcp64ReadBytes; +my $fileFcp64WriteBytes; +my $fileDisk64ReadBytes; +my $fileDisk64WriteBytes; + +# performance variables from SNMP +my $total_nfs_ops; +my $total_cifs_ops; +my $total_disk_read; +my $total_disk_write; +my $blocks_iscsi_ops; +my $blocks_iscsi_read; +my $blocks_iscsi_write; +my $blocks_fcp_ops; +my $blocks_fcp_read; +my $blocks_fcp_write; my $snmpHostUptime; @@ -165,15 +183,21 @@ my $snmpEnclTableVoltUnderFail = "$snmpEnclTable.38"; my $snmpEnclTableVoltUnderWarn = "$snmpEnclTable.39"; -my $snmp_netapp_miscHighNfsOps = '.1.3.6.1.4.1.789.1.2.2.5.0'; -my $snmp_netapp_miscLowNfsOps = '.1.3.6.1.4.1.789.1.2.2.6.0'; - -my $snmp_netapp_miscHighCifsOps = '.1.3.6.1.4.1.789.1.2.2.7.0'; -my $snmp_netapp_miscLowCifsOps = '.1.3.6.1.4.1.789.1.2.2.8.0'; +my $snmp_netapp_misc = '1.3.6.1.4.1.789.1.2.2'; +my $snmp_netapp_miscHighNfsOps = "$snmp_netapp_misc.5.0"; +my $snmp_netapp_miscLowNfsOps = "$snmp_netapp_misc.6.0"; +my $snmp_netapp_miscHighCifsOps = "$snmp_netapp_misc.7.0"; +my $snmp_netapp_miscLowCifsOps = "$snmp_netapp_misc.8.0"; +my $snmp_netapp_misc64DiskReadBytes = "$snmp_netapp_misc.32.0"; +my $snmp_netapp_misc64DiskWriteBytes = "$snmp_netapp_misc.33.0"; my $snmp_netapp_blocks = '.1.3.6.1.4.1.789.1.17'; my $snmp_netapp_blocks_iscsi64Ops = "$snmp_netapp_blocks.24.0"; +my $snmp_netapp_blocks_iscsi64ReadBytes = "$snmp_netapp_blocks.22.0"; +my $snmp_netapp_blocks_iscsi64WriteBytes = "$snmp_netapp_blocks.23.0"; my $snmp_netapp_blocks_fcp64Ops = "$snmp_netapp_blocks.25.0"; +my $snmp_netapp_blocks_fcp64ReadBytes = "$snmp_netapp_blocks.20.0"; +my $snmp_netapp_blocks_fcp64WriteBytes = "$snmp_netapp_blocks.21.0"; # SNMP Status Codes my %nvramBatteryStatus = ( @@ -307,8 +331,8 @@ This is $script_name in version $script_version. SHELFINFO - Shelf Model & Temperature Information NFSOPS - Nfs Ops per seconds (-w -c) CIFSOPS - Cifs Ops per seconds (-w -c) - ISCSIOPS - iSCSI Ops per seconds, using SNMP version 2c (-w -c) - FCPOPS - FibreChannel Ops per seconds, using SNMP version 2c (-w -c) + ISCSIOPS - iSCSI Ops per seconds, collect read/write performance data, using SNMPv2c (-w -c) + FCPOPS - FibreChannel Ops per seconds, collect read/write performance data, using SNMPv2c (-w -c) NDMPSESSIONS - Number of ndmp sessions (-w -c) CIFSSESSIONS - Number of cifs sessions (-w -c) GLOBALSTATUS - Global Status of the filer @@ -479,21 +503,32 @@ our $snmp_session = _create_session($opt{'filer'},$opt{'community'},$opt{'versio # setup counterFile now that we have host IP and check type $counterFile = $counterFilePath."/".$opt{'filer'}.".check-netapp-ng.$opt{'check_type'}.nagioscache"; -$snmpHostUptime = _get_oid_value($snmp_session,$snmpSysUpTime); - # READ AND UPDATE CACHE FOR SPECIFIC TESTS FROM FILE if (("$opt{'check_type'}" eq "CIFSOPS") or ("$opt{'check_type'}" eq "NFSOPS") or ("$opt{'check_type'}" eq "ISCSIOPS") or ("$opt{'check_type'}" eq "FCPOPS")) { + $snmpHostUptime = _get_oid_value($snmp_session,$snmpSysUpTime); # READ CACHE DATA FROM FILE IF IT EXISTS if (-e $counterFile) { open(FILE, "$counterFile"); chomp($fileRuntime = ); chomp($fileHostUptime = ); - chomp($fileNfsOps = ); - chomp($fileCifsOps = ); - chomp($fileIscsiOps = ); - chomp($fileFcpOps = ); + chomp($fileNfsOps = ) if $opt{'check_type'} eq 'NFSOPS'; + chomp($fileCifsOps = ) if $opt{'check_type'} eq 'CIFSOPS'; + if ($opt{'check_type'} eq 'ISCSIOPS') { + chomp($fileIscsiOps = ); + chomp($fileIscsi64ReadBytes = ); + chomp($fileIscsi64WriteBytes = ); + } + if ($opt{'check_type'} eq 'FCPOPS') { + chomp($fileFcpOps = ); + chomp($fileFcp64ReadBytes = ); + chomp($fileFcp64WriteBytes = ); + } + if ( ($opt{'check_type'} eq 'ISCSIOPS') or ($opt{'check_type'} eq 'FCPOPS') ) { + chomp($fileDisk64ReadBytes = ); + chomp($fileDisk64WriteBytes = ); + } close(FILE); } # end if file exists @@ -503,25 +538,39 @@ if (("$opt{'check_type'}" eq "CIFSOPS") or ("$opt{'check_type'}" eq "NFSOPS") or print FILE "$runtime\n"; print FILE "$snmpHostUptime\n"; - my $low_nfs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscLowNfsOps); - my $high_nfs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscHighNfsOps); - my $total_nfs_ops = _ulong64($high_nfs_ops, $low_nfs_ops); + if ($opt{'check_type'} eq 'NFSOPS') { + my $low_nfs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscLowNfsOps); + my $high_nfs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscHighNfsOps); + $total_nfs_ops = _ulong64($high_nfs_ops, $low_nfs_ops); + print FILE "$total_nfs_ops\n"; + } - print FILE "$total_nfs_ops\n"; + if ($opt{'check_type'} eq 'CIFSOPS') { + my $low_cifs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscLowCifsOps); + my $high_cifs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscHighCifsOps); + $total_cifs_ops = _ulong64($high_cifs_ops, $low_cifs_ops); + print FILE "$total_cifs_ops\n"; + } - my $low_cifs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscLowCifsOps); - my $high_cifs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscHighCifsOps); - my $total_cifs_ops = _ulong64($high_cifs_ops, $low_cifs_ops); + if ($opt{'check_type'} eq 'ISCSIOPS') { + $blocks_iscsi_ops = _get_oid_value($snmp_session,$snmp_netapp_blocks_iscsi64Ops); + $blocks_iscsi_read = _get_oid_value($snmp_session,$snmp_netapp_blocks_iscsi64ReadBytes); + $blocks_iscsi_write = _get_oid_value($snmp_session,$snmp_netapp_blocks_iscsi64WriteBytes); + print FILE "$blocks_iscsi_ops\n$blocks_iscsi_read\n$blocks_iscsi_write\n"; + } - print FILE "$total_cifs_ops\n"; - - my $blocks_iscsi_ops = _get_oid_value($snmp_session,$snmp_netapp_blocks_iscsi64Ops); - - print FILE "$blocks_iscsi_ops\n"; - - my $blocks_fcp_ops = _get_oid_value($snmp_session,$snmp_netapp_blocks_fcp64Ops); - print FILE "$blocks_fcp_ops\n"; + if ($opt{'check_type'} eq 'FCPOPS') { + $blocks_fcp_ops = _get_oid_value($snmp_session,$snmp_netapp_blocks_fcp64Ops); + $blocks_fcp_read = _get_oid_value($snmp_session,$snmp_netapp_blocks_fcp64ReadBytes); + $blocks_fcp_write = _get_oid_value($snmp_session,$snmp_netapp_blocks_fcp64WriteBytes); + print FILE "$blocks_fcp_ops\n$blocks_fcp_read\n$blocks_fcp_write\n"; + } + if ( ($opt{'check_type'} eq 'ISCSIOPS') or ($opt{'check_type'} eq 'FCPOPS') ) { + $total_disk_read = _get_oid_value($snmp_session,$snmp_netapp_misc64DiskReadBytes); + $total_disk_write = _get_oid_value($snmp_session,$snmp_netapp_misc64DiskWriteBytes); + print FILE "$total_disk_read\n$total_disk_write\n"; + } close(FILE); } else { $state = "WARNING"; @@ -598,10 +647,6 @@ if("$opt{'check_type'}" eq "TEMP") { $perf = "cpuload=$check\%"; ### NFSOPS ### } elsif("$opt{'check_type'}" eq "NFSOPS") { - my $low_nfs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscLowNfsOps); - my $high_nfs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscHighNfsOps); - my $total_nfs_ops = _ulong64($high_nfs_ops,$low_nfs_ops); - my $nfsops_per_seconds=floor ( ($total_nfs_ops-$fileNfsOps)/$elapsedtime ); my $check=$nfsops_per_seconds; @@ -610,10 +655,6 @@ if("$opt{'check_type'}" eq "TEMP") { $perf = "nfsops=$check"; ### CIFSOPS ### } elsif("$opt{'check_type'}" eq "CIFSOPS") { - my $low_cifs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscLowCifsOps); - my $high_cifs_ops = _get_oid_value($snmp_session,$snmp_netapp_miscHighCifsOps); - my $total_cifs_ops = _ulong64($high_cifs_ops,$low_cifs_ops); - my $cifsops_per_seconds=floor ( ($total_cifs_ops-$fileCifsOps)/$elapsedtime ); my $check=$cifsops_per_seconds; @@ -622,24 +663,29 @@ if("$opt{'check_type'}" eq "TEMP") { $perf = "cifsops=$check"; ### ISCSIOPS ### } elsif("$opt{'check_type'}" eq "ISCSIOPS") { - my $total_iscsi_ops = _get_oid_value($snmp_session,$snmp_netapp_blocks_iscsi64Ops); - - my $iscsiops_per_seconds=floor ( ($total_iscsi_ops-$fileIscsiOps)/$elapsedtime ); - + my $iscsiops_per_seconds=floor ( ($blocks_iscsi_ops-$fileIscsiOps)/$elapsedtime ); + my $iscsiread_per_seconds=floor ( ($blocks_iscsi_read-$fileIscsi64ReadBytes)/$elapsedtime ); + my $iscsiwrite_per_seconds=floor ( ($blocks_iscsi_write-$fileIscsi64WriteBytes)/$elapsedtime ); + my $diskread_per_seconds=floor ( ($total_disk_read-$fileDisk64ReadBytes)/$elapsedtime ); + my $diskwrite_per_seconds=floor ( ($total_disk_write-$fileDisk64WriteBytes)/$elapsedtime ); my $check=$iscsiops_per_seconds; ($msg,$stat) = _clac_absolute_err_stat($check,$opt{'check_type'},$opt{'warn'},$opt{'crit'}); - $perf = "iscsiops=$check"; + $msg = "$msg ops/s (iscsi read=$iscsiread_per_seconds B/s, iscsi write=$iscsiwrite_per_seconds B/s, disk read=$diskread_per_seconds B/s, disk write=$diskwrite_per_seconds B/s)"; + $perf = "iscsiops=$check iscsiread=$iscsiread_per_seconds iscsiwrite=$iscsiwrite_per_seconds diskread=$diskread_per_seconds diskwrite=$diskwrite_per_seconds"; ### FCPOPS ### } elsif("$opt{'check_type'}" eq "FCPOPS") { - my $total_fcp_ops = _get_oid_value($snmp_session,$snmp_netapp_blocks_fcp64Ops); - - my $fcpops_per_seconds=floor ( ($total_fcp_ops-$fileFcpOps)/$elapsedtime ); + my $fcpops_per_seconds=floor ( ($blocks_fcp_ops-$fileFcpOps)/$elapsedtime ); + my $fcpread_per_seconds=floor ( ($blocks_fcp_read-$fileFcp64ReadBytes)/$elapsedtime ); + my $fcpwrite_per_seconds=floor ( ($blocks_fcp_write-$fileFcp64WriteBytes)/$elapsedtime ); + my $diskread_per_seconds=floor ( ($total_disk_read-$fileDisk64ReadBytes)/$elapsedtime ); + my $diskwrite_per_seconds=floor ( ($total_disk_write-$fileDisk64WriteBytes)/$elapsedtime ); my $check=$fcpops_per_seconds; ($msg,$stat) = _clac_absolute_err_stat($check,$opt{'check_type'},$opt{'warn'},$opt{'crit'}); - $perf = "fcpops=$check"; + $msg = "$msg ops/s (fcp read=$fcpread_per_seconds B/s, fcp write=$fcpwrite_per_seconds B/s, disk read=$diskread_per_seconds B/s, disk write=$diskwrite_per_seconds B/s))"; + $perf = "fcpops=$check fcpread=$fcpread_per_seconds fcpwrite=$fcpwrite_per_seconds diskread=$diskread_per_seconds diskwrite=$diskwrite_per_seconds"; ### NVRAM ### } elsif("$opt{'check_type'}" eq "NVRAM") { my $check = _get_oid_value($snmp_session,$snmpnvramBatteryStatus); @@ -922,6 +968,5 @@ if("$opt{'check_type'}" eq "TEMP") { FSyntaxError("$opt{'check_type'} invalid parameter !"); } - print "$msg | $perf\n"; exit($stat); From 2e0becac9334cdb48a79860ddfc165f918f51169 Mon Sep 17 00:00:00 2001 From: Michal Svamberg Date: Sun, 31 May 2015 12:42:37 +0200 Subject: [PATCH 11/15] Add listing of available volumes and aggregates --- nagios/check-netapp-ng.pl | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/nagios/check-netapp-ng.pl b/nagios/check-netapp-ng.pl index 488f099..30df651 100755 --- a/nagios/check-netapp-ng.pl +++ b/nagios/check-netapp-ng.pl @@ -306,15 +306,16 @@ $err This is $script_name in version $script_version. Syntax: - -H Ip/Dns Name of the Filer - -C SNMP Community Name for read + -H Ip/Dns Name of the Filer + -C SNMP Community Name for read -V <1|2c> SNMP version (default 1), some checks run only 2c - -T Type of check, see bellow + -T Type of check, see bellow -t Timeout to SNMP session in seconds (default 5) -w Warning Value (default 500) -c Critical Value (default 500) -v Volume Name in format /vol/volname/ or aggregate name (not available in 7.x ONTAP) + For available values use any word, such as \'-v whatever\' -e Exclude volumes from snap check (SNAPSHOT) -I Inform only, return OK every time (ignore -w and -c values) -h This help @@ -352,6 +353,9 @@ This is $script_name in version $script_version. $script_name -H netapp.mydomain -C public -T GLOBALSTATUS CRIT: GLOBALSTATUS nonCritical 4 Disk on adapter 1a, shelf 1, bay 9, failed. | globalstatus=4 + + $script_name -H netapp.mydomain -C public -T DISKUSED -v wtf + WARN: Unknown volume path or aggregate name 'wtf'. Available values: aggr_p1a_sas2_mirror /vol/vol0/ /vol/esx/ /vol/xen_a/ EOU exit($ERRORS{'UNKNOWN'}); @@ -470,7 +474,7 @@ FSyntaxError("Missing -C") unless defined $opt{'community'}; FSyntaxError("Missing -T") unless defined $opt{'check_type'}; if($opt{'vol'}) { if ( !( ($opt{'vol'} =~ m#^/vol/.*/$#) or ($opt{'vol'} =~ m#^[^/]*$#) ) ) { - FSyntaxError("$opt{'vol'} format is /vol/volname/ or 'aggregate_name' !"); + FSyntaxError("$opt{'vol'} format is '/vol/volname/' or 'aggregate_name'! For listing available names use any text such as '-v whatever'."); } } if($opt{'crit'} and $opt{'warn'}) { @@ -725,7 +729,11 @@ if("$opt{'check_type'}" eq "TEMP") { } if ($msg =~ /^$/) { $stat = $ERRORS{'WARNING'}; - $msg = "WARN: Missing volume path or aggregate name '$opt{'vol'}' !"; + $msg = "WARN: Unknown volume path or aggregate name '$opt{'vol'}'. Available values:"; + foreach my $key (sort keys %$r_vol_tbl) { + next if $$r_vol_tbl{$key} =~ m#.*/\.snapshot$#; + $msg .= " $$r_vol_tbl{$key}" + } } ### SNAPSHOT ### } elsif("$opt{'check_type'}" eq "SNAPSHOT") { @@ -968,5 +976,7 @@ if("$opt{'check_type'}" eq "TEMP") { FSyntaxError("$opt{'check_type'} invalid parameter !"); } -print "$msg | $perf\n"; +$perf ? print "$msg | $perf\n" : print "$msg\n"; + exit($stat); + From 3c7ca480371591265fe39f8262896137a9d07232 Mon Sep 17 00:00:00 2001 From: Michal Svamberg Date: Sun, 31 May 2015 12:48:27 +0200 Subject: [PATCH 12/15] Better filter for listing available volumes. --- nagios/check-netapp-ng.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nagios/check-netapp-ng.pl b/nagios/check-netapp-ng.pl index 30df651..ec38118 100755 --- a/nagios/check-netapp-ng.pl +++ b/nagios/check-netapp-ng.pl @@ -731,7 +731,7 @@ if("$opt{'check_type'}" eq "TEMP") { $stat = $ERRORS{'WARNING'}; $msg = "WARN: Unknown volume path or aggregate name '$opt{'vol'}'. Available values:"; foreach my $key (sort keys %$r_vol_tbl) { - next if $$r_vol_tbl{$key} =~ m#.*/\.snapshot$#; + next if ( !( ($$r_vol_tbl{$key} =~ m#^/vol/.*/$#) or ($$r_vol_tbl{$key} =~ m#^[^/]*$#) ) ); $msg .= " $$r_vol_tbl{$key}" } } From 4890add44f83a5695386bd6751474c0ae9e0ad88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20=C5=A0vamberg?= Date: Sun, 31 May 2015 20:43:21 +0200 Subject: [PATCH 13/15] typo mistake --- nagios/check-netapp-ng.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nagios/check-netapp-ng.pl b/nagios/check-netapp-ng.pl index ec38118..90e1d20 100755 --- a/nagios/check-netapp-ng.pl +++ b/nagios/check-netapp-ng.pl @@ -688,7 +688,7 @@ if("$opt{'check_type'}" eq "TEMP") { my $check=$fcpops_per_seconds; ($msg,$stat) = _clac_absolute_err_stat($check,$opt{'check_type'},$opt{'warn'},$opt{'crit'}); - $msg = "$msg ops/s (fcp read=$fcpread_per_seconds B/s, fcp write=$fcpwrite_per_seconds B/s, disk read=$diskread_per_seconds B/s, disk write=$diskwrite_per_seconds B/s))"; + $msg = "$msg ops/s (fcp read=$fcpread_per_seconds B/s, fcp write=$fcpwrite_per_seconds B/s, disk read=$diskread_per_seconds B/s, disk write=$diskwrite_per_seconds B/s)"; $perf = "fcpops=$check fcpread=$fcpread_per_seconds fcpwrite=$fcpwrite_per_seconds diskread=$diskread_per_seconds diskwrite=$diskwrite_per_seconds"; ### NVRAM ### } elsif("$opt{'check_type'}" eq "NVRAM") { From 96c348f6143b52d662f6be68d850abaac0b1c9ff Mon Sep 17 00:00:00 2001 From: Michal Svamberg Date: Mon, 1 Jun 2015 11:34:13 +0200 Subject: [PATCH 14/15] Set unit of measurements on DISKUSED from k to KB. Nagiosgraph needs unit name as KB, not only 'k'. --- nagios/check-netapp-ng.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nagios/check-netapp-ng.pl b/nagios/check-netapp-ng.pl index 90e1d20..7b2197f 100755 --- a/nagios/check-netapp-ng.pl +++ b/nagios/check-netapp-ng.pl @@ -724,7 +724,7 @@ if("$opt{'check_type'}" eq "TEMP") { ($msg,$stat) = _clac_err_stat($used_prec,$opt{'check_type'},$opt{'warn'},$opt{'crit'}); - $perf = "$$r_vol_tbl{$key}=$used\k"; + $perf = "$$r_vol_tbl{$key}=$used\KB"; } } if ($msg =~ /^$/) { From a03dda24866b2e08d9f5c7be4c03e19e5405d308 Mon Sep 17 00:00:00 2001 From: Michal Svamberg Date: Mon, 1 Jun 2015 13:53:59 +0200 Subject: [PATCH 15/15] Add percentual perf. data and limits to DISKUSED --- nagios/check-netapp-ng.pl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/nagios/check-netapp-ng.pl b/nagios/check-netapp-ng.pl index 7b2197f..627abbb 100755 --- a/nagios/check-netapp-ng.pl +++ b/nagios/check-netapp-ng.pl @@ -712,8 +712,10 @@ if("$opt{'check_type'}" eq "TEMP") { my @tmp_arr = split(/\./, $key); my $oid = pop(@tmp_arr); my $used = ""; + my $capacity = ""; if ($opt{'version'} eq '2c') { $used = _get_oid_value($snmp_session,"$snmp_netapp_volume_id_table_df64_used.$oid"); + $capacity = _get_oid_value($snmp_session,"$snmp_netapp_volume_id_table_df64_total.$oid"); } else { my $used_high = _get_oid_value($snmp_session,"$snmp_netapp_volume_id_table_df_used_high.$oid"); @@ -724,7 +726,10 @@ if("$opt{'check_type'}" eq "TEMP") { ($msg,$stat) = _clac_err_stat($used_prec,$opt{'check_type'},$opt{'warn'},$opt{'crit'}); - $perf = "$$r_vol_tbl{$key}=$used\KB"; + # https://nagios-plugins.org/doc/guidelines.html + # 'label'=value[UOM];[warn];[crit];[min];[max] + $perf = "$$r_vol_tbl{$key}=$used\KB;" .floor($capacity*$opt{'warn'}/100) .";" .floor($capacity*$opt{'crit'}/100) .";;$capacity"; + $perf .= " $$r_vol_tbl{$key}:perc=$used_prec\%;" .floor($opt{'warn'}) .";" .floor($opt{'crit'}) .";;100"; } } if ($msg =~ /^$/) {