mirror of
https://github.com/ranl/monitor-utils.git
synced 2025-04-12 02:03:39 +02:00
Merge c9271972a6
into fc9c1902ab
This commit is contained in:
commit
f296f7465f
@ -30,8 +30,21 @@
|
||||
## AUTOSUPPORTSTATUS|NFSOPS|
|
||||
## CIFSOPS|SHELFINFO|...
|
||||
##
|
||||
#####################################
|
||||
#####################################
|
||||
##
|
||||
##
|
||||
## CLARIFICATION FOR TIMEOUTS
|
||||
## There are multiples timeouts we depend on
|
||||
##
|
||||
## Perl plugins timeout (utils.pm)--- > $TIMEOUT --- > originally 15 sec --> recommandation to raise it to 180
|
||||
##
|
||||
## Net::SNMP timeout --- > Used in Net::SNMP->session --- > originally 5 sec --> recommandation to raise it to 60
|
||||
## Beware that the max value 60 seconds. If set above you get the error message "Can't create snmp session"
|
||||
##
|
||||
## Do not forget that in nagios you need to increase service_check_timeout to a value above $TIMEOUT_PLUGINS
|
||||
## Nagios service check timeout (nagios.cfg) --- > service_check_timeout=240 --- > originally 30 sec
|
||||
##
|
||||
####################################################################################################################################################
|
||||
####################################################################################################################################################
|
||||
|
||||
|
||||
use strict;
|
||||
@ -47,13 +60,16 @@ use Getopt::Long;
|
||||
use Time::Local;
|
||||
use IPC::Cmd qw(run_forked);
|
||||
|
||||
|
||||
|
||||
Getopt::Long::Configure('bundling');
|
||||
|
||||
my $TIMEOUT_PLUGINS=$TIMEOUT;
|
||||
my $stat = 0;
|
||||
my $msg;
|
||||
my $perf;
|
||||
my $script_name = basename($0);
|
||||
my $script_version = 1.3;
|
||||
my $script_version = 1.3.1;
|
||||
|
||||
my $counterFilePath="/tmp";
|
||||
my $counterFile;
|
||||
@ -227,7 +243,7 @@ my %nvramBatteryStatus = (
|
||||
5 => 'near end of life',
|
||||
6 => 'at end of life',
|
||||
7 => 'unknown',
|
||||
);
|
||||
);
|
||||
my %GlobalStatusIndex = (
|
||||
1 => 'other',
|
||||
2 => 'unknown',
|
||||
@ -235,7 +251,7 @@ my %GlobalStatusIndex = (
|
||||
4 => 'nonCritical',
|
||||
5 => 'critical',
|
||||
6 => 'nonRecoverable',
|
||||
);
|
||||
);
|
||||
|
||||
my %AutoSupportStatusIndex = (
|
||||
1 => 'ok',
|
||||
@ -243,7 +259,7 @@ my %AutoSupportStatusIndex = (
|
||||
3 => 'postFailure',
|
||||
4 => 'smtpPostFailure',
|
||||
5 => 'unknown',
|
||||
);
|
||||
);
|
||||
|
||||
my %cfSettingsIndex = (
|
||||
1 => 'notConfigured',
|
||||
@ -251,7 +267,7 @@ my %cfSettingsIndex = (
|
||||
3 => 'disabled',
|
||||
4 => 'takeoverByPartnerDisabled',
|
||||
5 => 'thisNodeDead',
|
||||
);
|
||||
);
|
||||
|
||||
|
||||
my %cfStateIndex = (
|
||||
@ -259,7 +275,7 @@ my %cfStateIndex = (
|
||||
2 => 'canTakeover',
|
||||
3 => 'cannotTakeover',
|
||||
4 => 'takeover',
|
||||
);
|
||||
);
|
||||
|
||||
my %cfCannotTakeoverCauseIndex = (
|
||||
1 => 'ok',
|
||||
@ -280,20 +296,20 @@ my %cfCannotTakeoverCauseIndex = (
|
||||
16 => 'alreadyInTakenoverMode',
|
||||
17 => 'nvramLogUnsynchronized',
|
||||
18 => 'backupMailboxProblems',
|
||||
);
|
||||
);
|
||||
|
||||
my %cfPartnerStatusIndex = (
|
||||
1 => 'maybeDown',
|
||||
2 => 'ok',
|
||||
3 => 'dead',
|
||||
);
|
||||
);
|
||||
|
||||
my %cfInterconnectStatusIndex = (
|
||||
1 => 'notPresent',
|
||||
2 => 'down',
|
||||
3 => 'partialFailure',
|
||||
4 => 'up',
|
||||
);
|
||||
);
|
||||
|
||||
my %EcnlStatusIndex = (
|
||||
1 => 'initializing',
|
||||
@ -302,13 +318,13 @@ my %EcnlStatusIndex = (
|
||||
4 => 'inactive',
|
||||
5 => 'reconfiguring',
|
||||
6 => 'nonexistent',
|
||||
);
|
||||
);
|
||||
|
||||
my %fsOverallStatusIndex = (
|
||||
1 => 'ok',
|
||||
2 => 'Nearly Full',
|
||||
3 => 'Full',
|
||||
);
|
||||
);
|
||||
|
||||
### Functions
|
||||
###############
|
||||
@ -338,10 +354,8 @@ sub _create_session(@) {
|
||||
sub FSyntaxError($) {
|
||||
my $err = shift;
|
||||
print <<EOU;
|
||||
$err
|
||||
|
||||
This is $script_name in version $script_version.
|
||||
|
||||
$err
|
||||
This is $script_name in version $script_version.
|
||||
Syntax:
|
||||
-H <IP_or_Hostname> Ip/Dns Name of the Filer
|
||||
-C <community_name> SNMP Community Name for read
|
||||
@ -356,7 +370,6 @@ This is $script_name in version $script_version.
|
||||
-e <vol1[,vol2[,...]]> Exclude volumes from snap check (SNAPSHOT/SNAPSHOTAGE)
|
||||
-I Inform only, return OK every time (ignore -w and -c values)
|
||||
-h This help
|
||||
|
||||
Available check types:
|
||||
TEMP - Temperature
|
||||
FAN - Fan Fail
|
||||
@ -382,20 +395,15 @@ This is $script_name in version $script_version.
|
||||
UPTIME - Only show\'s uptime
|
||||
CACHEAGE - Cache Age (-w -c)
|
||||
FSSTATUS - Overall file system health
|
||||
|
||||
Examples:
|
||||
$script_name -H netapp.mydomain -C public -T UPTIME
|
||||
UPTIME: 2 days, 23:03:21.09 | uptime=255801s
|
||||
|
||||
$script_name -H netapp.mydomain -C public -T DISKUSED -v /vol/data/ -w 90 -c 95 -V 2c
|
||||
OK: DISKUSED 79% | /vol/data/=8104595240k
|
||||
|
||||
$script_name -H netapp.mydomain -C public -T GLOBALSTATUS
|
||||
CRIT: GLOBALSTATUS nonCritical 4 Disk on adapter 1a, shelf 1, bay 9, failed. | globalstatus=4
|
||||
|
||||
$script_name -H netapp.mydomain -C public -T DISKUSED -v wtf
|
||||
WARN: Unknown volume path or aggregate name 'wtf'. Available values: aggr_p1a_sas2_mirror /vol/vol0/ /vol/esx/ /vol/xen_a/
|
||||
|
||||
EOU
|
||||
exit($ERRORS{'UNKNOWN'});
|
||||
}
|
||||
@ -493,6 +501,8 @@ $opt{'crit'} = 500;
|
||||
$opt{'warn'} = 500;
|
||||
$opt{'version'} = 2;
|
||||
$opt{'timeout'} = 60;
|
||||
$TIMEOUT_PLUGINS = 180 ;
|
||||
|
||||
my $result = GetOptions(\%opt,
|
||||
'filer|H=s',
|
||||
'community|C=s',
|
||||
@ -507,6 +517,17 @@ my $result = GetOptions(\%opt,
|
||||
"help|h",
|
||||
);
|
||||
|
||||
if ( $opt{'timeout'} > 60)
|
||||
{
|
||||
#Set timeout for plugin to the parameter received via command line, but set snmp timeout to the max (60 seconds) if CLI timeout is above 60 seconds
|
||||
$TIMEOUT_PLUGINS=$opt{'timeout'};
|
||||
$opt{'timeout'}=60;
|
||||
} else
|
||||
{
|
||||
$TIMEOUT_PLUGINS=$opt{'timeout'};
|
||||
}
|
||||
|
||||
|
||||
FSyntaxError("") if defined $opt{'help'};
|
||||
FSyntaxError("Missing -H") unless defined $opt{'filer'};
|
||||
FSyntaxError("Missing -C") unless defined $opt{'community'};
|
||||
@ -536,9 +557,16 @@ if (!defined($counterFilePath)) {
|
||||
|
||||
|
||||
|
||||
# Just in case of problems, let's not hang Nagios
|
||||
# with "Return code of 142 is out of bounds", instead we set the message "No response in time"
|
||||
|
||||
$SIG{'ALRM'} = sub {
|
||||
print ("CRITICAL: No response in time\n");
|
||||
exit $ERRORS{"CRITICAL"};
|
||||
};
|
||||
|
||||
# Starting Alarm
|
||||
alarm($TIMEOUT);
|
||||
alarm($TIMEOUT_PLUGINS);
|
||||
|
||||
# Establish SNMP Session
|
||||
our $snmp_session = _create_session($opt{'filer'},$opt{'community'},$opt{'version'},$opt{'timeout'});
|
||||
@ -661,7 +689,7 @@ if("$opt{'check_type'}" eq "TEMP") {
|
||||
$msg = "CRIT: Over $opt{'check_type'} !";
|
||||
}
|
||||
$perf = "overtemperature=$check";
|
||||
### Fan ###
|
||||
### Fan ###
|
||||
} elsif("$opt{'check_type'}" eq "FAN") {
|
||||
my $check = _get_oid_value($snmp_session,$snmpFailedFanCount);
|
||||
if($check == 0) {
|
||||
@ -672,7 +700,7 @@ if("$opt{'check_type'}" eq "TEMP") {
|
||||
$msg = "CRIT: $opt{'check_type'} $check !";
|
||||
}
|
||||
$perf = "failedfans=$check";
|
||||
### PS ###
|
||||
### PS ###
|
||||
} elsif("$opt{'check_type'}" eq "PS") {
|
||||
my $check = _get_oid_value($snmp_session,$snmpFailPowerSupplyCount);
|
||||
if($check == 0) {
|
||||
@ -683,12 +711,12 @@ if("$opt{'check_type'}" eq "TEMP") {
|
||||
$msg = "CRIT: $opt{'check_type'} Fail $check !";
|
||||
}
|
||||
$perf = "failedpowersupplies=$check";
|
||||
### CPULOAD ###
|
||||
### CPULOAD ###
|
||||
} elsif("$opt{'check_type'}" eq "CPULOAD") {
|
||||
my $check = _get_oid_value($snmp_session,$snmpcpuBusyTimePerCent);
|
||||
($msg,$stat) = _clac_err_stat($check,$opt{'check_type'},$opt{'warn'},$opt{'crit'});
|
||||
$perf = "cpuload=$check\%;$opt{'warn'};$opt{'crit'};;";
|
||||
### NFSOPS ###
|
||||
### NFSOPS ###
|
||||
} elsif("$opt{'check_type'}" eq "NFSOPS") {
|
||||
my $nfsops_per_seconds=floor ( ($total_nfs_ops-$fileNfsOps)/$elapsedtime );
|
||||
|
||||
@ -696,7 +724,7 @@ if("$opt{'check_type'}" eq "TEMP") {
|
||||
|
||||
($msg,$stat) = _clac_absolute_err_stat($check,$opt{'check_type'},$opt{'warn'},$opt{'crit'});
|
||||
$perf = "nfsops=$check";
|
||||
### CIFSOPS ###
|
||||
### CIFSOPS ###
|
||||
} elsif("$opt{'check_type'}" eq "CIFSOPS") {
|
||||
my $cifsops_per_seconds=floor ( ($total_cifs_ops-$fileCifsOps)/$elapsedtime );
|
||||
|
||||
@ -704,7 +732,7 @@ if("$opt{'check_type'}" eq "TEMP") {
|
||||
|
||||
($msg,$stat) = _clac_absolute_err_stat($check,$opt{'check_type'},$opt{'warn'},$opt{'crit'});
|
||||
$perf = "cifsops=$check";
|
||||
### ISCSIOPS ###
|
||||
### ISCSIOPS ###
|
||||
} elsif("$opt{'check_type'}" eq "ISCSIOPS") {
|
||||
my $iscsiops_per_seconds=floor ( ($blocks_iscsi_ops-$fileIscsiOps)/$elapsedtime );
|
||||
my $iscsiread_per_seconds=floor ( ($blocks_iscsi_read-$fileIscsi64ReadBytes)/$elapsedtime );
|
||||
@ -716,7 +744,7 @@ if("$opt{'check_type'}" eq "TEMP") {
|
||||
($msg,$stat) = _clac_absolute_err_stat($check,$opt{'check_type'},$opt{'warn'},$opt{'crit'});
|
||||
$msg = "$msg ops/s (iscsi read=$iscsiread_per_seconds B/s, iscsi write=$iscsiwrite_per_seconds B/s, disk read=$diskread_per_seconds B/s, disk write=$diskwrite_per_seconds B/s)";
|
||||
$perf = "iscsiops=$check iscsiread=$iscsiread_per_seconds iscsiwrite=$iscsiwrite_per_seconds diskread=$diskread_per_seconds diskwrite=$diskwrite_per_seconds";
|
||||
### FCPOPS ###
|
||||
### FCPOPS ###
|
||||
} elsif("$opt{'check_type'}" eq "FCPOPS") {
|
||||
my $fcpops_per_seconds=floor ( ($blocks_fcp_ops-$fileFcpOps)/$elapsedtime );
|
||||
my $fcpread_per_seconds=floor ( ($blocks_fcp_read-$fileFcp64ReadBytes)/$elapsedtime );
|
||||
@ -729,7 +757,7 @@ if("$opt{'check_type'}" eq "TEMP") {
|
||||
($msg,$stat) = _clac_absolute_err_stat($check,$opt{'check_type'},$opt{'warn'},$opt{'crit'});
|
||||
$msg = "$msg ops/s (fcp read=$fcpread_per_seconds B/s, fcp write=$fcpwrite_per_seconds B/s, disk read=$diskread_per_seconds B/s, disk write=$diskwrite_per_seconds B/s)";
|
||||
$perf = "fcpops=$check fcpread=$fcpread_per_seconds fcpwrite=$fcpwrite_per_seconds diskread=$diskread_per_seconds diskwrite=$diskwrite_per_seconds";
|
||||
### NVRAM ###
|
||||
### NVRAM ###
|
||||
} elsif("$opt{'check_type'}" eq "NVRAM") {
|
||||
my $check = _get_oid_value($snmp_session,$snmpnvramBatteryStatus);
|
||||
if($check == 1) {
|
||||
@ -740,7 +768,7 @@ if("$opt{'check_type'}" eq "TEMP") {
|
||||
$msg = "CRIT: $opt{'check_type'} $nvramBatteryStatus{$check}";
|
||||
}
|
||||
$perf = "nvrambatterystatus=$check";
|
||||
### DISKUSED ###
|
||||
### DISKUSED ###
|
||||
} elsif("$opt{'check_type'}" eq "DISKUSED") {
|
||||
|
||||
FSyntaxError("Missing -v") unless defined $opt{'vol'};
|
||||
@ -779,7 +807,7 @@ if("$opt{'check_type'}" eq "TEMP") {
|
||||
$msg .= " $$r_vol_tbl{$key}"
|
||||
}
|
||||
}
|
||||
### SNAPSHOTAGE ###
|
||||
### SNAPSHOTAGE ###
|
||||
} elsif("$opt{'check_type'}" eq "SNAPSHOTAGE") {
|
||||
|
||||
my @exc_list = split(',',$opt{'exclude'});
|
||||
@ -885,9 +913,9 @@ if("$opt{'check_type'}" eq "TEMP") {
|
||||
$stat = $ERRORS{'UNKNOWN'};
|
||||
$msg = "UNKNOW Errors";
|
||||
}
|
||||
$perf = "outdated_snapshots=$badcount";
|
||||
$perf = "outdated_snapshots=$badcount";
|
||||
|
||||
### SNAPSHOT ###
|
||||
### SNAPSHOT ###
|
||||
} elsif("$opt{'check_type'}" eq "SNAPSHOT") {
|
||||
my @exc_list = split(',',$opt{'exclude'});
|
||||
my @vol_err;
|
||||
@ -923,7 +951,7 @@ $perf = "outdated_snapshots=$badcount";
|
||||
}
|
||||
$perf = "snapoff=$err_count";
|
||||
|
||||
### FAILEDDISK ###
|
||||
### FAILEDDISK ###
|
||||
} elsif("$opt{'check_type'}" eq "FAILEDDISK") {
|
||||
my $check = _get_oid_value($snmp_session,$snmpFailedDiskCount);
|
||||
if($check == 0) {
|
||||
@ -935,7 +963,7 @@ $perf = "outdated_snapshots=$badcount";
|
||||
}
|
||||
$perf = "faileddisks=$check";
|
||||
|
||||
### DISKSUMMARY ###
|
||||
### DISKSUMMARY ###
|
||||
} elsif("$opt{'check_type'}" eq "DISKSUMMARY") {
|
||||
my $diskTotal = _get_oid_value($snmp_session,$snmp_netapp_disksummary_diskTotalCount);
|
||||
my $diskActive = _get_oid_value($snmp_session,$snmp_netapp_disksummary_diskActiveCount);
|
||||
@ -958,7 +986,7 @@ $perf = "outdated_snapshots=$badcount";
|
||||
}
|
||||
$perf = "faileddisks=$check total=$diskTotal active=$diskActive spare=$diskSpare reconstructing=$diskReconstructing";
|
||||
|
||||
### HA ###
|
||||
### HA ###
|
||||
} elsif("$opt{'check_type'}" eq "HA") {
|
||||
|
||||
my $cfSettings = _get_oid_value($snmp_session,$snmp_netapp_cfSettings);
|
||||
@ -991,18 +1019,18 @@ $perf = "outdated_snapshots=$badcount";
|
||||
$perf = "hasettings=$check";
|
||||
|
||||
|
||||
### UPTIME ###
|
||||
### UPTIME ###
|
||||
} elsif("$opt{'check_type'}" eq "UPTIME") {
|
||||
my $check = _get_oid_value($snmp_session,$snmpUpTime);
|
||||
$msg = "$opt{'check_type'}: $check";
|
||||
$check =~ m/^\s*(\d+)\s+days,\s+(\d+):(\d+):(\d+).*$/;
|
||||
$perf = "uptime=" . ($1*86400 + $2*3600 + $3*60 + $4) . "s";
|
||||
### CACHEAGE ###
|
||||
### CACHEAGE ###
|
||||
} elsif("$opt{'check_type'}" eq "CACHEAGE") {
|
||||
my $check = _get_oid_value($snmp_session,$snmpCacheAge);
|
||||
($msg,$stat) = _clac_minutes_err_stat($check,$opt{'check_type'},$opt{'warn'},$opt{'crit'});
|
||||
$perf = "cache_age=$check";
|
||||
### GLOBALSTATUS ###
|
||||
### GLOBALSTATUS ###
|
||||
} elsif("$opt{'check_type'}" eq "GLOBALSTATUS") {
|
||||
my $check = _get_oid_value($snmp_session,$snmpGlobalStatus);
|
||||
my $global_stat_txt = _get_oid_value($snmp_session,$snmpGlobalStatus_text);
|
||||
@ -1014,7 +1042,7 @@ $perf = "outdated_snapshots=$badcount";
|
||||
$msg = "CRIT: $opt{'check_type'} $GlobalStatusIndex{$check} $check $global_stat_txt";
|
||||
}
|
||||
$perf = "globalstatus=$check";
|
||||
### AUTOSUPPORTSTATUS ###
|
||||
### AUTOSUPPORTSTATUS ###
|
||||
} elsif("$opt{'check_type'}" eq "AUTOSUPPORTSTATUS") {
|
||||
my $check = _get_oid_value($snmp_session,$snmpAutoSupportStatus);
|
||||
my $autosupport_stat_txt = _get_oid_value($snmp_session,$snmpAutoSupportStatus_text);
|
||||
@ -1026,17 +1054,17 @@ $perf = "outdated_snapshots=$badcount";
|
||||
$msg = "CRIT: $opt{'check_type'} $AutoSupportStatusIndex{$check} $check $autosupport_stat_txt";
|
||||
}
|
||||
$perf = "autosupportstatus=$check";
|
||||
### NDMPSESSIONS ###
|
||||
### NDMPSESSIONS ###
|
||||
} elsif("$opt{'check_type'}" eq "NDMPSESSIONS") {
|
||||
my $check = _get_oid_value($snmp_session,$snmpNdmpSessions);
|
||||
($msg,$stat) = _clac_absolute_err_stat($check,$opt{'check_type'},$opt{'warn'},$opt{'crit'});
|
||||
$perf = "ndmpsess=$check";
|
||||
### CIFSSESSIONS ###
|
||||
### CIFSSESSIONS ###
|
||||
} elsif("$opt{'check_type'}" eq "CIFSSESSIONS") {
|
||||
my $check = _get_oid_value($snmp_session,$snmpCifsSessions);
|
||||
($msg,$stat) = _clac_absolute_err_stat($check,$opt{'check_type'},$opt{'warn'},$opt{'crit'});
|
||||
$perf = "cifssess=$check";
|
||||
### SHELF ###
|
||||
### SHELF ###
|
||||
} elsif ( ("$opt{'check_type'}" eq "SHELF") or ("$opt{'check_type'}" eq "SHELFINFO") ) {
|
||||
my @errs;
|
||||
my $r_shelf = $snmp_session->get_table($snmpEnclTableIndex);
|
||||
@ -1124,7 +1152,7 @@ $perf = "outdated_snapshots=$badcount";
|
||||
else
|
||||
{ $perf = "shelf=1"; }
|
||||
}
|
||||
### FSSTATUS ###
|
||||
### FSSTATUS ###
|
||||
} elsif("$opt{'check_type'}" eq "FSSTATUS") {
|
||||
my $check = _get_oid_value($snmp_session,$snmpfsOverallStatus);
|
||||
my $global_stat_txt = _get_oid_value($snmp_session,$snmpfsOverallStatus_text);
|
||||
@ -1140,7 +1168,7 @@ $perf = "outdated_snapshots=$badcount";
|
||||
}
|
||||
$perf = "fsstatus=$check";
|
||||
|
||||
### Syntax Error ###
|
||||
### Syntax Error ###
|
||||
} else {
|
||||
FSyntaxError("$opt{'check_type'} invalid parameter !");
|
||||
}
|
||||
@ -1149,3 +1177,4 @@ $msg =~ s/\n//g;
|
||||
$perf ? print "$msg | $perf\n" : print "$msg\n";
|
||||
|
||||
exit($stat);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user