update smart check, merge new code from original fork

This commit is contained in:
jvivona 2023-05-02 07:04:22 -04:00
parent 33a81ee7ee
commit c3fe84d9c9

View File

@ -55,15 +55,18 @@
# Dec 16, 2021: Lorenz Kaestle - Bugfix when interface parameter was missing in combination with -g (6.12.2) # Dec 16, 2021: Lorenz Kaestle - Bugfix when interface parameter was missing in combination with -g (6.12.2)
# Apr 27, 2022: Claudio Kuenzler - Allow skip temperature check (--skip-temp-check) (6.13.0) # Apr 27, 2022: Claudio Kuenzler - Allow skip temperature check (--skip-temp-check) (6.13.0)
# Apr 27, 2022: Peter Newman - Better handling of missing or non-executable smartctl command (6.13.0) # Apr 27, 2022: Peter Newman - Better handling of missing or non-executable smartctl command (6.13.0)
# Aug 2, 2022: Joe Vivona - Modified to run on pfSense boxes, which are FreeBSD Based but install things like PERL in difference locations # Apr 29, 2023: Nick Bertrand - Show drive(s) causing UNKNOWN status using -g/--global check (6.14.0)
# Aug 2, 2022: Joe Vivona - Removed inline sudo for pfSense - since overall script is run in sudo mode and therefor impersonating # Apr 29, 2023: Claudio Kuenzler - Add possibility to hide serial number (--hide-sn) (6.14.0)
# Apr 29, 2023: Claudio Kuenzler - Add default check on Load Cycle Count (ignore using --skip-load-cycles) (6.14.0)
# Aug 2, 2022: Joe Vivona - Modified to run on pfSense boxes, which ar FreeBSD Based but install things like PERL in difference locations. see line 1 above - pfSense path is /usr/local/bin/perl
use strict; use strict;
use Getopt::Long; use Getopt::Long;
use File::Basename qw(basename); use File::Basename qw(basename);
my $basename = basename($0); my $basename = basename($0);
my $revision = '6.13.0'; my $revision = '6.14.0';
# Standard Nagios return codes # Standard Nagios return codes
my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4); my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4);
@ -73,7 +76,7 @@ my @sys_path = qw(/usr/bin /bin /usr/sbin /sbin /usr/local/bin /usr/local/sbin);
$ENV{'BASH_ENV'}=''; $ENV{'BASH_ENV'}='';
$ENV{'ENV'}=''; $ENV{'ENV'}='';
use vars qw($opt_b $opt_d $opt_g $opt_debug $opt_h $opt_i $opt_e $opt_E $opt_r $opt_s $opt_v $opt_w $opt_q $opt_l $opt_skip_sa $opt_skip_temp); use vars qw($opt_b $opt_d $opt_g $opt_debug $opt_h $opt_i $opt_e $opt_E $opt_r $opt_s $opt_v $opt_w $opt_q $opt_l $opt_skip_sa $opt_skip_temp $opt_skip_load_cycles $opt_hide_sn);
Getopt::Long::Configure('bundling'); Getopt::Long::Configure('bundling');
GetOptions( GetOptions(
"debug" => \$opt_debug, "debug" => \$opt_debug,
@ -92,6 +95,8 @@ GetOptions(
"l" => \$opt_l, "ssd-lifetime" => \$opt_l, "l" => \$opt_l, "ssd-lifetime" => \$opt_l,
"skip-self-assessment" => \$opt_skip_sa, "skip-self-assessment" => \$opt_skip_sa,
"skip-temp-check" => \$opt_skip_temp, "skip-temp-check" => \$opt_skip_temp,
"skip-load-cycles" => \$opt_skip_load_cycles,
"hide-sn" => \$opt_hide_sn,
); );
if ($opt_v) { if ($opt_v) {
@ -244,6 +249,7 @@ my @drives_status_okay;
my @drives_status_not_okay; my @drives_status_not_okay;
my @drives_status_warning; my @drives_status_warning;
my @drives_status_critical; my @drives_status_critical;
my @drives_status_unknown;
my $drive_details; my $drive_details;
foreach $device ( split("\\|",$device) ){ foreach $device ( split("\\|",$device) ){
@ -357,8 +363,13 @@ foreach $device ( split("\\|",$device) ){
} }
if($line =~ /$line_serial_ata(.+)/){ if($line =~ /$line_serial_ata(.+)/){
warn "(debug) parsing line:\n$line\n\n" if $opt_debug; warn "(debug) parsing line:\n$line\n\n" if $opt_debug;
if($opt_hide_sn) {
$serial = "<HIDDEN>";
warn "(debug) Hiding serial number\n\n" if $opt_debug;
} else {
$serial = $1; $serial = $1;
$serial =~ s/^\s+|\s+$//g; $serial =~ s/^\s+|\s+$//g;
}
warn "(debug) found serial number $serial\n\n" if $opt_debug; warn "(debug) found serial number $serial\n\n" if $opt_debug;
} }
if($line =~ /$line_serial_scsi(.+)/){ if($line =~ /$line_serial_scsi(.+)/){
@ -500,6 +511,19 @@ foreach $device ( split("\\|",$device) ){
warn "(debug) SMART Attribute $attribute_name was set to be ignored\n\n" if $opt_debug; warn "(debug) SMART Attribute $attribute_name was set to be ignored\n\n" if $opt_debug;
next; next;
} else { } else {
# alert for high load cycles, generally up to 600K cycles are considered safe on HDDs
unless($opt_skip_load_cycles) {
if ($attribute_number == 193) {
if ($raw_value > 600000) {
warn "(debug) $attribute_name is above value considered safe (600K)\n\n" if $opt_debug;
push(@error_messages, "$attribute_name is above 600K load cycles ($raw_value) causing possible performance and durability impact");
escalate_status('CRITICAL');
} elsif ($raw_value < 600000 && $raw_value > 550000) {
warn "(debug) $attribute_name is nearing 600K load cycles\n\n" if $opt_debug;
push(@warning_messages, "$attribute_name is soon reaching 600K load cycles ($raw_value) causing possible performance and durability impact soon");
}
}
}
# manual checks on raw values for certain attributes deemed significant # manual checks on raw values for certain attributes deemed significant
if (grep {$_ eq $attribute_name} @raw_check_list) { if (grep {$_ eq $attribute_name} @raw_check_list) {
if ($raw_value > 0) { if ($raw_value > 0) {
@ -735,6 +759,8 @@ foreach $device ( split("\\|",$device) ){
push @drives_status_warning, $status_string; push @drives_status_warning, $status_string;
} elsif ($exit_status_local eq 'CRITICAL') { } elsif ($exit_status_local eq 'CRITICAL') {
push @drives_status_critical, $status_string; push @drives_status_critical, $status_string;
} elsif ($exit_status_local eq 'UNKNOWN') {
push @drives_status_unknown, $status_string;
} }
} }
else { else {
@ -773,6 +799,10 @@ if (scalar(@drives_status_warning) > 0) {
push @drives_status_not_okay, @drives_status_warning; push @drives_status_not_okay, @drives_status_warning;
} }
if (scalar(@drives_status_unknown) > 0) {
push @drives_status_not_okay, @drives_status_unknown;
}
if (@drives_status_not_okay) { if (@drives_status_not_okay) {
push @msg_list, grep { $_ } @drives_status_not_okay; push @msg_list, grep { $_ } @drives_status_not_okay;
} }
@ -830,6 +860,8 @@ sub print_help {
print " -l/--ssd-lifetime: Check attribute 'Percent_Lifetime_Remain' available on some SSD drives\n"; print " -l/--ssd-lifetime: Check attribute 'Percent_Lifetime_Remain' available on some SSD drives\n";
print " --skip-self-assessment: Skip SMART self-assessment health status check\n"; print " --skip-self-assessment: Skip SMART self-assessment health status check\n";
print " --skip-temp-check: Skip temperature comparison current vs. drive max temperature\n"; print " --skip-temp-check: Skip temperature comparison current vs. drive max temperature\n";
print " --skip-load-cycles: Do not alert on high load/unload cycle count (600K considered safe on hard drives)\n";
print " --hide-sn: Do not show drive serial number in output\n";
print " -h/--help: this help\n"; print " -h/--help: this help\n";
print " -q/--quiet: When faults detected, only show faulted drive(s) (only affects output when used with -g parameter)\n"; print " -q/--quiet: When faults detected, only show faulted drive(s) (only affects output when used with -g parameter)\n";
print " --debug: show debugging information\n"; print " --debug: show debugging information\n";