icinga-plugins/checks/check_hddtemp.sh

273 lines
5.5 KiB
Bash
Executable File

#!/bin/bash
################################################################################
#
# This plugin checks the temperature via hddtemp
#
# I wrote this check-Script, because all other tools didn't do what I wanted.
# The goal was:
# - hddtemp is to be used (small, fast)
# - a better output (with manufacturer and model)
# - deliver performance data
# - options in linux style
# - it should do its job perfectly
# - switchable from celsius to fahrenheit
#
# The warning and critical values are from a 2007 Google study (Failure Trends in a Large Disk Drive Population)
#
#
# USAGE:
# ./check_hddtemp.sh -d <device> -w <warn> -c <crit>
# Nagios script to get the temperatue of HDD from hddtemp
#
# If you may have to let nagios run this script as root
# This is how the sudoers file looks in my debian system:
# nagios ALL=(root) NOPASSWD:/<path>/<to>/check_hddtemp.sh
################################################################################
HDDTEMP="$(type -P hddtemp)"
PARAMETER="$*"
WARNING="40"
CRITICAL="45"
UNIT="c"
function hilfetext(){
cat <<- EOF
$(basename "$0") [OPTION...]
Options:
-d | --device
--fahrenheit
--celsius (default)
-w | --warning (default: 40)
-c | --critical (default: 50)
-p | --perfdata
-h | --help
EOF
}
function ParameterEvaluation(){
options=$(getopt -o c:d:hpw: --long fahrenheit --long celsius --long critical: --long device: --long help --long perfdata --long warning: -- "$@")
#Option nicht verfügbar
[ "$?" -eq "0" ] || {
echo "Incorrect option provided"
exit 1
}
eval set -- "$options"
while true; do
case "$1" in
"-d"|"--device")
shift; # The arg is next in position args
DEVICES+=("$1")
;;
"--celsius")
UNIT="c"
;;
"--fahrenheit")
UNIT="f"
;;
"-w"|"--warning")
shift; # The arg is next in position args
WARNING="$1"
;;
"-c"|"--critical")
shift; # The arg is next in position args
CRITICAL="$1"
;;
"-p"|"--perfdata")
PERFDATA="1"
;;
"-h"|"--help")
hilfetext
exit 0
;;
"--")
shift
break
;;
esac
shift
done
}
#Prüfen, ob hddtemp installiert ist
if [ ! -x "$HDDTEMP" ]
then
echo "hddtemp nicht gefunden"
exit 1
fi
#Wenn keine Option oder Argument angegeben wurde, wird die Hilfe ausgegeben
if [ -z "$PARAMETER" ]
then
hilfetext
exit 0
fi
ParameterEvaluation "$0" "$@"
#Debug
#echo -e "WARNING:\t\"$WARNING\""
#echo -e "CRITICAL:\t\"$CRITICAL\""
#for device in ${DEVICES[@]}
#do
# echo -e "device: \"$device\""
#done
#echo -e "HELP\t\t\"$HELP\""
#echo -e "PERFDATA\t\t\"$PERFDATA\""
#echo -e "Alle Args:\t\"$@\""
#Check, if WARNING under CRITICAL
if [ "$WARNING" -ge "$CRITICAL" ]
then
echo "WARNING must be lower then CRITICAL (WARNING=$WARNING;CRITICAL=$CRITICAL)"
exit 1
fi
#Check, if device is a block-device
for device in ${DEVICES[@]}
do
if [ ! -b "$device" ]
then
echo "Device $device does not exist or is not a block-Device"
exitvar=1
fi
done
if [ "$exitvar" = "1" ]
then
exit 1
fi
#Vorbereitung für das Perfdata-Result
if [ -n "$PERFDATA" ]
then
PERFRESULT="|"
fi
for device in ${DEVICES[@]}
do
if [ "$UNIT" = "c" ]
then
RESULT=$($HDDTEMP --unit=C $device)
elif [ "$UNIT" = "f" ]
then
RESULT=$($HDDTEMP --unit=F $device)
fi
OUTDEV=$(echo "$RESULT" | awk -F: '{print $1}')
OUTMODEL=$(echo "$RESULT" | awk -F: '{print $2}' | tr -cd '[:print:]' | tr -s ' ' ' ' | sed -e 's/^ //')
OUTTEMP=$(echo "$RESULT" | awk -F: '{print $3}')
OUTTEMPCLEAN=$(echo "$OUTTEMP" | tr -dc '0-9')
#Debug
#echo -e "OUTDEV:\t$\"$OUTDEV\""
#echo -e "OUTMODEL:\t\"$OUTMODEL\""
#echo -e "OUTTEMP:\t\"$OUTTEMP\""
#echo -e "OUTTEMPCLEAN:\t\"$OUTTEMPCLEAN\""
if [ -z "$OUTTEMPCLEAN" ]
then
#Sollte es keinen SMART-Status geben, wird eine Tempoeratur von 999 ausgegeben
#Debug
#It's not a digit
OUTTEMPCLEAN="999"
fi
#Erstellen des Perfdata-Results
if [ -n "$PERFDATA" ]
then
#perfdata: temperatur=<temperatur>;<warn>;<crit>
PERFRESULT+=" $(echo "$OUTDEV" | awk -F "/" '{print $NF}')=$OUTTEMPCLEAN;$WARNING;$CRITICAL"
fi
#initialisierung der Varible
exitausgabe="0"
#Auswertung der Temperaturen
if [ "$OUTTEMPCLEAN" -lt "$WARNING" ]
then
# OK - Status
ausgabe+="\n$OUTMODEL on $OUTDEV is $OUTTEMP "
if [ "$exitausgabe" -gt "0" ]
then
echo do nothing
else
exitausgabe="0"
fi
elif [ "$OUTTEMPCLEAN" -ge "$WARNING" ] && [ "$OUTTEMPCLEAN" -lt "$CRITICAL" ]
then
# WARNING - Status
ausgabe+="\n$OUTMODEL on $OUTDEV is $OUTTEMP "
if [ "$exitausgabe" -gt "1" ]
then
echo do nothing
else
exitausgabe="1"
fi
elif [ "$OUTTEMPCLEAN" = "999" ]
then
#CRITICAL - Status (kein S.M.A.R.T. verfügbar)
ausgabe+="\n$OUTMODEL on $OUTDEV - no S.M.A.R.T.-Status "
if [ "$exitausgabe" -gt "2" ]
then
echo do nothing
else
exitausgabe="2"
fi
elif [ "$OUTTEMPCLEAN" -ge "$CRITICAL" ]
then
#CRITICAL - Status
ausgabe+="\n$OUTMODEL on $OUTDEV is $OUTTEMP "
if [ "$exitausgabe" -gt "2" ]
then
echo do nothing
else
exitausgabe="2"
fi
else
#UNKNOWN - Status
ausgabe+="\n$OUTMODEL on $OUTDEV is $OUTTEMP "
if [ "$exitausgabe" -gt "3" ]
then
echo do nothing
else
exitausgabe="3"
fi
fi
done
#Debug
#echo -e "Ausgabe:\t$ausgabe"
#echo -e "PERFRESULT:\t$PERFRESULT"
#echo -e "exitausgbe:\t$exitausgabe"
case $exitausgabe in
"0")
echo -e "OK $ausgabe $PERFRESULT"
exit 0
;;
"1")
echo -e "WARNING $ausgabe $PERFRESULT"
exit 1
;;
"2")
echo -e "CRITICAL $ausgabe $PERFRESULT"
exit 2
;;
"3")
echo -e "UNKNOWN $ausgabe $PERFRESULT"
exit 3
;;
*)
echo -e "UNKNOWN - Error"
exit 3
esac