mirror of
https://github.com/opinkerfi/nagios-plugins.git
synced 2024-11-05 01:53:44 +01:00
check_ibm_bladecenter service checks added
This commit is contained in:
parent
a55e1cef84
commit
2d2b20b120
128
check_ibm_bladecenter/README
Normal file
128
check_ibm_bladecenter/README
Normal file
@ -0,0 +1,128 @@
|
||||
check_ibm_bladecenter.py
|
||||
|
||||
# About this script
|
||||
#
|
||||
# This script will check the status of a remote IBM Bladecenter via SNMP.
|
||||
# Among other things the following are monitored:
|
||||
# * General Health
|
||||
# * Powermodule status
|
||||
# * Temperature
|
||||
# * Blade health
|
||||
# * Switchmodule Health
|
||||
# * Management Module health
|
||||
# * Blowers
|
||||
# * Chassis Sensors
|
||||
|
||||
|
||||
|
||||
# Usage
|
||||
|
||||
#------------------------------------------------------------------
|
||||
-bash-3.2$ python /nagios/usr/lib/nagios/plugins/check_ibm_bladecenter.py --help
|
||||
usage: check_ibm_bladecenter.py [options]
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
-m MODE, --mode=MODE Which check mode is in use (powermodules,system-
|
||||
health,temperature,chassis-
|
||||
status,bladehealth,blowers,switchmodules)
|
||||
-H HOST, --host=HOST Hostname or IP address of the host to check
|
||||
-w WARNING_THRESHOLD, --warning=WARNING_THRESHOLD
|
||||
Warning threshold
|
||||
-c CRITICAL_THRESHOLD, --critical=CRITICAL_THRESHOLD
|
||||
Critical threshold
|
||||
-e EXCLUDE, --exclude=EXCLUDE
|
||||
Exclude specific object
|
||||
-v SNMP_VERSION, --snmp_version=SNMP_VERSION
|
||||
SNMP Version to use (1, 2c or 3)
|
||||
-u SNMP_USERNAME, --snmp_username=SNMP_USERNAME
|
||||
SNMP username (only with SNMP v3)
|
||||
-C SNMP_COMMUNITY, --snmp_community=SNMP_COMMUNITY
|
||||
SNMP Community (only with SNMP v1|v2c)
|
||||
-p SNMP_PASSWORD, --snmp_password=SNMP_PASSWORD
|
||||
SNMP password (only with SNMP v3)
|
||||
-l SNMP_SECLEVEL, --snmp_security_level=SNMP_SECLEVEL
|
||||
SNMP security level (only with SNMP v3)
|
||||
(noAuthNoPriv|authNoPriv|authPriv)
|
||||
-d, --debug Enable debugging (for troubleshooting
|
||||
#------------------------------------------------------------------
|
||||
|
||||
# Example Usage:
|
||||
|
||||
|
||||
# Chassis-status
|
||||
|
||||
-bash-3.2$ python /nagios/usr/lib/nagios/plugins/check_ibm_bladecenter.py --host rek-blade-p01 --snmp_community public --snmp_version 1 --mode chassis-status
|
||||
OK - Blades OK. PowerModules OK. Switchmodules OK. Blowers OK. Media Trays OK. Other Sensors: OK. |
|
||||
|
||||
Other Sensors:
|
||||
bistRs485Port1 status: 0 (ok)
|
||||
bistRs485Port2 status: 0 (ok)
|
||||
bistLocalI2CBus status: 0 (ok)
|
||||
bistPrimaryMainAppFlashImage status: 0 (ok)
|
||||
bistSecondaryMainAppFlashImage status: 0 (ok)
|
||||
bistBootRomFlashImage status: 0 (ok)
|
||||
bistEthernetPort1 status: 0 (ok)
|
||||
bistExternalI2CDevices status: 0 (ok)
|
||||
bistInternalEthernetSwitch status: 0 (ok)
|
||||
|
||||
|
||||
|
||||
# System Health
|
||||
-bash-3.2$ python /nagios/usr/lib/nagios/plugins/check_ibm_bladecenter.py --host rek-blade-p01 --snmp_community public --snmp_version 1 --mode system-health
|
||||
OK - Bladecenter health: OK. Good: No critical or warning events |
|
||||
|
||||
|
||||
# Ambient Temperature
|
||||
-bash-3.2$ python /nagios/usr/lib/nagios/plugins/check_ibm_bladecenter.py --host rek-blade-p01 --snmp_community public --snmp_version 1 --mode temperature --warning 20 --critical 30
|
||||
Warning - ambient temperature (21.00 Centigrade) is over warning thresholds (20). | 'ambient_temp'=21.0;20;30
|
||||
|
||||
|
||||
# Blowers
|
||||
-bash-3.2$ python /nagios/usr/lib/nagios/plugins/check_ibm_bladecenter.py --host rek-blade-p01 --snmp_community public --snmp_version 1 --mode blowers
|
||||
OK - Blower1 OK. Blower1 OK. | blower1=55% blower2=55%
|
||||
|
||||
Blower 1 state=1 speed=55% of maximum
|
||||
Blower 2 state=1 speed=55% of maximum
|
||||
|
||||
# Powermodules
|
||||
-bash-3.2$ python /nagios/usr/lib/nagios/plugins/check_ibm_bladecenter.py --host rek-blade-p01 --snmp_community public --snmp_version 1 --mode powermodules
|
||||
OK - 4 out of 4 powermodules are healthy | 'Number of powermodules'=4
|
||||
|
||||
Powersupply "1" status "1". Power module status OK.
|
||||
Powersupply "2" status "1". Power module status OK.
|
||||
Powersupply "3" status "1". Power module status OK.
|
||||
Powersupply "4" status "1". Power module status OK.
|
||||
|
||||
# Switchmodules
|
||||
-bash-3.2$ python /nagios/usr/lib/nagios/plugins/check_ibm_bladecenter.py --host rek-blade-p01 --snmp_community public --snmp_version 1 --mode switchmodules
|
||||
OK - All switchmodules healthy |
|
||||
|
||||
Module1 health good.
|
||||
post=POST results available: Module completed POST successfully.
|
||||
type=CSCO ip=10.101.13.212
|
||||
Module2 health good.
|
||||
post=POST results available: Module completed POST successfully.
|
||||
type=CSCO ip=10.101.13.213
|
||||
Module3 health good.
|
||||
post=POST results available: Module completed POST successfully.
|
||||
type=BRCD ip=10.101.13.237
|
||||
Module4 health good.
|
||||
post=POST results available: Module completed POST successfully.
|
||||
type=BRCD ip=10.101.13.238
|
||||
|
||||
# Blades
|
||||
-bash-3.2$ python /nagios/usr/lib/nagios/plugins/check_ibm_bladecenter.py --host rek-blade-p01 --snmp_community public --snmp_version 1 --mode bladehealth
|
||||
OK - 8 out of 8 blades in Good health. |
|
||||
|
||||
blade1 (REK-SQLDB-P01): Good No critical or warning events
|
||||
blade2 (REK-FOREFR-P01): Good No critical or warning events
|
||||
blade3 (REK-CL-P06N1): Good No critical or warning events
|
||||
blade6 (REK-CL-P01N1): Good No critical or warning events
|
||||
blade8 (rek-sawm-p01): Good No critical or warning events
|
||||
blade10 (REK-FIX-P01): Good No critical or warning events
|
||||
blade11 (REK-SAPBW-D1): Good No critical or warning events
|
||||
blade14 (rek-oradb-t02): Good No critical or warning events
|
||||
|
||||
|
||||
|
527
check_ibm_bladecenter/check_ibm_bladecenter.py
Executable file
527
check_ibm_bladecenter/check_ibm_bladecenter.py
Executable file
@ -0,0 +1,527 @@
|
||||
#!/usr/bin/python
|
||||
#
|
||||
# Copyright 2010, Pall Sigurdsson <palli@opensource.is>
|
||||
#
|
||||
# This script is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This script is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# About this script
|
||||
#
|
||||
# This script will check the status of a remote IBM Bladecenter via SNMP.
|
||||
# Among other things the following are monitored:
|
||||
# * General Health
|
||||
# * Powermodule status
|
||||
# * Temperature
|
||||
# * Blade health
|
||||
# * Switchmodule Health
|
||||
# * Management Module health
|
||||
# * Blowers
|
||||
# * Chassis Sensors
|
||||
|
||||
|
||||
# No real need to change anything below here
|
||||
version="1.0"
|
||||
ok=0
|
||||
warning=1
|
||||
critical=2
|
||||
unknown=3
|
||||
not_present = -1
|
||||
exit_status = -1
|
||||
|
||||
|
||||
|
||||
state = {}
|
||||
state[not_present] = "Not Present"
|
||||
state[ok] = "OK"
|
||||
state[warning] = "Warning"
|
||||
state[critical] = "Critical"
|
||||
state[unknown] = "Unknown"
|
||||
|
||||
|
||||
longserviceoutput="\n"
|
||||
perfdata=""
|
||||
summary=""
|
||||
sudo=False
|
||||
|
||||
|
||||
from sys import exit
|
||||
from sys import argv
|
||||
from os import getenv,putenv,environ
|
||||
import subprocess
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Parse some Arguments
|
||||
from optparse import OptionParser
|
||||
parser = OptionParser()
|
||||
parser.add_option("-m","--mode", dest="mode",
|
||||
help="Which check mode is in use (powermodules,system-health,temperature,chassis-status,bladehealth,blowers,switchmodules)")
|
||||
parser.add_option("-H","--host", dest="host",
|
||||
help="Hostname or IP address of the host to check")
|
||||
parser.add_option("-w","--warning", dest="warning_threshold",
|
||||
help="Warning threshold", type="int", default=None)
|
||||
parser.add_option("-c","--critical", type="int", dest="critical_threshold",
|
||||
help="Critical threshold", default=None)
|
||||
parser.add_option("-e","--exclude", dest="exclude",
|
||||
help="Exclude specific object", default=None)
|
||||
parser.add_option("-v","--snmp_version", dest="snmp_version",
|
||||
help="SNMP Version to use (1, 2c or 3)", default="1")
|
||||
parser.add_option("-u","--snmp_username", dest="snmp_username",
|
||||
help="SNMP username (only with SNMP v3)", default=None)
|
||||
parser.add_option("-C","--snmp_community", dest="snmp_community",
|
||||
help="SNMP Community (only with SNMP v1|v2c)", default=None)
|
||||
parser.add_option("-p","--snmp_password", dest="snmp_password",
|
||||
help="SNMP password (only with SNMP v3)", default=None)
|
||||
parser.add_option("-l","--snmp_security_level", dest="snmp_seclevel",
|
||||
help="SNMP security level (only with SNMP v3) (noAuthNoPriv|authNoPriv|authPriv)", default=None)
|
||||
parser.add_option("-d","--debug", dest="debug",
|
||||
help="Enable debugging (for troubleshooting", action="store_true", default=False)
|
||||
|
||||
(opts,args) = parser.parse_args()
|
||||
|
||||
|
||||
if opts.host == None:
|
||||
parser.error("Hostname (-H) is required.")
|
||||
if opts.mode == None:
|
||||
parser.error("Mode (--mode) is required.")
|
||||
|
||||
snmp_options = ""
|
||||
def set_snmp_options():
|
||||
global snmp_options
|
||||
if opts.snmp_version is not None:
|
||||
snmp_options = snmp_options + " -v%s" % opts.snmp_version
|
||||
if opts.snmp_version == "3":
|
||||
if opts.snmp_username is None:
|
||||
parser.error("--snmp_username required with --snmp_version=3")
|
||||
if opts.snmp_seclevel is None:
|
||||
parser.error("--snmp_security_level required with --snmp_version=3")
|
||||
if opts.snmp_password is None:
|
||||
parser.error("--snmp_password required with --snmp_version=3")
|
||||
snmp_options = snmp_options + " -u %s -l %s -A %s " % (opts.snmp_username, opts.snmp_seclevel,opts.snmp_password)
|
||||
else:
|
||||
if opts.snmp_community is None:
|
||||
parser.error("--snmp_community is required with --snmp_version=1|2c")
|
||||
snmp_options = snmp_options + " -c %s " % opts.snmp_community
|
||||
|
||||
def error(errortext):
|
||||
print "* Error: %s" % errortext
|
||||
exit(unknown)
|
||||
|
||||
def debug( debugtext ):
|
||||
if opts.debug:
|
||||
print debugtext
|
||||
|
||||
def nagios_status( newStatus ):
|
||||
global exit_status
|
||||
exit_status = max(exit_status, newStatus)
|
||||
return exit_status
|
||||
|
||||
'''runCommand: Runs command from the shell prompt. Exit Nagios style if unsuccessful'''
|
||||
def runCommand(command):
|
||||
debug( "Executing: %s" % command )
|
||||
proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE,stderr=subprocess.PIPE,)
|
||||
stdout, stderr = proc.communicate('through stdin to stdout')
|
||||
if proc.returncode > 0:
|
||||
print "Error %s: %s\n command was: '%s'" % (proc.returncode,stderr.strip(),command)
|
||||
debug("results: %s" % (stdout.strip() ) )
|
||||
if proc.returncode == 127: # File not found, lets print path
|
||||
path=getenv("PATH")
|
||||
print "Check if your path is correct %s" % (path)
|
||||
if stderr.find('Password:') == 0 and command.find('sudo') == 0:
|
||||
print "Check if user is in the sudoers file"
|
||||
if stderr.find('sorry, you must have a tty to run sudo') == 0 and command.find('sudo') == 0:
|
||||
print "Please remove 'requiretty' from /etc/sudoers"
|
||||
exit(unknown)
|
||||
else:
|
||||
return stdout
|
||||
|
||||
def end():
|
||||
global summary
|
||||
global longserviceoutput
|
||||
global perfdata
|
||||
global exit_status
|
||||
print "%s - %s | %s" % (state[exit_status], summary,perfdata)
|
||||
print longserviceoutput
|
||||
if exit_status < 0: exit_status = unknown
|
||||
exit(exit_status)
|
||||
|
||||
def add_perfdata(text):
|
||||
global perfdata
|
||||
text = text.strip()
|
||||
perfdata = perfdata + " %s " % (text)
|
||||
|
||||
def add_long(text):
|
||||
global longserviceoutput
|
||||
longserviceoutput = longserviceoutput + text + '\n'
|
||||
|
||||
def add_summary(text):
|
||||
global summary
|
||||
summary = summary + text
|
||||
|
||||
def set_path(path):
|
||||
current_path = getenv('PATH')
|
||||
if current_path.find('C:\\') > -1: # We are on this platform
|
||||
if path == '':
|
||||
pass
|
||||
else: path = ';' + path
|
||||
else: # Unix/Linux, etc
|
||||
if path == '': path = ":/usr/sbin"
|
||||
else: path = ':' + path
|
||||
current_path = "%s%s" % (current_path,path)
|
||||
environ['PATH'] = current_path
|
||||
|
||||
|
||||
|
||||
def snmpget(oid):
|
||||
snmpgetcommand = "snmpget %s %s %s" % (snmp_options,opts.host,oid)
|
||||
output = runCommand(snmpgetcommand)
|
||||
oid,result = output.strip().split(' = ', 1)
|
||||
resultType,resultValue = result.split(': ',1)
|
||||
if resultType == 'STRING': # strip quotes of the string
|
||||
resultValue = resultValue[1:-1]
|
||||
return resultValue
|
||||
|
||||
# snmpwalk -v3 -u v3get mgmt-rek-proxy-p02 -A proxy2011 -l authNoPriv 1.3.6.1.4.1.15497
|
||||
def snmpwalk(base_oid):
|
||||
snmpwalkcommand = "snmpwalk %s %s %s" % (snmp_options, opts.host, base_oid)
|
||||
output = runCommand(snmpwalkcommand + " " + base_oid)
|
||||
return output
|
||||
|
||||
def getTable(base_oid):
|
||||
myTable = {}
|
||||
output = snmpwalk(base_oid)
|
||||
for line in output.split('\n'):
|
||||
tmp = line.strip().split(' = ', 1)
|
||||
if len(tmp) == 2:
|
||||
oid,result = tmp
|
||||
else:
|
||||
continue
|
||||
tmp = result.split(': ',1)
|
||||
if len(tmp) > 1:
|
||||
resultType,resultValue = tmp[0],tmp[1]
|
||||
else:
|
||||
resultType = None
|
||||
resultValue = tmp[0]
|
||||
if resultType == 'STRING': # strip quotes of the string
|
||||
resultValue = resultValue[1:-1]
|
||||
index = oid.strip().split('.')
|
||||
column = int(index.pop())
|
||||
row = int(index.pop())
|
||||
if not myTable.has_key(column): myTable[column] = {}
|
||||
myTable[column][row] = resultValue
|
||||
return myTable
|
||||
|
||||
def check_powermodules():
|
||||
powermodules = getTable('1.3.6.1.4.1.2.3.51.2.2.4')
|
||||
index = 1
|
||||
exists = 2
|
||||
status = 3
|
||||
details = 4
|
||||
num_ok = 0
|
||||
for i in powermodules.values():
|
||||
myIndex = i[index]
|
||||
myStatus = i[status]
|
||||
myDetails = i[details]
|
||||
myExists = i[exists]
|
||||
if myIndex == opts.exclude: continue
|
||||
if myStatus != "1":
|
||||
nagios_status(warning)
|
||||
add_summary( 'Powermodule "%s" status "%s". %s. ' % (myIndex,myStatus,myDetails) )
|
||||
else:
|
||||
num_ok = num_ok + 1
|
||||
add_long('Powersupply "%s" status "%s". %s. ' % (myIndex,myStatus,myDetails) )
|
||||
add_summary( "%s out of %s powermodules are healthy" % (num_ok, len(powermodules) ) )
|
||||
add_perfdata( "'Number of powermodules'=%s" % (len(powermodules) ) )
|
||||
|
||||
|
||||
nagios_status(ok)
|
||||
|
||||
def check_switchmodules():
|
||||
switchmodules = getTable("1.3.6.1.4.1.2.3.51.2.22.3.1.1")
|
||||
# The following oid is undocumented, but contains some useful extra info
|
||||
try:
|
||||
extrainfo = getTable("1.3.6.1.4.1.2.3.51.2.22.3.1.7").values()
|
||||
except:
|
||||
extrainfo = []
|
||||
for module in switchmodules.values():
|
||||
myIndex = module[1]
|
||||
healthstate = module[15]
|
||||
resultavailable = module[3]
|
||||
resultvalue = module[4]
|
||||
enabledisable = module[6]
|
||||
if resultavailable == "1":
|
||||
'this module is installed'
|
||||
if healthstate == "1":
|
||||
nagios_status(ok)
|
||||
add_long("Module%s health good.\n post=%s" % (myIndex,resultvalue))
|
||||
else:
|
||||
nagios_status(warning)
|
||||
add_long("Module%s health bad.\n post=%s" % (myIndex, resultvalue) )
|
||||
add_summary("Problem with Module %s. " % (myIndex))
|
||||
if len(extrainfo) > int(myIndex):
|
||||
myExtraInfo = extrainfo[int(myIndex)-1]
|
||||
module_type = myExtraInfo[22]
|
||||
module_ip = myExtraInfo[6]
|
||||
add_long( " type=%s ip=%s" % (module_type,module_ip) )
|
||||
if exit_status == ok:
|
||||
add_summary("All switchmodules healthy")
|
||||
|
||||
|
||||
def check_blowers():
|
||||
" Check blower status "
|
||||
blowers = getTable("1.3.6.1.4.1.2.3.51.2.2.3")
|
||||
# This mib only seems to support 2 blowers.
|
||||
blower1speed = snmpget("1.3.6.1.4.1.2.3.51.2.2.3.1.0")
|
||||
blower1state = snmpget("1.3.6.1.4.1.2.3.51.2.2.3.10.0")
|
||||
|
||||
blower2speed = snmpget("1.3.6.1.4.1.2.3.51.2.2.3.2.0")
|
||||
blower2state = snmpget("1.3.6.1.4.1.2.3.51.2.2.3.11.0")
|
||||
|
||||
add_long( "Blower 1 state=%s speed=%s" % (blower1state,blower1speed) )
|
||||
add_long( "Blower 2 state=%s speed=%s" % (blower2state,blower2speed) )
|
||||
add_perfdata("blower1=%s" %(blower1speed.split(None,1)[0] ))
|
||||
add_perfdata("blower2=%s" %(blower2speed.split(None,1)[0] ))
|
||||
# Check blower 1
|
||||
if blower1state == "1":
|
||||
nagios_status(ok)
|
||||
add_summary("Blower1 OK. " )
|
||||
else:
|
||||
add_summary("Blower1 NOT OK. ")
|
||||
nagios_status(warning)
|
||||
|
||||
# Check blower 2
|
||||
if blower2state == "1":
|
||||
nagios_status(ok)
|
||||
add_summary("Blower1 OK. " )
|
||||
else:
|
||||
add_summary("Blower2 NOT OK. ")
|
||||
nagios_status(warning)
|
||||
|
||||
if blower1state != "1" and blower2state != "1":
|
||||
nagios_status(critical)
|
||||
|
||||
|
||||
def check_chassis_status():
|
||||
chassis = getTable('1.3.6.1.4.1.2.3.51.2.2.5.2')
|
||||
oids = chassis.values()[0]
|
||||
chassis_oid = {
|
||||
1 :"bistSdram",
|
||||
10 :"bistBootRomFlashImage",
|
||||
11 :"bistEthernetPort1",
|
||||
113 :"bistSwitchModulesCommunicating",
|
||||
12 :"bistEthernetPort2",
|
||||
13 :"bistInternalPCIBus",
|
||||
14 :"bistExternalI2CDevices",
|
||||
15 :"bistUSBController",
|
||||
16 :"bistVideoCompressorBoard",
|
||||
17 :"bistPrimaryBus",
|
||||
18 :"bistInternalEthernetSwitch",
|
||||
2 :"bistRs485Port1",
|
||||
3 :"bistRs485Port2",
|
||||
33 :"bistBladesInstalled",
|
||||
4 :"bistNvram",
|
||||
49 :"bistBladesCommunicating",
|
||||
6 :"bistRtc",
|
||||
65 :"bistBlowersInstalled",
|
||||
7 :"bistLocalI2CBus",
|
||||
73 :"bistBlowersFunctional",
|
||||
74 :"bistMediaTrayInstalled",
|
||||
75 :"bistMediaTrayCommunicating",
|
||||
8 :"bistPrimaryMainAppFlashImage",
|
||||
81 :"bistPowerModulesInstalled",
|
||||
89 :"bistPowerModulesFunctional",
|
||||
9 :"bistSecondaryMainAppFlashImage",
|
||||
97 :"bistSwitchModulesInstalled",
|
||||
}
|
||||
|
||||
# Check if all blades are working
|
||||
bistBladesInstalled = 33
|
||||
bistBlowersInstalled = 65
|
||||
bistMediaTrayInstalled = 74
|
||||
bistPowerModulesInstalled = 81
|
||||
bistSwitchModulesInstalled = 97
|
||||
|
||||
bistSwitchModulesCommunicating = 113
|
||||
bistBladesCommunicating = 49
|
||||
bistMediaTrayCommunicating = 75
|
||||
bistBlowersFunctional = 73
|
||||
bistPowerModulesFunctional = 89
|
||||
|
||||
# Check Blade Communications
|
||||
if not oids.has_key(bistBladesInstalled) or not oids.has_key(bistBladesCommunicating):
|
||||
add_summary( "Blades N/A. ")
|
||||
elif oids[bistBladesInstalled] == oids[bistBladesCommunicating]:
|
||||
nagios_status(ok)
|
||||
add_summary( "Blades OK. " )
|
||||
else:
|
||||
nagios_status(warning)
|
||||
add_summary( "Blades NOT OK. " )
|
||||
# Check PowerModule Status
|
||||
if not oids.has_key(bistPowerModulesFunctional) or not oids.has_key(bistPowerModulesInstalled):
|
||||
add_summary( "Powermodules N/A. ")
|
||||
elif oids[bistPowerModulesFunctional] == oids[bistPowerModulesInstalled]:
|
||||
nagios_status(ok)
|
||||
add_summary( "PowerModules OK. " )
|
||||
else:
|
||||
nagios_status(warning)
|
||||
add_summary( "PowerModules NOT OK. " )
|
||||
|
||||
# Check SwitcModule Communications
|
||||
if not oids.has_key(bistSwitchModulesCommunicating) or not oids.has_key(bistSwitchModulesInstalled):
|
||||
add_summary( "SwitchModules N/A. ")
|
||||
if oids[bistSwitchModulesCommunicating] == oids[bistSwitchModulesInstalled]:
|
||||
nagios_status(ok)
|
||||
add_summary("Switchmodules OK. ")
|
||||
else:
|
||||
nagios_status(warning)
|
||||
add_summary( "Switchmodules NOT OK. ")
|
||||
# Check blower status
|
||||
if not oids.has_key(bistBlowersInstalled) or not oids.has_key(bistBlowersFunctional):
|
||||
add_summary( "Blowers N/A. ")
|
||||
elif oids[bistBlowersInstalled] == oids[bistBlowersFunctional]:
|
||||
nagios_status(ok)
|
||||
add_summary( "Blowers OK. " )
|
||||
else:
|
||||
nagios_status(warning)
|
||||
add_summary( "Blowers NOT OK. " )
|
||||
# Check Media Tray Status
|
||||
if not oids.has_key(bistMediaTrayCommunicating) or not oids.has_key(bistMediaTrayInstalled):
|
||||
nagios_status(ok)
|
||||
add_summary( "Media Trays N/A. ")
|
||||
elif oids[bistMediaTrayCommunicating] == oids[bistMediaTrayInstalled]:
|
||||
add_summary( "Media Trays OK. " )
|
||||
else:
|
||||
nagios_status(warning)
|
||||
add_summary( "Media Trays NOT OK. " )
|
||||
|
||||
|
||||
# status_oids, oids that where 0 == ok
|
||||
status_oids = ( 2,3,5,7,8,9,10,11,14,18,19,20,21,22,23,24,25,26,27,28,29,30, )
|
||||
|
||||
add_long("Other Sensors: ")
|
||||
sensor_status = ok
|
||||
for oid in status_oids:
|
||||
if not chassis_oid.has_key(oid): continue
|
||||
oidValue = oids[oid]
|
||||
oidName = chassis_oid[oid]
|
||||
if oidValue == "0":
|
||||
friendly_status = "%s (ok)" % oidValue
|
||||
else:
|
||||
friendly_status = "%s (not ok)" % oidValue
|
||||
nagios_status(warning)
|
||||
sensor_status = warning
|
||||
add_summary( "%s is %s" % oidName, friendly_status)
|
||||
add_long( " %s status: %s" % (oidName,friendly_status) )
|
||||
if sensor_status == ok:
|
||||
add_summary( "Other Sensors: OK. ")
|
||||
|
||||
|
||||
def check_bladehealth():
|
||||
blades = getTable('1.3.6.1.4.1.2.3.51.2.22.1.5.2.1')
|
||||
bladestate = getTable('1.3.6.1.4.1.2.3.51.2.22.1.5.1.1').values()
|
||||
|
||||
index,bladeid,severity,description = (1,2,3,4)
|
||||
good_blades = 0
|
||||
total_blades = 0
|
||||
for i,row in enumerate(blades.values()):
|
||||
myIndex = row[index]
|
||||
myBladeid = row[bladeid]
|
||||
mySeverity = row[severity]
|
||||
myDescription = row[description]
|
||||
myName = bladestate[i][6]
|
||||
if mySeverity == "(No severity)": continue
|
||||
add_long( "blade%s (%s): %s %s" % (myBladeid,myName,mySeverity, myDescription) )
|
||||
total_blades += 1
|
||||
if mySeverity == 'Good':
|
||||
nagios_status(ok)
|
||||
good_blades += 1
|
||||
else:
|
||||
nagios_status(warning)
|
||||
add_summary( "blade%s (%s): %s %s. " % (myBladeid,myName,mySeverity, myDescription) )
|
||||
if good_blades == total_blades:
|
||||
add_summary( "%s out of %s blades in Good health. " % (good_blades, total_blades))
|
||||
nagios_status(ok)
|
||||
else:
|
||||
nagios_status(warning)
|
||||
def check_systemhealth():
|
||||
systemhealthstat = snmpget('1.3.6.1.4.1.2.3.51.2.2.7.1.0')
|
||||
summary = getTable('1.3.6.1.4.1.2.3.51.2.2.7.2.1')
|
||||
index,severity,description,date = (1,2,3,4)
|
||||
# Check overall health
|
||||
if systemhealthstat == '255':
|
||||
nagios_status(ok)
|
||||
add_summary("Bladecenter health: OK. ")
|
||||
elif systemhealthstat == "2":
|
||||
nagios_status(warning)
|
||||
add_summary("Non-Critical Error. ")
|
||||
elif systemhealthstat == "4":
|
||||
nagios_status(critical)
|
||||
add_summary("System-Level Error. ")
|
||||
elif systemhealth == "0":
|
||||
nagios_status(critical)
|
||||
add_summary("Critical. ")
|
||||
else:
|
||||
nagios_status(unknown)
|
||||
add_summary("Bladecenter health unkown (oid 1.3.6.1.4.1.2.3.51.2.2.7.1.0 returns %s). " % systemhealthstat)
|
||||
for row in summary.values():
|
||||
if row[severity] == 'Good':
|
||||
nagios_status(ok)
|
||||
elif row[severity] == 'Warning':
|
||||
nagios_status(warning)
|
||||
else:
|
||||
nagios_status(critical)
|
||||
add_summary( "%s: %s" % (row[severity], row[description]) )
|
||||
|
||||
def check_temperature():
|
||||
# set some sensible defaults
|
||||
if opts.warning_threshold is None: opts.warning_threshold = 28
|
||||
if opts.critical_threshold is None: opts.critical_threshold = 35
|
||||
str_temp = snmpget('1.3.6.1.4.1.2.3.51.2.2.1.5.1.0')
|
||||
float_temp,measurement = str_temp.split(None, 1)
|
||||
float_temp = float( float_temp )
|
||||
if opts.critical_threshold is not None and float_temp > opts.critical_threshold:
|
||||
nagios_status(critical)
|
||||
add_summary( "ambient temperature (%s) is over critical thresholds (%s). " % (str_temp, opts.critical_threshold) )
|
||||
elif opts.warning_threshold is not None and float_temp > opts.warning_threshold:
|
||||
nagios_status(warning)
|
||||
add_summary( "ambient temperature (%s) is over warning thresholds (%s). " % (str_temp, opts.warning_threshold) )
|
||||
else:
|
||||
add_summary( "Ambient temperature = %s. " % (str_temp) )
|
||||
add_perfdata( "'ambient_temp'=%s;%s;%s " % (float_temp,opts.warning_threshold,opts.critical_threshold) )
|
||||
#add_long( "Ambient Temperature = %s" % (str_temp) )
|
||||
nagios_status(ok)
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
set_snmp_options()
|
||||
if opts.mode == 'powermodules':
|
||||
check_powermodules()
|
||||
elif opts.mode == 'system-health':
|
||||
check_systemhealth()
|
||||
elif opts.mode == 'temperature':
|
||||
check_temperature()
|
||||
elif opts.mode == 'chassis-status':
|
||||
check_chassis_status()
|
||||
elif opts.mode == 'bladehealth':
|
||||
check_bladehealth()
|
||||
elif opts.mode == 'blowers':
|
||||
check_blowers()
|
||||
elif opts.mode == 'switchmodules':
|
||||
check_switchmodules()
|
||||
else:
|
||||
parser.error("%s is not a valid option for --mode" % opts.mode)
|
||||
end()
|
Loading…
Reference in New Issue
Block a user