mirror of
https://github.com/opinkerfi/nagios-plugins.git
synced 2024-11-24 11:23:47 +01:00
Various improvements. See comments in script
This commit is contained in:
parent
7dc3b57582
commit
24a1b980a1
150
check_eva/check_eva.py
Normal file → Executable file
150
check_eva/check_eva.py
Normal file → Executable file
@ -21,6 +21,35 @@
|
|||||||
# You will need the sssu binary in path (/usr/bin/sssu is a good place)
|
# You will need the sssu binary in path (/usr/bin/sssu is a good place)
|
||||||
# If you do not have sssu, check your commandview CD, it should have both
|
# If you do not have sssu, check your commandview CD, it should have both
|
||||||
# binaries for Windows and Linux
|
# binaries for Windows and Linux
|
||||||
|
#
|
||||||
|
# UPDATE HISTORY:
|
||||||
|
# 22 Jul 2015: Alastair Munro:
|
||||||
|
# Disk failures need a Enclosure and Bay location so we can get failed disks easily replaced. Thus
|
||||||
|
# changed objectname to this for disk checks.
|
||||||
|
# Disk checks: include the comments field for the eva, so we can easily log a ticket with HP (we
|
||||||
|
# System check: included comments
|
||||||
|
# include eva serial number and DC cabinet location in here).
|
||||||
|
# If check_system and system specified; drop system name from perf data fields and add Gb.
|
||||||
|
# Turn off perfdata for disk shelves; we don't need to graph how many fc ports it has, etc; these rarely change!
|
||||||
|
#
|
||||||
|
# 17 Mar 2016: Alastair Munro:
|
||||||
|
# No --system in the help; I wanted to add this and only discovered it by looking at the code!
|
||||||
|
# Bring back reporting number of disks checked.
|
||||||
|
# Cleaned up error reporting on failed disks.
|
||||||
|
# Added --option and then noemptybays. All disk shelves should be fully populated with disks and all
|
||||||
|
# shelves have the same number of disks. If a disk fails, it may get evicted and this will catch this.
|
||||||
|
# This is part of the check_disks mode. Report warning if bays not full.
|
||||||
|
#
|
||||||
|
# 04 Apr 2016: Alastair Munro:
|
||||||
|
# notinstalled is not a valid state for fans; especially for disk shelves. Thus alert on this.
|
||||||
|
# check operationalstatedetail is not _ok. Sometimes objects report good but the detail is not _ok (eg _attention).
|
||||||
|
# for disk enclosure, advise enclosure name and state before printing number of sensors, fans, etc.
|
||||||
|
#
|
||||||
|
# 10 May 2016: Alastair Munro:
|
||||||
|
# check_controllers: powersources searching for key status rather than state. Now identifies failed/missing power supplies.
|
||||||
|
#
|
||||||
|
# 20 May 2016: Alastair Munro:
|
||||||
|
# noemptybays not working as expected; tweaked to count disks rather than highest disk.
|
||||||
|
|
||||||
|
|
||||||
# Some Defaults
|
# Some Defaults
|
||||||
@ -42,6 +71,7 @@ do_phone_home = False
|
|||||||
escape_newlines = False
|
escape_newlines = False
|
||||||
check_system = None # By default check all systems
|
check_system = None # By default check all systems
|
||||||
proxyserver = None
|
proxyserver = None
|
||||||
|
options = None
|
||||||
timeout = 0 # 0 means no timeout
|
timeout = 0 # 0 means no timeout
|
||||||
|
|
||||||
|
|
||||||
@ -94,12 +124,16 @@ def print_help():
|
|||||||
print " [--password <password]"
|
print " [--password <password]"
|
||||||
print " [--path </path/to/sssu>]"
|
print " [--path </path/to/sssu>]"
|
||||||
print " [--mode <mode>] "
|
print " [--mode <mode>] "
|
||||||
|
print " [--system <eva>] "
|
||||||
print " [--test]"
|
print " [--test]"
|
||||||
print " [--timeout <timeout>]"
|
print " [--timeout <timeout>]"
|
||||||
|
print " [--options <noemptybays>]"
|
||||||
print " [--debug]"
|
print " [--debug]"
|
||||||
print " [--help]"
|
print " [--help]"
|
||||||
print ""
|
print ""
|
||||||
print " Valid modes are: %s" % ', '.join(valid_modes)
|
print " Valid modes are: %s" % ', '.join(valid_modes)
|
||||||
|
print " --options are dependant on --mode:"
|
||||||
|
print " noemptybays (check_disks): don't ignore empty bays as a disk may have been removed. Assumes all bays are populated."
|
||||||
print ""
|
print ""
|
||||||
print "Example: %s --host commandview.example.net --username eva --password myPassword --mode check_systems" % (argv[0])
|
print "Example: %s --host commandview.example.net --username eva --password myPassword --mode check_systems" % (argv[0])
|
||||||
|
|
||||||
@ -163,6 +197,8 @@ while len(arguments) > 0:
|
|||||||
proxyserver = arguments.pop(0)
|
proxyserver = arguments.pop(0)
|
||||||
elif arg == '--escape-newlines':
|
elif arg == '--escape-newlines':
|
||||||
escape_newlines = True
|
escape_newlines = True
|
||||||
|
elif arg == '--options':
|
||||||
|
options = arguments.pop(0)
|
||||||
elif arg == '-h' or arg == '--help':
|
elif arg == '-h' or arg == '--help':
|
||||||
print_help()
|
print_help()
|
||||||
exit(ok)
|
exit(ok)
|
||||||
@ -473,6 +509,43 @@ def check_operationalstate(my_object, print_failed_objects=False, namefield='obj
|
|||||||
(my_object[namefield], my_object['operationalstate'], my_object[detailfield]))
|
(my_object[namefield], my_object['operationalstate'], my_object[detailfield]))
|
||||||
return ok
|
return ok
|
||||||
|
|
||||||
|
# Count no. disks per shelf:
|
||||||
|
# Count no disks per shelf; highest value is number to expect per shelf.
|
||||||
|
# Report any shelves not equal to highest value.
|
||||||
|
# An oddity is that there may be a gap in the numbering!
|
||||||
|
#
|
||||||
|
def check_numdisks_pershelf(disk,systemname):
|
||||||
|
rtn={}
|
||||||
|
rtn['systemname']=systemname
|
||||||
|
rtn['state']=0
|
||||||
|
rtn['text']=None
|
||||||
|
bay={}
|
||||||
|
|
||||||
|
for x in disk:
|
||||||
|
s=x['shelfnumber']
|
||||||
|
b=int(x['diskbaynumber'])
|
||||||
|
bay.setdefault(s, 0)
|
||||||
|
bay[s] += 1
|
||||||
|
|
||||||
|
maxdisk=max(bay.values())
|
||||||
|
|
||||||
|
ns=len(bay)
|
||||||
|
for k in sorted(bay, key=int):
|
||||||
|
if bay[k] < maxdisk:
|
||||||
|
if rtn['text'] is None:
|
||||||
|
rtn['state']=1
|
||||||
|
rtn['text']="\n%s: Failed disk/s? Some of the %d shelves have < %d disks: shelf%s=%d" % (
|
||||||
|
systemname, ns, maxdisk, k, bay[k])
|
||||||
|
else:
|
||||||
|
rtn['text']+=", shelf%s=%d" % ( k, bay[k])
|
||||||
|
|
||||||
|
if rtn['text'] is None:
|
||||||
|
rtn['text']="\n%s: All %d disk shelves have %d disks each." % (systemname, ns, maxdisk)
|
||||||
|
else:
|
||||||
|
rtn['text']+="."
|
||||||
|
rtn['text']+="\n"
|
||||||
|
return rtn
|
||||||
|
|
||||||
|
|
||||||
def check_generic(command="ls disk full", namefield="objectname", perfdata_fields=None, longserviceoutputfields=None, detailedsummary=False):
|
def check_generic(command="ls disk full", namefield="objectname", perfdata_fields=None, longserviceoutputfields=None, detailedsummary=False):
|
||||||
if not perfdata_fields:
|
if not perfdata_fields:
|
||||||
@ -480,6 +553,7 @@ def check_generic(command="ls disk full", namefield="objectname", perfdata_field
|
|||||||
if not longserviceoutputfields:
|
if not longserviceoutputfields:
|
||||||
longserviceoutputfields = []
|
longserviceoutputfields = []
|
||||||
global perfdata
|
global perfdata
|
||||||
|
global options
|
||||||
nagios_state = ok
|
nagios_state = ok
|
||||||
systems = run_sssu()
|
systems = run_sssu()
|
||||||
objects = []
|
objects = []
|
||||||
@ -490,10 +564,20 @@ def check_generic(command="ls disk full", namefield="objectname", perfdata_field
|
|||||||
else:
|
else:
|
||||||
for i in systems:
|
for i in systems:
|
||||||
result = run_sssu(system=i['objectname'], command=command)
|
result = run_sssu(system=i['objectname'], command=command)
|
||||||
|
if options == "noemptybays":
|
||||||
|
shelves=check_numdisks_pershelf(result,i['objectname'])
|
||||||
|
nagios_state = max(shelves['state'], nagios_state)
|
||||||
|
longoutput(shelves['text'])
|
||||||
|
|
||||||
for x in result:
|
for x in result:
|
||||||
x['systemname'] = i['objectname']
|
x['systemname'] = i['objectname']
|
||||||
|
x['comments'] = i['comments']
|
||||||
objects.append(x)
|
objects.append(x)
|
||||||
summary = "%s objects found " % len(objects)
|
|
||||||
|
|
||||||
|
summary = "%s objects " % len(objects)
|
||||||
|
#print objects # debug
|
||||||
|
|
||||||
usedstoragespacegb = 0
|
usedstoragespacegb = 0
|
||||||
occupancyalarmlvel = 0
|
occupancyalarmlvel = 0
|
||||||
warninggb = 0
|
warninggb = 0
|
||||||
@ -504,6 +588,9 @@ def check_generic(command="ls disk full", namefield="objectname", perfdata_field
|
|||||||
objectname = i[namefield]
|
objectname = i[namefield]
|
||||||
else:
|
else:
|
||||||
objectname = i['objectname']
|
objectname = i['objectname']
|
||||||
|
|
||||||
|
if command == "ls disk full":
|
||||||
|
encbay = "Enc%s_Bay%s" % (i['shelfnumber'], i['diskbaynumber'] )
|
||||||
# Some versions of CV also return garbage objects, luckily it is easy
|
# Some versions of CV also return garbage objects, luckily it is easy
|
||||||
# to find these
|
# to find these
|
||||||
if i.has_key('objecttype') and i['objecttype'] == 'typenotset':
|
if i.has_key('objecttype') and i['objecttype'] == 'typenotset':
|
||||||
@ -513,18 +600,42 @@ def check_generic(command="ls disk full", namefield="objectname", perfdata_field
|
|||||||
# Lets see if this object is working
|
# Lets see if this object is working
|
||||||
nagios_state = max(check_operationalstate(i), nagios_state)
|
nagios_state = max(check_operationalstate(i), nagios_state)
|
||||||
|
|
||||||
|
if command == "ls diskshelf full":
|
||||||
|
longoutput("%s/%s=%s (%s)\n" %
|
||||||
|
(systemname, objectname, i['operationalstate'], i['operationalstatedetail']))
|
||||||
|
|
||||||
# Lets add to the summary
|
# Lets add to the summary
|
||||||
if i['operationalstate'] != 'good' or detailedsummary == True:
|
#if i['operationalstate'] != 'good' or detailedsummary == True:
|
||||||
|
if i['operationalstate'] != 'good' or detailedsummary == True or not '_ok' in i['operationalstatedetail']:
|
||||||
|
if command == "ls disk full":
|
||||||
|
summary += " %s/%s (eva_comment=%s)=%s (%s)" % (
|
||||||
|
systemname, encbay, i['comments'], i['operationalstate'], i['operationalstatedetail'])
|
||||||
|
else:
|
||||||
|
if i['operationalstate'] == "good":
|
||||||
summary += " %s/%s=%s" % (
|
summary += " %s/%s=%s" % (
|
||||||
systemname, objectname, i['operationalstate'])
|
systemname, objectname, i['operationalstatedetail'])
|
||||||
|
else:
|
||||||
|
summary += " %s/%s=%s (%s)" % (
|
||||||
|
systemname, objectname, i['operationalstate'],i['operationalstatedetail'])
|
||||||
|
|
||||||
|
if not '_ok' in i['operationalstatedetail']:
|
||||||
|
nagios_state = max(warning, nagios_state)
|
||||||
|
|
||||||
# Lets get some perfdata
|
# Lets get some perfdata
|
||||||
|
if check_system is not None:
|
||||||
|
identifier = "%s_" % objectname
|
||||||
|
else:
|
||||||
identifier = "%s/%s_" % (systemname, objectname)
|
identifier = "%s/%s_" % (systemname, objectname)
|
||||||
|
|
||||||
i['identifier'] = identifier
|
i['identifier'] = identifier
|
||||||
|
|
||||||
for field in perfdata_fields:
|
for field in perfdata_fields:
|
||||||
if field == '':
|
if field == '':
|
||||||
continue
|
continue
|
||||||
|
if command == 'ls system full' and check_system != None:
|
||||||
|
add_perfdata("'%s'=%sGb " %
|
||||||
|
(field, i.get(field, None)))
|
||||||
|
else:
|
||||||
add_perfdata("'%s%s'=%s " %
|
add_perfdata("'%s%s'=%s " %
|
||||||
(identifier, field, i.get(field, None)))
|
(identifier, field, i.get(field, None)))
|
||||||
|
|
||||||
@ -538,11 +649,10 @@ def check_generic(command="ls disk full", namefield="objectname", perfdata_field
|
|||||||
(identifier, usedstoragespacegb, warninggb, totalstoragespacegb))
|
(identifier, usedstoragespacegb, warninggb, totalstoragespacegb))
|
||||||
|
|
||||||
# Long Serviceoutput
|
# Long Serviceoutput
|
||||||
|
if command == "ls disk full":
|
||||||
# There are usually to many disks for nagios to display. Skip.
|
longoutput("\n%s/%s (%s)=%s (%s)\n" %
|
||||||
if command != "ls disk full":
|
(systemname, objectname, encbay, i['operationalstate'], i['operationalstatedetail']))
|
||||||
longoutput("\n%s/%s = %s (%s)\n" %
|
#(systemname, objectname, i['operationalstate'], i['operationalstatedetail']))
|
||||||
(systemname, objectname, i['operationalstate'], i['operationalstatedetail']))
|
|
||||||
|
|
||||||
# If diskgroup has a problem because it is over allocated. Lets inform
|
# If diskgroup has a problem because it is over allocated. Lets inform
|
||||||
# about that
|
# about that
|
||||||
@ -552,9 +662,11 @@ def check_generic(command="ls disk full", namefield="objectname", perfdata_field
|
|||||||
(state[warning], occupancyalarmlvel))
|
(state[warning], occupancyalarmlvel))
|
||||||
# If a disk has a problem, lets display some extra info on it
|
# If a disk has a problem, lets display some extra info on it
|
||||||
elif command == "ls disk full" and i['operationalstate'] != 'good':
|
elif command == "ls disk full" and i['operationalstate'] != 'good':
|
||||||
longoutput("Warning - %s=%s (%s)\n" %
|
longoutput("Issues on this drive. Further details:\n")
|
||||||
(i['diskname'], i['operationalstate'], i['operationalstatedetail']))
|
#longoutput("Warning - %s/%s=%s (%s)\n" %
|
||||||
fields = "modelnumber firmwareversion serialnumber failurepredicted diskdrivetype".split(
|
#(systemname, encbay, i['operationalstate'], i['operationalstatedetail']))
|
||||||
|
#fields = "objectname modelnumber firmwareversion serialnumber failurepredicted diskdrivetype shelfnumber diskbaynumber comments".split(
|
||||||
|
fields = "modelnumber firmwareversion serialnumber failurepredicted diskdrivetype shelfnumber diskbaynumber comments".split(
|
||||||
)
|
)
|
||||||
for field in fields:
|
for field in fields:
|
||||||
longoutput("- %s = %s\n" % (field, i[field]))
|
longoutput("- %s = %s\n" % (field, i[field]))
|
||||||
@ -583,9 +695,13 @@ def check_multiple_objects(my_object, name):
|
|||||||
namefield = "name"
|
namefield = "name"
|
||||||
detailfield = 'operationalstatedetail'
|
detailfield = 'operationalstatedetail'
|
||||||
|
|
||||||
if name == 'fans' or name == 'sensors':
|
#if name == 'fans' or name == 'sensors':
|
||||||
|
if name == 'sensors':
|
||||||
valid_states = [
|
valid_states = [
|
||||||
'good', 'notavailable', 'unsupported', 'notinstalled']
|
'good', 'notavailable', 'unsupported', 'notinstalled']
|
||||||
|
elif name == 'fans':
|
||||||
|
valid_states = [
|
||||||
|
'good', 'notavailable', 'unsupported']
|
||||||
elif name == 'fibrechannelports':
|
elif name == 'fibrechannelports':
|
||||||
valid_states.append('notinstalled')
|
valid_states.append('notinstalled')
|
||||||
num_items = len(my_object[name])
|
num_items = len(my_object[name])
|
||||||
@ -611,7 +727,7 @@ def check_controllers():
|
|||||||
for controller in result:
|
for controller in result:
|
||||||
controller['systemname'] = i['objectname']
|
controller['systemname'] = i['objectname']
|
||||||
controllers.append(controller)
|
controllers.append(controller)
|
||||||
summary = "%s objects found " % len(controllers)
|
summary = "%s objects " % len(controllers)
|
||||||
for i in controllers:
|
for i in controllers:
|
||||||
systemname = i['systemname']
|
systemname = i['systemname']
|
||||||
if i.has_key('controllername'):
|
if i.has_key('controllername'):
|
||||||
@ -688,11 +804,12 @@ def check_controllers():
|
|||||||
if i.has_key('powersources'):
|
if i.has_key('powersources'):
|
||||||
for source in i['powersources']:
|
for source in i['powersources']:
|
||||||
source_state = max(source_state, ok)
|
source_state = max(source_state, ok)
|
||||||
if not source.has_key('status'):
|
#if not source.has_key('status'): # Should be state not status
|
||||||
|
if not source.has_key('state'):
|
||||||
continue
|
continue
|
||||||
if source['state'] != 'good':
|
if source['state'] != 'good':
|
||||||
source_state = max(warning, source_state)
|
source_state = max(warning, source_state)
|
||||||
longoutput("Powersource %s status = %s\n" %
|
longoutput("Powersource %s state = %s\n" %
|
||||||
(source['type'], source['state']))
|
(source['type'], source['state']))
|
||||||
if i.has_key('modules'):
|
if i.has_key('modules'):
|
||||||
for module in i['modules']:
|
for module in i['modules']:
|
||||||
@ -739,7 +856,7 @@ signal.alarm(timeout)
|
|||||||
if mode == 'check_systems':
|
if mode == 'check_systems':
|
||||||
perfdata_fields = 'totalstoragespace usedstoragespace availablestoragespace'.split(
|
perfdata_fields = 'totalstoragespace usedstoragespace availablestoragespace'.split(
|
||||||
)
|
)
|
||||||
longserviceoutputfields = 'licensestate systemtype firmwareversion nscfwversion totalstoragespace usedstoragespace availablestoragespace'.split(
|
longserviceoutputfields = 'comments licensestate systemtype firmwareversion nscfwversion totalstoragespace usedstoragespace availablestoragespace'.split(
|
||||||
)
|
)
|
||||||
command = "ls system full"
|
command = "ls system full"
|
||||||
namefield = "objectname"
|
namefield = "objectname"
|
||||||
@ -758,6 +875,7 @@ elif mode == 'check_diskgroups':
|
|||||||
elif mode == 'check_disks':
|
elif mode == 'check_disks':
|
||||||
check_generic(command="ls disk full", namefield="objectname")
|
check_generic(command="ls disk full", namefield="objectname")
|
||||||
elif mode == 'check_diskshelfs' or mode == 'check_diskshelves':
|
elif mode == 'check_diskshelfs' or mode == 'check_diskshelves':
|
||||||
|
show_perfdata = False # Ideally should fixed the code; but this does the trick!
|
||||||
check_generic(command="ls diskshelf full", namefield="diskshelfname",
|
check_generic(command="ls diskshelf full", namefield="diskshelfname",
|
||||||
longserviceoutputfields=[], perfdata_fields=[])
|
longserviceoutputfields=[], perfdata_fields=[])
|
||||||
else:
|
else:
|
||||||
|
Loading…
Reference in New Issue
Block a user