diff --git a/check_eva/check_eva.py b/check_eva/check_eva.py index e2abdb7..7bf9482 100644 --- a/check_eva/check_eva.py +++ b/check_eva/check_eva.py @@ -82,86 +82,86 @@ socket.setdefaulttimeout(5) def print_help(): - print "check_eva version %s" % version - print "This plugin checks HP EVA Array with the sssu command" - print "" - print "Usage: %s [OPTIONS]" % argv[0] - print "OPTIONS:" - print " [--host ]" - print " [--username ]" - print " [--password ]" - print " [--mode ] " - print " [--test]" - print " [--debug]" - print " [--help]" - print "" - print " Valid modes are: %s" % ', '.join(valid_modes) - print "" - print "Example: %s --host commandview.example.net --username eva --password myPassword --mode check_systems" % (argv[0]) + print "check_eva version %s" % version + print "This plugin checks HP EVA Array with the sssu command" + print "" + print "Usage: %s [OPTIONS]" % argv[0] + print "OPTIONS:" + print " [--host ]" + print " [--username ]" + print " [--password ]" + print " [--mode ] " + print " [--test]" + print " [--debug]" + print " [--help]" + print "" + print " Valid modes are: %s" % ', '.join(valid_modes) + print "" + print "Example: %s --host commandview.example.net --username eva --password myPassword --mode check_systems" % (argv[0]) def error(errortext): - print "* Error: %s" % errortext - print_help() - print "* Error: %s" % errortext - exit(unknown) + print "* Error: %s" % errortext + print_help() + print "* Error: %s" % errortext + exit(unknown) def debug( debugtext ): - global debugging - if debugging: - print debugtext + global debugging + if debugging: + print debugtext # parse arguments arguments=argv[1:] while len(arguments) > 0: - arg=arguments.pop(0) - if arg == 'invalid': - pass - elif arg == '-H' or arg == '--host': - hostname=arguments.pop(0) - elif arg == '-U' or arg == '--username': - username=arguments.pop(0) - elif arg == '-P' or arg == '--password': - password = arguments.pop(0) - elif arg == '-T' or arg == '--test': - testmode=1 - elif arg == '--path': - path = arguments.pop(0) + '/' - elif arg == '-M' or arg == '--mode': - mode=arguments.pop(0) - if mode not in valid_modes: - error("Invalid --mode %s" % arg) - elif arg == '-d' or arg == '--debug': - debugging=True - elif arg == '--longserviceoutput': - show_longserviceoutput = True - elif arg == '--no-longserviceoutput': - show_longserviceoutput = False - elif arg == '--perfdata': - show_perfdata = True - elif arg == '--no-perfdata': - show_perfdata = False - elif arg == '--nagios_myhostname': - nagios_myhostname = arguments.pop(0) - elif arg == '--nagios_server': - nagios_server = arguments.pop(0) - elif arg == '--nagios_port': - nagios_port = arguments.pop(0) - elif arg == '--system': - check_system = arguments.pop(0) - elif arg == '--phone-home': - do_phone_home = True - elif arg == '--proxy': - proxyserver = arguments.pop(0) - elif arg == '--escape-newlines': - escape_newlines = True - elif arg == '-h' or arg == '--help': - print_help() - exit(ok) - else: - error( "Invalid argument %s"% arg) + arg=arguments.pop(0) + if arg == 'invalid': + pass + elif arg == '-H' or arg == '--host': + hostname=arguments.pop(0) + elif arg == '-U' or arg == '--username': + username=arguments.pop(0) + elif arg == '-P' or arg == '--password': + password = arguments.pop(0) + elif arg == '-T' or arg == '--test': + testmode=1 + elif arg == '--path': + path = arguments.pop(0) + '/' + elif arg == '-M' or arg == '--mode': + mode=arguments.pop(0) + if mode not in valid_modes: + error("Invalid --mode %s" % arg) + elif arg == '-d' or arg == '--debug': + debugging=True + elif arg == '--longserviceoutput': + show_longserviceoutput = True + elif arg == '--no-longserviceoutput': + show_longserviceoutput = False + elif arg == '--perfdata': + show_perfdata = True + elif arg == '--no-perfdata': + show_perfdata = False + elif arg == '--nagios_myhostname': + nagios_myhostname = arguments.pop(0) + elif arg == '--nagios_server': + nagios_server = arguments.pop(0) + elif arg == '--nagios_port': + nagios_port = arguments.pop(0) + elif arg == '--system': + check_system = arguments.pop(0) + elif arg == '--phone-home': + do_phone_home = True + elif arg == '--proxy': + proxyserver = arguments.pop(0) + elif arg == '--escape-newlines': + escape_newlines = True + elif arg == '-h' or arg == '--help': + print_help() + exit(ok) + else: + error( "Invalid argument %s"% arg) @@ -179,513 +179,512 @@ subitems['port'] = 'fibrechannelports' '''runCommand: Runs command from the shell prompt. Exit Nagios style if unsuccessful''' def runCommand(command): - proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE,stderr=subprocess.PIPE,) - stdout, stderr = proc.communicate('through stdin to stdout') - if proc.returncode > 0: - print "Error %s: %s\n command was: '%s'" % (proc.returncode,stderr.strip(),command) - if proc.returncode == 127 or proc.returncode == 1: # File not found, lets print path - path=getenv("PATH") - print "Current Path: %s" % (path) - exit(unknown) - else: - return stdout + proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE,stderr=subprocess.PIPE,) + stdout, stderr = proc.communicate('through stdin to stdout') + if proc.returncode > 0: + print "Error %s: %s\n command was: '%s'" % (proc.returncode,stderr.strip(),command) + if proc.returncode == 127 or proc.returncode == 1: # File not found, lets print path + path=getenv("PATH") + print "Current Path: %s" % (path) + exit(unknown) + else: + return stdout '''Runs the sssu command. This one is responsible for error checking from sssu''' def run_sssu(system=None, command="ls system full"): - commands = [] + commands = [] - continue_on_error="set option on_error=continue" - login="select manager %s USERNAME=%s PASSWORD=%s"%(hostname,username,password) + continue_on_error="set option on_error=continue" + login="select manager %s USERNAME=%s PASSWORD=%s"%(hostname,username,password) - commands.append(continue_on_error) - commands.append(login) - if system != None: - commands.append('select SYSTEM "%s"' % system) - commands.append(command) + commands.append(continue_on_error) + commands.append(login) + if system != None: + commands.append('select SYSTEM "%s"' % system) + commands.append(command) - commandstring = "sssu " - for i in commands: commandstring = commandstring + '"%s" ' % i - global server_side_troubleshooting - if server_side_troubleshooting == True: - commandstring = 'cat "debug/%s"' % command - - #print mystring - #if command == "ls system full": - # output = runCommand("cat sssu.out") - #elif command == "ls disk_groups full": - # output = runCommand("cat ls_disk*") - #elif command == "ls controller full": - # output = runCommand("cat ls_controller") - #else: - # print "What command is this?", command - # exit(unknown) - output = runCommand(commandstring) - debug( commandstring ) + commandstring = "sssu " + for i in commands: commandstring = commandstring + '"%s" ' % i + global server_side_troubleshooting + if server_side_troubleshooting == True: + commandstring = 'cat "debug/%s"' % command - output = output.split('\n') + #print mystring + #if command == "ls system full": + # output = runCommand("cat sssu.out") + #elif command == "ls disk_groups full": + # output = runCommand("cat ls_disk*") + #elif command == "ls controller full": + # output = runCommand("cat ls_controller") + #else: + # print "What command is this?", command + # exit(unknown) + output = runCommand(commandstring) + debug( commandstring ) - # Lets process the top few results from the sssu command. Make sure the results make sense - error = 0 - if output.pop(0).strip() != '': error = 1 - if output.pop(0).strip() != '': error = 1 - if output.pop(0).strip() != 'SSSU for HP StorageWorks Command View EVA': error = 1 - if output.pop(0).strip().find('Version:') != 0: error=1 - if output.pop(0).strip().find('Build:') != 0: error=1 - if output.pop(0).strip().find('NoSystemSelected> ') != 0: error=1 - #if output.pop(0).strip() != '': error = 1 - #if output.pop(0).strip().find('NoSystemSelected> ') != 0: error=1 - #if output.pop(0).strip() != '': error = 1 - buffer = "" - for i in output: - buffer = buffer + i + "\n" - if i.find('Error') > -1: - print "This is the command i was trying to execute: %s" % i - error = 1 - if i.find('information:') > 0: break - if error > 0: - print "Error running the sssu command" - print commandstring - print buffer - exit(unknown) - - objects = [] - object = None - parent_object = None - for line in output: - if len(line) == 0: - continue - line = line.strip() - tmp = line.split() - if len(tmp) == 0: - if object: - if not object['master'] in objects: objects.append( object['master'] ) - object = None - continue - key = tmp[0].strip() - if object and not object['master'] in objects: objects.append( object['master'] ) - if key == 'object': - object = {} - object['master'] = object - if key == 'controllertemperaturestatus': - object = object['master'] - if key == 'iomodules': - key = 'modules' - #if key in subitems.values(): - # object['master'][key] = [] - if key in subitems.keys(): - mastergroup = subitems[key] - master = object['master'] - object = {} - object['object_type'] = key - object['master'] = master - if not object['master'].has_key(mastergroup): - object['master'][mastergroup] = [] - object['master'][mastergroup].append(object) - - + output = output.split('\n') - if line.find('.:') > 0: - # We work on first come, first serve basis, so if - # we accidentally see same key again, we will ignore - if not object.has_key(key): - value = ' '.join( tmp[2:] ).strip() - object[key] = value - # Check if we were instructed to check only one eva system - global check_system - if command == "ls system full" and check_system != None: - tmp_objects = [] - for i in objects: - if i['objectname'] == check_system: - tmp_objects.append( i ) - objects = tmp_objects - return objects + # Lets process the top few results from the sssu command. Make sure the results make sense + error = 0 + if output.pop(0).strip() != '': error = 1 + if output.pop(0).strip() != '': error = 1 + if output.pop(0).strip() != 'SSSU for HP StorageWorks Command View EVA': error = 1 + if output.pop(0).strip().find('Version:') != 0: error=1 + if output.pop(0).strip().find('Build:') != 0: error=1 + if output.pop(0).strip().find('NoSystemSelected> ') != 0: error=1 + #if output.pop(0).strip() != '': error = 1 + #if output.pop(0).strip().find('NoSystemSelected> ') != 0: error=1 + #if output.pop(0).strip() != '': error = 1 + buffer = "" + for i in output: + buffer = buffer + i + "\n" + if i.find('Error') > -1: + print "This is the command i was trying to execute: %s" % i + error = 1 + if i.find('information:') > 0: break + if error > 0: + print "Error running the sssu command" + print commandstring + print buffer + exit(unknown) + objects = [] + object = None + parent_object = None + for line in output: + if len(line) == 0: + continue + line = line.strip() + tmp = line.split() + if len(tmp) == 0: + if object: + if not object['master'] in objects: objects.append( object['master'] ) + object = None + continue + key = tmp[0].strip() + if object and not object['master'] in objects: objects.append( object['master'] ) + if key == 'object': + object = {} + object['master'] = object + if key == 'controllertemperaturestatus': + object = object['master'] + if key == 'iomodules': + key = 'modules' + #if key in subitems.values(): + # object['master'][key] = [] + if key in subitems.keys(): + mastergroup = subitems[key] + master = object['master'] + object = {} + object['object_type'] = key + object['master'] = master + if not object['master'].has_key(mastergroup): + object['master'][mastergroup] = [] + object['master'][mastergroup].append(object) + + + + if line.find('.:') > 0: + # We work on first come, first serve basis, so if + # we accidentally see same key again, we will ignore + if not object.has_key(key): + value = ' '.join( tmp[2:] ).strip() + object[key] = value + # Check if we were instructed to check only one eva system + global check_system + if command == "ls system full" and check_system != None: + tmp_objects = [] + for i in objects: + if i['objectname'] == check_system: + tmp_objects.append( i ) + objects = tmp_objects + return objects def end(summary,perfdata,longserviceoutput,nagios_state): - global show_longserviceoutput - global show_perfdata - global nagios_server - global do_phone_home - global nagios_port - global nagios_myhostname - global hostname - global mode - global escape_newlines - global check_system + global show_longserviceoutput + global show_perfdata + global nagios_server + global do_phone_home + global nagios_port + global nagios_myhostname + global hostname + global mode + global escape_newlines + global check_system - message = "%s - %s" % ( state[nagios_state], summary) - if show_perfdata: - message = "%s | %s" % ( message, perfdata) - if show_longserviceoutput: - message = "%s\n%s" % ( message, longserviceoutput.strip()) - if escape_newlines == True: - lines = message.split('\n') - message = '\\n'.join(lines) - debug( "do_phone_home = %s" %(do_phone_home) ) - if do_phone_home == True: - try: - if nagios_myhostname is None: - if environ.has_key( 'HOSTNAME' ): - nagios_myhostname = environ['HOSTNAME'] - elif environ.has_key( 'COMPUTERNAME' ): - nagios_myhostname = environ['COMPUTERNAME'] - else: - nagios_myhostname = hostname - try: phone_home(nagios_server,nagios_port, status=nagios_state, message=message, hostname=nagios_myhostname, servicename=mode,system=check_system) - except:pass + message = "%s - %s" % ( state[nagios_state], summary) + if show_perfdata: + message = "%s | %s" % ( message, perfdata) + if show_longserviceoutput: + message = "%s\n%s" % ( message, longserviceoutput.strip()) + if escape_newlines == True: + lines = message.split('\n') + message = '\\n'.join(lines) + debug( "do_phone_home = %s" %(do_phone_home) ) + if do_phone_home == True: + try: + if nagios_myhostname is None: + if environ.has_key( 'HOSTNAME' ): + nagios_myhostname = environ['HOSTNAME'] + elif environ.has_key( 'COMPUTERNAME' ): + nagios_myhostname = environ['COMPUTERNAME'] + else: + nagios_myhostname = hostname + try: phone_home(nagios_server,nagios_port, status=nagios_state, message=message, hostname=nagios_myhostname, servicename=mode,system=check_system) + except:pass - except: - raise - print message - exit(nagios_state) + except: + raise + print message + exit(nagios_state) class ProxiedTransport(xmlrpclib.Transport): - def set_proxy(self, proxy): - self.proxy = proxy - def make_connection(self, host): - self.realhost = host - h = httplib.HTTP(self.proxy) - return h - def send_request(self, connection, handler, request_body): - connection.putrequest("POST", 'http://%s%s' % (self.realhost, handler)) - def send_host(self, connection, host): - connection.putheader('Host', self.realhost) + def set_proxy(self, proxy): + self.proxy = proxy + def make_connection(self, host): + self.realhost = host + h = httplib.HTTP(self.proxy) + return h + def send_request(self, connection, handler, request_body): + connection.putrequest("POST", 'http://%s%s' % (self.realhost, handler)) + def send_host(self, connection, host): + connection.putheader('Host', self.realhost) ''' phone_home: Sends results to remote nagios server via python xml-rpc ''' def phone_home(nagios_server,nagios_port, status, message, hostname=None, servicename=None,system=None): - debug("phoning home: %s" % (servicename) ) - if system is not None: - servicename = str(servicename) + str(system) - uri = "http://%s:%s" % (nagios_server,nagios_port) - - global proxyserver - if proxyserver != None: - p = ProxiedTransport() - p.set_proxy(proxyserver) - s = xmlrpclib.Server( uri, transport=p ) - else: - s = xmlrpclib.ServerProxy( uri ) - s.nagiosupdate(hostname, servicename, status, message) - return 0 + debug("phoning home: %s" % (servicename) ) + if system is not None: + servicename = str(servicename) + str(system) + uri = "http://%s:%s" % (nagios_server,nagios_port) + + global proxyserver + if proxyserver != None: + p = ProxiedTransport() + p.set_proxy(proxyserver) + s = xmlrpclib.Server( uri, transport=p ) + else: + s = xmlrpclib.ServerProxy( uri ) + s.nagiosupdate(hostname, servicename, status, message) + return 0 def check_systems(): - summary="" - perfdata="" - #longserviceoutput="\n" - nagios_state = ok - objects = run_sssu() - for i in objects: - name = i['objectname'] - operationalstate = i['operationalstate'] - # Lets see if this array is working - if operationalstate != 'good': - nagios_state = max(nagios_state, warning) - # Lets add to the summary - summary = summary + " %s=%s " %(name, operationalstate) - # Collect the performance data - interesting_perfdata = 'totalstoragespace|usedstoragespace|availablestoragespace' - perfdata = perfdata + get_perfdata(i,interesting_perfdata.split('|'), identifier="%s_"% name) - # Collect extra info for longserviceoutput - long("%s = %s (%s)\n" % ( i['objectname'], i['operationalstate'], i['operationalstatedetail']) ) - interesting_fields = 'licensestate|systemtype|firmwareversion|nscfwversion|totalstoragespace|usedstoragespace|availablestoragespace' - for x in interesting_fields.split('|'): - long( "- %s = %s \n" %(x, i[x]) ) - long("\n") - end(summary,perfdata,longserviceoutput,nagios_state) + summary="" + perfdata="" + #longserviceoutput="\n" + nagios_state = ok + objects = run_sssu() + for i in objects: + name = i['objectname'] + operationalstate = i['operationalstate'] + # Lets see if this array is working + if operationalstate != 'good': + nagios_state = max(nagios_state, warning) + # Lets add to the summary + summary = summary + " %s=%s " %(name, operationalstate) + # Collect the performance data + interesting_perfdata = 'totalstoragespace|usedstoragespace|availablestoragespace' + perfdata = perfdata + get_perfdata(i,interesting_perfdata.split('|'), identifier="%s_"% name) + # Collect extra info for longserviceoutput + long("%s = %s (%s)\n" % ( i['objectname'], i['operationalstate'], i['operationalstatedetail']) ) + interesting_fields = 'licensestate|systemtype|firmwareversion|nscfwversion|totalstoragespace|usedstoragespace|availablestoragespace' + for x in interesting_fields.split('|'): + long( "- %s = %s \n" %(x, i[x]) ) + long("\n") + end(summary,perfdata,longserviceoutput,nagios_state) def get_perfdata(object, interesting_fields, identifier=""): - perfdata = "" - for i in interesting_fields: - if i == '': continue - perfdata = perfdata + "'%s%s'=%s " % (identifier, i, object[i]) - return perfdata + perfdata = "" + for i in interesting_fields: + if i == '': continue + perfdata = perfdata + "'%s%s'=%s " % (identifier, i, object[i]) + return perfdata def add_perfdata(text): - global perfdata - text = text.strip() - perfdata = perfdata + " %s " % (text) + global perfdata + text = text.strip() + perfdata = perfdata + " %s " % (text) def long(text): - global longserviceoutput - longserviceoutput = longserviceoutput + text + global longserviceoutput + longserviceoutput = longserviceoutput + text def get_longserviceoutput(object, interesting_fields): - longserviceoutput = "" - for i in interesting_fields: - longserviceoutput = longserviceoutput + "%s = %s \n" %(i, object[i]) - return longserviceoutput + longserviceoutput = "" + for i in interesting_fields: + longserviceoutput = longserviceoutput + "%s = %s \n" %(i, object[i]) + return longserviceoutput def check_operationalstate(object, print_failed_objects=False,namefield='objectname',detailfield='operationalstatedetail',statefield='operationalstate',valid_states=['good']): - if not object.has_key(detailfield): detailfield = statefield - if not object.has_key(statefield): - if print_failed_objects: - long("- Warning, %s does not have any '%s'" % ( object[namefield], statefield ) ) - return warning - if object[statefield] not in valid_states: - if print_failed_objects: - long("- Warning, %s=%s (%s)\n" % ( object[namefield], object['operationalstate'], object[detailfield] )) - return warning - debug( "OK, %s=%s (%s)\n" % ( object[namefield], object['operationalstate'], object[detailfield] ) ) - return ok + if not object.has_key(detailfield): detailfield = statefield + if not object.has_key(statefield): + if print_failed_objects: + long("- Warning, %s does not have any '%s'" % ( object[namefield], statefield ) ) + return warning + if object[statefield] not in valid_states: + if print_failed_objects: + long("- Warning, %s=%s (%s)\n" % ( object[namefield], object['operationalstate'], object[detailfield] )) + return warning + debug( "OK, %s=%s (%s)\n" % ( object[namefield], object['operationalstate'], object[detailfield] ) ) + return ok def check_generic(command="ls disk full",namefield="objectname", perfdata_fields=[], longserviceoutputfields=[], detailedsummary=False): - summary="" - global perfdata - nagios_state = ok - systems = run_sssu() - objects = [] - if command == 'ls system full': - objects = systems - for i in systems: i['systemname'] = '' #i['objectname'] - else: - for i in systems: - result = run_sssu(system=i['objectname'], command=command) - for x in result: - x['systemname'] = i['objectname'] - objects.append( x ) - summary = "%s objects found " % len(objects) - for i in objects: - systemname = i['systemname'] - # Some versions of commandview use "objectname" instead of namefield - if i.has_key( namefield ): - objectname = i[namefield] - else: - objectname = i['objectname'] - # Some versions of CV also return garbage objects, luckily it is easy to find these - if i.has_key('objecttype') and i['objecttype'] == 'typenotset': - long("Object %s was skipped because objecttype == typenotset\n" % objectname ) - continue - # Lets see if this object is working - nagios_state = max( check_operationalstate(i), nagios_state ) - - - # Lets add to the summary - if i['operationalstate'] != 'good' or detailedsummary == True: - summary = summary + " %s/%s=%s " %(systemname,objectname, i['operationalstate']) - - # Lets get some perfdata - identifier = "%s/%s_" % (systemname,objectname) - i['identifier'] = identifier + summary="" + global perfdata + nagios_state = ok + systems = run_sssu() + objects = [] + if command == 'ls system full': + objects = systems + for i in systems: i['systemname'] = '' #i['objectname'] + else: + for i in systems: + result = run_sssu(system=i['objectname'], command=command) + for x in result: + x['systemname'] = i['objectname'] + objects.append( x ) + summary = "%s objects found " % len(objects) + for i in objects: + systemname = i['systemname'] + # Some versions of commandview use "objectname" instead of namefield + if i.has_key( namefield ): + objectname = i[namefield] + else: + objectname = i['objectname'] + # Some versions of CV also return garbage objects, luckily it is easy to find these + if i.has_key('objecttype') and i['objecttype'] == 'typenotset': + long("Object %s was skipped because objecttype == typenotset\n" % objectname ) + continue + # Lets see if this object is working + nagios_state = max( check_operationalstate(i), nagios_state ) - for field in perfdata_fields: - if field == '': continue - add_perfdata( "'%s%s'=%s " % (identifier, field, i[field]) ) - - # Disk group gets a special perfdata treatment - if command == "ls disk_group full": - totalstoragespacegb= float( i['totalstoragespacegb'] ) - usedstoragespacegb= float ( i['usedstoragespacegb'] ) - occupancyalarmlvel = float( i['occupancyalarmlevel'] ) - warninggb= totalstoragespacegb * occupancyalarmlvel / 100 - add_perfdata( " '%sdiskusage'=%s;%s;%s "% (identifier, usedstoragespacegb,warninggb,totalstoragespacegb) ) - - # Long Serviceoutput - - # There are usually to many disks for nagios to display. Skip. - if command != "ls disk full": - long( "\n%s/%s = %s (%s)\n"%(systemname,objectname,i['operationalstate'], i['operationalstatedetail']) ) - - # If diskgroup has a problem because it is over allocated. Lets inform about that - if command == "ls disk_group full" and usedstoragespacegb > warninggb: - long("- %s - diskgroup usage is over %s%% threshold !\n" % (state[warning], occupancyalarmlvel) ) - # If a disk has a problem, lets display some extra info on it - elif command == "ls disk full" and i['operationalstate'] != 'good': - long( "Warning - %s=%s (%s)\n" % (i['diskname'], i['operationalstate'], i['operationalstatedetail'] )) - fields="modelnumber firmwareversion serialnumber failurepredicted diskdrivetype".split() - for field in fields: - long( "- %s = %s\n" % (field, i[field]) ) + # Lets add to the summary + if i['operationalstate'] != 'good' or detailedsummary == True: + summary = summary + " %s/%s=%s " %(systemname,objectname, i['operationalstate']) + + # Lets get some perfdata + identifier = "%s/%s_" % (systemname,objectname) + i['identifier'] = identifier - nagios_state = max(nagios_state, check_multiple_objects(i, 'sensors')) - nagios_state = max(nagios_state, check_multiple_objects(i, 'fans')) - nagios_state = max(nagios_state, check_multiple_objects(i, 'powersupplies')) - nagios_state = max(nagios_state, check_multiple_objects(i, 'communicationbuses')) - nagios_state = max(nagios_state, check_multiple_objects(i, 'fibrechannelports')) - nagios_state = max(nagios_state, check_multiple_objects(i, 'modules')) - for x in longserviceoutputfields: - if i.has_key( x ): - long( "- %s = %s\n" % (x, i[x])) + for field in perfdata_fields: + if field == '': continue + add_perfdata( "'%s%s'=%s " % (identifier, field, i[field]) ) - end(summary,perfdata,longserviceoutput,nagios_state) + # Disk group gets a special perfdata treatment + if command == "ls disk_group full": + totalstoragespacegb= float( i['totalstoragespacegb'] ) + usedstoragespacegb= float ( i['usedstoragespacegb'] ) + occupancyalarmlvel = float( i['occupancyalarmlevel'] ) + warninggb= totalstoragespacegb * occupancyalarmlvel / 100 + add_perfdata( " '%sdiskusage'=%s;%s;%s "% (identifier, usedstoragespacegb,warninggb,totalstoragespacegb) ) + + # Long Serviceoutput + + # There are usually to many disks for nagios to display. Skip. + if command != "ls disk full": + long( "\n%s/%s = %s (%s)\n"%(systemname,objectname,i['operationalstate'], i['operationalstatedetail']) ) + + # If diskgroup has a problem because it is over allocated. Lets inform about that + if command == "ls disk_group full" and usedstoragespacegb > warninggb: + long("- %s - diskgroup usage is over %s%% threshold !\n" % (state[warning], occupancyalarmlvel) ) + # If a disk has a problem, lets display some extra info on it + elif command == "ls disk full" and i['operationalstate'] != 'good': + long( "Warning - %s=%s (%s)\n" % (i['diskname'], i['operationalstate'], i['operationalstatedetail'] )) + fields="modelnumber firmwareversion serialnumber failurepredicted diskdrivetype".split() + for field in fields: + long( "- %s = %s\n" % (field, i[field]) ) + + + nagios_state = max(nagios_state, check_multiple_objects(i, 'sensors')) + nagios_state = max(nagios_state, check_multiple_objects(i, 'fans')) + nagios_state = max(nagios_state, check_multiple_objects(i, 'powersupplies')) + nagios_state = max(nagios_state, check_multiple_objects(i, 'communicationbuses')) + nagios_state = max(nagios_state, check_multiple_objects(i, 'fibrechannelports')) + nagios_state = max(nagios_state, check_multiple_objects(i, 'modules')) + for x in longserviceoutputfields: + if i.has_key( x ): + long( "- %s = %s\n" % (x, i[x])) + + end(summary,perfdata,longserviceoutput,nagios_state) def check_multiple_objects(object, name): - item_status = not_present - if object.has_key(name): - item_status = not_present - valid_states=['good'] - namefield="name" - detailfield = 'operationalstatedetail' + item_status = not_present + if object.has_key(name): + item_status = not_present + valid_states=['good'] + namefield="name" + detailfield = 'operationalstatedetail' - if name == 'fans' or name == 'sensors': - valid_states = ['good','notavailable','unsupported','notinstalled'] - elif name == 'fibrechannelports': - valid_states.append( 'notinstalled' ) - num_items = len(object[name]) - for item in object[name]: - stat = check_operationalstate( item,print_failed_objects=True, namefield=namefield, valid_states=valid_states,detailfield=detailfield) - item_status = max( stat, item_status ) - long('- %s on %s (%s detected)\n'% (state[item_status], name, num_items) ) - add_perfdata( " '%s%s'=%s" % (object['identifier'],name, num_items) ) - return item_status - + if name == 'fans' or name == 'sensors': + valid_states = ['good','notavailable','unsupported','notinstalled'] + elif name == 'fibrechannelports': + valid_states.append( 'notinstalled' ) + num_items = len(object[name]) + for item in object[name]: + stat = check_operationalstate( item,print_failed_objects=True, namefield=namefield, valid_states=valid_states,detailfield=detailfield) + item_status = max( stat, item_status ) + long('- %s on %s (%s detected)\n'% (state[item_status], name, num_items) ) + add_perfdata( " '%s%s'=%s" % (object['identifier'],name, num_items) ) + return item_status + def check_controllers(): - summary="" - perfdata="" - #longserviceoutput="\n" - nagios_state = ok - systems = run_sssu() - controllers =[] - for i in systems: - result = run_sssu(system=i['objectname'], command="ls controller full") - for controller in result: - controller['systemname'] = i['objectname'] - controllers.append( controller ) - summary = "%s objects found " % len(controllers) - for i in controllers: - systemname = i['systemname'] - if i.has_key('controllername'): - controllername = i['controllername'] - else: - controllername = i['objectname'] - # Lets see if this controller is working - nagios_state = max( check_operationalstate(i), nagios_state ) + summary="" + perfdata="" + #longserviceoutput="\n" + nagios_state = ok + systems = run_sssu() + controllers =[] + for i in systems: + result = run_sssu(system=i['objectname'], command="ls controller full") + for controller in result: + controller['systemname'] = i['objectname'] + controllers.append( controller ) + summary = "%s objects found " % len(controllers) + for i in controllers: + systemname = i['systemname'] + if i.has_key('controllername'): + controllername = i['controllername'] + else: + controllername = i['objectname'] + # Lets see if this controller is working + nagios_state = max( check_operationalstate(i), nagios_state ) - # Lets add to the summary - if not i.has_key('operationalstate'): - summary = summary + " %s does not have any operationalstate " % controllername - nagios_state = max( unknown, nagios_state ) - continue - elif i['operationalstate'] != 'good': - summary = summary + " %s/%s=%s " %(systemname,controllername, i['operationalstate']) - - # Lets get some perfdata - interesting_fields = "controllermainmemory" - identifier = "%s/%s_" % (systemname,controllername) - perfdata = perfdata + get_perfdata(i, interesting_fields.split('|'), identifier=identifier) + # Lets add to the summary + if not i.has_key('operationalstate'): + summary = summary + " %s does not have any operationalstate " % controllername + nagios_state = max( unknown, nagios_state ) + continue + elif i['operationalstate'] != 'good': + summary = summary + " %s/%s=%s " %(systemname,controllername, i['operationalstate']) - # Long Serviceoutput - interesting_fields = "operationalstate|operationalstatedetail|firmwareversion|serialnumber" - #longserviceoutput = longserviceoutput + get_longserviceoutput(i, interesting_fields.split('|') ) - #longserviceoutput = longserviceoutput + "\n%s/%s\n"%(systemname,controllername) - long( "\n%s/%s = %s (%s)\n"%(systemname,controllername,i['operationalstate'], i['operationalstatedetail']) ) - long( "- firmwareversion = %s \n" %(i['firmwareversion'])) - long( "- serialnumber = %s \n" %(i['serialnumber'])) + # Lets get some perfdata + interesting_fields = "controllermainmemory" + identifier = "%s/%s_" % (systemname,controllername) + perfdata = perfdata + get_perfdata(i, interesting_fields.split('|'), identifier=identifier) + + # Long Serviceoutput + interesting_fields = "operationalstate|operationalstatedetail|firmwareversion|serialnumber" + #longserviceoutput = longserviceoutput + get_longserviceoutput(i, interesting_fields.split('|') ) + #longserviceoutput = longserviceoutput + "\n%s/%s\n"%(systemname,controllername) + long( "\n%s/%s = %s (%s)\n"%(systemname,controllername,i['operationalstate'], i['operationalstatedetail']) ) + long( "- firmwareversion = %s \n" %(i['firmwareversion'])) + long( "- serialnumber = %s \n" %(i['serialnumber'])) - controllertemperaturestatus = not_present - cache_state = not_present - fanstate = not_present - hostportstate = not_present - sensorstate = ok - source_state = not_present - module_state = not_present + controllertemperaturestatus = not_present + cache_state = not_present + fanstate = not_present + hostportstate = not_present + sensorstate = ok + source_state = not_present + module_state = not_present - # Check the cache status - if i['cachecondition'] == 'good': - cache_state = ok - else: - cache_state = warning - - # Check Temperature - if i.has_key("controllertemperaturestatus"): - if i['controllertemperaturestatus'] == 'normal': - controllertemperaturestatus = ok - else: - controllertemperaturestatus = warning - - - # Process the subsensors - for hostport in i['hostports']: - #long(" %s = %s\n" % (hostport['portname'], hostport['operationalstate'])) - hostportstate = max(hostportstate,ok) - if hostport['operationalstate'] != 'good': - hostportstate = max(warning,hostport_state) - long("Hostport %s state = %s\n" % hostport['portname'], hostport['operationalstate']) - if i.has_key('fans'): - for fan in i['fans']: - fanstate = max(fanstate,ok) - #long(" %s = %s\n" % (fan['fanname'], fan['status'])) - if fan.has_key('status'): status = fan['status'] - elif fan.has_key('installstatus'): status = fan['installstatus'] - if status != 'normal' and status != 'yes': - fanstate = max(warning,fanstate) - long("Fan %s status = %s\n" % (fan['fanname'],status)) - if i.has_key('powersources'): - for source in i['powersources']: - source_state = max(source_state,ok) - if not source.has_key('status'): continue - if source['state'] != 'good': - source_state = max(warning,source_state) - long("Powersource %s status = %s\n" % (source['type'],source['state'])) - if i.has_key('modules'): - for module in i['modules']: - module_state = max(module_state,ok) - if module['operationalstate'] not in ('good','not_present'): - module_state = max(warning,module_state) - long("Battery Module %s status = %s\n" % (module['name'],module['operationalstate'])) - + # Check the cache status + if i['cachecondition'] == 'good': + cache_state = ok + else: + cache_state = warning - for i in (fanstate,hostportstate,sensorstate,source_state,module_state,cache_state,controllertemperaturestatus): - nagios_state = max(nagios_state, i) - - long("- %s on fans\n"%( state[fanstate] ) ) - long("- %s on cachememory\n"%( state[cache_state] ) ) - long("- %s on temperature\n"%( state[controllertemperaturestatus] ) ) - long("- %s on hostports\n"%( state[hostportstate] ) ) - long("- %s on sensors\n"%( state[sensorstate] ) ) - long("- %s on powersupplies\n"%( state[source_state] ) ) - long("- %s on batterymodules\n"%( state[module_state] ) ) - - - long('\n') - end(summary,perfdata,longserviceoutput,nagios_state) + # Check Temperature + if i.has_key("controllertemperaturestatus"): + if i['controllertemperaturestatus'] == 'normal': + controllertemperaturestatus = ok + else: + controllertemperaturestatus = warning + + + # Process the subsensors + for hostport in i['hostports']: + #long(" %s = %s\n" % (hostport['portname'], hostport['operationalstate'])) + hostportstate = max(hostportstate,ok) + if hostport['operationalstate'] != 'good': + hostportstate = max(warning,hostport_state) + long("Hostport %s state = %s\n" % hostport['portname'], hostport['operationalstate']) + if i.has_key('fans'): + for fan in i['fans']: + fanstate = max(fanstate,ok) + #long(" %s = %s\n" % (fan['fanname'], fan['status'])) + if fan.has_key('status'): status = fan['status'] + elif fan.has_key('installstatus'): status = fan['installstatus'] + if status != 'normal' and status != 'yes': + fanstate = max(warning,fanstate) + long("Fan %s status = %s\n" % (fan['fanname'],status)) + if i.has_key('powersources'): + for source in i['powersources']: + source_state = max(source_state,ok) + if not source.has_key('status'): continue + if source['state'] != 'good': + source_state = max(warning,source_state) + long("Powersource %s status = %s\n" % (source['type'],source['state'])) + if i.has_key('modules'): + for module in i['modules']: + module_state = max(module_state,ok) + if module['operationalstate'] not in ('good','not_present'): + module_state = max(warning,module_state) + long("Battery Module %s status = %s\n" % (module['name'],module['operationalstate'])) + + + for i in (fanstate,hostportstate,sensorstate,source_state,module_state,cache_state,controllertemperaturestatus): + nagios_state = max(nagios_state, i) + + long("- %s on fans\n"%( state[fanstate] ) ) + long("- %s on cachememory\n"%( state[cache_state] ) ) + long("- %s on temperature\n"%( state[controllertemperaturestatus] ) ) + long("- %s on hostports\n"%( state[hostportstate] ) ) + long("- %s on sensors\n"%( state[sensorstate] ) ) + long("- %s on powersupplies\n"%( state[source_state] ) ) + long("- %s on batterymodules\n"%( state[module_state] ) ) + + + long('\n') + end(summary,perfdata,longserviceoutput,nagios_state) def set_path(): - global path - current_path = getenv('PATH') - if path == '': - if current_path.find('C:\\') > -1: # We are on this platform - path = ";C:\\Program Files\\Hewlett-Packard\\Sanworks\\Element Manager for StorageWorks HSV" - else: - path = ":/usr/local/bin" - current_path = "%s%s" % (current_path,path) - environ['PATH'] = current_path + global path + current_path = getenv('PATH') + if path == '': + if current_path.find('C:\\') > -1: # We are on this platform + path = ";C:\\Program Files\\Hewlett-Packard\\Sanworks\\Element Manager for StorageWorks HSV" + else: + path = ":/usr/local/bin" + current_path = "%s%s" % (current_path,path) + environ['PATH'] = current_path set_path() if mode == 'check_systems': - perfdata_fields = 'totalstoragespace usedstoragespace availablestoragespace'.split() - longserviceoutputfields = 'licensestate systemtype firmwareversion nscfwversion totalstoragespace usedstoragespace availablestoragespace'.split() - command = "ls system full" - namefield="objectname" - check_generic(command=command,namefield=namefield,longserviceoutputfields=longserviceoutputfields, perfdata_fields=perfdata_fields) + perfdata_fields = 'totalstoragespace usedstoragespace availablestoragespace'.split() + longserviceoutputfields = 'licensestate systemtype firmwareversion nscfwversion totalstoragespace usedstoragespace availablestoragespace'.split() + command = "ls system full" + namefield="objectname" + check_generic(command=command,namefield=namefield,longserviceoutputfields=longserviceoutputfields, perfdata_fields=perfdata_fields) elif mode == 'check_controllers': - check_controllers() + check_controllers() elif mode == 'check_diskgroups': - command = "ls disk_group full" - namefield='diskgroupname' - longserviceoutputfields = "totaldisks levelingstate levelingprogress totalstoragespacegb usedstoragespacegb occupancyalarmlevel".split() - perfdata_fields="totaldisks".split() - check_generic(command=command,namefield=namefield,longserviceoutputfields=longserviceoutputfields, perfdata_fields=perfdata_fields) + command = "ls disk_group full" + namefield='diskgroupname' + longserviceoutputfields = "totaldisks levelingstate levelingprogress totalstoragespacegb usedstoragespacegb occupancyalarmlevel".split() + perfdata_fields="totaldisks".split() + check_generic(command=command,namefield=namefield,longserviceoutputfields=longserviceoutputfields, perfdata_fields=perfdata_fields) elif mode == 'check_disks': - check_generic(command="ls disk full",namefield="objectname") + check_generic(command="ls disk full",namefield="objectname") elif mode == 'check_diskshelfs' or mode == 'check_diskshelves': - check_generic(command="ls diskshelf full",namefield="diskshelfname",longserviceoutputfields=[], perfdata_fields=[]) + check_generic(command="ls diskshelf full",namefield="diskshelfname",longserviceoutputfields=[], perfdata_fields=[]) else: - print "* Error: Mode %s not found" % mode - print_help() - print "* Error: Mode %s not found" % mode - exit(unknown) + print "* Error: Mode %s not found" % mode + print_help() + print "* Error: Mode %s not found" % mode + exit(unknown)