nagios tabs

This commit is contained in:
ranl 2013-12-29 10:28:36 +02:00
parent eb44ca6a10
commit 6b08f8accc
2 changed files with 381 additions and 382 deletions

View File

@ -11,83 +11,82 @@ requires
from optparse import OptionParser from optparse import OptionParser
import urllib2 import urllib2
import re import re
from twisted.plugins.twisted_reactors import default
EXIT_CODE = { EXIT_CODE = {
'OK': 0, 'OK': 0,
'WARN': 1, 'WARN': 1,
'CRIT': 2, 'CRIT': 2,
'UNKNOWN': 3, 'UNKNOWN': 3,
} }
def prepareOpts(): def prepareOpts():
''' '''
Parse option from the shell Parse option from the shell
''' '''
def help(): def help():
print 'How many workers are in a non-OK state' print 'How many workers are in a non-OK state'
print '' print ''
parser.print_help() parser.print_help()
def err( string ): def err( string ):
print 'Error: {0}'.format( string ) print 'Error: {0}'.format( string )
help() help()
exit(1) exit(1)
parser = OptionParser() parser = OptionParser()
parser.add_option('-u', '--url', dest='url', type='string', help='modjk status worker url') parser.add_option('-u', '--url', dest='url', type='string', help='modjk status worker url')
parser.add_option('-c', '--critical', dest='critical', type='int', help='warning threshold', default=-1) parser.add_option('-c', '--critical', dest='critical', type='int', help='warning threshold', default=-1)
parser.add_option('-w', '--warning', dest='warning', type='int', help='critical threshold', default=-1) parser.add_option('-w', '--warning', dest='warning', type='int', help='critical threshold', default=-1)
parser.add_option('-t', '--timeout', dest='timeout', type='float', help='how many seconds to wait for each http request', default=5) parser.add_option('-t', '--timeout', dest='timeout', type='float', help='how many seconds to wait for each http request', default=5)
(opts, args) = parser.parse_args() (opts, args) = parser.parse_args()
# Input Validation # Input Validation
if not opts.url: if not opts.url:
err('missing Modjk Status http url') err('missing Modjk Status http url')
if opts.warning > opts.critical: if opts.warning > opts.critical:
err('-w can not be greater than -c') err('-w can not be greater than -c')
if opts.warning < 0 or opts.critical < 0: if opts.warning < 0 or opts.critical < 0:
err('-w and -c must be a positive number') err('-w and -c must be a positive number')
return opts return opts
def getErrorWorkers(url, timeout): def getErrorWorkers(url, timeout):
''' '''
Query the Modjk status worker for bad workers Query the Modjk status worker for bad workers
''' '''
ret = [] ret = []
response = urllib2.urlopen(url+'?command=list&mime=prop', timeout=timeout).read() response = urllib2.urlopen(url+'?command=list&mime=prop', timeout=timeout).read()
for line in re.findall( r'worker\..*\.state=.*', response, re.M): for line in re.findall( r'worker\..*\.state=.*', response, re.M):
if not line.endswith('OK'): if not line.endswith('OK'):
ret.append( ret.append(
line.split('.',1)[1].split('.',1)[0] line.split('.',1)[1].split('.',1)[0]
) )
return ret return ret
if __name__ == '__main__': if __name__ == '__main__':
opts = prepareOpts() opts = prepareOpts()
try: try:
errorWorkers = getErrorWorkers( errorWorkers = getErrorWorkers(
opts.url, opts.timeout opts.url, opts.timeout
) )
except urllib2.URLError as e: except urllib2.URLError as e:
print 'UNKNOWN: Cant query jkstatus worker for data' print 'UNKNOWN: Cant query jkstatus worker for data'
exit(EXIT_CODE['UNKNOWN']) exit(EXIT_CODE['UNKNOWN'])
count = len(errorWorkers) count = len(errorWorkers)
state = '' state = ''
if count < opts.warning: if count < opts.warning:
state = 'OK' state = 'OK'
elif count >= opts.warning and count < opts.critical: elif count >= opts.warning and count < opts.critical:
state = 'WARN' state = 'WARN'
else: else:
state = 'CRIT' state = 'CRIT'
print '{0}: {1} workers are in Err state {2}'.format( print '{0}: {1} workers are in Err state {2}'.format(
state, count, ','.join(errorWorkers) state, count, ','.join(errorWorkers)
) )
exit(EXIT_CODE[state]) exit(EXIT_CODE[state])

View File

@ -13,326 +13,326 @@ import tempfile
import os.path import os.path
EXIT_CODE = { EXIT_CODE = {
'OK': 0, 'OK': 0,
'WARN': 1, 'WARN': 1,
'CRIT': 2, 'CRIT': 2,
'UNKNOWN': 3, 'UNKNOWN': 3,
} }
def prepareOpts(): def prepareOpts():
''' '''
Parse option from the shell Parse option from the shell
''' '''
cmds = { cmds = {
'ping' : 'create a ping to solr API', 'ping' : 'create a ping to solr API',
'dataimportDocumentsProcessed': 'check that the dataimport handler is not processing the same document for too long', 'dataimportDocumentsProcessed': 'check that the dataimport handler is not processing the same document for too long',
'indexBehindMaster': 'check the difference between the slave index and the master' 'indexBehindMaster': 'check the difference between the slave index and the master'
} }
epliog = 'Type of Checks:' epliog = 'Type of Checks:'
for k in cmds: for k in cmds:
epliog += '\n {0}:\t{1}'.format(k, cmds[k]) epliog += '\n {0}:\t{1}'.format(k, cmds[k])
def help(): def help():
parser.print_help() parser.print_help()
print '' print ''
print epliog print epliog
def err( string ): def err( string ):
print 'Error: {0}'.format( string ) print 'Error: {0}'.format( string )
help() help()
exit(1) exit(1)
parser = OptionParser() parser = OptionParser()
parser.add_option('-u', '--url', dest='url', type='string', help='solr url', default=None) parser.add_option('-u', '--url', dest='url', type='string', help='solr url', default=None)
parser.add_option('-U', '--user', dest='user', type='string', help='username', default=None) parser.add_option('-U', '--user', dest='user', type='string', help='username', default=None)
parser.add_option('-P', '--passwd', dest='passwd', type='string', help='password', default=None) parser.add_option('-P', '--passwd', dest='passwd', type='string', help='password', default=None)
parser.add_option('-t', '--timeout', dest='timeout', type='float', help='how many seconds to wait for each http request', default=5) parser.add_option('-t', '--timeout', dest='timeout', type='float', help='how many seconds to wait for each http request', default=5)
parser.add_option('-T', '--type', dest='type', type='choice', choices=cmds.keys(), help='what to check: {0}'.format(', '.join(cmds.keys())) ) parser.add_option('-T', '--type', dest='type', type='choice', choices=cmds.keys(), help='what to check: {0}'.format(', '.join(cmds.keys())) )
parser.add_option('-C', '--core', dest='core', type='string', help='core id', default=None) parser.add_option('-C', '--core', dest='core', type='string', help='core id', default=None)
parser.add_option('-d', '--handler', dest='handler', type='string', help='dataimport handler name', default=None) parser.add_option('-d', '--handler', dest='handler', type='string', help='dataimport handler name', default=None)
parser.add_option('-c', '--critical', dest='critical', type='int', help='warning threshold (implies -T indexBehindMaster)', default=None) parser.add_option('-c', '--critical', dest='critical', type='int', help='warning threshold (implies -T indexBehindMaster)', default=None)
parser.add_option('-w', '--warning', dest='warning', type='int', help='critical threshold (implies -T indexBehindMaster)', default=None) parser.add_option('-w', '--warning', dest='warning', type='int', help='critical threshold (implies -T indexBehindMaster)', default=None)
parser.add_option('-m', '--tmpdir', dest='tmpdir', type='string', help='absolute path to a writeable directory on the server', default=tempfile.gettempdir()) parser.add_option('-m', '--tmpdir', dest='tmpdir', type='string', help='absolute path to a writeable directory on the server', default=tempfile.gettempdir())
(opts, args) = parser.parse_args() (opts, args) = parser.parse_args()
# Input Validation # Input Validation
if not opts: if not opts:
help() help()
exit(1) exit(1)
if not opts.type: if not opts.type:
err('missing -T') err('missing -T')
if (opts.user and not opts.passwd) or (not opts.user and opts.passwd): if (opts.user and not opts.passwd) or (not opts.user and opts.passwd):
err('missing username or password') err('missing username or password')
if not opts.url: if not opts.url:
err('missing solr http url') err('missing solr http url')
if opts.type == 'dataimportDocumentsProcessed': if opts.type == 'dataimportDocumentsProcessed':
if opts.core is None: if opts.core is None:
err('missing core id !') err('missing core id !')
if opts.handler is None: if opts.handler is None:
err('missing handler name !') err('missing handler name !')
if opts.type == 'indexBehindMaster': if opts.type == 'indexBehindMaster':
if opts.critical is None or opts.warning is None: if opts.critical is None or opts.warning is None:
err('missing -w or -c') err('missing -w or -c')
if opts.warning > opts.critical: if opts.warning > opts.critical:
err('-w can not be greater than -c') err('-w can not be greater than -c')
return opts return opts
class SolrMonitor(): class SolrMonitor():
''' '''
Monitor Apache Solr via http Monitor Apache Solr via http
''' '''
def __init__(self, url, timeout=5, username=None, passwd=None): def __init__(self, url, timeout=5, username=None, passwd=None):
self.url = url self.url = url
self.timeout = timeout self.timeout = timeout
self.username = username self.username = username
self.passwd = passwd self.passwd = passwd
self.memfile = 'check_solr_data' self.memfile = 'check_solr_data'
if self.url.endswith('/'): if self.url.endswith('/'):
self.url = self.url[:-1] self.url = self.url[:-1]
self._get_auth() self._get_auth()
def _get_auth(self): def _get_auth(self):
''' '''
Build an Auth opener for HTTP connection Build an Auth opener for HTTP connection
''' '''
if not self.username or not self.passwd: if not self.username or not self.passwd:
return return
basic = urllib2.HTTPBasicAuthHandler() basic = urllib2.HTTPBasicAuthHandler()
basic.add_password( basic.add_password(
realm='Solr', realm='Solr',
uri=self.url, uri=self.url,
user=self.username, user=self.username,
passwd=self.passwd passwd=self.passwd
) )
digest = urllib2.HTTPDigestAuthHandler() digest = urllib2.HTTPDigestAuthHandler()
digest.add_password( digest.add_password(
realm='Solr', realm='Solr',
uri=self.url, uri=self.url,
user=self.username, user=self.username,
passwd=self.passwd passwd=self.passwd
) )
urllib2.install_opener( urllib2.install_opener(
urllib2.build_opener(basic, digest)) urllib2.build_opener(basic, digest))
def _getXmlData(self, url): def _getXmlData(self, url):
''' '''
create an http request to url and return the data create an http request to url and return the data
in case of a problem return None in case of a problem return None
''' '''
try: try:
return ET.fromstring( return ET.fromstring(
urllib2.urlopen( urllib2.urlopen(
url, url,
timeout=self.timeout timeout=self.timeout
).read() ).read()
) )
except urllib2.URLError: except urllib2.URLError:
return None return None
def _eval_ping(self, res, opts): def _eval_ping(self, res, opts):
''' '''
Evaluate the ping test Evaluate the ping test
''' '''
if res: if res:
return { return {
'exit': EXIT_CODE['OK'], 'exit': EXIT_CODE['OK'],
'msg': 'OK: Solr Ping is up' 'msg': 'OK: Solr Ping is up'
} }
else: else:
return { return {
'exit': EXIT_CODE['CRIT'], 'exit': EXIT_CODE['CRIT'],
'msg': 'OK: Solr Ping is down' 'msg': 'OK: Solr Ping is down'
} }
def _eval_dataimportDocumentsProcessed(self, res, opts): def _eval_dataimportDocumentsProcessed(self, res, opts):
''' '''
Evaluate the dataimportDocumentsProcessed test Evaluate the dataimportDocumentsProcessed test
''' '''
firstTimeResponse = { firstTimeResponse = {
'exit': EXIT_CODE['UNKNOWN'], 'exit': EXIT_CODE['UNKNOWN'],
'msg': 'UNKNOWN: looks like the first time we are using this check, creating local cache' 'msg': 'UNKNOWN: looks like the first time we are using this check, creating local cache'
} }
memFile = os.path.join(opts.tmpdir, self.memfile) memFile = os.path.join(opts.tmpdir, self.memfile)
if not os.path.isfile(memFile): if not os.path.isfile(memFile):
with open( memFile, 'w' ) as f: with open( memFile, 'w' ) as f:
f.write(str(res)) f.write(str(res))
return firstTimeResponse return firstTimeResponse
if res < 0: if res < 0:
return { return {
'exit': EXIT_CODE['UNKNOWN'], 'exit': EXIT_CODE['UNKNOWN'],
'msg': 'UNKNOWN: could not query solr for index status' 'msg': 'UNKNOWN: could not query solr for index status'
} }
fh = open( memFile, 'r+' ) fh = open( memFile, 'r+' )
prev = fh.read() prev = fh.read()
fh.seek(0) fh.seek(0)
fh.write(str(res)) fh.write(str(res))
fh.close() fh.close()
if not prev: if not prev:
return firstTimeResponse return firstTimeResponse
prev = int(prev) prev = int(prev)
if prev != res or res == 0: if prev != res or res == 0:
return { return {
'exit': EXIT_CODE['OK'], 'exit': EXIT_CODE['OK'],
'msg': 'OK: Solr is indexing {0} docs now and before {1}'.format( 'msg': 'OK: Solr is indexing {0} docs now and before {1}'.format(
res, prev res, prev
) )
} }
else: else:
return { return {
'exit': EXIT_CODE['CRIT'], 'exit': EXIT_CODE['CRIT'],
'msg': 'CRIT: Solr is still indexing {0} docs since the last check'.format(res) 'msg': 'CRIT: Solr is still indexing {0} docs since the last check'.format(res)
} }
def _eval_indexBehindMaster(self, res, opts): def _eval_indexBehindMaster(self, res, opts):
''' '''
Evaluate the indexBehindMaster test Evaluate the indexBehindMaster test
''' '''
msg='' msg=''
if res < opts.warning: if res < opts.warning:
msg='OK' msg='OK'
elif res >= opts.warning and res <= opts.critical: elif res >= opts.warning and res <= opts.critical:
msg='WARN' msg='WARN'
else: else:
msg='CRIT' msg='CRIT'
return { return {
'exit': EXIT_CODE[msg], 'exit': EXIT_CODE[msg],
'msg': '{0}: Solr Slave is {1} behind then master'.format( 'msg': '{0}: Solr Slave is {1} behind then master'.format(
msg, res msg, res
) )
} }
def ping(self): def ping(self):
''' '''
Check if solr ping returns True Check if solr ping returns True
''' '''
ret = False ret = False
root = self._getXmlData(self.url + '/admin/ping') root = self._getXmlData(self.url + '/admin/ping')
if root is None: if root is None:
return False return False
if root.find('str').text == 'OK': if root.find('str').text == 'OK':
ret = True ret = True
return ret return ret
def dataimportDocumentsProcessed(self, core, handler): def dataimportDocumentsProcessed(self, core, handler):
''' '''
Return the number of processed documents Return the number of processed documents
from the dataimport handler from the dataimport handler
url: http://solr:port/solr/core0/dataimportName?command=status url: http://solr:port/solr/core0/dataimportName?command=status
''' '''
url = '{0}/{1}/{2}?command=status'.format( url = '{0}/{1}/{2}?command=status'.format(
self.url, self.url,
core, core,
handler handler
) )
root = self._getXmlData(url) root = self._getXmlData(url)
if root is None: if root is None:
return -1 return -1
for lst in root.findall('lst'): for lst in root.findall('lst'):
if lst.attrib['name'] == 'statusMessages': if lst.attrib['name'] == 'statusMessages':
for str in lst.findall('str'): for str in lst.findall('str'):
if str.attrib['name'] == 'Total Documents Processed': if str.attrib['name'] == 'Total Documents Processed':
return int(str.text) return int(str.text)
return -1 return -1
# Python 2.7 # Python 2.7
# return int( # return int(
# root.findall( # root.findall(
# "lst[@name='statusMessages']/str[@name='Total Documents Processed']" # "lst[@name='statusMessages']/str[@name='Total Documents Processed']"
# )[0].text # )[0].text
# ) # )
def indexBehindMaster(self): def indexBehindMaster(self):
''' '''
Returns the difference bewteen the slave index Returns the difference bewteen the slave index
and the master replicable index and the master replicable index
''' '''
slave = None slave = None
master = None master = None
root = self._getXmlData( root = self._getXmlData(
self.url + '/replication?command=details' self.url + '/replication?command=details'
) )
if root is None: if root is None:
return -1 return -1
for lst in root.findall('lst'): for lst in root.findall('lst'):
if lst.attrib['name'] == 'details': if lst.attrib['name'] == 'details':
# Slave # Slave
for lng in lst.findall('long'): for lng in lst.findall('long'):
if lng.attrib['name'] == 'indexVersion': if lng.attrib['name'] == 'indexVersion':
slave = long(lng.text) slave = long(lng.text)
break break
# Master # Master
for lstm in lst.findall('lst'): for lstm in lst.findall('lst'):
if lstm.attrib['name'] == 'slave': if lstm.attrib['name'] == 'slave':
for lstms in lstm.findall('lst'): for lstms in lstm.findall('lst'):
if lstms.attrib['name'] == 'masterDetails': if lstms.attrib['name'] == 'masterDetails':
for lstMaster in lstms.findall('lst'): for lstMaster in lstms.findall('lst'):
if lstMaster.attrib['name'] == 'master': if lstMaster.attrib['name'] == 'master':
for rep in lstMaster.findall('long'): for rep in lstMaster.findall('long'):
if rep.attrib['name'] == 'replicableVersion': if rep.attrib['name'] == 'replicableVersion':
master = long(rep.text) master = long(rep.text)
break break
if master and slave: if master and slave:
break break
# Python 2.7 # Python 2.7
# slave = root.findall( # slave = root.findall(
# "./*[@name='details']/arr[@name='commits']/lst/long[@name='indexVersion']" # "./*[@name='details']/arr[@name='commits']/lst/long[@name='indexVersion']"
# )[0].text # )[0].text
# master = root.findall( # master = root.findall(
# "./lst[@name='details']/lst[@name='slave']/lst[@name='masterDetails']/lst[@name='master']/long[@name='replicableVersion']" # "./lst[@name='details']/lst[@name='slave']/lst[@name='masterDetails']/lst[@name='master']/long[@name='replicableVersion']"
# )[0].text # )[0].text
return long(master - slave) return long(master - slave)
@staticmethod @staticmethod
def main(): def main():
''' '''
Main function Main function
''' '''
opts = prepareOpts() opts = prepareOpts()
solr = SolrMonitor( opts.url, opts.timeout, opts.user, opts.passwd ) solr = SolrMonitor( opts.url, opts.timeout, opts.user, opts.passwd )
method = getattr(solr, opts.type) method = getattr(solr, opts.type)
k = {} k = {}
if opts.core: if opts.core:
k.update({'core': opts.core}) k.update({'core': opts.core})
if opts.handler: if opts.handler:
k.update({'handler': opts.handler}) k.update({'handler': opts.handler})
res = method(**k) res = method(**k)
eval_method = getattr( eval_method = getattr(
solr, '_eval_{0}'.format(opts.type) solr, '_eval_{0}'.format(opts.type)
) )
ret = eval_method(res, opts) ret = eval_method(res, opts)
print ret['msg'] print ret['msg']
exit(ret['exit']) exit(ret['exit'])
if __name__ == '__main__': if __name__ == '__main__':
SolrMonitor.main() SolrMonitor.main()