From a6dc1900c7ff1e6bb0fe97e3e74f8582df1752a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B3mas=20Edwardsson?= Date: Mon, 20 Dec 2010 15:43:57 +0000 Subject: [PATCH] --- check_rhcs/trunk/check_rhcs | 326 ++++++++++++++++++------------------ 1 file changed, 163 insertions(+), 163 deletions(-) diff --git a/check_rhcs/trunk/check_rhcs b/check_rhcs/trunk/check_rhcs index 504e3b6..71e6498 100644 --- a/check_rhcs/trunk/check_rhcs +++ b/check_rhcs/trunk/check_rhcs @@ -1,163 +1,163 @@ -#!/bin/env python - -# -# Gather the cluster state and the current node state -# -# Output example: -# -# -# -# -# -# -# -# -# -# -# -# -# -# Frank Clements - -import xml.dom.minidom -import os -import sys, socket -import getopt - -def usage(): - """ - Display usage information - """ - print """ -Usage: """ + sys.argv[0] + """ ([-s serviceName] | [-c]) - --c, --cluster - Gathers the overall cluster status for the local node --s, --service - Gets the stats of the named service --h, --help - Display this -""" - -def getQuorumState(dom): - """ - Get the quorum state. This is a single inline element which only - has attributes and no children elements. - """ - quorumList = dom.getElementsByTagName('quorum') - quorumElement = quorumList[0] - - return quorumElement.attributes['quorate'].value - - -def getClusterName(dom): - """ - Get the name of the cluster from the clustat output. - This assumes only a single cluster is running for the moment. - """ - clusterList = dom.getElementsByTagName('cluster') - clusterElement = clusterList[0] - - return clusterElement.attributes['name'].value - - -def getLocalNodeState(dom): - """ - Get the state of the local node - """ - hostname = socket.gethostname() - nodesList = dom.getElementsByTagName('node') - nodeState = {} - - for node in nodesList: - if node.attributes['name'].value == hostname: - nodeState['name'] = node.attributes['name'].value - nodeState['state'] = node.attributes['state'].value - nodeState['rgmanager'] = node.attributes['rgmanager'].value - - elif node.attributes['qdisk'].value == "1": - if node.attributes['state'].value != "1": - print "CRITICAL: Quorum disk " + node.attributes['name'].value + " is unavailable!" - sys.exit(2) - - return nodeState - - -def getServiceState(dom, service): - """ - Get the state of the named service - """ - groupList = dom.getElementsByTagName('group') - hostname = socket.gethostname() - serviceState = {} - - for group in groupList: - if group.attributes['name'].value == "service:"+service: - serviceState['owner'] = group.attributes['owner'].value - serviceState['state'] = group.attributes['state_str'].value - - return serviceState - - -def main(): - try: - opts, args = getopt.getopt(sys.argv[1:], 's:ch', ['service=', 'cluster', 'help']) - except getopt.GetoptError: - usage() - sys.exit(2) - - for o, a in opts: - if o in ('-c', '--cluster'): - typeCheck = 'cluster' - if o in ('-s', '--service'): - typeCheck = 'service' - serviceName = a - if o in ('-h', '--help'): - usage() - sys.exit() - - clustatOutput = os.popen('clustat -fx') - dom = xml.dom.minidom.parse(clustatOutput) - - if typeCheck == 'cluster': - - # First we query for the state of the cluster itself. - # Should it be found tha the cluste ris not quorate we alert and exit immediately - cluster = getClusterName(dom) - qState = getQuorumState(dom) - - # There are some serious problems if the cluster is inquorate so we simply alert immediately! - if qState != "1": - print "CRITICAL: Cluster " + cluster + " is inquorate!" - sys.exit(2) - - # Now we find the status of the local node from clustat. - # We only care about the local state since this way we can tie the alert to the host. - nodeStates = getLocalNodeState(dom) - if nodeStates['state'] != "1": - print "WARNING: Local node state is offline!" - sys.exit(1) - elif nodeStates['rgmanager'] != "1": - print "CRITICAL: RGManager service not running on " + nodeStates['name'] + "!" - sys.exit(1) - else: - print "OK: Cluster node " + nodeStates['name'] + " is online and cluster is quorate." - sys.exit(0) - - elif typeCheck == 'service': - serviceState = getServiceState(dom, serviceName) - if serviceState['state'] != 'started': - print "CRITICAL: Service " + serviceName + " on " + serviceState['owner'] + " is in " + serviceState['state'] + " state" - sys.exit(2) - else: - print "OK: Service " + serviceName + " on " + serviceState['owner'] + " is in " + serviceState['state'] + " state" - sys.exit(0) - - -if __name__ == "__main__": - main() +#!/bin/env python + +# +# Gather the cluster state and the current node state +# +# Output example: +# +# +# +# +# +# +# +# +# +# +# +# +# +# Frank Clements + +import xml.dom.minidom +import os +import sys, socket +import getopt + +def usage(): + """ + Display usage information + """ + print """ +Usage: """ + sys.argv[0] + """ ([-s serviceName] | [-c]) + +-c, --cluster + Gathers the overall cluster status for the local node +-s, --service + Gets the stats of the named service +-h, --help + Display this +""" + +def getQuorumState(dom): + """ + Get the quorum state. This is a single inline element which only + has attributes and no children elements. + """ + quorumList = dom.getElementsByTagName('quorum') + quorumElement = quorumList[0] + + return quorumElement.attributes['quorate'].value + + +def getClusterName(dom): + """ + Get the name of the cluster from the clustat output. + This assumes only a single cluster is running for the moment. + """ + clusterList = dom.getElementsByTagName('cluster') + clusterElement = clusterList[0] + + return clusterElement.attributes['name'].value + + +def getLocalNodeState(dom): + """ + Get the state of the local node + """ + hostname = socket.gethostname() + nodesList = dom.getElementsByTagName('node') + nodeState = {} + + for node in nodesList: + if node.attributes['name'].value == hostname: + nodeState['name'] = node.attributes['name'].value + nodeState['state'] = node.attributes['state'].value + nodeState['rgmanager'] = node.attributes['rgmanager'].value + + elif node.attributes['qdisk'].value == "1": + if node.attributes['state'].value != "1": + print "CRITICAL: Quorum disk " + node.attributes['name'].value + " is unavailable!" + sys.exit(2) + + return nodeState + + +def getServiceState(dom, service): + """ + Get the state of the named service + """ + groupList = dom.getElementsByTagName('group') + hostname = socket.gethostname() + serviceState = {} + + for group in groupList: + if group.attributes['name'].value == "service:"+service: + serviceState['owner'] = group.attributes['owner'].value + serviceState['state'] = group.attributes['state_str'].value + + return serviceState + + +def main(): + try: + opts, args = getopt.getopt(sys.argv[1:], 's:ch', ['service=', 'cluster', 'help']) + except getopt.GetoptError: + usage() + sys.exit(2) + + for o, a in opts: + if o in ('-c', '--cluster'): + typeCheck = 'cluster' + if o in ('-s', '--service'): + typeCheck = 'service' + serviceName = a + if o in ('-h', '--help'): + usage() + sys.exit() + + clustatOutput = os.popen('/usr/sbin/clustat -fx') + dom = xml.dom.minidom.parse(clustatOutput) + + if typeCheck == 'cluster': + + # First we query for the state of the cluster itself. + # Should it be found tha the cluste ris not quorate we alert and exit immediately + cluster = getClusterName(dom) + qState = getQuorumState(dom) + + # There are some serious problems if the cluster is inquorate so we simply alert immediately! + if qState != "1": + print "CRITICAL: Cluster " + cluster + " is inquorate!" + sys.exit(2) + + # Now we find the status of the local node from clustat. + # We only care about the local state since this way we can tie the alert to the host. + nodeStates = getLocalNodeState(dom) + if nodeStates['state'] != "1": + print "WARNING: Local node state is offline!" + sys.exit(1) + elif nodeStates['rgmanager'] != "1": + print "CRITICAL: RGManager service not running on " + nodeStates['name'] + "!" + sys.exit(1) + else: + print "OK: Cluster node " + nodeStates['name'] + " is online and cluster is quorate." + sys.exit(0) + + elif typeCheck == 'service': + serviceState = getServiceState(dom, serviceName) + if serviceState['state'] != 'started': + print "CRITICAL: Service " + serviceName + " on " + serviceState['owner'] + " is in " + serviceState['state'] + " state" + sys.exit(2) + else: + print "OK: Service " + serviceName + " on " + serviceState['owner'] + " is in " + serviceState['state'] + " state" + sys.exit(0) + + +if __name__ == "__main__": + main()