diff --git a/misc/check_rhcs b/misc/check_rhcs
new file mode 100644
index 0000000..504e3b6
--- /dev/null
+++ b/misc/check_rhcs
@@ -0,0 +1,163 @@
+#!/bin/env python
+# Gather the cluster state and the current node state
+# Output example:
+# Frank Clements
+import xml.dom.minidom
+import os
+import sys, socket
+import getopt
+def usage():
+ """
+ Display usage information
+ """
+ print """
+Usage: """ + sys.argv[0] + """ ([-s serviceName] | [-c])
+-c, --cluster
+ Gathers the overall cluster status for the local node
+-s, --service
+ Gets the stats of the named service
+-h, --help
+ Display this
+def getQuorumState(dom):
+ """
+ Get the quorum state. This is a single inline element which only
+ has attributes and no children elements.
+ """
+ quorumList = dom.getElementsByTagName('quorum')
+ quorumElement = quorumList[0]
+ return quorumElement.attributes['quorate'].value
+def getClusterName(dom):
+ """
+ Get the name of the cluster from the clustat output.
+ This assumes only a single cluster is running for the moment.
+ """
+ clusterList = dom.getElementsByTagName('cluster')
+ clusterElement = clusterList[0]
+ return clusterElement.attributes['name'].value
+def getLocalNodeState(dom):
+ """
+ Get the state of the local node
+ """
+ hostname = socket.gethostname()
+ nodesList = dom.getElementsByTagName('node')
+ nodeState = {}
+ for node in nodesList:
+ if node.attributes['name'].value == hostname:
+ nodeState['name'] = node.attributes['name'].value
+ nodeState['state'] = node.attributes['state'].value
+ nodeState['rgmanager'] = node.attributes['rgmanager'].value
+ elif node.attributes['qdisk'].value == "1":
+ if node.attributes['state'].value != "1":
+ print "CRITICAL: Quorum disk " + node.attributes['name'].value + " is unavailable!"
+ sys.exit(2)
+ return nodeState
+def getServiceState(dom, service):
+ """
+ Get the state of the named service
+ """
+ groupList = dom.getElementsByTagName('group')
+ hostname = socket.gethostname()
+ serviceState = {}
+ for group in groupList:
+ if group.attributes['name'].value == "service:"+service:
+ serviceState['owner'] = group.attributes['owner'].value
+ serviceState['state'] = group.attributes['state_str'].value
+ return serviceState
+def main():
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], 's:ch', ['service=', 'cluster', 'help'])
+ except getopt.GetoptError:
+ usage()
+ sys.exit(2)
+ for o, a in opts:
+ if o in ('-c', '--cluster'):
+ typeCheck = 'cluster'
+ if o in ('-s', '--service'):
+ typeCheck = 'service'
+ serviceName = a
+ if o in ('-h', '--help'):
+ usage()
+ sys.exit()
+ clustatOutput = os.popen('clustat -fx')
+ dom = xml.dom.minidom.parse(clustatOutput)
+ if typeCheck == 'cluster':
+ # First we query for the state of the cluster itself.
+ # Should it be found tha the cluste ris not quorate we alert and exit immediately
+ cluster = getClusterName(dom)
+ qState = getQuorumState(dom)
+ # There are some serious problems if the cluster is inquorate so we simply alert immediately!
+ if qState != "1":
+ print "CRITICAL: Cluster " + cluster + " is inquorate!"
+ sys.exit(2)
+ # Now we find the status of the local node from clustat.
+ # We only care about the local state since this way we can tie the alert to the host.
+ nodeStates = getLocalNodeState(dom)
+ if nodeStates['state'] != "1":
+ print "WARNING: Local node state is offline!"
+ sys.exit(1)
+ elif nodeStates['rgmanager'] != "1":
+ print "CRITICAL: RGManager service not running on " + nodeStates['name'] + "!"
+ sys.exit(1)
+ else:
+ print "OK: Cluster node " + nodeStates['name'] + " is online and cluster is quorate."
+ sys.exit(0)
+ elif typeCheck == 'service':
+ serviceState = getServiceState(dom, serviceName)
+ if serviceState['state'] != 'started':
+ print "CRITICAL: Service " + serviceName + " on " + serviceState['owner'] + " is in " + serviceState['state'] + " state"
+ sys.exit(2)
+ else:
+ print "OK: Service " + serviceName + " on " + serviceState['owner'] + " is in " + serviceState['state'] + " state"
+ sys.exit(0)
+if __name__ == "__main__":
+ main()
diff --git a/misc/nrpe.d/check_rhcs.cfg b/misc/nrpe.d/check_rhcs.cfg
new file mode 100644
index 0000000..ceef14e
--- /dev/null
+++ b/misc/nrpe.d/check_rhcs.cfg
@@ -0,0 +1,3 @@
+command[check_rhcs]=/usr/lib64/nagios/plugins/check_rhcs -H rek-oraheart-p04 -c
+command[check_rhcs_service]=/usr/lib64/nagios/plugins/check_rhcs -s '$ARG1$'