From ca07c1413249a5f427926c848e3942eb3e42d493 Mon Sep 17 00:00:00 2001 From: Denis GERMAIN Date: Sun, 4 Dec 2016 12:18:51 +0100 Subject: [PATCH 1/4] Corrected a CRITICAL state returned as 1 (WARNING state from nagios POV) Added a safeguard when information of the local node aren't found --- check_rhcs/{check_rhcs => check_rhcs.py} | 25 ++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) rename check_rhcs/{check_rhcs => check_rhcs.py} (90%) diff --git a/check_rhcs/check_rhcs b/check_rhcs/check_rhcs.py similarity index 90% rename from check_rhcs/check_rhcs rename to check_rhcs/check_rhcs.py index a235498..5a8db20 100644 --- a/check_rhcs/check_rhcs +++ b/check_rhcs/check_rhcs.py @@ -1,6 +1,4 @@ #!/bin/env python - -# # Gather the cluster state and the current node state # # Output example: @@ -133,7 +131,7 @@ def main(): if typeCheck == 'cluster': # First we query for the state of the cluster itself. - # Should it be found tha the cluste ris not quorate we alert and exit immediately + # Should it be found that the cluster is not quorate we alert and exit immediately cluster = getClusterName(dom) qState = getQuorumState(dom) @@ -145,15 +143,18 @@ def main(): # Now we find the status of the local node from clustat. # We only care about the local state since this way we can tie the alert to the host. nodeStates = getLocalNodeState(dom) - if nodeStates['state'] != "1": - print "WARNING: Local node state is offline!" - sys.exit(1) - elif nodeStates['rgmanager'] != "1": - print "CRITICAL: RGManager service not running on " + nodeStates['name'] + "!" - sys.exit(1) - else: - print "OK: Cluster node " + nodeStates['name'] + " is online and cluster is quorate." - sys.exit(0) + if nodeStates == {}: + print "UNKNOWN: Local node informations couldn't be found!" + sys.exit(3) + if nodeStates['state'] != "1": + print "WARNING: Local node state is offline!" + sys.exit(1) + elif nodeStates['rgmanager'] != "1": + print "CRITICAL: RGManager service not running on " + nodeStates['name'] + "!" + sys.exit(2) + else: + print "OK: Cluster node " + nodeStates['name'] + " is online and cluster is quorate." + sys.exit(0) elif typeCheck == 'service': serviceState = getServiceState(dom, serviceName) From 77be9cb42dacff76f43cbfae771f1557a81833d1 Mon Sep 17 00:00:00 2001 From: Denis GERMAIN Date: Sun, 4 Dec 2016 12:20:06 +0100 Subject: [PATCH 2/4] Added a safeguard and print usage when no argument given --- check_rhcs/check_rhcs.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/check_rhcs/check_rhcs.py b/check_rhcs/check_rhcs.py index 5a8db20..6f4142d 100644 --- a/check_rhcs/check_rhcs.py +++ b/check_rhcs/check_rhcs.py @@ -110,6 +110,7 @@ def main(): sys.exit(2) check_suspend = False + typeCheck = None for o, a in opts: if o in ('-c', '--cluster'): typeCheck = 'cluster' @@ -122,6 +123,10 @@ def main(): usage() sys.exit() + if typeCheck == None: + usage() + sys.exit() + try: clustatOutput = os.popen('/usr/sbin/clustat -fx') dom = xml.dom.minidom.parse(clustatOutput) From fbd1392ca7e5ed33bed54f0f7bcbe44e32d9703e Mon Sep 17 00:00:00 2001 From: Denis GERMAIN Date: Sun, 4 Dec 2016 12:21:56 +0100 Subject: [PATCH 3/4] Added info about bug in RHCS for RHEL 5 --- check_rhcs/check_rhcs.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/check_rhcs/check_rhcs.py b/check_rhcs/check_rhcs.py index 6f4142d..0c6f661 100644 --- a/check_rhcs/check_rhcs.py +++ b/check_rhcs/check_rhcs.py @@ -21,6 +21,14 @@ # # # Frank Clements +# +# INFO : In RHEL 5, there is a bug in clustat preventing non-root users to use +# clustat. See https://bugzilla.redhat.com/show_bug.cgi?id=531273 +# You might need to use setuid on clustat to change this if rgmanager cannot be +# upgraded to 3.0.7+ +# $chown root:nagios /usr/sbin/clustat +# $chmod u+s /usr/sbin/clustat + import xml.dom.minidom import os From d9e89bc3588e57d48d52677592b4f18314467989 Mon Sep 17 00:00:00 2001 From: Denis GERMAIN Date: Sun, 4 Dec 2016 12:25:53 +0100 Subject: [PATCH 4/4] Small tab vs space issue. All spaces now --- check_rhcs/check_rhcs.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/check_rhcs/check_rhcs.py b/check_rhcs/check_rhcs.py index 0c6f661..66fd565 100644 --- a/check_rhcs/check_rhcs.py +++ b/check_rhcs/check_rhcs.py @@ -156,18 +156,18 @@ def main(): # Now we find the status of the local node from clustat. # We only care about the local state since this way we can tie the alert to the host. nodeStates = getLocalNodeState(dom) - if nodeStates == {}: + if nodeStates == {}: print "UNKNOWN: Local node informations couldn't be found!" - sys.exit(3) - if nodeStates['state'] != "1": - print "WARNING: Local node state is offline!" - sys.exit(1) - elif nodeStates['rgmanager'] != "1": - print "CRITICAL: RGManager service not running on " + nodeStates['name'] + "!" - sys.exit(2) - else: - print "OK: Cluster node " + nodeStates['name'] + " is online and cluster is quorate." - sys.exit(0) + sys.exit(3) + if nodeStates['state'] != "1": + print "WARNING: Local node state is offline!" + sys.exit(1) + elif nodeStates['rgmanager'] != "1": + print "CRITICAL: RGManager service not running on " + nodeStates['name'] + "!" + sys.exit(2) + else: + print "OK: Cluster node " + nodeStates['name'] + " is online and cluster is quorate." + sys.exit(0) elif typeCheck == 'service': serviceState = getServiceState(dom, serviceName)