From ca07c1413249a5f427926c848e3942eb3e42d493 Mon Sep 17 00:00:00 2001
From: Denis GERMAIN
Date: Sun, 4 Dec 2016 12:18:51 +0100
Subject: [PATCH 1/4] Corrected a CRITICAL state returned as 1 (WARNING state
from nagios POV) Added a safeguard when information of the local node aren't
found
---
check_rhcs/{check_rhcs => check_rhcs.py} | 25 ++++++++++++------------
1 file changed, 13 insertions(+), 12 deletions(-)
rename check_rhcs/{check_rhcs => check_rhcs.py} (90%)
diff --git a/check_rhcs/check_rhcs b/check_rhcs/check_rhcs.py
similarity index 90%
rename from check_rhcs/check_rhcs
rename to check_rhcs/check_rhcs.py
index a235498..5a8db20 100644
--- a/check_rhcs/check_rhcs
+++ b/check_rhcs/check_rhcs.py
@@ -1,6 +1,4 @@
#!/bin/env python
-
-#
# Gather the cluster state and the current node state
#
# Output example:
@@ -133,7 +131,7 @@ def main():
if typeCheck == 'cluster':
# First we query for the state of the cluster itself.
- # Should it be found tha the cluste ris not quorate we alert and exit immediately
+ # Should it be found that the cluster is not quorate we alert and exit immediately
cluster = getClusterName(dom)
qState = getQuorumState(dom)
@@ -145,15 +143,18 @@ def main():
# Now we find the status of the local node from clustat.
# We only care about the local state since this way we can tie the alert to the host.
nodeStates = getLocalNodeState(dom)
- if nodeStates['state'] != "1":
- print "WARNING: Local node state is offline!"
- sys.exit(1)
- elif nodeStates['rgmanager'] != "1":
- print "CRITICAL: RGManager service not running on " + nodeStates['name'] + "!"
- sys.exit(1)
- else:
- print "OK: Cluster node " + nodeStates['name'] + " is online and cluster is quorate."
- sys.exit(0)
+ if nodeStates == {}:
+ print "UNKNOWN: Local node informations couldn't be found!"
+ sys.exit(3)
+ if nodeStates['state'] != "1":
+ print "WARNING: Local node state is offline!"
+ sys.exit(1)
+ elif nodeStates['rgmanager'] != "1":
+ print "CRITICAL: RGManager service not running on " + nodeStates['name'] + "!"
+ sys.exit(2)
+ else:
+ print "OK: Cluster node " + nodeStates['name'] + " is online and cluster is quorate."
+ sys.exit(0)
elif typeCheck == 'service':
serviceState = getServiceState(dom, serviceName)
From 77be9cb42dacff76f43cbfae771f1557a81833d1 Mon Sep 17 00:00:00 2001
From: Denis GERMAIN
Date: Sun, 4 Dec 2016 12:20:06 +0100
Subject: [PATCH 2/4] Added a safeguard and print usage when no argument given
---
check_rhcs/check_rhcs.py | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/check_rhcs/check_rhcs.py b/check_rhcs/check_rhcs.py
index 5a8db20..6f4142d 100644
--- a/check_rhcs/check_rhcs.py
+++ b/check_rhcs/check_rhcs.py
@@ -110,6 +110,7 @@ def main():
sys.exit(2)
check_suspend = False
+ typeCheck = None
for o, a in opts:
if o in ('-c', '--cluster'):
typeCheck = 'cluster'
@@ -122,6 +123,10 @@ def main():
usage()
sys.exit()
+ if typeCheck == None:
+ usage()
+ sys.exit()
+
try:
clustatOutput = os.popen('/usr/sbin/clustat -fx')
dom = xml.dom.minidom.parse(clustatOutput)
From fbd1392ca7e5ed33bed54f0f7bcbe44e32d9703e Mon Sep 17 00:00:00 2001
From: Denis GERMAIN
Date: Sun, 4 Dec 2016 12:21:56 +0100
Subject: [PATCH 3/4] Added info about bug in RHCS for RHEL 5
---
check_rhcs/check_rhcs.py | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/check_rhcs/check_rhcs.py b/check_rhcs/check_rhcs.py
index 6f4142d..0c6f661 100644
--- a/check_rhcs/check_rhcs.py
+++ b/check_rhcs/check_rhcs.py
@@ -21,6 +21,14 @@
#
#
# Frank Clements
+#
+# INFO : In RHEL 5, there is a bug in clustat preventing non-root users to use
+# clustat. See https://bugzilla.redhat.com/show_bug.cgi?id=531273
+# You might need to use setuid on clustat to change this if rgmanager cannot be
+# upgraded to 3.0.7+
+# $chown root:nagios /usr/sbin/clustat
+# $chmod u+s /usr/sbin/clustat
+
import xml.dom.minidom
import os
From d9e89bc3588e57d48d52677592b4f18314467989 Mon Sep 17 00:00:00 2001
From: Denis GERMAIN
Date: Sun, 4 Dec 2016 12:25:53 +0100
Subject: [PATCH 4/4] Small tab vs space issue. All spaces now
---
check_rhcs/check_rhcs.py | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/check_rhcs/check_rhcs.py b/check_rhcs/check_rhcs.py
index 0c6f661..66fd565 100644
--- a/check_rhcs/check_rhcs.py
+++ b/check_rhcs/check_rhcs.py
@@ -156,18 +156,18 @@ def main():
# Now we find the status of the local node from clustat.
# We only care about the local state since this way we can tie the alert to the host.
nodeStates = getLocalNodeState(dom)
- if nodeStates == {}:
+ if nodeStates == {}:
print "UNKNOWN: Local node informations couldn't be found!"
- sys.exit(3)
- if nodeStates['state'] != "1":
- print "WARNING: Local node state is offline!"
- sys.exit(1)
- elif nodeStates['rgmanager'] != "1":
- print "CRITICAL: RGManager service not running on " + nodeStates['name'] + "!"
- sys.exit(2)
- else:
- print "OK: Cluster node " + nodeStates['name'] + " is online and cluster is quorate."
- sys.exit(0)
+ sys.exit(3)
+ if nodeStates['state'] != "1":
+ print "WARNING: Local node state is offline!"
+ sys.exit(1)
+ elif nodeStates['rgmanager'] != "1":
+ print "CRITICAL: RGManager service not running on " + nodeStates['name'] + "!"
+ sys.exit(2)
+ else:
+ print "OK: Cluster node " + nodeStates['name'] + " is online and cluster is quorate."
+ sys.exit(0)
elif typeCheck == 'service':
serviceState = getServiceState(dom, serviceName)