mirror of
https://github.com/opinkerfi/nagios-plugins.git
synced 2026-02-05 22:55:17 +01:00
This commit is contained in:
163
misc/check_rhcs
163
misc/check_rhcs
@@ -1,163 +0,0 @@
|
||||
#!/bin/env python
|
||||
|
||||
#
|
||||
# Gather the cluster state and the current node state
|
||||
#
|
||||
# Output example:
|
||||
#<clustat version="4.1.1">
|
||||
# <cluster name="LabCluster" id="22068" generation="172"/>
|
||||
# <quorum quorate="1" groupmember="1"/>
|
||||
# <nodes>
|
||||
# <node name="clusternode1.lab.inetu.net" state="1" local="0" \
|
||||
# estranged="0" rgmanager="1" rgmanager_master="0" qdisk="0" nodeid="0x00000001"/>
|
||||
# <node name="clusternode2.lab.inetu.net" state="1" local="1" \
|
||||
# estranged="0" rgmanager="1" rgmanager_master="0" qdisk="0" nodeid="0x00000002"/>
|
||||
# <node name="/dev/disk/by-id/scsi-36002219000b9642b000027124a3b61f1-part1" state="1" \
|
||||
# local="0" estranged="0" rgmanager="0" rgmanager_master="0" qdisk="1" nodeid="0x00000000"/>
|
||||
# </nodes>
|
||||
# <groups>
|
||||
# <group name="service:MySQL" state="112" state_str="started" flags="0" flags_str="" \
|
||||
# owner="clusternode2.lab.inetu.net" last_owner="clusternode1.lab.inetu.net" restarts="0" \
|
||||
# last_transition="1245765274" last_transition_str="Tue Jun 23 09:54:34 2009"/>
|
||||
# </groups>
|
||||
#</clustat>
|
||||
#
|
||||
# Frank Clements <frank @ sixthtoe.net>
|
||||
|
||||
import xml.dom.minidom
|
||||
import os
|
||||
import sys, socket
|
||||
import getopt
|
||||
|
||||
def usage():
|
||||
"""
|
||||
Display usage information
|
||||
"""
|
||||
print """
|
||||
Usage: """ + sys.argv[0] + """ ([-s serviceName] | [-c])
|
||||
|
||||
-c, --cluster
|
||||
Gathers the overall cluster status for the local node
|
||||
-s, --service
|
||||
Gets the stats of the named service
|
||||
-h, --help
|
||||
Display this
|
||||
"""
|
||||
|
||||
def getQuorumState(dom):
|
||||
"""
|
||||
Get the quorum state. This is a single inline element which only
|
||||
has attributes and no children elements.
|
||||
"""
|
||||
quorumList = dom.getElementsByTagName('quorum')
|
||||
quorumElement = quorumList[0]
|
||||
|
||||
return quorumElement.attributes['quorate'].value
|
||||
|
||||
|
||||
def getClusterName(dom):
|
||||
"""
|
||||
Get the name of the cluster from the clustat output.
|
||||
This assumes only a single cluster is running for the moment.
|
||||
"""
|
||||
clusterList = dom.getElementsByTagName('cluster')
|
||||
clusterElement = clusterList[0]
|
||||
|
||||
return clusterElement.attributes['name'].value
|
||||
|
||||
|
||||
def getLocalNodeState(dom):
|
||||
"""
|
||||
Get the state of the local node
|
||||
"""
|
||||
hostname = socket.gethostname()
|
||||
nodesList = dom.getElementsByTagName('node')
|
||||
nodeState = {}
|
||||
|
||||
for node in nodesList:
|
||||
if node.attributes['name'].value == hostname:
|
||||
nodeState['name'] = node.attributes['name'].value
|
||||
nodeState['state'] = node.attributes['state'].value
|
||||
nodeState['rgmanager'] = node.attributes['rgmanager'].value
|
||||
|
||||
elif node.attributes['qdisk'].value == "1":
|
||||
if node.attributes['state'].value != "1":
|
||||
print "CRITICAL: Quorum disk " + node.attributes['name'].value + " is unavailable!"
|
||||
sys.exit(2)
|
||||
|
||||
return nodeState
|
||||
|
||||
|
||||
def getServiceState(dom, service):
|
||||
"""
|
||||
Get the state of the named service
|
||||
"""
|
||||
groupList = dom.getElementsByTagName('group')
|
||||
hostname = socket.gethostname()
|
||||
serviceState = {}
|
||||
|
||||
for group in groupList:
|
||||
if group.attributes['name'].value == "service:"+service:
|
||||
serviceState['owner'] = group.attributes['owner'].value
|
||||
serviceState['state'] = group.attributes['state_str'].value
|
||||
|
||||
return serviceState
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], 's:ch', ['service=', 'cluster', 'help'])
|
||||
except getopt.GetoptError:
|
||||
usage()
|
||||
sys.exit(2)
|
||||
|
||||
for o, a in opts:
|
||||
if o in ('-c', '--cluster'):
|
||||
typeCheck = 'cluster'
|
||||
if o in ('-s', '--service'):
|
||||
typeCheck = 'service'
|
||||
serviceName = a
|
||||
if o in ('-h', '--help'):
|
||||
usage()
|
||||
sys.exit()
|
||||
|
||||
clustatOutput = os.popen('clustat -fx')
|
||||
dom = xml.dom.minidom.parse(clustatOutput)
|
||||
|
||||
if typeCheck == 'cluster':
|
||||
|
||||
# First we query for the state of the cluster itself.
|
||||
# Should it be found tha the cluste ris not quorate we alert and exit immediately
|
||||
cluster = getClusterName(dom)
|
||||
qState = getQuorumState(dom)
|
||||
|
||||
# There are some serious problems if the cluster is inquorate so we simply alert immediately!
|
||||
if qState != "1":
|
||||
print "CRITICAL: Cluster " + cluster + " is inquorate!"
|
||||
sys.exit(2)
|
||||
|
||||
# Now we find the status of the local node from clustat.
|
||||
# We only care about the local state since this way we can tie the alert to the host.
|
||||
nodeStates = getLocalNodeState(dom)
|
||||
if nodeStates['state'] != "1":
|
||||
print "WARNING: Local node state is offline!"
|
||||
sys.exit(1)
|
||||
elif nodeStates['rgmanager'] != "1":
|
||||
print "CRITICAL: RGManager service not running on " + nodeStates['name'] + "!"
|
||||
sys.exit(1)
|
||||
else:
|
||||
print "OK: Cluster node " + nodeStates['name'] + " is online and cluster is quorate."
|
||||
sys.exit(0)
|
||||
|
||||
elif typeCheck == 'service':
|
||||
serviceState = getServiceState(dom, serviceName)
|
||||
if serviceState['state'] != 'started':
|
||||
print "CRITICAL: Service " + serviceName + " on " + serviceState['owner'] + " is in " + serviceState['state'] + " state"
|
||||
sys.exit(2)
|
||||
else:
|
||||
print "OK: Service " + serviceName + " on " + serviceState['owner'] + " is in " + serviceState['state'] + " state"
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,84 +0,0 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2010, Pall Sigurdsson <palli@opensource.is>
|
||||
#
|
||||
# This script is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This script is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# About this script
|
||||
#
|
||||
# Checks uptime of a specified host, using NRPE is host is remote
|
||||
|
||||
HOSTN="localhost" # By default check localhost
|
||||
CHECK_COMMAND="test ! -p /tmp/fence_manual.fifo" # Default command to check selinux status
|
||||
|
||||
print_help() {
|
||||
echo "check_rhcs_fencing version $VERSION"
|
||||
echo "This plugin checks if there is Manual ACK is required for RHCS fencing"
|
||||
echo ""
|
||||
echo "Usage: $0 [-H <host>]"
|
||||
echo ""
|
||||
echo "Example: Check if fencing is required on localhost"
|
||||
echo "# check_rhcs_fencing.sh"
|
||||
}
|
||||
|
||||
#if [ $# -eq 0 ]; then
|
||||
# print_help ;
|
||||
# exit $UNKNOWN
|
||||
#fi
|
||||
|
||||
|
||||
# Parse arguments
|
||||
while [ $# -gt 0 ]
|
||||
do
|
||||
case $1
|
||||
in
|
||||
-H)
|
||||
HOSTN=$2
|
||||
shift 2
|
||||
;;
|
||||
|
||||
*)
|
||||
print_help ;
|
||||
exit $UNKNOWN
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
|
||||
|
||||
# We we are not checking localhost, lets get remote uptime via NRPE
|
||||
if [ "$HOSTN" != "localhost" ]; then
|
||||
export PATH=$PATH:/usr/lib/nagios/plugins:/usr/lib64/nagios/plugins:/nagios/usr/lib/nagios/plugins
|
||||
CHECK_COMMAND="check_nrpe -H $HOSTN -c check_rhcs_fencing"
|
||||
fi
|
||||
|
||||
|
||||
# Get the uptime, raise error if we are unsuccessful
|
||||
OUTPUT=`$CHECK_COMMAND`
|
||||
RESULT=$?
|
||||
|
||||
if [ $RESULT -eq 2 ]; then
|
||||
echo "Error, could not run command $CHECK_COMMAND"
|
||||
echo "output:"
|
||||
echo "$OUTPUT"
|
||||
exit 3
|
||||
fi
|
||||
|
||||
if [ $RESULT -gt 0 ]; then
|
||||
echo "Warning, /tmp/fence_manual.fifo exists on host $HOSTN. Manual fencing is required"
|
||||
exit 1
|
||||
else
|
||||
echo "Ok, No fencing required on host $HOSTN"
|
||||
exit 0
|
||||
fi
|
||||
@@ -1,3 +0,0 @@
|
||||
command[check_rhcs]=/usr/lib64/nagios/plugins/check_rhcs -H rek-oraheart-p04 -c
|
||||
command[check_rhcs_service]=/usr/lib64/nagios/plugins/check_rhcs -s '$ARG1$'
|
||||
|
||||
Reference in New Issue
Block a user