1
0
mirror of https://github.com/opinkerfi/nagios-plugins.git synced 2026-02-05 22:55:17 +01:00
This commit is contained in:
Páll Guðjón Sigurðsson
2010-12-02 11:19:21 +00:00
parent 432227fdb0
commit 6d8daac6fb
3 changed files with 0 additions and 0 deletions

163
check_rhcs/trunk/check_rhcs Normal file
View File

@@ -0,0 +1,163 @@
#!/bin/env python
#
# Gather the cluster state and the current node state
#
# Output example:
#<clustat version="4.1.1">
# <cluster name="LabCluster" id="22068" generation="172"/>
# <quorum quorate="1" groupmember="1"/>
# <nodes>
# <node name="clusternode1.lab.inetu.net" state="1" local="0" \
# estranged="0" rgmanager="1" rgmanager_master="0" qdisk="0" nodeid="0x00000001"/>
# <node name="clusternode2.lab.inetu.net" state="1" local="1" \
# estranged="0" rgmanager="1" rgmanager_master="0" qdisk="0" nodeid="0x00000002"/>
# <node name="/dev/disk/by-id/scsi-36002219000b9642b000027124a3b61f1-part1" state="1" \
# local="0" estranged="0" rgmanager="0" rgmanager_master="0" qdisk="1" nodeid="0x00000000"/>
# </nodes>
# <groups>
# <group name="service:MySQL" state="112" state_str="started" flags="0" flags_str="" \
# owner="clusternode2.lab.inetu.net" last_owner="clusternode1.lab.inetu.net" restarts="0" \
# last_transition="1245765274" last_transition_str="Tue Jun 23 09:54:34 2009"/>
# </groups>
#</clustat>
#
# Frank Clements <frank @ sixthtoe.net>
import xml.dom.minidom
import os
import sys, socket
import getopt
def usage():
"""
Display usage information
"""
print """
Usage: """ + sys.argv[0] + """ ([-s serviceName] | [-c])
-c, --cluster
Gathers the overall cluster status for the local node
-s, --service
Gets the stats of the named service
-h, --help
Display this
"""
def getQuorumState(dom):
"""
Get the quorum state. This is a single inline element which only
has attributes and no children elements.
"""
quorumList = dom.getElementsByTagName('quorum')
quorumElement = quorumList[0]
return quorumElement.attributes['quorate'].value
def getClusterName(dom):
"""
Get the name of the cluster from the clustat output.
This assumes only a single cluster is running for the moment.
"""
clusterList = dom.getElementsByTagName('cluster')
clusterElement = clusterList[0]
return clusterElement.attributes['name'].value
def getLocalNodeState(dom):
"""
Get the state of the local node
"""
hostname = socket.gethostname()
nodesList = dom.getElementsByTagName('node')
nodeState = {}
for node in nodesList:
if node.attributes['name'].value == hostname:
nodeState['name'] = node.attributes['name'].value
nodeState['state'] = node.attributes['state'].value
nodeState['rgmanager'] = node.attributes['rgmanager'].value
elif node.attributes['qdisk'].value == "1":
if node.attributes['state'].value != "1":
print "CRITICAL: Quorum disk " + node.attributes['name'].value + " is unavailable!"
sys.exit(2)
return nodeState
def getServiceState(dom, service):
"""
Get the state of the named service
"""
groupList = dom.getElementsByTagName('group')
hostname = socket.gethostname()
serviceState = {}
for group in groupList:
if group.attributes['name'].value == "service:"+service:
serviceState['owner'] = group.attributes['owner'].value
serviceState['state'] = group.attributes['state_str'].value
return serviceState
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], 's:ch', ['service=', 'cluster', 'help'])
except getopt.GetoptError:
usage()
sys.exit(2)
for o, a in opts:
if o in ('-c', '--cluster'):
typeCheck = 'cluster'
if o in ('-s', '--service'):
typeCheck = 'service'
serviceName = a
if o in ('-h', '--help'):
usage()
sys.exit()
clustatOutput = os.popen('clustat -fx')
dom = xml.dom.minidom.parse(clustatOutput)
if typeCheck == 'cluster':
# First we query for the state of the cluster itself.
# Should it be found tha the cluste ris not quorate we alert and exit immediately
cluster = getClusterName(dom)
qState = getQuorumState(dom)
# There are some serious problems if the cluster is inquorate so we simply alert immediately!
if qState != "1":
print "CRITICAL: Cluster " + cluster + " is inquorate!"
sys.exit(2)
# Now we find the status of the local node from clustat.
# We only care about the local state since this way we can tie the alert to the host.
nodeStates = getLocalNodeState(dom)
if nodeStates['state'] != "1":
print "WARNING: Local node state is offline!"
sys.exit(1)
elif nodeStates['rgmanager'] != "1":
print "CRITICAL: RGManager service not running on " + nodeStates['name'] + "!"
sys.exit(1)
else:
print "OK: Cluster node " + nodeStates['name'] + " is online and cluster is quorate."
sys.exit(0)
elif typeCheck == 'service':
serviceState = getServiceState(dom, serviceName)
if serviceState['state'] != 'started':
print "CRITICAL: Service " + serviceName + " on " + serviceState['owner'] + " is in " + serviceState['state'] + " state"
sys.exit(2)
else:
print "OK: Service " + serviceName + " on " + serviceState['owner'] + " is in " + serviceState['state'] + " state"
sys.exit(0)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,3 @@
command[check_rhcs]=/usr/lib64/nagios/plugins/check_rhcs -H rek-oraheart-p04 -c
command[check_rhcs_service]=/usr/lib64/nagios/plugins/check_rhcs -s '$ARG1$'

View File

@@ -0,0 +1,84 @@
#!/bin/bash
#
# Copyright 2010, Pall Sigurdsson <palli@opensource.is>
#
# This script is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This script is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# About this script
#
# Checks uptime of a specified host, using NRPE is host is remote
HOSTN="localhost" # By default check localhost
CHECK_COMMAND="test ! -p /tmp/fence_manual.fifo" # Default command to check selinux status
print_help() {
echo "check_rhcs_fencing version $VERSION"
echo "This plugin checks if there is Manual ACK is required for RHCS fencing"
echo ""
echo "Usage: $0 [-H <host>]"
echo ""
echo "Example: Check if fencing is required on localhost"
echo "# check_rhcs_fencing.sh"
}
#if [ $# -eq 0 ]; then
# print_help ;
# exit $UNKNOWN
#fi
# Parse arguments
while [ $# -gt 0 ]
do
case $1
in
-H)
HOSTN=$2
shift 2
;;
*)
print_help ;
exit $UNKNOWN
;;
esac
done
# We we are not checking localhost, lets get remote uptime via NRPE
if [ "$HOSTN" != "localhost" ]; then
export PATH=$PATH:/usr/lib/nagios/plugins:/usr/lib64/nagios/plugins:/nagios/usr/lib/nagios/plugins
CHECK_COMMAND="check_nrpe -H $HOSTN -c check_rhcs_fencing"
fi
# Get the uptime, raise error if we are unsuccessful
OUTPUT=`$CHECK_COMMAND`
RESULT=$?
if [ $RESULT -eq 2 ]; then
echo "Error, could not run command $CHECK_COMMAND"
echo "output:"
echo "$OUTPUT"
exit 3
fi
if [ $RESULT -gt 0 ]; then
echo "Warning, /tmp/fence_manual.fifo exists on host $HOSTN. Manual fencing is required"
exit 1
else
echo "Ok, No fencing required on host $HOSTN"
exit 0
fi