#!/bin/sh
#
# description:  starts and stops the cluster daemons
# chkconfig: 2345 99 01
#

PATH=/usr/bin:/sbin:/bin:/usr/sbin
export PATH

. /etc/rc.d/init.d/functions

CFG_DIR="/etc"
CFG_FILE="$CFG_DIR/cluster.conf"

# daemons and options
#
WATCHDOG_NOWAYOUT=0
CLUHBD=cluhbd
CLUSVCMGRD=clusvcmgrd
CLUQUORUMD=cluquorumd
CLUPOWERD=clupowerd
CLUMIBD=clumibd
CLURMTABD=clurmtabd
CLUSCAND=cluscand
CLUQUORUMD_START_OPTIONS=""
CLUQUORUMD_STOP_OPTIONS="-p"
CLUSTOP=clustop
CLUSTOP_OPTIONS=""
CLULOG=clulog

# daemon functions
getpid()
{
    pid=''
    if [ -f /var/run/${base}.pid ] 
    then
        pid=`head -1 /var/run/${base}.pid`
    fi
    if [ "$pid" = "" ]
    then
        pid=`pidof $1`
    fi
    if [ "$pid" = "" ]
    then
        pid=`ps ax | awk 'BEGIN { prog=ARGV[1]; ARGC=1 }
                   { if ((prog == $5) || (("(" prog ")") == $5) ||
                     (("[" prog "]") == $5) ||
                   ((prog ":") == $5)) { print $1 ; exit 0 } }' $1`
    fi

    echo $pid
}

startdaemon()
{
    base=`basename $1`

    # check if it is already running
    pid=`getpid $base`
    [ -n "$pid" ] && ps h $pid >/dev/null 2>&1 && echo " already running." && return

    # don't dump core
    ulimit -c 0

    $* && echo " done." || echo " failed."
}

stopdaemon()
{
    base=`basename $1`
    pid=`getpid $base`

    if [ "$pid" != "" ]
    then
        if ps h $pid>/dev/null 2>&1
        then
            kill -TERM $pid
            if ps h $pid>/dev/null 2>&1
            then
                sleep 1
                if ps h $pid>/dev/null 2>&1
                then
                    sleep 3
                    if ps h $pid>/dev/null 2>&1
                    then
                        kill -KILL $pid
			sleep 2
                    fi
                fi
            fi
        fi
        ps h $pid >/dev/null 2>&1
        RC=$?
        [ $RC -eq 0 ] && echo " failed." || echo " done."
        [ $RC -eq 0 ] || rm -f /var/run/$base.pid >/dev/null 2>&1
    else
        echo " not running."
    fi
}

daemonstatus()
{
    base=`basename $1`
    pid=`getpid $base`
    
    if [ "$pid" != "" ]
    then
        if ps h $pid >/dev/null 2>&1
        then
            echo "$base (pid $pid) is running."
            return 0
        else
            if [ -f /var/run/${base}.pid ]
            then
                echo "$base dead but pid file exists."
                return 1
            else
                echo "$base is stopped."
                return 2
            fi
        fi
    else
        echo "$base is stopped."
        return 3
    fi
}


#
# Check the cluster configuration file and load the watchdog timer if 
# necessary.
#
watchdoginit()
{ 
    # Check the cluster configuration for watchdog support
    grep -q -i sw_watchdog $CFG_FILE
    if [ $? -eq 1 ]; then
        return 0
    fi

    # Check to ensure we have /dev/watchdog
    if ! [ -c /dev/watchdog ]; then
	action "Creating /dev/watchdog:" /dev/MAKEDEV watchdog
    fi

    # Check /etc/modules.conf for "alias watchdog xxxxxx" line; xxxxxx = the
    # specific driver (see below) we're dealing with.
    # If there is no alias, assume softdog.
    _WDT=`grep watchdog /etc/modules.conf | awk '{print $3}'`
    if [ -z "$_WDT" ]; then
        _PROBE=softdog
	_WDT=softdog
    else
        _PROBE=watchdog
    fi

    # Don't try to load the module a second time.
    lsmod | grep -q $_WDT
    if [ $? -ne 0 ]; then
        action "Loading Watchdog Timer ($_WDT): " modprobe $_PROBE \
            nowayout=$WATCHDOG_NOWAYOUT
    fi

    unset _WDT _PROBE
    return 0
}


# If the clumanager rpm is installed, then this script will be run.
# For systems on which the cluster has not been configured, don't do
# anything.  We know a cluster is not configured if the cluster.conf
# configuration file does not exist.
if [ ! -f $CFG_FILE ]; then
    if [ "$1" != "status" ]; then
        exit 0
    fi
fi

RETVAL=0
case "$1" in
start)
    # Check-and-load the Watchdog driver, if we want one
    watchdoginit

    echo -n "Starting cluster management agent: "
    mkdir -p /var/lib/xm > /dev/null
    startdaemon "xmproxyd"

    # Samaba pid files need to be cleaned up to prevent indefinite
    # accumulation.  Only do this if there are actually samba services
    # in the configuration (to prevent messing up non-clustered samba).
    if [ -f $CFG_FILE ]; then
        if grep sharename $CFG_FILE | grep -v None &>/dev/null; then
	    rm -f /tmp/sambapids.*
        fi
    fi

    echo -n "Starting cluster manager services: "
    $CLULOG -s 5 -l 5 -n cluster "Starting cluster manager services "
    startdaemon $CLUQUORUMD $CLUQUORUMD_START_OPTIONS

    # if this lock file doesn't exist, init won't even try to run
    # the shutdown script for this service on RedHat systems!
    # on non-RedHat systems, /var/lock/subsys may not exist.
    touch /var/lock/subsys/cluster >/dev/null 2>&1

    ;;

# The stop sequence primarily consists of sending a message to the highest
# level cluster service, which does its own shutdown and then sends a message
# to the layer it depends on.  This is necessary as you can't just send
# kill signals to the quorumd specifically, because in that case it wouldn't
# shutdown cleanly by marking its state as DOWN on the disk partition - this
# would result in the other node shooting this node when the timestamp
# stops updating.
# Here's the order that stop messages get sent:
# SM -> cluquorumd -> clupowerd 
# On the off chance that any of the daemons in the chain of shutdown
# messaging are not running, skip ahead to the next daemon or direct kill.
stop)

    echo -n "Shutting down cluster management agent: "
    stopdaemon "/sbin/xmproxyd"
    echo "Shutting down Cluster Manager services "
    $CLULOG -s 5 -l 5 -n cluster "Shutting down Cluster Manager services "
    pidof $CLUSVCMGRD >/dev/null 2>&1; SMRUNNING="$?"
    if [ $SMRUNNING = 0 ]; then
        echo -n "Sending stop message to $CLUSVCMGRD: "
        $CLUSTOP $CLUSTOP_OPTIONS 
    
        # Handle oddball case of SM running but quorumd isn't.
        pidof $CLUQUORUMD >/dev/null 2>&1; CLUQUORUMD_RUNNING="$?"
        if [ $CLUQUORUMD_RUNNING != 0 ]; then
            echo -n "Shutting down $CLUPOWERD: "
            stopdaemon $CLUPOWERD
        fi
    else 
        # XXX - if SM isn't running its not safe to just tell quorumd to 
        # stop as this could result in services runnning on both members.
        # In this case call SM with a new (not yet implemented) option which
        # tells it to stop all services and return status when done.  If that
        # is successful, it will send a termination message to quorumd.  If
        # the new involkation of SM fails to stop services then a hard reboot
        # is warranted.
        pidof $CLUQUORUMD >/dev/null 2>&1; CLUQUORUMD_RUNNING="$?"
        if [ $CLUQUORUMD_RUNNING = 0 ]; then
            echo -n "Sending stop message to $CLUQUORUMD: "
            $CLUSTOP $CLUQUORUMD_STOP_OPTIONS
        else
            echo -n "Shutting down $CLUPOWERD: "
            stopdaemon $CLUPOWERD
        fi
    fi

    # Wait around for Quorum daemon to go away.  Then you know that the
    # on-disk state has been marked as cleanly down.
    while : ; do
       pidof $CLUQUORUMD >/dev/null 2>&1; CLUQUORUMD_RUNNING="$?"
       if [ $CLUQUORUMD_RUNNING != 0 ]; then
       break
       fi
          echo "Waiting for services to stop and cluquorumd to exit."
       sleep 10
    done

    # make sure it really is dead
    for process in $CLUQUORUMD $CLUHBD $CLUSVCMGRD $CLUPOWERD $CLUMIBD $CLUSCAND $CLURMTABD
    do
    	echo -n "Stopping $process: "
		stopdaemon $process
    done

    # remove the lock file, so init will allow the start script to run
    rm -f /var/lock/subsys/cluster >/dev/null 2>&1

    $CLULOG -s 5 -l 5 -n cluster "Completed shutdown of Cluster Manager"
    ;;

reload)
    for process in $CLUHBD $CLUSVCMGRD $CLUQUORUMD $CLUPOWERD $CLUMIBD $CLUSCAND $CLURMTABD
    do
        killall -HUP $process
    done
    exit 0
    ;;

# Don't allow restart, because the stop merely initiates the shutdown
# sequence.  So this command would end up attempting to start while the
# daemons may not have been stopped.
#restart)
#    $0 stop
#    $0 start
#    ;;

status)
    for process in $CLUHBD $CLUSVCMGRD $CLUQUORUMD $CLUPOWERD $CLUMIBD $CLUSCAND $CLURMTABD
    do
	daemonstatus $process
    done
    exit 0
    ;;

probe)
    exit 0;
    ;;

*)
    echo "Usage: $0 {start|stop|status|reload}"
    exit 1
esac

exit $RETVAL
