#!/bin/bash
#
# Bring up/down openibd
#
# chkconfig: 2345 05 95
# description: Activates/Deactivates InfiniBand Network Interfaces
#
### BEGIN INIT INFO
# Provides:       openibd
### END INIT INFO
#
# Copyright (c) 2006 Mellanox Technologies. All rights reserved.
#
# This Software is licensed under one of the following licenses:
#
# 1) under the terms of the "Common Public License 1.0" a copy of which is
#    available from the Open Source Initiative, see
#    http://www.opensource.org/licenses/cpl.php.
#
# 2) under the terms of the "The BSD License" a copy of which is
#    available from the Open Source Initiative, see
#    http://www.opensource.org/licenses/bsd-license.php.
#
# 3) under the terms of the "GNU General Public License (GPL) Version 2" a
#    copy of which is available from the Open Source Initiative, see
#    http://www.opensource.org/licenses/gpl-license.php.
#
# Licensee has the right to choose one of the above licenses.
#
# Redistributions of source code must retain the above copyright
# notice and one of the license notices.
#
# Redistributions in binary form must reproduce both the above copyright
# notice, one of the license notices in the documentation
# and/or other materials provided with the distribution.
#
#
#  $Id: openib-1.0-openibd.init,v 1.13 2008/05/21 16:55:06 dledford Exp $
#

# config: /etc/ofed/openib.conf
CONFIG="/etc/ofed/openib.conf"
NETWORK_CONF_DIR=/etc/sysconfig/network-scripts

. /etc/rc.d/init.d/functions

# If module $1 is loaded return - 0 else - 1
is_module()
{
    /sbin/lsmod | grep -w "$1" > /dev/null 2>&1
    return $?    
}

LOAD_ULP_MODULES=""
LOAD_CORE_USER_MODULES="ib_umad ib_uverbs ib_ucm rdma_ucm"
LOAD_CORE_CM_MODULES="iw_cm ib_cm rdma_cm"
LOAD_CORE_MODULES="ib_core ib_mad ib_sa ib_addr"

if [ -f $CONFIG ]; then
    . $CONFIG

    # Setting OpenIB start parameters
    if [ "${SDP_LOAD}" == "yes" ]; then
	LOAD_ULP_MODULES="ib_ipoib ib_sdp"
    elif [ "${IPOIB_LOAD}" == "yes" ]; then
	LOAD_ULP_MODULES="ib_ipoib"
    fi

    if [ "${SRP_LOAD}" == "yes" ]; then
	LOAD_ULP_MODULES="$LOAD_ULP_MODULES ib_srp"
    fi
else
    LOAD_ULP_MODULES="ib_ipoib ib_sdp"
fi

UNLOAD_ULP_MODULES="ib_madeye ib_iser ib_srp ib_sdp ib_ipoib"
UNLOAD_HW_MODULES="iw_c2 iw_cxgb3 iw_nes ib_ehca ib_ipath ib_mthca mlx4_ib"
UNLOAD_CORE_USER_MODULES="rdma_ucm ib_ucm ib_uverbs ib_umad"
UNLOAD_CORE_CM_MODULES="rdma_cm ib_cm iw_cm"
UNLOAD_CORE_MODULES="ib_addr ib_sa ib_mad ib_core"

interfaces=`/sbin/ifconfig | grep "^ib[0-9]*" | cut -f 1 -d ' ' | sed -e 'y/\r/ /'`

load_modules()
{
    local RC=0

    for module in $*; do
	if ! is_module $module; then
	    /sbin/modprobe $module
	    res=$?
	    RC=$[ $RC + $res ]
	    if [ $res -ne 0 ]; then
		echo
		echo -n "Failed to load module $mod"
	    fi
	fi
    done
    return $RC
}

# This function is a horrible hack to work around BIOS authors that should
# be shot.  Specifically, certain BIOSes will map the entire 4GB address
# space as write-back cacheable when the machine has 4GB or more of RAM, and
# then they will exclude the reserved PCI I/O addresses from that 4GB
# cacheable mapping by making on overlapping uncacheable mapping.  However,
# once you do that, it is then impossible to set *any* of the PCI I/O
# address space as write-combining.  This is an absolute death-knell to
# certain IB hardware.  So, we unroll this mapping here.  Instead of
# punching a hole in a single 4GB mapping, we redo the base 4GB mapping as
# a series of discreet mappings that effectively are the same as the 4GB
# mapping minus the hole, and then we delete the uncacheable mappings that
# are used to punch the hole.  This then leaves the PCI I/O address space
# unregistered (which defaults it to uncacheable), but available for
# write-combining mappings where needed.
check_mtrr_registers()
{
    # If we actually change the mtrr registers, then the awk script will
    # return true, and we need to unload the ib_ipath module if it's already
    # loaded.  The udevtrigger in load_hardware_modules will immediately
    # reload the ib_ipath module for us, so there shouldn't be a problem.
    [ -f /proc/mtrr -a -f /etc/ofed/fixup-mtrr.awk ] && 
	awk -f /etc/ofed/fixup-mtrr.awk /proc/mtrr 2>/dev/null &&
	if is_module ib_ipath; then
		/sbin/rmmod ib_ipath
	fi
}

load_hardware_modules()
{
    local -i RC=0

    [ "$FIXUP_MTRR_REGS" = "yes" ] && check_mtrr_registers
    # WARNING!!  If you are using this script to take down and bring up
    # your IB interfaces on a machine that uses more than one low level
    # Infiniband hardware driver, then there is no guarantee that the
    # ordering of rdma interfaces after you take down and bring up the
    # stack will be the same as the ordering of the interfaces on a
    # clean boot.

    # In RHEL5 and later we use udevtrigger/udevsettle to reload the
    # hardware modules, but that doesn't work on RHEL4.
    #udevtrigger
    #udevsettle
    for driver in `grep "cxgb3\|ib_ehca\|ib_ipath\|mlx4_core\|ib_mthca\|iw_nes\|iw_c2" /etc/sysconfig/hwconf | awk '{ print $2 }'`; do
    	if ! is_module $driver; then
	    /sbin/modprobe $driver
	    RC=$[ $RC + $? ]
	fi
	if [ $driver = "cxgb3" ]; then
	    if is_module cxgb3 -a ! is_module iw_cxgb3; then
		/sbin/modprobe iw_cxgb3
		RC=$[ $RC + $? ]
	    fi
	fi
	if [ $driver = "mlx4_core" ]; then
	    if is_module mlx4_core -a ! is_module mlx4_ib; then
		/sbin/modprobe mlx4_ib
		RC=$[ $RC + $? ]
	    fi
	fi
    done
    if [ -r /proc/device-tree ]; then
	if [ -n "`ls /proc/device-tree | grep lhca`" ]; then
	    if ! is_module ib_ehca; then
		/sbin/modprobe ib_ehca
		RC=$[ $RC + $? ]
	    fi
	fi
    fi
    return $RC
}

errata_58()
{
    # Check AMD chipset issue Errata #58
    if test -x /sbin/lspci && test -x /sbin/setpci; then
	if ( /sbin/lspci -nd 1022:1100 | grep "1100" > /dev/null ) &&
	   ( /sbin/lspci -nd 1022:7450 | grep "7450" > /dev/null ) &&
	   ( /sbin/lspci -nd 15b3:5a46 | grep "5a46" > /dev/null ); then
	    CURVAL=`/sbin/setpci -d 1022:1100 69`
	    for val in $CURVAL
	    do
		if [ "${val}" != "c0" ]; then
		    /sbin/setpci -d 1022:1100 69=c0
		    if [ $? -eq 0 ]; then
			break
		    else
			echo "Failed to apply AMD-8131 Errata #58 workaround"
		    fi
		fi
	    done
	fi
    fi
}

errata_56()
{
    # Check AMD chipset issue Errata #56
    if test -x /sbin/lspci && test -x /sbin/setpci; then
	if ( /sbin/lspci -nd 1022:1100 | grep "1100" > /dev/null ) &&
	   ( /sbin/lspci -nd 1022:7450 | grep "7450" > /dev/null ) &&
	   ( /sbin/lspci -nd 15b3:5a46 | grep "5a46" > /dev/null ); then
	    bus=""
	    # Look for devices AMD-8131
	    for dev in `/sbin/setpci -v -f -d 1022:7450 19 | cut -d':' -f1,2`
	    do
		bus=`/sbin/setpci -s $dev 19`
		rev=`/sbin/setpci -s $dev 8`
		# Look for Tavor attach to secondary bus of this devices
		for device in `/sbin/setpci -f -s $bus: -d 15b3:5a46 19`
		do
		    if [ $rev -lt 13 ]; then
			/sbin/setpci -d 15b3:5a44 72=14
			if [ $? -eq 0 ]; then
			    break
			else
			    echo
			    echo "Failed to apply AMD-8131 Errata #56 workaround"
			fi
		    else
			continue
		    fi
		    # If more than one device is on the bus the issue a
		    # warning
		    num=`/sbin/setpci -f -s $bus: 0 | wc -l |  sed 's/\ *//g'`
		    if [ $num -gt 1 ]; then
			echo "Warning: your current PCI-X configuration might be incorrect."
			echo "see AMD-8131 Errata 56 for more details."
		    fi
		done
	    done
	fi
    fi
}

start()
{
    local RC=0
    local loaded=0

    echo -n "Loading OpenIB kernel modules:"

    load_hardware_modules
    RC=$[ $RC + $? ]
    load_modules $LOAD_CORE_MODULES
    RC=$[ $RC + $? ]
    load_modules $LOAD_CORE_CM_MODULES
    RC=$[ $RC + $? ]
    load_modules $LOAD_CORE_USER_MODULES
    RC=$[ $RC + $? ]
    load_modules $LOAD_ULP_MODULES
    RC=$[ $RC + $? ]
   
    # Add node description to sysfs
    IBSYSDIR="/sys/class/infiniband"
    if [ -d ${IBSYSDIR} ]; then
	declare -i hca_id=1
	for hca in ${IBSYSDIR}/*
	do
	    if [ -w ${hca}/node_desc ]; then
	    	echo -n "$(hostname | cut -f 1 -d .) HCA-${hca_id}" >> ${hca}/node_desc 2> /dev/null
	    fi
	    let hca_id++
	done
    fi
   
    errata_58
    errata_56
    sleep 1
    
    touch /var/lock/subsys/openibd
    [ $RC -eq 0 ] && echo_success || echo_failure
    echo
    return $RC    
}

unload()
{
    local mod=$1
    # Unload module $1
    if is_module $mod; then
	/sbin/rmmod $mod > /dev/null 2>&1
	if [ $? -ne 0 ]; then
	    echo
	    echo "Failed to unload $mod"
	    return 1
	fi
    fi
    return 0
}

stop()
{
    # Check if applications which use infiniband are running
    local apps="opensm osmtest srp_daemon"
    local pid
    local RC=0
    
    echo -n "Unloading OpenIB kernel modules:"

    for app in $apps
    do
    	if ( ps -ef | grep $app | grep -v grep > /dev/null 2>&1 ); then
	    echo
	    echo "Please stop $app and all applications running over InfiniBand"
	    echo "Then run \"$0 $ACTION\""
	    echo_failure
	    echo
	    return 1
	fi
    done

    if is_module qlgc_vnic; then
	echo
	echo "The qlgc_vnic service is still running."
	echo_failure
	echo
	return 1
    fi

    if ! is_module ib_core; then
	# Nothing to do, make sure lock file is gone and return
	rm -f /var/lock/subsys/openibd
	echo_success
	echo
	return 0
    fi

    # Remove srp_daemon instances before removeing ib_srp module
    PIDS=`ps ax | grep srp_daemon | grep -v grep | awk '{print $1}'`
    if [ -n "$PIDS" ]; then
	for pid in "$PIDS"; do
	    kill $pid
	done
	sleep 2
	PIDS=`ps ax | grep srp_daemon | grep -v grep | awk '{print $1}'`
	if [ -n "$PIDS" ]; then
	    for pid in "$PIDS"; do
		kill -9 $pid
	    done
	fi
    fi
    
	
    # Down all IPoIB interfaces
    if is_module ib_ipoib; then
	for i in $interfaces
	do
	    ifdown $i > /dev/null 2>&1
	done    
	# sleep after downing interfaces to avoid a kernel timer warning
	sleep 4
    fi
	
    # Unload OpenIB modules
    MODULES="$UNLOAD_ULP_MODULES $UNLOAD_CORE_USER_MODULES"
    MODULES="$MODULES $UNLOAD_CORE_CM_MODULES"
    for mod in $MODULES
    do
	unload $mod
	RC=$[ $RC + $? ]
    done
    # Insert a sleep here for all the ULP modules to have been fully removed
    # before proceeding to unload the driver modules
    sleep 3
    MODULES="$UNLOAD_HW_MODULES $UNLOAD_CORE_MODULES" 
    for mod in $MODULES
    do
	unload $mod
	RC=$[ $RC + $? ]
    done

    rm -f /var/lock/subsys/openibd
    [ $RC -eq 0 ] && echo_success || echo_failure
    echo
    return $RC
}

status()
{
    local -i cnt=0
    local -i modules=0
    local module=""

    echo -ne "Low level hardware support loaded:\n\t"
    for module in $UNLOAD_HW_MODULES; do
	if is_module $module; then
	    echo -n "$module "
	    let cnt++
	fi
    done
    [ $cnt -eq 0 ] && echo -n "none found"
    modules=$[ $modules + $cnt ]
    echo
    echo

    echo -ne "Upper layer protocol modules:\n\t"
    cnt=0
    for module in $UNLOAD_ULP_MODULES; do
	if is_module $module; then
	    echo -n "$module "
	    let cnt++
	fi
    done
    [ $cnt -eq 0 ] && echo -n "none found"
    modules=$[ $modules + $cnt ]
    echo
    echo

    echo -ne "User space access modules:\n\t"
    cnt=0
    for module in $UNLOAD_CORE_USER_MODULES; do
	if is_module $module; then
	    echo -n "$module "
	    let cnt++
	fi
    done
    [ $cnt -eq 0 ] && echo -n "none found"
    modules=$[ $modules + $cnt ]
    echo
    echo

    echo -ne "Connection management modules:\n\t"
    cnt=0
    for module in $UNLOAD_CORE_CM_MODULES; do
	if is_module $module; then
	    echo -n "$module "
	    let cnt++
	fi
    done
    [ $cnt -eq 0 ] && echo -n "none found"
    modules=$[ $modules + $cnt ]
    echo
    echo

    for module in $UNLOAD_CORE_MODULES; do
	if is_module $module; then
	    let modules++
	fi
    done

    if is_module ib_ipoib; then
	echo -n "Configured IPoIB interfaces: "
	cnt=0
	for i in /etc/sysconfig/network-scripts/ifcfg-ib*
	do
	    if [ -f $i ]; then
		. $i
		echo -n "$DEVICE "
		let cnt++
	    fi
	done
	[ $cnt -eq 0 ] && echo -n "none"
	echo
	echo -n "Currently active IPoIB interfaces: "
	cnt=0
	for i in $interfaces
	do
	    ifconfig $i | grep UP > /dev/null 2>&1
	    [ $? -eq 0 ] && echo -n "$i " && let cnt++
	done
	[ $cnt -eq 0 ] && echo -n "none"
	echo
    fi
    
    if [ $modules -eq 0 ]; then
	if [ -f /var/lock/subsys/openibd ]; then
	    return 2
	else
	    return 3
	fi
    else
	return 0
    fi
}


RC=0
start_time=$(date +%s | tr -d '[:space:]')

trap_handler()
{
    let run_time=$(date +%s | tr -d '[:space:]')-${start_time}

    # Ask to wait for 5 seconds if trying to stop openibd
    if [ $run_time -gt 5 ] && [ "$ACTION" == "stop" ]; then
	printf "\nProbably some application are still using InfiniBand modules...\n"
    else
	printf "\nPlease wait ...\n"
    fi    
    return 0
}

trap 'trap_handler' 2 9 15

restart ()
{
    stop
    start
}

condrestart ()
{
    [ -e /var/lock/subsys/openibd ] && restart || return 0
}

usage ()
{
    echo
    echo "Usage: `basename $0` {start|stop|restart|condrestart|try-restart|force-reload|status}"
    echo
    return 2
}

case $1 in
start) start; RC=$? ;;
    stop) stop; RC=$? ;;
    restart) restart; RC=$? ;;
    reload) RC=3 ;;
    condrestart) condrestart; RC=$? ;;
    try-restart) condrestart; RC=$? ;;
    force-reload) condrestart; RC=$? ;;
    status) status; RC=$? ;;
    *) usage; RC=$? ;;
esac

exit $RC
