#!/bin/bash
# BEGIN_ICS_COPYRIGHT8 ****************************************
# 
# Copyright (c) 2015, Intel Corporation
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# 
#     * Redistributions of source code must retain the above copyright notice,
#       this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright
#       notice, this list of conditions and the following disclaimer in the
#       documentation and/or other materials provided with the distribution.
#     * Neither the name of Intel Corporation nor the names of its contributors
#       may be used to endorse or promote products derived from this software
#       without specific prior written permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# 
# END_ICS_COPYRIGHT8   ****************************************

# [ICS VERSION STRING: unknown]

### BEGIN INIT INFO
# Provides:	arptbl_tuneup
# Default-Start: 2 3 4 5
# Default-Stop: 
# Required-Start: $network
# Required-Stop:
# Should-Start:
# Should-Stop:
# Short-Description:  Adjust arp/neighbor table size
# Description:  Adjust arp/neighbor table size based on configured 
#	ipv4 and/or ipv6 network interface netmasks.
### END INIT INFO

SYSCTL_ORIG="/var/cache/sysctl_arptbl_orig"
SYSCTL_FILE="/etc/sysctl.d/98-opa.conf"

# max address range  = (1 << max_maskwidth)
max_maskwidth=14
# limit table size
max_entries=65536

if [[ -d /etc/sysconfig/network-scripts ]]; then
       NETWORK_CONFIG="/etc/sysconfig/network-scripts"
elif [[ -d /etc/sysconfig/network ]]; then
       NETWORK_CONFIG="/etc/sysconfig/network"
fi

###########################################################
# obtaining netmask from configured interface
# input: interface name: 'ib0',...
# output: 
#	> 1 - netmask in CIDR format;
#	0   - interface is not configured for ipv4 or ipv6.
#       1 -  config file is not found
###########################################################
get_netmask_cidr()
{
	local iface=$1
	local inet=$2

	local ifcfg_file=$NETWORK_CONFIG/ifcfg-$iface
	local netmask=""
	local ret=0

	if [[ ! -e $ifcfg_file ]]; then
		return 1 
	fi
	unset NETMASK
	unset IPV6INIT
	unset IPV6ADDR
	unset BOOTPROTO
	. $ifcfg_file

	if [[ $inet == ipv4 ]]; then
		local _proto_lc=`echo "$BOOTPROTO" | sed 's/./\L&/g'`

		if [[ $_proto_lc == dhcp ]]; then
			ret=$(get_iface_netmask_cidr $iface $inet)
		elif [[ ! -z $NETMASK ]]; then
			netmask="$NETMASK"
			ret=$(netmask_dot_to_cidr $netmask)
		fi
	elif [[ $inet == ipv6 ]]; then
		if [[ $IPV6INIT == yes  && ! -z $IPV6ADDR ]]; then
			ret=$(echo $IPV6ADDR | awk -F'/' '{ print $2 }')
			if ((ret == 0)); then
				# seem mask wasn't explicitly set. 
				# forced return of script max. mask.
				((ret = 128 - max_maskwidth))
			fi
		else
			ret=$(get_iface_netmask_cidr $iface $inet)
		fi		
	fi

	return $ret
}

#################################################
# Get CIDR netmask value from running interface
#
#################################################
get_iface_netmask_cidr()
{
	local ret=0
	local dev=$1

	if [[ $2 == ipv6 ]]; then
		prot="inet6"
	elif [[ $2 == ipv4 ]]; then
		prot="inet"
	else
		return 0
	fi
	ret=`ip -f $prot addr show $dev | grep $prot | cut -d'/' -f2 | cut -d' ' -f1`
	return $ret
}

#################################################
# convert 'dot' format netmask to CIDR value
#################################################
netmask_dot_to_cidr()
{
	local _netmask="$1"
	IFS='.' read -a _mask <<< "$_netmask"
	((bin=_mask[0] << 24 | _mask[1] << 16 | _mask[2] << 8 | _mask[3]))

	local cnt=32
	for i in `seq 0 31`; do
		((binmask = 1 << i))
		if (((bin & binmask) != 0)); then
			((cnt = 32 - i))
			break
		fi
	done
	return $cnt
}

#################################################
# check if current arp table sz is different from
# previous (saved) one.
#################################################
is_changed()
{
prot_list=`ls /proc/sys/net/ | grep ipv`
str2="gc_thresh3"

if [[ -e $SYSCTL_ORIG ]]; then
	while read line; do
		for str in $prot_list; do
		 	_curr=`sysctl net.$str.neigh.default.$str2 |cut -d' ' -f3`
			if [[ $line = *"$str"* && $line = *"$str2"* ]]; then
                        	_saved=`echo $line | cut -d' ' -f3`
				if ((_curr != _saved)); then
					return 0
				fi
				break
			fi
		done
        done <$SYSCTL_ORIG
fi
return 1;
}

#################################################
#  actual fix of sysctl configuration.
#################################################
do_adjust_arptbl()
{
	local _inet=$1
	local _hard_limit=$2
	local _soft_limit=$3
	local _min_size=$4
	local _nodes=$5

	if [[ ! -e ${SYSCTL_FILE} ]]; then
	        echo "# File generated at $(date) by Intel OPA. Please do not modify." > ${SYSCTL_FILE}
	fi

	/sbin/sysctl -q -w net.${_inet}.neigh.default.gc_thresh3=${_hard_limit}
	/sbin/sysctl -q -w net.${_inet}.neigh.default.gc_thresh2=${_soft_limit}
	/sbin/sysctl -q -w net.${_inet}.neigh.default.gc_thresh1=${_min_size}

	echo -e "# Adjust ${_inet} arp/neighbor tbl size for ${_nodes} computed nodes" >> ${SYSCTL_FILE}

	echo "net.${_inet}.neigh.default.gc_thresh3 = ${_hard_limit}" >> ${SYSCTL_FILE}
	echo "net.${_inet}.neigh.default.gc_thresh2 = ${_soft_limit}" >> ${SYSCTL_FILE}
	echo "net.${_inet}.neigh.default.gc_thresh1 = ${_min_size}" >> ${SYSCTL_FILE}
}

##################################################
# restore original sysctl config file
##################################################
restore_arptbl()
{
	if [[ -e ${SYSCTL_FILE} ]]; then
		/bin/rm -f ${SYSCTL_FILE}
	fi

	if [[ -e ${SYSCTL_ORIG} ]]; then
		/sbin/sysctl -q -p ${SYSCTL_ORIG}
		/bin/rm -f ${SYSCTL_ORIG}
	fi
}

###########################################################
# save default arp/neighbor tbl size settings
###########################################################
save_orig_sysctl_arptbl()
{
	if [[ ! -e ${SYSCTL_ORIG} ]]; then
		umask 022
		touch ${SYSCTL_ORIG}
		for _ipv in ${protocols}; do
			/sbin/sysctl net.${_ipv}.neigh.default.gc_thresh3 >> ${SYSCTL_ORIG}
			/sbin/sysctl net.${_ipv}.neigh.default.gc_thresh2 >> ${SYSCTL_ORIG}
			/sbin/sysctl net.${_ipv}.neigh.default.gc_thresh1 >> ${SYSCTL_ORIG}
		done
	fi
}

############################################################
# Adjust kernel ARP table size based on subnet address range
# calculated from interface netmask
############################################################
adjust_arptbl()
{
	local _ret=0
	local _num_nodes=0
	local _found=0

	devlist=`ls /sys/class/net/ | grep [ie]`
	protocols=`ls /proc/sys/net/ | grep ipv`
	
	# save default arptbl params
	save_orig_sysctl_arptbl

	for inet in $protocols; do
		if [[ $inet == ipv6 ]]; then
			cidrmask=120
			addrsize=128
		elif [[ $inet == ipv4 ]]; then
        		cidrmask=24
		        addrsize=32
		fi
		_found=0
		_num_nodes=0
		for dev in $devlist; do
			# echo -n "lookup $dev for $inet netmask..."

			get_netmask_cidr $dev $inet
			_ret=$?
			
			if ((_ret < 2 )); then
			#	echo "not set"
				continue
			elif (( cidrmask > _ret )); then
				cidrmask=$_ret
			fi
			((_found++))
			# echo "$_ret - Ok"
		done

		if ((_found == 0)); then
			# no $inet configured device found.
			continue
		fi
		
       		if (((addrsize - cidrmask) > max_maskwidth)); then
			# address range with mask $cidrmask is too big for the kernel tables
			# TBD: consider setting up 'arpd' daemon for an extra-large networks.

			# For the kernel tables - adjust cidrmask to max allowed in this script.
			((cidrmask = addrsize - max_maskwidth))
        	fi

		# echo "found $_found interface(s) for $inet, max netmask: /$cidrmask"

	        ((_num_nodes = 1 << (addrsize - cidrmask)))

		# for final calculations, consider following:
        	# - total number of configured ipv4/ipv6 interfaces
	        # - arp/neighbor tbl size must be power of 2
        	# - limit tbl size to 'max_entries'.

	        ((hard_limit = _num_nodes << (_found - 1)))
		recommended_tblsize=$hard_limit
        	if ((hard_limit > max_entries)); then
                	((hard_limit = max_entries))
	        fi
        	((soft_limit = hard_limit))
	        ((min_size = hard_limit / 2))

		thresh1=`sysctl net.$inet.neigh.default.gc_thresh1 |cut -d' ' -f3`
		thresh2=`sysctl net.$inet.neigh.default.gc_thresh2 |cut -d' ' -f3`
		thresh3=`sysctl net.$inet.neigh.default.gc_thresh3 |cut -d' ' -f3`

		echo "Default setting for arp table size for $inet are thresh1=$thresh1, thresh2=$thresh2, thresh3=$thresh3"
		echo -e "Recommended table size for $inet is $recommended_tblsize for $_num_nodes computed nodes"

		if ((recommended_tblsize >= max_entries)); then
                        echo "Maximum table size limit reached for $inet"
                        continue
                fi

		if ((thresh3 >= hard_limit)); then
			echo "arp/neighbor tbl size $thresh3 for $inet is Ok. No need to change."
        	        continue
	        fi

		echo -e "Adjust $inet arp/neighbor tbl of size $hard_limit (thresh1=$min_size, thresh2=$soft_limit, thresh3=$hard_limit) for $_num_nodes computed nodes"
		do_adjust_arptbl $inet $hard_limit $soft_limit $min_size $_num_nodes
	done
}

status()
{
	if is_changed; then
		echo "arp table size was adjusted."
	else
		echo "arp table size was not changed."
	fi
}

start()
{
	echo "Starting arptbl tuneup..."
	adjust_arptbl
	echo "done."
}

stop()
{
	echo -n "Restoring arptbl previous size..."
	restore_arptbl
	echo "done."
}

Usage()
{
	echo >&2
	echo "Usage: `/bin/basename $0` (--help|start|stop|restart|force-reload|status)" >&2
	echo "    --help - produce full help text" >&2
	echo >&2
	echo "    Must execute as root. Adjusts kernel arp/neighbor table sizes for very large" >&2
	echo "    subnets based on configured ipv4/ipv6 network interface netmask values." >&2
	echo "    Normally executes once on boot by opa.service; however can be invoked" >&2
	echo "    with user discretion for a changed subnet configuration." >&2
}

Usage_full()
{
	echo >&2
	echo "Usage: `/bin/basename $0` (--help|start|stop|restart|force-reload|status)" >&2
	echo "    --help - produce full help text" >&2
	echo >&2
	echo "    Must execute as root. Adjusts kernel arp/neighbor table sizes for very large" >&2
	echo "    subnets based on configured ipv4/ipv6 network interface netmask values." >&2
	echo "    Normally executes once on boot by opa.service; however can be invoked" >&2
	echo "    with user discretion for a changed subnet configuration." >&2
	echo >&2
	echo "    start        - adjust kernel arp table size" >&2
	echo "    stop         - restore previous configuration" >&2
	echo "    status       - check if original table size was changed" >&2
	echo "    restart      - stop then start" >&2
	echo "    force-reload - stop then start" >&2

	exit 0
}

case $1 in
    start|stop|restart|force-reload|status)
        [[ `id -u` != 0 ]] && Usage && exit 4 ;;
esac

case $1 in
	--help)
		Usage_full
	;;
	start)
		start
	;;
	stop)
		stop
	;;
	( restart | force-reload )
		stop
		start
	;;
	status)
		status
	;;
	*)
		Usage
		exit 2
	;;
esac

exit 0
	
