mirror of
https://dev.iopsys.eu/feed/iopsys.git
synced 2025-12-10 07:44:50 +01:00
244 lines
4.2 KiB
Bash
Executable file
244 lines
4.2 KiB
Bash
Executable file
#!/bin/sh
|
|
|
|
. /lib/functions.sh
|
|
. /usr/share/libubox/jshn.sh
|
|
|
|
# 1. sleep for $SLEEP_LONG seconds
|
|
# 2. run the check_* functions
|
|
# 3. sleep for $SLEEP_SHORT seconds
|
|
# 4. go to step 2. $SAMPLES-1 times (step 2. will run $SAMPLES times)
|
|
# 5. go to step 1.
|
|
SAMPLES=4
|
|
SLEEP_SHORT=2
|
|
SLEEP_LONG=10
|
|
|
|
# worst-case scenario:
|
|
# quest will be restarted in:
|
|
# SLEEP_LONG + (SAMPLES-1)*SLEEP_SHORT + UBUS_TIMEOUT + epsilon =
|
|
# = 21 seconds
|
|
|
|
# number of process
|
|
NPROC_LIMIT=2 # the value that is considered to be wrong (and above)
|
|
NPROC_COUNT=4 # number of time that the NPROC_LIMIT has to be hit to trigger a process restart
|
|
NPROC_NFAIL=0 # current consecutive Number of FAILures. process restarts when NPROC_NFAIL == NPROC_COUNT
|
|
|
|
# memmory limit
|
|
MEM_LIMIT=20000
|
|
MEM_COUNT=4
|
|
MEM_NFAIL=0
|
|
|
|
# procentage of CPU usage
|
|
PCPU_LIMIT=38
|
|
PCPU_COUNT=4
|
|
PCPU_NFAIL=0
|
|
|
|
# network call check
|
|
NNET_NFAIL=0
|
|
|
|
UBUS_TIMEOUT=5
|
|
|
|
|
|
BSDBIN=$(which bsd)
|
|
ACSDBIN=$(which acsd)
|
|
|
|
restart_questd()
|
|
{
|
|
logger -s -t $0[$$] "Restarting questd. $NPROC_NFAIL $MEM_NFAIL $PCPU_NFAIL $NNET_NFAIL"
|
|
|
|
killall -q -KILL questd
|
|
/etc/init.d/quest stop
|
|
killall -q -KILL questd
|
|
/etc/init.d/quest start
|
|
|
|
NPROC_NFAIL=0
|
|
MEM_NFAIL=0
|
|
PCPU_NFAIL=0
|
|
NNET_NFAIL=0
|
|
}
|
|
|
|
|
|
check_nproc()
|
|
{
|
|
local nproc="$@"
|
|
|
|
if [ "$nproc" -ge "$NPROC_LIMIT" -o "$nproc" -eq "0" ]; then
|
|
NPROC_NFAIL=$((NPROC_NFAIL + 1))
|
|
else
|
|
NPROC_NFAIL=0
|
|
fi
|
|
|
|
[ "$NPROC_NFAIL" -ge "$NPROC_COUNT" ] && return 1
|
|
|
|
return 0
|
|
}
|
|
|
|
check_mem()
|
|
{
|
|
local mem="$@"
|
|
local ok=true
|
|
|
|
for m in $mem; do
|
|
if [ "$m" -ge "$MEM_LIMIT" ]; then
|
|
ok=false
|
|
break
|
|
fi
|
|
done
|
|
|
|
[ "$ok" = "true" ] && MEM_NFAIL=0
|
|
[ "$ok" = "false" ] && MEM_NFAIL=$((MEM_NFAIL + 1))
|
|
|
|
[ "$MEM_NFAIL" -ge "$MEM_COUNT" ] && return 1
|
|
|
|
return 0
|
|
}
|
|
|
|
check_pcpu()
|
|
{
|
|
local pcpu="$@"
|
|
local ok=true
|
|
|
|
for p in $pcpu; do
|
|
p=${p%%%*}
|
|
if [ "$p" -ge "$PCPU_LIMIT" ]; then
|
|
ok=false
|
|
break
|
|
fi
|
|
done
|
|
|
|
[ "$ok" = "true" ] && PCPU_NFAIL=0
|
|
[ "$ok" = "false" ] && PCPU_NFAIL=$((PCPU_NFAIL + 1))
|
|
|
|
[ "$PCPU_NFAIL" -ge "$PCPU_COUNT" ] && return 1
|
|
|
|
return 0
|
|
}
|
|
|
|
check_ubuscall()
|
|
{
|
|
local rv
|
|
|
|
ubus -t $UBUS_TIMEOUT call router.system info >/dev/null 2>&1
|
|
rv=$?
|
|
|
|
[ "$rv" = "0" ] && return 0
|
|
return 1
|
|
}
|
|
|
|
check_ubus_network_call() {
|
|
[ "$(uci -q get network.lan)" == "interface" ] || return 0
|
|
json_load "$(ifstatus lan)" || return 0
|
|
json_get_var up up
|
|
[ $up -eq 1 ] || return 0
|
|
local ifname="$(uci -q get network.lan.ifname)"
|
|
[ -z "$ifname" -o "${ifname:0:3}" == "br-" ] && return 0
|
|
ubus -t 5 call router.network ports '{"network":"lan"}' | grep -q statistics || {
|
|
NNET_NFAIL=1
|
|
return 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
restart_bsd()
|
|
{
|
|
logger -s -t $0[$$] "Restarting bsd."
|
|
killall -q -KILL bsd
|
|
bsd
|
|
}
|
|
|
|
check_bsdcall()
|
|
{
|
|
local rv
|
|
|
|
bsd -s >/dev/null 2>&1
|
|
rv=$?
|
|
|
|
[ "$rv" == "0" ] && return 0
|
|
return 1
|
|
}
|
|
|
|
|
|
restart_acsd()
|
|
{
|
|
logger -s -t $0[$$] "Restarting acsd."
|
|
killall -q -KILL acsd
|
|
acsd &
|
|
}
|
|
|
|
check_acsd()
|
|
{
|
|
local topline pcpu p
|
|
|
|
topline=$(top -bn1 | grep "[a]csd")
|
|
pcpu=$(echo -en "$topline" | awk '{print $7}')
|
|
|
|
for p in $pcpu; do
|
|
p=${p%%%*}
|
|
if [ "$p" -ge "20" ]; then
|
|
return 1
|
|
fi
|
|
done
|
|
|
|
return 0
|
|
}
|
|
|
|
|
|
main()
|
|
{
|
|
local topline nproc mem pcpu
|
|
local restart_nproc restart_mem restart_pcpu
|
|
local sample=1
|
|
local bsdenabled
|
|
|
|
while true ; do
|
|
|
|
# collect info
|
|
topline=$(top -bn1 | grep "/sbin/[q]uestd" | grep -v monitor)
|
|
|
|
nproc=$(echo -n "$topline" | awk 'END{print NR}')
|
|
check_nproc $nproc
|
|
restart_nproc=$?
|
|
|
|
mem=$(echo -en "$topline" | awk '{print $5}')
|
|
check_mem $mem
|
|
restart_mem=$?
|
|
|
|
pcpu=$(echo -en "$topline" | awk '{print $7}')
|
|
check_pcpu $pcpu
|
|
restart_pcpu=$?
|
|
|
|
# do the restart
|
|
if [ "$restart_nproc" = "1" -o \
|
|
"$restart_mem" = "1" -o \
|
|
"$restart_pcpu" = "1" ]
|
|
then
|
|
restart_questd
|
|
# else
|
|
# if [ "$sample" -ge "$SAMPLES" ]; then
|
|
# check_ubus_network_call || restart_questd
|
|
# fi
|
|
fi
|
|
|
|
# if [ "$BSDBIN" ] ; then
|
|
# config_load wireless
|
|
# config_get_bool bsdenabled bandsteering enabled 0
|
|
# if [ $bsdenabled == "1" -a "$(pidof wifi)" == "" ] ; then
|
|
# check_bsdcall || restart_bsd
|
|
# fi
|
|
# fi
|
|
|
|
if [ "$ACSDBIN" ] ; then
|
|
check_acsd || restart_acsd
|
|
fi
|
|
|
|
# sleep
|
|
if [ "$sample" -lt "$SAMPLES" ]; then
|
|
sample=$((sample + 1))
|
|
sleep $SLEEP_SHORT
|
|
else
|
|
sample=1
|
|
sleep $SLEEP_LONG
|
|
fi
|
|
done
|
|
}
|
|
|
|
main $@
|