#!/usr/local/bin/bash

REVALENT_COUNT="299" # in seconds
TIMEOUT="1800" # timeout after restart
#REVALENT_COUNT="15" # in seconds
#TIMEOUT="10" # timeout after restart
COUNT="0"
AVERAGE_LOAD="0"
LOG="/var/log/check_overload.log"
SERVICE_NAME="polkadot" # name for command: service $SERV_NAME stop and start
STOP_TIMEOUT="60"
PIDFILE="${1}"

send_log() {
    echo `date "+%Y.%m.%d %H:%M:%S"` ${1} >>${LOG}
    echo `date "+%Y.%m.%d %H:%M:%S"` ${1}
}

if [ "${1}" = "" ] || [ "${2}" = "" ]; then
    send_log "Please enter: check_overload_procpidfile check_overload_limit"
    exit 0
fi

LOAD_LIMIT="${2}"

send_log "Starting overload control for process PID: ${PROCESS_PID}, NAME: `ps -p ${PROCESS_PID} -o comm=`, LOAD_LIMIT=${LOAD_LIMIT}, REVALENT_COUNT=${REVALENT_COUNT}, TIMEOUT: ${TIMEOUT}"

while true; do
    if [ "${COUNT}" -gt "${REVALENT_COUNT}" ]; then
        COUNT="0"
        send_log "Average LOAD = ${AVERAGE_LOAD}"
#        send_log "All array count: ${#ARR[*]}"
#	send_log "All massive elements: ${ARR[*]}"
    fi

    PROCESS_PID="`cat ${PIDFILE}`"
    ARR[${COUNT}]="`ps -p ${PROCESS_PID} -o %cpu= | awk -F'[,.]' '{print $1}'`"

    if [ "${#ARR[*]}" -gt "${REVALENT_COUNT}" ]; then
        AVERAGE_LOAD="0"
        AVERAGE_SUMM="0"
        for I in ${ARR[*]}; do
            AVERAGE_SUMM=$((${AVERAGE_SUMM} + ${I}))
        done
        AVERAGE_LOAD=$(( ${AVERAGE_SUMM} / ${#ARR[*]}))
	if [ "${AVERAGE_LOAD}" = "0" ]; then
	    send_log "COUNT: ${COUNT}"        
	    send_log "All array count: ${#ARR[*]}"
	    send_log "Polkadot PID: ${PROCESS_PID}"
	fi
    fi

    if [ "${AVERAGE_LOAD}" -ge "${LOAD_LIMIT}" ]; then
        send_log "Process number:${PROCESS_PID}, name:`ps -p ${PROCESS_PID} -o comm=`, average load for the last ${REVALENT_COUNT} seconds is: ${AVERAGE_LOAD}%. Restarting"
#        send_log "All array count: ${#ARR[*]}"
#	send_log "All massive elements: ${ARR[*]}"

	# stopping running version
	send_log "Stopping Polkadot service...."
	service ${SERVICE_NAME} stop >>${LOG} 2>&1

	STOP_COUNT="0"
	send_log "Start waiting for "${SERVICE_NAME}" successful stopped (timeout seconds: ${STOP_TIMEOUT})"
	while ps -A | grep "${PROCESS_PID}" | grep -v "grep ${PROCESS_PID}"; do
	    if [ "${STOP_COUNT}" -gt "${STOP_TIMEOUT}" ]; then
    		send_log "Stopping ${SERVICE_NAME} TIMEOUT! killing..."
    		kill -9 ${PROCESS_PID}
    		sleep 5
    		killall -9 ${SERVICE_NAME}
    		sleep 1
    		return
	    else
    		send_log "Waiting for ${SERVICE_NAME} successful stopped... (${STOP_COUNT} seconds...)"
    		sleep 1
	    fi
	    STOP_COUNT=$((${STOP_COUNT} + 1 ))
	done

	sleep 5

	send_log "Starting Polkadot service...."
	service ${SERVICE_NAME} start >>${LOG} 2>&1
	if [ "$?" = "0" ]; then
	    send_log "Starting ${SERVICE_NAME} service successful"
	else
	    send_log "Starting ${SERVICE_NAME} service ERROR! Exiting"
	fi

        unset ARR
        #echo ${#ARR[*]}
        AVERAGE_LOAD="0"
        AVERAGE_SUMM="0"
        send_log "Sleeping ${TIMEOUT}"
        COUNT_SLEEP="0"
	while [ ${COUNT_SLEEP} -lt ${TIMEOUT} ]; do
	    sleep 1
	    let COUNT_SLEEP++
	done
    fi

    let COUNT++
    sleep 1
done
