mirror of
https://github.com/vgstation-coders/vgstation13.git
synced 2025-12-10 10:21:11 +00:00
Replace old watchdog script with the new system that we used on Failstation, plus some fixes.
This commit is contained in:
@@ -1,51 +1,157 @@
|
||||
import subprocess
|
||||
import socket
|
||||
import urlparse
|
||||
import os
|
||||
import struct
|
||||
import time
|
||||
import urllib
|
||||
import json
|
||||
import logging
|
||||
import logging.handlers
|
||||
|
||||
UDP_IP="127.0.0.1"
|
||||
UDP_PORT=8019
|
||||
MONITOR = ('127.0.0.1',1336) # IP, port.
|
||||
RESTART_COMMAND="/home/gmod/byond/ss13.sh"
|
||||
STATS_FILE='/home/gmod/stats.json'
|
||||
MAX_FAILURES=3
|
||||
LOGPATH='/home/gmod/byond/crashlogs/'
|
||||
TIMEOUT=30.0
|
||||
|
||||
sock = socket.socket( socket.AF_INET, # Internet
|
||||
socket.SOCK_DGRAM ) # UDP
|
||||
sock.bind( (UDP_IP,UDP_PORT) )
|
||||
# Return True for success, False otherwise.
|
||||
def open_socket():
|
||||
# Open TCP socket to target.
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
s.connect(MONITOR)
|
||||
# 30-second timeout
|
||||
s.settimeout(TIMEOUT)
|
||||
return s
|
||||
|
||||
# Snippet below from http://pastebin.com/TGhPBPGp
|
||||
def decode_packet(packet):
|
||||
if packet != "":
|
||||
if packet[0] == b'\x00' or packet[1] == b'\x83': # make sure it's the right packet format
|
||||
# Actually begin reading the output:
|
||||
sizebytes = struct.unpack('>H', packet[2]+packet[3]) # array size of the type identifier and content # ROB: Big-endian!
|
||||
#print(repr(sizebytes))
|
||||
size = sizebytes[0] - 1 # size of the string/floating-point (minus the size of the identifier byte)
|
||||
if packet[4] == b'\x2a': # 4-byte big-endian floating-point
|
||||
unpackint = struct.unpack('f', packet[5]+packet[6]+packet[7]+packet[8]) # 4 possible bytes: add them up together, unpack them as a floating-point
|
||||
return unpackint[1]
|
||||
elif packet[4] == b'\x06': # ASCII string
|
||||
unpackstr = '' # result string
|
||||
index = 5 # string index
|
||||
|
||||
while (size > 0): # loop through the entire ASCII string
|
||||
size -= 1
|
||||
unpackstr = unpackstr+packet[index] # add the string position to return string
|
||||
index += 1
|
||||
return unpackstr.replace('\x00','')
|
||||
log.error('UNKNOWN PACKET: {0}'.format(repr(packet)))
|
||||
return b''
|
||||
|
||||
def ping_server(request):
|
||||
try:
|
||||
# Snippet below from http://pastebin.com/TGhPBPGp
|
||||
#==============================================================
|
||||
# All queries must begin with a question mark (ie "?players")
|
||||
if request[0] != b'?':
|
||||
request = b'?' + request
|
||||
|
||||
# --- Prepare a packet to send to the server (based on a reverse-engineered packet structure) ---
|
||||
query = b'\x00\x83'
|
||||
query += struct.pack('>H', len(request) + 6) # Rob: BIG-endian
|
||||
query += b'\x00\x00\x00\x00\x00'
|
||||
query += request
|
||||
query += b'\x00'
|
||||
#==============================================================
|
||||
|
||||
last_ticker_state = None
|
||||
s = open_socket()
|
||||
if s is None:
|
||||
return False
|
||||
|
||||
#print 'Sending query packet...'
|
||||
s.sendall(query)
|
||||
#print 'Receiving response...'
|
||||
data = b''
|
||||
while True:
|
||||
buf = s.recv(1024)
|
||||
data += buf
|
||||
szbuf = len(buf)
|
||||
#print('<',szbuf)
|
||||
if szbuf<1024:
|
||||
break
|
||||
s.close()
|
||||
|
||||
response = decode_packet(data)
|
||||
|
||||
if response is not None:
|
||||
response = response.replace('\x00','')
|
||||
#print 'Received: ', response
|
||||
|
||||
parsed_response = {}
|
||||
reserved_keys=['ai','respawn','admins', 'players', 'host', 'version', 'mode', 'enter', 'vote','playerlist']
|
||||
for chunk in response.split('&'):
|
||||
dt = chunk.split('=')
|
||||
if dt[0] not in reserved_keys:
|
||||
if 'playerlist' not in parsed_response:
|
||||
parsed_response['playerlist'] = []
|
||||
parsed_response['playerlist'] += [ dt[0] ]
|
||||
else:
|
||||
parsed_response[dt[0]] = ''
|
||||
if len(dt) == 2:
|
||||
parsed_response[dt[0]] = urllib.unquote(dt[1])
|
||||
#print 'Received: ', repr(parsed_response) #, response
|
||||
# {'ai': '1', 'respawn': '0', 'admins': '0', 'players': '0', 'host': '', 'version': '/vg/+Station+13', 'mode': 'secret', 'enter': '1', 'vote': '0'}
|
||||
with open(STATS_FILE,'w') as f:
|
||||
json.dump(parsed_response,f)
|
||||
else:
|
||||
log.error("Received NONE from server!")
|
||||
return False
|
||||
except socket.timeout:
|
||||
log.error("Socket timed out!")
|
||||
return False
|
||||
except socket.error:
|
||||
log.error("Connection lost!")
|
||||
return False
|
||||
return True
|
||||
|
||||
def handle_message(data, addr):
|
||||
global last_ticker_state
|
||||
if not os.path.isdir(LOGPATH):
|
||||
os.makedirs(LOGPATH)
|
||||
|
||||
logFormatter = logging.Formatter(fmt='%(asctime)s [%(levelname)-8s]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') #, level=logging.INFO, filename='crashlog.log', filemode='a+')
|
||||
log = logging.getLogger()
|
||||
log.setLevel(logging.INFO)
|
||||
|
||||
params = urlparse.parse_qs(data)
|
||||
print(data)
|
||||
fileHandler = logging.handlers.RotatingFileHandler(os.path.join(LOGPATH, 'crash.log'), maxBytes=1024*1024*50, backupCount=0) # 50MB
|
||||
fileHandler.setFormatter(logFormatter)
|
||||
log.addHandler(fileHandler)
|
||||
|
||||
try:
|
||||
if params["type"][0] == "log" and str(params["log"][0]) and str(params["message"][0]):
|
||||
open(params["log"][0],"a+").write(params["message"][0]+"\n")
|
||||
except IOError:
|
||||
pass
|
||||
except KeyError:
|
||||
pass
|
||||
consoleHandler = logging.StreamHandler()
|
||||
consoleHandler.setFormatter(logFormatter)
|
||||
log.addHandler(consoleHandler)
|
||||
|
||||
try:
|
||||
if params["type"][0] == "ticker_state" and str(params["message"][0]):
|
||||
last_ticker_state = str(params["message"][0])
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
try:
|
||||
if params["type"][0] == "startup" and last_ticker_state:
|
||||
open("crashlog.txt","a+").write("Server exited, last ticker state was: "+last_ticker_state+"\n")
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
sock.settimeout(60*6) # 10 minute timeout
|
||||
log.info('/vg/station Watchdog: Started.')
|
||||
lastState=True
|
||||
failChain=0
|
||||
firstRun=True
|
||||
while True:
|
||||
try:
|
||||
data, addr = sock.recvfrom( 1024 ) # buffer size is 1024 bytes
|
||||
handle_message(data,addr)
|
||||
except socket.timeout:
|
||||
# try to start the server again
|
||||
print("Server timed out.. attempting restart.")
|
||||
if last_ticker_state:
|
||||
open("crashmsg.txt","a+").write("Server crashed, trying to reboot. last ticker state: "+last_ticker_state+"\n")
|
||||
subprocess.call("killall -9 DreamDaemon")
|
||||
subprocess.call("./start")
|
||||
if not ping_server(b'?status'):
|
||||
# try to start the server again
|
||||
failChain += 1
|
||||
if lastState == False:
|
||||
if failChain > MAX_FAILURES:
|
||||
log.error('Too many failures, quitting.')
|
||||
sys.exit(1)
|
||||
log.error('Try {0}/{1}...'.format(failChain,MAX_FAILURES))
|
||||
else:
|
||||
log.error("Detected a problem, attempting restart ({0}/{1}.".format(failChain,MAX_FAILURES))
|
||||
subprocess.call(RESTART_COMMAND,shell=True)
|
||||
time.sleep(50) # Sleep 50 seconds for a total of one minute before we ping again.
|
||||
lastState=False
|
||||
else:
|
||||
if lastState == False:
|
||||
log.info('Server is confirmed to be back up and running.')
|
||||
if firstRun:
|
||||
log.info('Server is confirmed to be up and running.')
|
||||
lastState=True
|
||||
failChain=0
|
||||
firstRun=False
|
||||
time.sleep(10) # Ten seconds between "pings".
|
||||
Reference in New Issue
Block a user