Process deferral and profiling improvements

* Merges Krausus's https://github.com/ParadiseSS13/Paradise/pull/5472 from Paradise
This commit is contained in:
Krausus
2016-08-17 02:10:39 -04:00
committed by Leshana
parent 43f6f48abb
commit b183ce3ad2
3 changed files with 84 additions and 189 deletions

View File

@@ -7,13 +7,12 @@
#define PROCESS_STATUS_HUNG 6 #define PROCESS_STATUS_HUNG 6
// Process time thresholds // Process time thresholds
#define PROCESS_DEFAULT_HANG_WARNING_TIME 300 // 30 seconds #define PROCESS_DEFAULT_HANG_WARNING_TIME 300 // 30 seconds
#define PROCESS_DEFAULT_HANG_ALERT_TIME 600 // 60 seconds #define PROCESS_DEFAULT_HANG_ALERT_TIME 600 // 60 seconds
#define PROCESS_DEFAULT_HANG_RESTART_TIME 900 // 90 seconds #define PROCESS_DEFAULT_HANG_RESTART_TIME 900 // 90 seconds
#define PROCESS_DEFAULT_SCHEDULE_INTERVAL 50 // 50 ticks #define PROCESS_DEFAULT_SCHEDULE_INTERVAL 50 // 50 ticks
#define PROCESS_DEFAULT_SLEEP_INTERVAL 8 // 1/8th of a tick #define PROCESS_DEFAULT_SLEEP_INTERVAL 20 // 20% of a tick
#define PROCESS_DEFAULT_DEFER_USAGE 90 // 90% of a tick
// SCHECK macros // Sleep check macro
// This references src directly to work around a weird bug with try/catch #define SCHECK if(world.tick_usage >= next_sleep_usage) defer()
#define SCHECK_EVERY(this_many_calls) if(++src.calls_since_last_scheck >= this_many_calls) sleepCheck()
#define SCHECK sleepCheck()

View File

@@ -48,7 +48,10 @@
// This controls how often the process will yield (call sleep(0)) while it is running. // This controls how often the process will yield (call sleep(0)) while it is running.
// Every concurrent process should sleep periodically while running in order to allow other // Every concurrent process should sleep periodically while running in order to allow other
// processes to execute concurrently. // processes to execute concurrently.
var/tmp/sleep_interval var/tmp/sleep_interval = PROCESS_DEFAULT_SLEEP_INTERVAL
// Defer usage; the tick usage at which this process will defer until the next tick
var/tmp/defer_usage = PROCESS_DEFAULT_DEFER_USAGE
// hang_warning_time - this is the time (in 1/10 seconds) after which the server will begin to show "maybe hung" in the context window // hang_warning_time - this is the time (in 1/10 seconds) after which the server will begin to show "maybe hung" in the context window
var/tmp/hang_warning_time = PROCESS_DEFAULT_HANG_WARNING_TIME var/tmp/hang_warning_time = PROCESS_DEFAULT_HANG_WARNING_TIME
@@ -59,19 +62,13 @@
// hang_restart_time - After this much time(in 1/10 seconds), the server will automatically kill and restart the process. // hang_restart_time - After this much time(in 1/10 seconds), the server will automatically kill and restart the process.
var/tmp/hang_restart_time = PROCESS_DEFAULT_HANG_RESTART_TIME var/tmp/hang_restart_time = PROCESS_DEFAULT_HANG_RESTART_TIME
// How many times in the current run has the process deferred work till the next tick? // Number of deciseconds to delay before starting the process
var/tmp/cpu_defer_count = 0 var/start_delay = 0
// How many SCHECKs have been skipped (to limit btime calls)
var/tmp/calls_since_last_scheck = 0
/** /**
* recordkeeping vars * recordkeeping vars
*/ */
// Records the time (1/10s timeoftick) at which the process last finished sleeping
var/tmp/last_slept = 0
// Records the time (1/10s timeofgame) at which the process last began running // Records the time (1/10s timeofgame) at which the process last began running
var/tmp/run_start = 0 var/tmp/run_start = 0
@@ -85,11 +82,29 @@
var/tmp/last_object var/tmp/last_object
// How many times in the current run has the process deferred work till the next tick?
var/tmp/cpu_defer_count = 0
// Counts the number of times an exception has occurred; gets reset after 10 // Counts the number of times an exception has occurred; gets reset after 10
var/tmp/list/exceptions = list() var/tmp/list/exceptions = list()
// Number of deciseconds to delay before starting the process // The next tick_usage the process will sleep at
var/start_delay = 0 var/tmp/next_sleep_usage
// Last run duration, in seconds
var/tmp/last_run_time = 0
// Last 20 run durations
var/tmp/list/last_twenty_run_times = list()
// Highest run duration, in seconds
var/tmp/highest_run_time = 0
// Tick usage at start of current run (updates upon deferring)
var/tmp/tick_usage_start
// Accumulated tick usage from before each deferral
var/tmp/tick_usage_accumulated = 0
/datum/controller/process/New(var/datum/controller/processScheduler/scheduler) /datum/controller/process/New(var/datum/controller/processScheduler/scheduler)
..() ..()
@@ -97,9 +112,6 @@
previousStatus = "idle" previousStatus = "idle"
idle() idle()
name = "process" name = "process"
schedule_interval = 50
sleep_interval = world.tick_lag / PROCESS_DEFAULT_SLEEP_INTERVAL
last_slept = 0
run_start = 0 run_start = 0
ticks = 0 ticks = 0
last_task = 0 last_task = 0
@@ -112,6 +124,10 @@
// Initialize defer count // Initialize defer count
cpu_defer_count = 0 cpu_defer_count = 0
// Prepare usage tracking (defer() updates these)
tick_usage_start = world.tick_usage
tick_usage_accumulated = 0
running() running()
main.processStarted(src) main.processStarted(src)
@@ -119,11 +135,23 @@
/datum/controller/process/proc/finished() /datum/controller/process/proc/finished()
ticks++ ticks++
recordRunTime()
idle() idle()
main.processFinished(src) main.processFinished(src)
onFinish() onFinish()
/datum/controller/process/proc/recordRunTime()
// Convert from tick usage (100/tick) to seconds of CPU time used
var/total_usage = (tick_usage_accumulated + (world.tick_usage - tick_usage_start)) / 1000 * world.tick_lag
last_run_time = total_usage
if(total_usage > highest_run_time)
highest_run_time = total_usage
if(last_twenty_run_times.len == 20)
last_twenty_run_times.Cut(1, 2)
last_twenty_run_times += total_usage
/datum/controller/process/proc/doWork() /datum/controller/process/proc/doWork()
/datum/controller/process/proc/setup() /datum/controller/process/proc/setup()
@@ -183,10 +211,9 @@
// This should del // This should del
del(src) del(src)
// Do not call this directly - use SHECK or SCHECK_EVERY // Do not call this directly - use SHECK
/datum/controller/process/proc/sleepCheck(var/tickId = 0) /datum/controller/process/proc/defer()
calls_since_last_scheck = 0 if(killed)
if (killed)
// The kill proc is the only place where killed is set. // The kill proc is the only place where killed is set.
// The kill proc should have deleted this datum, and all sleeping procs that are // The kill proc should have deleted this datum, and all sleeping procs that are
// owned by it. // owned by it.
@@ -196,15 +223,14 @@
handleHung() handleHung()
CRASH("Process [name] hung and was restarted.") CRASH("Process [name] hung and was restarted.")
if (main.getCurrentTickElapsedTime() > main.timeAllowance) tick_usage_accumulated += (world.tick_usage - tick_usage_start)
if(world.tick_usage < defer_usage)
sleep(0)
else
sleep(world.tick_lag) sleep(world.tick_lag)
cpu_defer_count++ cpu_defer_count++
last_slept = 0 tick_usage_start = world.tick_usage
else next_sleep_usage = min(world.tick_usage + sleep_interval, defer_usage)
if (TimeOfTick > last_slept + sleep_interval)
// If we haven't slept in sleep_interval deciseconds, sleep to allow other work to proceed.
sleep(0)
last_slept = TimeOfTick
/datum/controller/process/proc/update() /datum/controller/process/proc/update()
// Clear delta // Clear delta
@@ -231,14 +257,14 @@
return return
/datum/controller/process/proc/getContext() /datum/controller/process/proc/getContext()
return "<tr><td>[name]</td><td>[main.averageRunTime(src)]</td><td>[main.last_run_time[src]]</td><td>[main.highest_run_time[src]]</td><td>[ticks]</td></tr>\n" return "<tr><td>[name]</td><td>[getAverageRunTime()]</td><td>[last_run_time]</td><td>[highest_run_time]</td><td>[ticks]</td></tr>\n"
/datum/controller/process/proc/getContextData() /datum/controller/process/proc/getContextData()
return list( return list(
"name" = name, "name" = name,
"averageRunTime" = main.averageRunTime(src), "averageRunTime" = getAverageRunTime(),
"lastRunTime" = main.last_run_time[src], "lastRunTime" = last_run_time,
"highestRunTime" = main.highest_run_time[src], "highestRunTime" = highest_run_time,
"ticks" = ticks, "ticks" = ticks,
"schedule" = schedule_interval, "schedule" = schedule_interval,
"status" = getStatusText(), "status" = getStatusText(),
@@ -286,7 +312,6 @@
name = target.name name = target.name
schedule_interval = target.schedule_interval schedule_interval = target.schedule_interval
sleep_interval = target.sleep_interval sleep_interval = target.sleep_interval
last_slept = 0
run_start = 0 run_start = 0
times_killed = target.times_killed times_killed = target.times_killed
ticks = target.ticks ticks = target.ticks
@@ -309,21 +334,31 @@
disabled = 0 disabled = 0
/datum/controller/process/proc/getAverageRunTime() /datum/controller/process/proc/getAverageRunTime()
return main.averageRunTime(src) var/t = 0
var/c = 0
for(var/time in last_twenty_run_times)
t += time
c++
if(c > 0)
return t / c
return c
/datum/controller/process/proc/getLastRunTime() /datum/controller/process/proc/getLastRunTime()
return main.getProcessLastRunTime(src) return last_run_time
/datum/controller/process/proc/getHighestRunTime() /datum/controller/process/proc/getHighestRunTime()
return main.getProcessHighestRunTime(src) return highest_run_time
/datum/controller/process/proc/getTicks() /datum/controller/process/proc/getTicks()
return ticks return ticks
/datum/controller/process/proc/statProcess() /datum/controller/process/proc/statProcess()
var/averageRunTime = round(getAverageRunTime(), 0.1)/10 var/averageRunTime = round(getAverageRunTime(), 0.001)
var/lastRunTime = round(getLastRunTime(), 0.1)/10 var/lastRunTime = round(last_run_time, 0.001)
var/highestRunTime = round(getHighestRunTime(), 0.1)/10 var/highestRunTime = round(highest_run_time, 0.001)
stat("[name]", "T#[getTicks()] | AR [averageRunTime] | LR [lastRunTime] | HR [highestRunTime] | D [cpu_defer_count]") var/deferTime = round(cpu_defer_count / 10 * world.tick_lag, 0.01)
stat("[name]", "T#[getTicks()] | AR [averageRunTime] | LR [lastRunTime] | HR [highestRunTime] | D [deferTime]")
/datum/controller/process/proc/catchException(var/exception/e, var/thrower) /datum/controller/process/proc/catchException(var/exception/e, var/thrower)
if(istype(e)) // Real runtimes go to the real error handler if(istype(e)) // Real runtimes go to the real error handler

View File

@@ -20,18 +20,6 @@ var/global/datum/controller/processScheduler/processScheduler
// Process last queued times (world time) // Process last queued times (world time)
var/tmp/datum/controller/process/list/last_queued = new var/tmp/datum/controller/process/list/last_queued = new
// Process last start times (real time)
var/tmp/datum/controller/process/list/last_start = new
// Process last run durations
var/tmp/datum/controller/process/list/last_run_time = new
// Per process list of the last 20 durations
var/tmp/datum/controller/process/list/last_twenty_run_times = new
// Process highest run time
var/tmp/datum/controller/process/list/highest_run_time = new
// How long to sleep between runs (set to tick_lag in New) // How long to sleep between runs (set to tick_lag in New)
var/tmp/scheduler_sleep_interval var/tmp/scheduler_sleep_interval
@@ -41,22 +29,12 @@ var/global/datum/controller/processScheduler/processScheduler
// Setup for these processes will be deferred until all the other processes are set up. // Setup for these processes will be deferred until all the other processes are set up.
var/tmp/list/deferredSetupList = new var/tmp/list/deferredSetupList = new
var/tmp/currentTick = 0
var/tmp/timeAllowance = 0
var/tmp/cpuAverage = 0
var/tmp/timeAllowanceMax = 0
/datum/controller/processScheduler/New() /datum/controller/processScheduler/New()
..() ..()
// When the process scheduler is first new'd, tick_lag may be wrong, so these // When the process scheduler is first new'd, tick_lag may be wrong, so these
// get re-initialized when the process scheduler is started. // get re-initialized when the process scheduler is started.
// (These are kept here for any processes that decide to process before round start) // (These are kept here for any processes that decide to process before round start)
scheduler_sleep_interval = world.tick_lag scheduler_sleep_interval = world.tick_lag
timeAllowance = world.tick_lag * 0.5
timeAllowanceMax = world.tick_lag
/** /**
* deferSetupFor * deferSetupFor
@@ -88,20 +66,12 @@ var/global/datum/controller/processScheduler/processScheduler
isRunning = 1 isRunning = 1
// tick_lag will have been set by now, so re-initialize these // tick_lag will have been set by now, so re-initialize these
scheduler_sleep_interval = world.tick_lag scheduler_sleep_interval = world.tick_lag
timeAllowance = world.tick_lag * 0.5
timeAllowanceMax = world.tick_lag
updateStartDelays() updateStartDelays()
spawn(0) spawn(0)
process() process()
/datum/controller/processScheduler/proc/process() /datum/controller/processScheduler/proc/process()
updateCurrentTickData()
for(var/i=world.tick_lag,i<world.tick_lag*50,i+=world.tick_lag)
spawn(i) updateCurrentTickData()
while(isRunning) while(isRunning)
// Hopefully spawning this for 50 ticks in the future will make it the first thing in the queue.
spawn(world.tick_lag*50) updateCurrentTickData()
checkRunningProcesses() checkRunningProcesses()
queueProcesses() queueProcesses()
runQueuedProcesses() runQueuedProcesses()
@@ -148,20 +118,6 @@ var/global/datum/controller/processScheduler/processScheduler
process.idle() process.idle()
idle.Add(process) idle.Add(process)
// init recordkeeping vars
last_start.Add(process)
last_start[process] = 0
last_run_time.Add(process)
last_run_time[process] = 0
last_twenty_run_times.Add(process)
last_twenty_run_times[process] = list()
highest_run_time.Add(process)
highest_run_time[process] = 0
// init starts and stops record starts
recordStart(process, 0)
recordEnd(process, 0)
// Set up process // Set up process
process.setup() process.setup()
@@ -178,24 +134,9 @@ var/global/datum/controller/processScheduler/processScheduler
queued.Remove(oldProcess) queued.Remove(oldProcess)
idle.Add(newProcess) idle.Add(newProcess)
last_start.Remove(oldProcess) newProcess.last_run_time = oldProcess.last_run_time
last_start.Add(newProcess) newProcess.last_twenty_run_times = oldProcess.last_twenty_run_times
last_start[newProcess] = 0 newProcess.highest_run_time = oldProcess.highest_run_time
last_run_time.Add(newProcess)
last_run_time[newProcess] = last_run_time[oldProcess]
last_run_time.Remove(oldProcess)
last_twenty_run_times.Add(newProcess)
last_twenty_run_times[newProcess] = last_twenty_run_times[oldProcess]
last_twenty_run_times.Remove(oldProcess)
highest_run_time.Add(newProcess)
highest_run_time[newProcess] = highest_run_time[oldProcess]
highest_run_time.Remove(oldProcess)
recordStart(newProcess, 0)
recordEnd(newProcess, 0)
nameToProcessMap[newProcess.name] = newProcess nameToProcessMap[newProcess.name] = newProcess
@@ -210,11 +151,10 @@ var/global/datum/controller/processScheduler/processScheduler
/datum/controller/processScheduler/proc/processStarted(var/datum/controller/process/process) /datum/controller/processScheduler/proc/processStarted(var/datum/controller/process/process)
setRunningProcessState(process) setRunningProcessState(process)
recordStart(process) last_queued[process] = world.time
/datum/controller/processScheduler/proc/processFinished(var/datum/controller/process/process) /datum/controller/processScheduler/proc/processFinished(var/datum/controller/process/process)
setIdleProcessState(process) setIdleProcessState(process)
recordEnd(process)
/datum/controller/processScheduler/proc/setIdleProcessState(var/datum/controller/process/process) /datum/controller/processScheduler/proc/setIdleProcessState(var/datum/controller/process/process)
if (process in running) if (process in running)
@@ -243,63 +183,6 @@ var/global/datum/controller/processScheduler/processScheduler
if (!(process in running)) if (!(process in running))
running += process running += process
/datum/controller/processScheduler/proc/recordStart(var/datum/controller/process/process, var/time = null)
if (isnull(time))
time = TimeOfGame
last_queued[process] = world.time
last_start[process] = time
else
last_queued[process] = (time == 0 ? 0 : world.time)
last_start[process] = time
/datum/controller/processScheduler/proc/recordEnd(var/datum/controller/process/process, var/time = null)
if (isnull(time))
time = TimeOfGame
var/lastRunTime = time - last_start[process]
if(lastRunTime < 0)
lastRunTime = 0
recordRunTime(process, lastRunTime)
/**
* recordRunTime
* Records a run time for a process
*/
/datum/controller/processScheduler/proc/recordRunTime(var/datum/controller/process/process, time)
last_run_time[process] = time
if(time > highest_run_time[process])
highest_run_time[process] = time
var/list/lastTwenty = last_twenty_run_times[process]
if (lastTwenty.len == 20)
lastTwenty.Cut(1, 2)
lastTwenty.len++
lastTwenty[lastTwenty.len] = time
/**
* averageRunTime
* returns the average run time (over the last 20) of the process
*/
/datum/controller/processScheduler/proc/averageRunTime(var/datum/controller/process/process)
var/lastTwenty = last_twenty_run_times[process]
var/t = 0
var/c = 0
for(var/time in lastTwenty)
t += time
c++
if(c > 0)
return t / c
return c
/datum/controller/processScheduler/proc/getProcessLastRunTime(var/datum/controller/process/process)
return last_run_time[process]
/datum/controller/processScheduler/proc/getProcessHighestRunTime(var/datum/controller/process/process)
return highest_run_time[process]
/datum/controller/processScheduler/proc/getStatusData() /datum/controller/processScheduler/proc/getStatusData()
var/list/data = new var/list/data = new
@@ -338,34 +221,12 @@ var/global/datum/controller/processScheduler/processScheduler
var/datum/controller/process/process = nameToProcessMap[processName] var/datum/controller/process/process = nameToProcessMap[processName]
process.disable() process.disable()
/datum/controller/processScheduler/proc/getCurrentTickElapsedTime()
if (world.time > currentTick)
updateCurrentTickData()
return 0
else
return TimeOfTick
/datum/controller/processScheduler/proc/updateCurrentTickData()
if (world.time > currentTick)
// New tick!
currentTick = world.time
updateTimeAllowance()
cpuAverage = (world.cpu + cpuAverage + cpuAverage) / 3
/datum/controller/processScheduler/proc/updateTimeAllowance()
// Time allowance goes down linearly with world.cpu.
var/tmp/error = cpuAverage - 100
var/tmp/timeAllowanceDelta = SIMPLE_SIGN(error) * -0.5 * world.tick_lag * max(0, 0.001 * abs(error))
//timeAllowance = world.tick_lag * min(1, 0.5 * ((200/max(1,cpuAverage)) - 1))
timeAllowance = min(timeAllowanceMax, max(0, timeAllowance + timeAllowanceDelta))
/datum/controller/processScheduler/proc/statProcesses() /datum/controller/processScheduler/proc/statProcesses()
if(!isRunning) if(!isRunning)
stat("Processes", "Scheduler not running") stat("Processes", "Scheduler not running")
return return
stat("Processes", "[processes.len] (R [running.len] / Q [queued.len] / I [idle.len])") stat("Processes", "[processes.len] (R [running.len] / Q [queued.len] / I [idle.len])")
stat(null, "[round(cpuAverage, 0.1)] CPU, [round(timeAllowance, 0.1)/10] TA")
for(var/datum/controller/process/p in processes) for(var/datum/controller/process/p in processes)
p.statProcess() p.statProcess()