Scheduler now uses btime, and other improvements

This commit is contained in:
Yoshax
2016-06-01 17:47:50 +01:00
parent 9d47019f32
commit 0fb98bbabd
45 changed files with 408 additions and 981 deletions

View File

@@ -48,7 +48,7 @@
// This controls how often the process will yield (call sleep(0)) while it is running.
// Every concurrent process should sleep periodically while running in order to allow other
// processes to execute concurrently.
var/tmp/sleep_interval = PROCESS_DEFAULT_SLEEP_INTERVAL
var/tmp/sleep_interval
// hang_warning_time - this is the time (in 1/10 seconds) after which the server will begin to show "maybe hung" in the context window
var/tmp/hang_warning_time = PROCESS_DEFAULT_HANG_WARNING_TIME
@@ -59,20 +59,20 @@
// hang_restart_time - After this much time(in 1/10 seconds), the server will automatically kill and restart the process.
var/tmp/hang_restart_time = PROCESS_DEFAULT_HANG_RESTART_TIME
// cpu_threshold - if world.cpu >= cpu_threshold, scheck() will call sleep(1) to defer further work until the next tick. This keeps a process from driving a tick into overtime (causing perceptible lag)
var/tmp/cpu_threshold = PROCESS_DEFAULT_CPU_THRESHOLD
// How many times in the current run has the process deferred work till the next tick?
var/tmp/cpu_defer_count = 0
// How many SCHECKs have been skipped (to limit btime calls)
var/tmp/calls_since_last_scheck = 0
/**
* recordkeeping vars
*/
// Records the time (server ticks) at which the process last finished sleeping
// Records the time (1/10s timeofday) at which the process last finished sleeping
var/tmp/last_slept = 0
// Records the time (s-ticks) at which the process last began running
// Records the time (1/10s timeofday) at which the process last began running
var/tmp/run_start = 0
// Records the number of times this process has been killed and restarted
@@ -85,26 +85,33 @@
var/tmp/last_object
datum/controller/process/New(var/datum/controller/processScheduler/scheduler)
// Counts the number of times an exception has occurred; gets reset after 10
var/tmp/list/exceptions = list()
// Number of deciseconds to delay before starting the process
var/start_delay = 0
/datum/controller/process/New(var/datum/controller/processScheduler/scheduler)
..()
main = scheduler
previousStatus = "idle"
idle()
name = "process"
schedule_interval = 50
sleep_interval = 2
sleep_interval = world.tick_lag / PROCESS_DEFAULT_SLEEP_INTERVAL
last_slept = 0
run_start = 0
ticks = 0
last_task = 0
last_object = null
datum/controller/process/proc/started()
/datum/controller/process/proc/started()
var/timeofhour = TimeOfHour
// Initialize last_slept so we can know when to sleep
last_slept = world.timeofday
last_slept = timeofhour
// Initialize run_start so we can detect hung processes.
run_start = world.timeofday
run_start = timeofhour
// Initialize defer count
cpu_defer_count = 0
@@ -114,65 +121,65 @@ datum/controller/process/proc/started()
onStart()
datum/controller/process/proc/finished()
/datum/controller/process/proc/finished()
ticks++
idle()
main.processFinished(src)
onFinish()
datum/controller/process/proc/doWork()
/datum/controller/process/proc/doWork()
datum/controller/process/proc/setup()
/datum/controller/process/proc/setup()
datum/controller/process/proc/process()
/datum/controller/process/proc/process()
started()
doWork()
finished()
datum/controller/process/proc/running()
/datum/controller/process/proc/running()
idle = 0
queued = 0
running = 1
hung = 0
setStatus(PROCESS_STATUS_RUNNING)
datum/controller/process/proc/idle()
/datum/controller/process/proc/idle()
queued = 0
running = 0
idle = 1
hung = 0
setStatus(PROCESS_STATUS_IDLE)
datum/controller/process/proc/queued()
/datum/controller/process/proc/queued()
idle = 0
running = 0
queued = 1
hung = 0
setStatus(PROCESS_STATUS_QUEUED)
datum/controller/process/proc/hung()
/datum/controller/process/proc/hung()
hung = 1
setStatus(PROCESS_STATUS_HUNG)
datum/controller/process/proc/handleHung()
/datum/controller/process/proc/handleHung()
var/timeofhour = TimeOfHour
var/datum/lastObj = last_object
var/lastObjType = "null"
if(istype(lastObj))
lastObjType = lastObj.type
// If world.timeofday has rolled over, then we need to adjust.
if (world.timeofday < run_start)
run_start -= 864000
var/msg = "[name] process hung at tick #[ticks]. Process was unresponsive for [(world.timeofday - run_start) / 10] seconds and was restarted. Last task: [last_task]. Last Object Type: [lastObjType]"
// If timeofhour has rolled over, then we need to adjust.
if (timeofhour < run_start)
run_start -= 36000
var/msg = "[name] process hung at tick #[ticks]. Process was unresponsive for [(timeofhour - run_start) / 10] seconds and was restarted. Last task: [last_task]. Last Object Type: [lastObjType]"
logTheThing("debug", null, null, msg)
logTheThing("diary", null, null, msg, "debug")
message_admins(msg)
main.restartProcess(src.name)
datum/controller/process/proc/kill()
/datum/controller/process/proc/kill()
if (!killed)
var/msg = "[name] process was killed at tick #[ticks]."
logTheThing("debug", null, null, msg)
@@ -182,59 +189,68 @@ datum/controller/process/proc/kill()
// Allow inheritors to clean up if needed
onKill()
killed = TRUE
// This should del
del(src)
del(src) // This should del
datum/controller/process/proc/scheck(var/tickId = 0)
// Do not call this directly - use SHECK or SCHECK_EVERY
/datum/controller/process/proc/sleepCheck(var/tickId = 0)
calls_since_last_scheck = 0
if (killed)
// The kill proc is the only place where killed is set.
// The kill proc should have deleted this datum, and all sleeping procs that are
// owned by it.
CRASH("A killed process is still running somehow...")
if (hung)
// This will only really help if the doWork proc ends up in an infinite loop.
handleHung()
CRASH("Process [name] hung and was restarted.")
// For each tick the process defers, it increments the cpu_defer_count so we don't
// defer indefinitely
if (world.cpu >= cpu_threshold + cpu_defer_count * 10)
sleep(1)
if (main.getCurrentTickElapsedTime() > main.timeAllowance)
sleep(world.tick_lag)
cpu_defer_count++
last_slept = world.timeofday
last_slept = TimeOfHour
else
// If world.timeofday has rolled over, then we need to adjust.
if (world.timeofday < last_slept)
last_slept -= 864000
var/timeofhour = TimeOfHour
// If timeofhour has rolled over, then we need to adjust.
if (timeofhour < last_slept)
last_slept -= 36000
if (world.timeofday > last_slept + sleep_interval)
// If we haven't slept in sleep_interval ticks, sleep to allow other work to proceed.
if (timeofhour > last_slept + sleep_interval)
// If we haven't slept in sleep_interval deciseconds, sleep to allow other work to proceed.
sleep(0)
last_slept = world.timeofday
last_slept = TimeOfHour
datum/controller/process/proc/update()
/datum/controller/process/proc/update()
// Clear delta
if(previousStatus != status)
setStatus(status)
var/elapsedTime = getElapsedTime()
if (elapsedTime > hang_restart_time)
if (hung)
handleHung()
return
else if (elapsedTime > hang_restart_time)
hung()
else if (elapsedTime > hang_alert_time)
setStatus(PROCESS_STATUS_PROBABLY_HUNG)
else if (elapsedTime > hang_warning_time)
setStatus(PROCESS_STATUS_MAYBE_HUNG)
datum/controller/process/proc/getElapsedTime()
if (world.timeofday < run_start)
return world.timeofday - (run_start - 864000)
return world.timeofday - run_start
datum/controller/process/proc/tickDetail()
/datum/controller/process/proc/getElapsedTime()
var/timeofhour = TimeOfHour
if (timeofhour < run_start)
return timeofhour - (run_start - 36000)
return timeofhour - run_start
/datum/controller/process/proc/tickDetail()
return
datum/controller/process/proc/getContext()
/datum/controller/process/proc/getContext()
return "<tr><td>[name]</td><td>[main.averageRunTime(src)]</td><td>[main.last_run_time[src]]</td><td>[main.highest_run_time[src]]</td><td>[ticks]</td></tr>\n"
datum/controller/process/proc/getContextData()
/datum/controller/process/proc/getContextData()
return list(
"name" = name,
"averageRunTime" = main.averageRunTime(src),
@@ -246,10 +262,10 @@ datum/controller/process/proc/getContextData()
"disabled" = disabled
)
datum/controller/process/proc/getStatus()
/datum/controller/process/proc/getStatus()
return status
datum/controller/process/proc/getStatusText(var/s = 0)
/datum/controller/process/proc/getStatusText(var/s = 0)
if(!s)
s = status
switch(s)
@@ -268,21 +284,21 @@ datum/controller/process/proc/getStatusText(var/s = 0)
else
return "UNKNOWN"
datum/controller/process/proc/getPreviousStatus()
/datum/controller/process/proc/getPreviousStatus()
return previousStatus
datum/controller/process/proc/getPreviousStatusText()
/datum/controller/process/proc/getPreviousStatusText()
return getStatusText(previousStatus)
datum/controller/process/proc/setStatus(var/newStatus)
/datum/controller/process/proc/setStatus(var/newStatus)
previousStatus = status
status = newStatus
datum/controller/process/proc/setLastTask(var/task, var/object)
/datum/controller/process/proc/setLastTask(var/task, var/object)
last_task = task
last_object = object
datum/controller/process/proc/_copyStateFrom(var/datum/controller/process/target)
/datum/controller/process/proc/_copyStateFrom(var/datum/controller/process/target)
main = target.main
name = target.name
schedule_interval = target.schedule_interval
@@ -295,28 +311,62 @@ datum/controller/process/proc/_copyStateFrom(var/datum/controller/process/target
last_object = target.last_object
copyStateFrom(target)
datum/controller/process/proc/copyStateFrom(var/datum/controller/process/target)
/datum/controller/process/proc/copyStateFrom(var/datum/controller/process/target)
datum/controller/process/proc/onKill()
/datum/controller/process/proc/onKill()
datum/controller/process/proc/onStart()
/datum/controller/process/proc/onStart()
datum/controller/process/proc/onFinish()
/datum/controller/process/proc/onFinish()
datum/controller/process/proc/disable()
/datum/controller/process/proc/disable()
disabled = 1
datum/controller/process/proc/enable()
/datum/controller/process/proc/enable()
disabled = 0
/datum/controller/process/proc/getAverageRunTime()
return main.averageRunTime(src)
/datum/controller/process/proc/getLastRunTime()
return main.getProcessLastRunTime(src)
/datum/controller/process/proc/getHighestRunTime()
return main.getProcessHighestRunTime(src)
/datum/controller/process/proc/getTicks()
return ticks
/datum/controller/process/proc/getStatName()
return name
/datum/controller/process/proc/statProcess()
var/averageRunTime = round(getAverageRunTime(), 0.1)/10
var/lastRunTime = round(getLastRunTime(), 0.1)/10
var/highestRunTime = round(getHighestRunTime(), 0.1)/10
stat("[name]", "T#[getTicks()] | AR [averageRunTime] | LR [lastRunTime] | HR [highestRunTime] | D [cpu_defer_count]")
/datum/controller/process/proc/getTickTime()
return "#[getTicks()]\t- [getLastRunTime()]"
/datum/controller/process/proc/catchException(var/exception/e, var/thrower)
var/etext = "[e]"
var/eid = "[e]" // Exception ID, for tracking repeated exceptions
var/ptext = "" // "processing..." text, for what was being processed (if known)
if(istype(e))
etext += " in [e.file], line [e.line]"
eid = "[e.file]:[e.line]"
if(eid in exceptions)
if(exceptions[eid]++ >= 10)
return
else
exceptions[eid] = 1
if(istype(thrower, /datum))
var/datum/D = thrower
ptext = " processing [D.type]"
if(istype(thrower, /atom))
var/atom/A = thrower
ptext += " ([A]) ([A.x],[A.y],[A.z])"
log_to_dd("\[[time_stamp()]\] Process [name] caught exception[ptext]: [etext]")
if(exceptions[eid] >= 10)
log_to_dd("This exception will now be ignored for ten minutes.")
spawn(6000)
exceptions[eid] = 0
/datum/controller/process/proc/catchBadType(var/datum/caught)
if(isnull(caught) || !istype(caught) || !isnull(caught.gcDestroyed))
return // Only bother with types we can identify and that don't belong
catchException("Type [caught.type] does not belong in process' queue")