Garbage collection tweaks and refactors. (#30118)

* Garbage collection queuing rewriting

* Some Tweaks

* Fixes some queue tracking oddities

* More tweaks

* Remove the middle two queue steps. tis was a silly idea

* New logging.

We store logging datums for use in tracking the stats of shit qdeleted

Added logging for destroy time as well as hard delete time, per type. As well as a few others

* Fix compile for testing

* Does the cyberboss

* does the antur
This commit is contained in:
Kyle Spier-Swenson
2017-09-12 23:46:18 -07:00
committed by AnturK
parent cf23c0e7d1
commit 2f6df2a008
9 changed files with 260 additions and 154 deletions

View File

@@ -1,5 +1,7 @@
// simple is_type and similar inline helpers
#define isdatum(D) (istype(D, /datum))
#define islist(L) (istype(L, /list))
#define in_range(source, user) (get_dist(source, user) <= 1)

View File

@@ -10,6 +10,11 @@
//if TESTING is enabled, qdel will call this object's find_references() verb.
//defines for the gc_destroyed var
#define GC_QUEUE_PREQUEUE 1
#define GC_QUEUE_CHECK 2
#define GC_QUEUE_HARDDELETE 3
#define GC_QUEUE_COUNT 3 //increase this when adding more steps.
#define GC_QUEUED_FOR_QUEUING -1
#define GC_QUEUED_FOR_HARD_DEL -2
#define GC_CURRENTLY_BEING_QDELETED -3

View File

@@ -13,3 +13,11 @@ When using time2text(), please use "DDD" to find the weekday. Refrain from using
#define FRIDAY "Fri"
#define SATURDAY "Sat"
#define SUNDAY "Sun"
#define SECONDS *10
#define MINUTES SECONDS*60
#define HOURS MINUTES*60
#define TICKS *world.tick_lag

View File

@@ -96,6 +96,9 @@
if (config.log_pda)
WRITE_FILE(GLOB.world_game_log, "\[[time_stamp()]]CHAT: [text]")
/proc/log_qdel(text)
WRITE_FILE(GLOB.world_qdel_log, "\[[time_stamp()]]QDEL: [text]")
/proc/log_sql(text)
WRITE_FILE(GLOB.sql_error_log, "\[[time_stamp()]]SQL: [text]")

View File

@@ -49,3 +49,12 @@ GLOBAL_VAR_INIT(cmp_field, "name")
/proc/cmp_ruincost_priority(datum/map_template/ruin/A, datum/map_template/ruin/B)
return initial(A.cost) - initial(B.cost)
/proc/cmp_qdel_item_time(datum/qdel_item/A, datum/qdel_item/B)
. = B.hard_delete_time - A.hard_delete_time
if (!.)
. = B.destroy_time - A.destroy_time
if (!.)
. = B.failures - A.failures
if (!.)
. = B.qdels - A.qdels

View File

@@ -4,6 +4,8 @@ GLOBAL_VAR(world_game_log)
GLOBAL_PROTECT(world_game_log)
GLOBAL_VAR(world_runtime_log)
GLOBAL_PROTECT(world_runtime_log)
GLOBAL_VAR(world_qdel_log)
GLOBAL_PROTECT(world_qdel_log)
GLOBAL_VAR(world_attack_log)
GLOBAL_PROTECT(world_attack_log)
GLOBAL_VAR(world_href_log)

View File

@@ -1,40 +1,44 @@
SUBSYSTEM_DEF(garbage)
name = "Garbage"
priority = 15
wait = 20
wait = 2 SECONDS
flags = SS_POST_FIRE_TIMING|SS_BACKGROUND|SS_NO_INIT
runlevels = RUNLEVELS_DEFAULT | RUNLEVEL_LOBBY
var/collection_timeout = 3000// deciseconds to wait to let running procs finish before we just say fuck it and force del() the object
var/delslasttick = 0 // number of del()'s we've done this tick
var/gcedlasttick = 0 // number of things that gc'ed last tick
var/list/collection_timeout = list(0, 2 MINUTES, 10 SECONDS) // deciseconds to wait before moving something up in the queue to the next level
//Stat tracking
var/delslasttick = 0 // number of del()'s we've done this tick
var/gcedlasttick = 0 // number of things that gc'ed last tick
var/totaldels = 0
var/totalgcs = 0
var/highest_del_time = 0
var/highest_del_tickusage = 0
var/list/queue = list() // list of refID's of things that should be garbage collected
// refID's are associated with the time at which they time out and need to be manually del()
// we do this so we aren't constantly locating them and preventing them from being gc'd
var/list/pass_counts
var/list/fail_counts
var/list/tobequeued = list() //We store the references of things to be added to the queue separately so we can spread out GC overhead over a few ticks
var/list/items = list() // Holds our qdel_item statistics datums
var/list/didntgc = list() // list of all types that have failed to GC associated with the number of times that's happened.
// the types are stored as strings
var/list/sleptDestroy = list() //Same as above but these are paths that slept during their Destroy call
//Queue
var/list/queues
var/list/noqdelhint = list()// list of all types that do not return a QDEL_HINT
// all types that did not respect qdel(A, force=TRUE) and returned one
// of the immortality qdel hints
var/list/noforcerespect = list()
#ifdef TESTING
var/list/qdel_list = list() // list of all types that have been qdel()eted
#endif
/datum/controller/subsystem/garbage/PreInit()
queues = new(GC_QUEUE_COUNT)
pass_counts = new(GC_QUEUE_COUNT)
fail_counts = new(GC_QUEUE_COUNT)
for(var/i in 1 to GC_QUEUE_COUNT)
queues[i] = list()
pass_counts[i] = 0
fail_counts[i] = 0
/datum/controller/subsystem/garbage/stat_entry(msg)
msg += "Q:[queue.len]|D:[delslasttick]|G:[gcedlasttick]|"
var/list/counts = list()
for (var/list/L in queues)
counts += length(L)
msg += "Q:[counts.Join(",")]|D:[delslasttick]|G:[gcedlasttick]|"
msg += "GR:"
if (!(delslasttick+gcedlasttick))
msg += "n/a|"
@@ -46,116 +50,179 @@ SUBSYSTEM_DEF(garbage)
msg += "n/a|"
else
msg += "TGR:[round((totalgcs/(totaldels+totalgcs))*100, 0.01)]%"
msg += " P:[pass_counts.Join(",")]"
msg += "|F:[fail_counts.Join(",")]"
..(msg)
/datum/controller/subsystem/garbage/Shutdown()
//Adds the del() log to world.log in a format condensable by the runtime condenser found in tools
if(didntgc.len || sleptDestroy.len)
var/list/dellog = list()
for(var/path in didntgc)
dellog += "Path : [path] \n"
dellog += "Failures : [didntgc[path]] \n"
if(path in sleptDestroy)
dellog += "Sleeps : [sleptDestroy[path]] \n"
sleptDestroy -= path
for(var/path in sleptDestroy)
dellog += "Path : [path] \n"
dellog += "Sleeps : [sleptDestroy[path]] \n"
text2file(dellog.Join(), "[GLOB.log_directory]/qdel.log")
//Adds the del() log to the qdel log file
var/list/dellog = list()
//sort by how long it's wasted hard deleting
sortTim(items, cmp=/proc/cmp_qdel_item_time, associative = TRUE)
for(var/path in items)
var/datum/qdel_item/I = items[path]
dellog += "Path: [path]"
if (I.failures)
dellog += "\tFailures: [I.failures]"
dellog += "\tqdel() Count: [I.qdels]"
dellog += "\tDestroy() Cost: [I.destroy_time]ms"
if (I.hard_deletes)
dellog += "\tTotal Hard Deletes [I.hard_deletes]"
dellog += "\tTime Spent Hard Deleting: [I.hard_delete_time]ms"
if (I.slept_destroy)
dellog += "\tSleeps: [I.slept_destroy]"
if (I.no_respect_force)
dellog += "\tIgnored force: [I.no_respect_force] times"
if (I.no_hint)
dellog += "\tNo hint: [I.no_hint] times"
log_qdel(dellog.Join("\n"))
/datum/controller/subsystem/garbage/fire()
HandleToBeQueued()
if(state == SS_RUNNING)
HandleQueue()
//the fact that this resets its processing each fire (rather then resume where it left off) is intentional.
var/queue = GC_QUEUE_PREQUEUE
while (state == SS_RUNNING)
switch (queue)
if (GC_QUEUE_PREQUEUE)
HandlePreQueue()
queue = GC_QUEUE_PREQUEUE+1
if (GC_QUEUE_CHECK)
HandleQueue(GC_QUEUE_CHECK)
queue = GC_QUEUE_CHECK+1
if (GC_QUEUE_HARDDELETE)
HandleQueue(GC_QUEUE_HARDDELETE)
break
if (state == SS_PAUSED) //make us wait again before the next run.
state = SS_RUNNING
//If you see this proc high on the profile, what you are really seeing is the garbage collection/soft delete overhead in byond.
//Don't attempt to optimize, not worth the effort.
/datum/controller/subsystem/garbage/proc/HandleToBeQueued()
var/list/tobequeued = src.tobequeued
var/starttime = world.time
var/starttimeofday = world.timeofday
while(tobequeued.len && starttime == world.time && starttimeofday == world.timeofday)
if (MC_TICK_CHECK)
break
var/ref = tobequeued[1]
Queue(ref)
tobequeued.Cut(1, 2)
/datum/controller/subsystem/garbage/proc/HandlePreQueue()
var/list/tobequeued = queues[GC_QUEUE_PREQUEUE]
var/static/count = 0
if (count)
var/c = count
count = 0 //so if we runtime on the Cut, we don't try again.
tobequeued.Cut(1,c+1)
/datum/controller/subsystem/garbage/proc/HandleQueue()
delslasttick = 0
gcedlasttick = 0
var/time_to_kill = world.time - collection_timeout // Anything qdel() but not GC'd BEFORE this time needs to be manually del()
var/list/queue = src.queue
var/starttime = world.time
var/starttimeofday = world.timeofday
while(queue.len && starttime == world.time && starttimeofday == world.timeofday)
for (var/ref in tobequeued)
count++
Queue(ref, GC_QUEUE_PREQUEUE+1)
if (MC_TICK_CHECK)
break
var/refID = queue[1]
if (count)
tobequeued.Cut(1,count+1)
count = 0
/datum/controller/subsystem/garbage/proc/HandleQueue(level = GC_QUEUE_CHECK)
if (level == GC_QUEUE_CHECK)
delslasttick = 0
gcedlasttick = 0
var/cut_off_time = world.time - collection_timeout[level] //ignore entries newer then this
var/list/queue = queues[level]
var/static/lastlevel
var/static/count = 0
if (count) //runtime last run before we could do this.
var/c = count
count = 0 //so if we runtime on the Cut, we don't try again.
var/list/lastqueue = queues[lastlevel]
lastqueue.Cut(1, c+1)
lastlevel = level
for (var/refID in queue)
if (!refID)
queue.Cut(1, 2)
count++
if (MC_TICK_CHECK)
break
continue
var/GCd_at_time = queue[refID]
if(GCd_at_time > time_to_kill)
if(GCd_at_time > cut_off_time)
break // Everything else is newer, skip them
queue.Cut(1, 2)
var/datum/A
A = locate(refID)
if (A && A.gc_destroyed == GCd_at_time) // So if something else coincidently gets the same ref, it's not deleted by mistake
#ifdef GC_FAILURE_HARD_LOOKUP
A.find_references()
#endif
count++
// Something's still referring to the qdel'd object. Kill it.
var/type = A.type
testing("GC: -- \ref[A] | [type] was unable to be GC'd and was deleted --")
didntgc["[type]"]++
var/datum/D
D = locate(refID)
HardDelete(A)
++delslasttick
++totaldels
else
if (!D || D.gc_destroyed != GCd_at_time) // So if something else coincidently gets the same ref, it's not deleted by mistake
++gcedlasttick
++totalgcs
pass_counts[level]++
if (MC_TICK_CHECK)
break
continue
/datum/controller/subsystem/garbage/proc/QueueForQueuing(datum/A)
if (istype(A) && A.gc_destroyed == GC_CURRENTLY_BEING_QDELETED)
tobequeued += A
A.gc_destroyed = GC_QUEUED_FOR_QUEUING
// Something's still referring to the qdel'd object.
fail_counts[level]++
switch (level)
if (GC_QUEUE_CHECK)
#ifdef GC_FAILURE_HARD_LOOKUP
D.find_references()
#endif
var/type = D.type
var/datum/qdel_item/I = items[type]
testing("GC: -- \ref[D] | [type] was unable to be GC'd --")
I.failures++
if (GC_QUEUE_HARDDELETE)
HardDelete(D)
if (MC_TICK_CHECK)
break
continue
/datum/controller/subsystem/garbage/proc/Queue(datum/A)
if (isnull(A) || (!isnull(A.gc_destroyed) && A.gc_destroyed >= 0))
Queue(D, level+1)
if (MC_TICK_CHECK)
break
if (count)
queue.Cut(1,count+1)
count = 0
/datum/controller/subsystem/garbage/proc/PreQueue(datum/D)
if (D.gc_destroyed == GC_CURRENTLY_BEING_QDELETED)
queues[GC_QUEUE_PREQUEUE] += D
D.gc_destroyed = GC_QUEUED_FOR_QUEUING
/datum/controller/subsystem/garbage/proc/Queue(datum/D, level = GC_QUEUE_CHECK)
if (isnull(D))
return
if (A.gc_destroyed == GC_QUEUED_FOR_HARD_DEL)
HardDelete(A)
if (D.gc_destroyed == GC_QUEUED_FOR_HARD_DEL)
level = GC_QUEUE_HARDDELETE
if (level > GC_QUEUE_COUNT)
HardDelete(D)
return
var/gctime = world.time
var/refid = "\ref[A]"
A.gc_destroyed = gctime
var/refid = "\ref[D]"
D.gc_destroyed = gctime
var/list/queue = queues[level]
if (queue[refid])
queue -= refid // Removing any previous references that were GC'd so that the current object will be at the end of the list.
queue[refid] = gctime
//this is purely to separate things profile wise.
/datum/controller/subsystem/garbage/proc/HardDelete(datum/A)
//this is mainly to separate things profile wise.
/datum/controller/subsystem/garbage/proc/HardDelete(datum/D)
var/time = world.timeofday
var/tick = TICK_USAGE
var/ticktime = world.time
++delslasttick
++totaldels
var/type = D.type
var/refID = "\ref[D]"
var/type = A.type
var/refID = "\ref[A]"
del(A)
del(D)
tick = (TICK_USAGE-tick+((world.time-ticktime)/world.tick_lag*100))
var/datum/qdel_item/I = items[type]
I.hard_deletes++
I.hard_delete_time += TICK_DELTA_TO_MS(tick)
if (tick > highest_del_tickusage)
highest_del_tickusage = tick
time = world.timeofday - time
@@ -166,18 +233,33 @@ SUBSYSTEM_DEF(garbage)
if (time > 10)
log_game("Error: [type]([refID]) took longer than 1 second to delete (took [time/10] seconds to delete)")
message_admins("Error: [type]([refID]) took longer than 1 second to delete (took [time/10] seconds to delete).")
postpone(time/5)
postpone(time)
/datum/controller/subsystem/garbage/proc/HardQueue(datum/A)
if (istype(A) && A.gc_destroyed == GC_CURRENTLY_BEING_QDELETED)
tobequeued += A
A.gc_destroyed = GC_QUEUED_FOR_HARD_DEL
/datum/controller/subsystem/garbage/proc/HardQueue(datum/D)
if (D.gc_destroyed == GC_CURRENTLY_BEING_QDELETED)
queues[GC_QUEUE_PREQUEUE] += D
D.gc_destroyed = GC_QUEUED_FOR_HARD_DEL
/datum/controller/subsystem/garbage/Recover()
if (istype(SSgarbage.queue))
queue |= SSgarbage.queue
if (istype(SSgarbage.tobequeued))
tobequeued |= SSgarbage.tobequeued
if (istype(SSgarbage.queues))
for (var/i in 1 to SSgarbage.queues.len)
queues[i] |= SSgarbage.queues[i]
/datum/qdel_item
var/name = ""
var/qdels = 0 //Total number of times it's passed thru qdel.
var/destroy_time = 0 //Total amount of milliseconds spent processing this type's Destroy()
var/failures = 0 //Times it was queued for soft deletion but failed to soft delete.
var/hard_deletes = 0 //Different from failures because it also includes QDEL_HINT_HARDDEL deletions
var/hard_delete_time = 0//Total amount of milliseconds spent hard deleting this type.
var/no_respect_force = 0//Number of times it's not respected force=TRUE
var/no_hint = 0 //Number of times it's not even bother to give a qdel hint
var/slept_destroy = 0 //Number of times it's slept in its destroy
/datum/qdel_item/New(mytype)
name = "[mytype]"
// Should be treated as a replacement for the 'del' keyword.
// Datums passed to this will be given a chance to clean up references to allow the GC to collect them.
@@ -185,21 +267,27 @@ SUBSYSTEM_DEF(garbage)
if(!istype(D))
del(D)
return
#ifdef TESTING
SSgarbage.qdel_list += "[D.type]"
#endif
var/datum/qdel_item/I = SSgarbage.items[D.type]
if (!I)
I = SSgarbage.items[D.type] = new /datum/qdel_item(D.type)
I.qdels++
if(isnull(D.gc_destroyed))
D.SendSignal(COMSIG_PARENT_QDELETED)
D.gc_destroyed = GC_CURRENTLY_BEING_QDELETED
var/start_time = world.time
var/start_tick = world.tick_usage
var/hint = D.Destroy(force) // Let our friend know they're about to get fucked up.
if(world.time != start_time)
SSgarbage.sleptDestroy["[D.type]"]++
I.slept_destroy++
else
I.destroy_time += TICK_USAGE_TO_MS(start_tick)
if(!D)
return
switch(hint)
if (QDEL_HINT_QUEUE) //qdel should queue the object for deletion.
SSgarbage.QueueForQueuing(D)
SSgarbage.PreQueue(D)
if (QDEL_HINT_IWILLGC)
D.gc_destroyed = world.time
return
@@ -209,28 +297,33 @@ SUBSYSTEM_DEF(garbage)
return
// Returning LETMELIVE after being told to force destroy
// indicates the objects Destroy() does not respect force
if(!SSgarbage.noforcerespect["[D.type]"])
SSgarbage.noforcerespect["[D.type]"] = "[D.type]"
#ifdef TESTING
if(!I.no_respect_force)
testing("WARNING: [D.type] has been force deleted, but is \
returning an immortal QDEL_HINT, indicating it does \
not respect the force flag for qdel(). It has been \
placed in the queue, further instances of this type \
will also be queued.")
SSgarbage.QueueForQueuing(D)
#endif
I.no_respect_force++
SSgarbage.PreQueue(D)
if (QDEL_HINT_HARDDEL) //qdel should assume this object won't gc, and queue a hard delete using a hard reference to save time from the locate()
SSgarbage.HardQueue(D)
if (QDEL_HINT_HARDDEL_NOW) //qdel should assume this object won't gc, and hard del it post haste.
SSgarbage.HardDelete(D)
if (QDEL_HINT_FINDREFERENCE)//qdel will, if TESTING is enabled, display all references to this object, then queue the object for deletion.
SSgarbage.QueueForQueuing(D)
SSgarbage.PreQueue(D)
#ifdef TESTING
D.find_references()
#endif
else
if(!SSgarbage.noqdelhint["[D.type]"])
SSgarbage.noqdelhint["[D.type]"] = "[D.type]"
#ifdef TESTING
if(!I.no_hint)
testing("WARNING: [D.type] is not returning a qdel hint. It is being placed in the queue. Further instances of this type will also be queued.")
SSgarbage.QueueForQueuing(D)
#endif
I.no_hint++
SSgarbage.PreQueue(D)
else if(D.gc_destroyed == GC_CURRENTLY_BEING_QDELETED)
CRASH("[D.type] destroy proc was called multiple times, likely due to a qdel loop in the Destroy logic")
@@ -281,15 +374,6 @@ SUBSYSTEM_DEF(garbage)
SSgarbage.can_fire = 1
SSgarbage.next_fire = world.time + world.tick_lag
/client/verb/purge_all_destroyed_objects()
set category = "Debug"
while(SSgarbage.queue.len)
var/datum/o = locate(SSgarbage.queue[1])
if(istype(o) && o.gc_destroyed)
del(o)
SSgarbage.totaldels++
SSgarbage.queue.Cut(1, 2)
/datum/verb/qdel_then_find_references()
set category = "Debug"
set name = "qdel() then Find References"
@@ -300,24 +384,6 @@ SUBSYSTEM_DEF(garbage)
if(!running_find_references)
find_references(TRUE)
/client/verb/show_qdeleted()
set category = "Debug"
set name = "Show qdel() Log"
set desc = "Render the qdel() log and display it"
var/dat = "<B>List of things that have been qdel()eted this round</B><BR><BR>"
var/tmplist = list()
for(var/elem in SSgarbage.qdel_list)
if(!(elem in tmplist))
tmplist[elem] = 0
tmplist[elem]++
for(var/path in tmplist)
dat += "[path] - [tmplist[path]] times<BR>"
usr << browse(dat, "window=qdeletedlog")
/datum/proc/DoSearchVar(X, Xname)
if(usr && usr.client && !usr.client.running_find_references) return
if(istype(X, /datum))

View File

@@ -86,6 +86,7 @@ GLOBAL_PROTECT(security_mode)
GLOB.world_game_log = file("[GLOB.log_directory]/game.log")
GLOB.world_attack_log = file("[GLOB.log_directory]/attack.log")
GLOB.world_runtime_log = file("[GLOB.log_directory]/runtime.log")
GLOB.world_qdel_log = file("[GLOB.log_directory]/qdel.log")
GLOB.world_href_log = file("[GLOB.log_directory]/hrefs.html")
GLOB.sql_error_log = file("[GLOB.log_directory]/sql.log")
WRITE_FILE(GLOB.world_game_log, "\n\nStarting up round ID [GLOB.round_id]. [time_stamp()]\n---------------------")

View File

@@ -743,21 +743,31 @@ GLOBAL_PROTECT(AdminProcCallCount)
/client/proc/cmd_display_del_log()
set category = "Debug"
set name = "Display del() Log"
set desc = "Displays a list of things that have failed to GC this round"
set desc = "Display del's log of everything that's passed through it."
var/dat = "<B>List of things that failed to GC this round</B><BR><BR>"
for(var/path in SSgarbage.didntgc)
dat += "[path] - [SSgarbage.didntgc[path]] times<BR>"
var/list/dellog = list("<B>List of things that have gone through qdel this round</B><BR><BR><ol>")
sortTim(SSgarbage.items, cmp=/proc/cmp_qdel_item_time, associative = TRUE)
for(var/path in SSgarbage.items)
var/datum/qdel_item/I = SSgarbage.items[path]
dellog += "<li><u>[path]</u><ul>"
if (I.failures)
dellog += "<li>Failures: [I.failures]</li>"
dellog += "<li>qdel() Count: [I.qdels]</li>"
dellog += "<li>Destroy() Cost: [I.destroy_time]ms</li>"
if (I.hard_deletes)
dellog += "<li>Total Hard Deletes [I.hard_deletes]</li>"
dellog += "<li>Time Spent Hard Deleting: [I.hard_delete_time]ms</li>"
if (I.slept_destroy)
dellog += "<li>Sleeps: [I.slept_destroy]</li>"
if (I.no_respect_force)
dellog += "<li>Ignored force: [I.no_respect_force]</li>"
if (I.no_hint)
dellog += "<li>No hint: [I.no_hint]</li>"
dellog += "</ul></li>"
dat += "<B>List of paths that did not return a qdel hint in Destroy()</B><BR><BR>"
for(var/path in SSgarbage.noqdelhint)
dat += "[path]<BR>"
dellog += "</ol>"
dat += "<B>List of paths that slept in Destroy()</B><BR><BR>"
for(var/path in SSgarbage.sleptDestroy)
dat += "[path]<BR>"
usr << browse(dat, "window=dellog")
usr << browse(dellog.Join(), "window=dellog")
/client/proc/cmd_display_init_log()
set category = "Debug"