From 1358ad88678d46ef565ae3bf0ef1c739d5ffe1bb Mon Sep 17 00:00:00 2001 From: Novacat <35587478+Novacat@users.noreply.github.com> Date: Mon, 10 May 2021 22:54:31 -0400 Subject: [PATCH 1/2] Merge pull request #10286 from VOREStation/Arokha/SSabandonship Adds subsystem crash logging to SSprocess and SSmobs --- code/_helpers/unsorted.dm | 33 ++++++++++++++ code/controllers/master.dm | 2 + code/controllers/subsystem.dm | 15 +++++++ code/controllers/subsystems/mobs.dm | 43 +++++++++++++++++++ .../subsystems/processing/processing.dm | 43 +++++++++++++++++++ 5 files changed, 136 insertions(+) diff --git a/code/_helpers/unsorted.dm b/code/_helpers/unsorted.dm index f2b57156a9..b006d63581 100644 --- a/code/_helpers/unsorted.dm +++ b/code/_helpers/unsorted.dm @@ -1656,3 +1656,36 @@ GLOBAL_REAL_VAR(list/stack_trace_storage) /proc/CallAsync(datum/source, proctype, list/arguments) set waitfor = FALSE return call(source, proctype)(arglist(arguments)) + +/proc/describeThis(var/datum/D) + if(istype(D)) + var/msg = "[D.type] - [D]" + if(isatom(D)) + var/atom/A = D + if(!A.z) + msg += " - Coords unavailable (in contents?)" + if(ismovable(A)) + var/turf/T = get_turf(A) + if(T) + msg += " - Parent turf: [T.x],[T.y],[T.z]" + else + msg += " - In nullspace" + else + msg += " - In nullspace" + else + msg += "- [A.x],[A.y],[A.z]" + return msg + else if(isnull(D)) + return "NULL" + else if(istext(D)) + return "TEXT: [D]" + else if(isnum(D)) + return "NUM: [D]" + else if(ispath(D)) + return "PATH: [D]" + else if(islist(D)) + return "LIST: [D]" + else if(isclient(D)) + return "CLIENT: [D]" + else + return "Unknown data type: [D]" diff --git a/code/controllers/master.dm b/code/controllers/master.dm index 04204131d4..110b7dc633 100644 --- a/code/controllers/master.dm +++ b/code/controllers/master.dm @@ -134,9 +134,11 @@ GLOBAL_REAL(Master, /datum/controller/master) = new if(2) msg = "The [BadBoy.name] subsystem was the last to fire for 2 controller restarts. It will be recovered now and disabled if it happens again." FireHim = TRUE + BadBoy.fail() if(3) msg = "The [BadBoy.name] subsystem seems to be destabilizing the MC and will be offlined." BadBoy.flags |= SS_NO_FIRE + BadBoy.critfail() if(msg) log_game(msg) message_admins("[msg]") diff --git a/code/controllers/subsystem.dm b/code/controllers/subsystem.dm index 7115c77a61..6b6c345b24 100644 --- a/code/controllers/subsystem.dm +++ b/code/controllers/subsystem.dm @@ -215,3 +215,18 @@ // Wakes a suspended subsystem. /datum/controller/subsystem/proc/wake() can_fire = TRUE + +// This subsystem has destabilized the game and is being put on warning. At this point there may be +// an opportunity to clean up the subsystem or check it for errors in ways that would otherwise be too slow. +// You should log the errors/cleanup results, so you can fix the problem rather than using this as a crutch. +/datum/controller/subsystem/proc/fail() + var/msg = "[name] subsystem being blamed for MC failure" + log_world(msg) + log_game(msg) + +// DO NOT ATTEMPT RECOVERY. Only log debugging info. You should leave the subsystem as it is. +// Attempting recovery here could make things worse, create hard recursions with the MC disabling it every run, etc. +/datum/controller/subsystem/proc/critfail() + var/msg = "[name] subsystem received final blame for MC failure" + log_world(msg) + log_game(msg) diff --git a/code/controllers/subsystems/mobs.dm b/code/controllers/subsystems/mobs.dm index 1ae1d64757..b44b9ac5d6 100644 --- a/code/controllers/subsystems/mobs.dm +++ b/code/controllers/subsystems/mobs.dm @@ -51,3 +51,46 @@ SUBSYSTEM_DEF(mobs) if (MC_TICK_CHECK) return + +/datum/controller/subsystem/mobs/proc/log_recent() + var/msg = "Debug output from the [name] subsystem:\n" + msg += "- This subsystem is processed tail-first -\n" + if(!currentrun || !mob_list) + msg += "ERROR: A critical list [currentrun ? "mob_list" : "currentrun"] is gone!" + log_game(msg) + log_world(msg) + return + msg += "Lists: currentrun: [currentrun.len], mob_list: [mob_list.len]\n" + + if(!currentrun.len) + msg += "!!The subsystem just finished the mob_list list, and currentrun is empty (or has never run).\n" + msg += "!!The info below is the tail of mob_list instead of currentrun.\n" + + var/datum/D = currentrun.len ? currentrun[currentrun.len] : mob_list[mob_list.len] + msg += "Tail entry: [describeThis(D)] (this is likely the item AFTER the problem item)\n" + + var/position = mob_list.Find(D) + if(!position) + msg += "Unable to find context of tail entry in mob_list list.\n" + else + if(position != mob_list.len) + var/additional = mob_list.Find(D, position+1) + if(additional) + msg += "WARNING: Tail entry found more than once in mob_list list! Context is for the first found.\n" + var/start = clamp(position-2,1,mob_list.len) + var/end = clamp(position+2,1,mob_list.len) + msg += "2 previous elements, then tail, then 2 next elements of mob_list list for context:\n" + msg += "---\n" + for(var/i in start to end) + msg += "[describeThis(mob_list[i])][i == position ? " << TAIL" : ""]\n" + msg += "---\n" + log_game(msg) + log_world(msg) + +/datum/controller/subsystem/mobs/fail() + ..() + log_recent() + +/datum/controller/subsystem/mobs/critfail() + ..() + log_recent() \ No newline at end of file diff --git a/code/controllers/subsystems/processing/processing.dm b/code/controllers/subsystems/processing/processing.dm index 3eeff489aa..a16902c0c5 100644 --- a/code/controllers/subsystems/processing/processing.dm +++ b/code/controllers/subsystems/processing/processing.dm @@ -60,6 +60,49 @@ SUBSYSTEM_DEF(processing) to_chat(usr, "[name] - Debug mode [debug_original_process_proc ? "en" : "dis"]abled") +/datum/controller/subsystem/processing/proc/log_recent() + var/msg = "Debug output from the [name] subsystem:\n" + msg += "- Process subsystems are processed tail-first -\n" + if(!currentrun || !processing) + msg += "ERROR: A critical list [currentrun ? "processing" : "currentrun"] is gone!" + log_game(msg) + log_world(msg) + return + msg += "Lists: current_run: [currentrun.len], processing: [processing.len]\n" + + if(!currentrun.len) + msg += "!!The subsystem just finished the processing list, and currentrun is empty (or has never run).\n" + msg += "!!The info below is the tail of processing instead of currentrun.\n" + + var/datum/D = currentrun.len ? currentrun[currentrun.len] : processing[processing.len] + msg += "Tail entry: [describeThis(D)] (this is likely the item AFTER the problem item)\n" + + var/position = processing.Find(D) + if(!position) + msg += "Unable to find context of tail entry in processing list.\n" + else + if(position != processing.len) + var/additional = processing.Find(D, position+1) + if(additional) + msg += "WARNING: Tail entry found more than once in processing list! Context is for the first found.\n" + var/start = clamp(position-2,1,processing.len) + var/end = clamp(position+2,1,processing.len) + msg += "2 previous elements, then tail, then 2 next elements of processing list for context:\n" + msg += "---\n" + for(var/i in start to end) + msg += "[describeThis(processing[i])][i == position ? " << TAIL" : ""]\n" + msg += "---\n" + log_game(msg) + log_world(msg) + +/datum/controller/subsystem/processing/fail() + ..() + log_recent() + +/datum/controller/subsystem/processing/critfail() + ..() + log_recent() + /datum/proc/DebugSubsystemProcess(var/wait, var/times_fired, var/datum/controller/subsystem/processing/subsystem) subsystem.debug_last_thing = src var/start_tick = world.time