[MIRROR] Uses regex datums and macros for text-manipulation (#7805)

* Uses regex datums and macros for text-manipulation (#61042)

* Uses regex datums and macros for text-manipulation

* a

Co-authored-by: Rohesie <rohesie@gmail.com>
Co-authored-by: Gandalf <jzo123@hotmail.com>
This commit is contained in:
SkyratBot
2021-09-01 17:28:56 +02:00
committed by GitHub
parent 8da357aca4
commit 9290399403
9 changed files with 71 additions and 74 deletions

View File

@@ -3,3 +3,9 @@
/// Macro from Lummox used to get height from a MeasureText proc
#define WXH_TO_HEIGHT(x) text2num(copytext(x, findtextEx(x, "x") + 1))
/// Removes characters incompatible with file names.
#define SANITIZE_FILENAME(text) (GLOB.filename_forbidden_chars.Replace(text, ""))
/// Simply removes the < and > characters, and limits the length of the message.
#define STRIP_HTML_SIMPLE(text, limit) (GLOB.angular_brackets.Replace(copytext(text, 1, limit), ""))

View File

@@ -1082,7 +1082,7 @@ GLOBAL_LIST_INIT(freon_color_matrix, list("#2E5E69", "#60A2A8", "#A1AFB1", rgb(0
if (!isicon(I))
if (isfile(thing)) //special snowflake
var/name = sanitize_filename("[generate_asset_name(thing)].png")
var/name = SANITIZE_FILENAME("[generate_asset_name(thing)].png")
if (!SSassets.cache[name])
SSassets.transport.register_asset(name, thing)
for (var/thing2 in targets)

View File

@@ -20,28 +20,6 @@
* Text sanitization
*/
//Simply removes < and > and limits the length of the message
/proc/strip_html_simple(t,limit=MAX_MESSAGE_LEN)
var/list/strip_chars = list("<",">")
t = copytext(t,1,limit)
for(var/char in strip_chars)
var/index = findtext(t, char)
while(index)
t = copytext(t, 1, index) + copytext(t, index+1)
index = findtext(t, char)
return t
//Removes a few problematic characters
/proc/sanitize_simple(t,list/repl_chars = list("\n"="#","\t"="#"))
for(var/char in repl_chars)
var/index = findtext(t, char)
while(index)
t = copytext(t, 1, index) + repl_chars[char] + copytext(t, index + length(char))
index = findtext(t, char, index + length(char))
return t
/proc/sanitize_filename(t)
return sanitize_simple(t, list("\n"="", "\t"="", "/"="", "\\"="", "?"="", "%"="", "*"="", ":"="", "|"="", "\""="", "<"="", ">"=""))
///returns nothing with an alert instead of the message if it contains something in the ic filter, and sanitizes normally if the name is fine. It returns nothing so it backs out of the input the same way as if you had entered nothing.
/proc/sanitize_name(t,allow_numbers=FALSE)
@@ -54,19 +32,22 @@
return ""
return sanitize(r)
//Runs byond's sanitization proc along-side sanitize_simple
/proc/sanitize(t,list/repl_chars = null)
return html_encode(sanitize_simple(t,repl_chars))
//Runs sanitize and strip_html_simple
//I believe strip_html_simple() is required to run first to prevent '<' from displaying as '&lt;' after sanitize() calls byond's html_encode()
/proc/strip_html(t,limit=MAX_MESSAGE_LEN)
return copytext((sanitize(strip_html_simple(t))),1,limit)
/// Runs byond's html encoding sanitization proc, after replacing new-lines and tabs for the # character.
/proc/sanitize(text)
var/static/regex/regex = regex(@"[\n\t]", "g")
return html_encode(regex.Replace(text, "#"))
/// Runs STRIP_HTML_SIMPLE and sanitize.
/proc/strip_html(text, limit = MAX_MESSAGE_LEN)
return sanitize(STRIP_HTML_SIMPLE(text, limit))
/// Runs STRIP_HTML_SIMPLE and byond's sanitization proc.
/proc/adminscrub(text, limit = MAX_MESSAGE_LEN)
return html_encode(STRIP_HTML_SIMPLE(text, limit))
//Runs byond's sanitization proc along-side strip_html_simple
//I believe strip_html_simple() is required to run first to prevent '<' from displaying as '&lt;' that html_encode() would cause
/proc/adminscrub(t,limit=MAX_MESSAGE_LEN)
return copytext((html_encode(strip_html_simple(t))),1,limit)
/**
* Perform a whitespace cleanup on the text, similar to what HTML renderers do
@@ -91,31 +72,34 @@
return t
//Returns null if there is any bad text in the string
/**
* Returns the text if properly formatted, or null else.
*
* Things considered improper:
* * Larger than max_length.
* * Presence of non-ASCII characters if asci_only is set to TRUE.
* * Only whitespaces, tabs and/or line breaks in the text.
* * Presence of the <, >, \ and / characters.
* * Presence of ASCII special control characters (horizontal tab and new line not included).
* */
/proc/reject_bad_text(text, max_length = 512, ascii_only = TRUE)
var/char_count = 0
var/non_whitespace = FALSE
var/lenbytes = length(text)
var/char = ""
for(var/i = 1, i <= lenbytes, i += length(char))
char = text[i]
char_count++
if(char_count > max_length)
return
switch(text2ascii(char))
if(62, 60, 92, 47) // <, >, \, /
return
if(0 to 31)
return
if(32)
continue
if(127 to INFINITY)
if(ascii_only)
return
else
non_whitespace = TRUE
if(non_whitespace)
return text //only accepts the text if it has some non-spaces
if(ascii_only)
if(length(text) > max_length)
return null
var/static/regex/non_ascii = regex(@"[^\x20-\x7E\t\n]")
if(non_ascii.Find(text))
return null
else if(length_char(text) > max_length)
return null
var/static/regex/non_whitespace = regex(@"\S")
if(!non_whitespace.Find(text))
return null
var/static/regex/bad_chars = regex(@"[\\<>/\x00-\x08\x11-\x1F]")
if(bad_chars.Find(text))
return null
return text
/// Used to get a properly sanitized input, of max_length
/// no_trim is self explanatory but it prevents the input from being trimed if you intend to parse newlines or whitespace.

View File

@@ -9,3 +9,10 @@ GLOBAL_DATUM_INIT(is_color, /regex, regex("^#\[0-9a-fA-F]{6}$"))
//finds text strings recognized as links on discord. Mainly used to stop embedding.
GLOBAL_DATUM_INIT(has_discord_embeddable_links, /regex, regex("(https?://\[^\\s|<\]{2,})"))
//All < and > characters
GLOBAL_DATUM_INIT(angular_brackets, /regex, regex(@"[<>]", "g"))
//All characters forbidden by filenames: ", \, \n, \t, /, ?, %, *, :, |, <, >
GLOBAL_DATUM_INIT(filename_forbidden_chars, /regex, regex(@{""|[\\\n\t/?%*:|<>]"}, "g"))
// had to use the OR operator for quotes instead of putting them in the character class because it breaks the syntax highlighting otherwise.

View File

@@ -376,7 +376,7 @@
/obj/structure/sign/painting/proc/save_persistent()
if(!persistence_id || !current_canvas || current_canvas.no_save)
return
if(sanitize_filename(persistence_id) != persistence_id)
if(SANITIZE_FILENAME(persistence_id) != persistence_id)
stack_trace("Invalid persistence_id - [persistence_id]")
return
if(!current_canvas.painting_name)

View File

@@ -260,7 +260,7 @@ GLOBAL_LIST_EMPTY(asset_datums)
var/item_filename
/datum/asset/changelog_item/New(date)
item_filename = sanitize_filename("[date].yml")
item_filename = SANITIZE_FILENAME("[date].yml")
SSassets.transport.register_asset(item_filename, file("html/changelogs/archive/" + item_filename))
/datum/asset/changelog_item/send(client)
@@ -301,7 +301,7 @@ GLOBAL_LIST_EMPTY(asset_datums)
continue
asset = fcopy_rsc(asset) //dedupe
var/prefix2 = (directions.len > 1) ? "[dir2text(direction)]." : ""
var/asset_name = sanitize_filename("[prefix].[prefix2][icon_state_name].png")
var/asset_name = SANITIZE_FILENAME("[prefix].[prefix2][icon_state_name].png")
if (generic_icon_names)
asset_name = "[generate_asset_name(asset)].png"

View File

@@ -355,12 +355,12 @@
if("edit_admin_note")
var/msg = input(usr, "Set your note to admins!", "Note to admins", note_to_admins) as message|null
if(msg)
note_to_admins = strip_html_simple(msg, MAX_FLAVOR_LEN, TRUE)
note_to_admins = strip_html(msg, MAX_FLAVOR_LEN, TRUE)
log_action("NOTE: [note_to_admins]", FALSE)
if("set_narrative")
var/msg = input(usr, "Set your narrative!", "Narrative", narrative) as message|null
if(msg)
narrative = strip_html_simple(msg, MAX_FLAVOR_LEN, TRUE)
narrative = strip_html(msg, MAX_FLAVOR_LEN, TRUE)
log_action("NARRATIVE - change: [narrative]")
un_submit()
if("remove_objective")
@@ -379,13 +379,13 @@
if(msg)
if(length(objectives) < index)
return
objectives[index] = strip_html_simple(msg, MAX_FLAVOR_LEN, TRUE)
objectives[index] = strip_html(msg, MAX_FLAVOR_LEN, TRUE)
un_submit()
log_action("OBJ - edit: [old_obj] TO-> [objectives[index]]")
if("add_objective")
var/msg = input(usr, "Add new objective:", "Objectives", "") as message|null
if(msg)
var/new_obj = strip_html_simple(msg, MAX_FLAVOR_LEN, TRUE)
var/new_obj = strip_html(msg, MAX_FLAVOR_LEN, TRUE)
objectives += new_obj
log_action("OBJ - add: [new_obj]")
un_submit()

View File

@@ -2154,42 +2154,42 @@ GLOBAL_LIST_INIT(food, list(
if("flavor_text")
var/msg = input(usr, "Set the flavor text in your 'examine' verb. This is for describing what people can tell by looking at your character.", "Flavor Text", features["flavor_text"]) as message|null //Skyrat edit, removed stripped_multiline_input()
if(!isnull(msg))
features["flavor_text"] = strip_html_simple(msg, MAX_FLAVOR_LEN, TRUE)
features["flavor_text"] = strip_html(msg, MAX_FLAVOR_LEN, TRUE)
if("silicon_flavor_text")
var/msg = input(usr, "Set the flavor text in your 'examine' verb. This is for describing what people can tell by looking at your character.", "Silicon Flavor Text", features["silicon_flavor_text"]) as message|null
if(!isnull(msg))
features["silicon_flavor_text"] = strip_html_simple(msg, MAX_FLAVOR_LEN, TRUE)
features["silicon_flavor_text"] = strip_html(msg, MAX_FLAVOR_LEN, TRUE)
if("ooc_prefs")
var/msg = input(usr, "Set your OOC preferences.", "OOC Prefs", ooc_prefs) as message|null
if(!isnull(msg))
ooc_prefs = strip_html_simple(msg, MAX_FLAVOR_LEN, TRUE)
ooc_prefs = strip_html(msg, MAX_FLAVOR_LEN, TRUE)
if("general_record")
var/msg = input(usr, "Set your general record. This is more or less public information, available from security, medical and command consoles", "General Record", general_record) as message|null
if(!isnull(msg))
general_record = strip_html_simple(msg, MAX_FLAVOR_LEN, TRUE)
general_record = strip_html(msg, MAX_FLAVOR_LEN, TRUE)
if("medical_record")
var/msg = input(usr, "Set your medical record. ", "Medical Record", medical_record) as message|null
if(!isnull(msg))
medical_record = strip_html_simple(msg, MAX_FLAVOR_LEN, TRUE)
medical_record = strip_html(msg, MAX_FLAVOR_LEN, TRUE)
if("security_record")
var/msg = input(usr, "Set your security record. ", "Medical Record", security_record) as message|null
if(!isnull(msg))
security_record = strip_html_simple(msg, MAX_FLAVOR_LEN, TRUE)
security_record = strip_html(msg, MAX_FLAVOR_LEN, TRUE)
if("background_info")
var/msg = input(usr, "Set your background information. (Where you come from, which culture were you raised in and why you are working here etc.)", "Background Info", background_info) as message|null
if(!isnull(msg))
background_info = strip_html_simple(msg, MAX_FLAVOR_LEN, TRUE)
background_info = strip_html(msg, MAX_FLAVOR_LEN, TRUE)
if("exploitable_info")
var/msg = input(usr, "Set your exploitable information. This is sensitive informations that antagonists may get to see, recommended for better roleplay experience", "Exploitable Info", exploitable_info) as message|null
if(!isnull(msg))
exploitable_info = strip_html_simple(msg, MAX_FLAVOR_LEN, TRUE)
exploitable_info = strip_html(msg, MAX_FLAVOR_LEN, TRUE)
if("uses_skintones")
needs_update = TRUE

View File

@@ -12,5 +12,5 @@
if(msg == "")
temporary_flavor_text = null
else
temporary_flavor_text = strip_html_simple(msg, MAX_FLAVOR_LEN, TRUE)
temporary_flavor_text = strip_html(msg, MAX_FLAVOR_LEN, TRUE)
return