/* * Holds procs designed to help with filtering text * Contains groups: * SQL sanitization/formating * Text sanitization * Text searches * Text modification * Misc */ /* * SQL sanitization */ /proc/format_table_name(table as text) return CONFIG_GET(string/feedback_tableprefix) + table /* * Text sanitization */ //Simply removes < and > and limits the length of the message /proc/strip_html_simple(t,limit=MAX_MESSAGE_LEN) var/list/strip_chars = list("<",">") t = copytext(t,1,limit) for(var/char in strip_chars) var/index = findtext(t, char) while(index) t = copytext(t, 1, index) + copytext(t, index+1) index = findtext(t, char) return t //Removes a few problematic characters /proc/sanitize_simple(t,list/repl_chars = list("\n"="#","\t"="#")) for(var/char in repl_chars) var/index = findtext(t, char) while(index) t = copytext(t, 1, index) + repl_chars[char] + copytext(t, index + length(char)) index = findtext(t, char, index + length(char)) return t /proc/sanitize_filename(t) return sanitize_simple(t, list("\n"="", "\t"="", "/"="", "\\"="", "?"="", "%"="", "*"="", ":"="", "|"="", "\""="", "<"="", ">"="")) ///returns nothing with an alert instead of the message if it contains something in the ic filter, and sanitizes normally if the name is fine. It returns nothing so it backs out of the input the same way as if you had entered nothing. /proc/sanitize_name(t,allow_numbers=FALSE) if(CHAT_FILTER_CHECK(t)) alert("You cannot set a name that contains a word prohibited in IC chat!") return "" var/r = reject_bad_name(t,allow_numbers=allow_numbers,strict=TRUE) if(!r) alert("Invalid name.") return "" return sanitize(r) //Runs byond's sanitization proc along-side sanitize_simple /proc/sanitize(t,list/repl_chars = null) return html_encode(sanitize_simple(t,repl_chars)) //Runs sanitize and strip_html_simple //I believe strip_html_simple() is required to run first to prevent '<' from displaying as '<' after sanitize() calls byond's html_encode() /proc/strip_html(t,limit=MAX_MESSAGE_LEN) return copytext((sanitize(strip_html_simple(t))),1,limit) //Runs byond's sanitization proc along-side strip_html_simple //I believe strip_html_simple() is required to run first to prevent '<' from displaying as '<' that html_encode() would cause /proc/adminscrub(t,limit=MAX_MESSAGE_LEN) return copytext((html_encode(strip_html_simple(t))),1,limit) //Returns null if there is any bad text in the string /proc/reject_bad_text(text, max_length = 512, ascii_only = TRUE) var/char_count = 0 var/non_whitespace = FALSE var/lenbytes = length(text) var/char = "" for(var/i = 1, i <= lenbytes, i += length(char)) char = text[i] char_count++ if(char_count > max_length) return switch(text2ascii(char)) if(62, 60, 92, 47) // <, >, \, / return if(0 to 31) return if(32) continue if(127 to INFINITY) if(ascii_only) return else non_whitespace = TRUE if(non_whitespace) return text //only accepts the text if it has some non-spaces // Used to get a properly sanitized input, of max_length // no_trim is self explanatory but it prevents the input from being trimed if you intend to parse newlines or whitespace. /proc/stripped_input(mob/user, message = "", title = "", default = "", max_length=MAX_MESSAGE_LEN, no_trim=FALSE) var/name = input(user, message, title, default) as text|null if(no_trim) return copytext(html_encode(name), 1, max_length) else return trim(html_encode(name), max_length) //trim is "outside" because html_encode can expand single symbols into multiple symbols (such as turning < into <) // Used to get a properly sanitized multiline input, of max_length /proc/stripped_multiline_input(mob/user, message = "", title = "", default = "", max_length=MAX_MESSAGE_LEN, no_trim=FALSE) var/name = input(user, message, title, default) as message|null if(no_trim) return copytext(html_encode(name), 1, max_length) else return trim(html_encode(name), max_length) #define NO_CHARS_DETECTED 0 #define SPACES_DETECTED 1 #define SYMBOLS_DETECTED 2 #define NUMBERS_DETECTED 3 #define LETTERS_DETECTED 4 /** * Filters out undesirable characters from names. * * * strict - return null immidiately instead of filtering out * * allow_numbers - allows numbers and common special characters - used for silicon/other weird things names */ /proc/reject_bad_name(t_in, allow_numbers = FALSE, max_length = MAX_NAME_LEN, ascii_only = TRUE, strict = FALSE) if(!t_in) return //Rejects the input if it is null var/number_of_alphanumeric = 0 var/last_char_group = NO_CHARS_DETECTED var/t_out = "" var/t_len = length(t_in) var/charcount = 0 var/char = "" // This is a sanity short circuit, if the users name is three times the maximum allowable length of name // We bail out on trying to process the name at all, as it could be a bug or malicious input and we dont // Want to iterate all of it. if(t_len > 3 * MAX_NAME_LEN) return for(var/i = 1, i <= t_len, i += length(char)) char = t_in[i] switch(text2ascii(char)) // A .. Z if(65 to 90) //Uppercase Letters number_of_alphanumeric++ last_char_group = LETTERS_DETECTED // a .. z if(97 to 122) //Lowercase Letters if(last_char_group == NO_CHARS_DETECTED || last_char_group == SPACES_DETECTED || last_char_group == SYMBOLS_DETECTED) //start of a word char = uppertext(char) number_of_alphanumeric++ last_char_group = LETTERS_DETECTED // 0 .. 9 if(48 to 57) //Numbers if(last_char_group == NO_CHARS_DETECTED || !allow_numbers) //suppress at start of string if(strict) return continue number_of_alphanumeric++ last_char_group = NUMBERS_DETECTED // ' - . if(39,45,46) //Common name punctuation if(last_char_group == NO_CHARS_DETECTED) if(strict) return continue last_char_group = SYMBOLS_DETECTED // ~ | @ : # $ % & * + if(126,124,64,58,35,36,37,38,42,43) //Other symbols that we'll allow (mainly for AI) if(last_char_group == NO_CHARS_DETECTED || !allow_numbers) //suppress at start of string if(strict) return continue last_char_group = SYMBOLS_DETECTED //Space if(32) if(last_char_group == NO_CHARS_DETECTED || last_char_group == SPACES_DETECTED) //suppress double-spaces and spaces at start of string if(strict) return continue last_char_group = SPACES_DETECTED if(127 to INFINITY) if(ascii_only) if(strict) return continue last_char_group = SYMBOLS_DETECTED //for now, we'll treat all non-ascii characters like symbols even though most are letters else continue t_out += char charcount++ if(charcount >= max_length) break if(number_of_alphanumeric < 2) return //protects against tiny names like "A" and also names like "' ' ' ' ' ' ' '" if(last_char_group == SPACES_DETECTED) t_out = copytext_char(t_out, 1, -1) //removes the last character (in this case a space) for(var/bad_name in list("space","floor","wall","r-wall","monkey","unknown","inactive ai")) //prevents these common metagamey names if(cmptext(t_out,bad_name)) return //(not case sensitive) return t_out #undef NO_CHARS_DETECTED #undef SPACES_DETECTED #undef NUMBERS_DETECTED #undef LETTERS_DETECTED //html_encode helper proc that returns the smallest non null of two numbers //or 0 if they're both null (needed because of findtext returning 0 when a value is not present) /proc/non_zero_min(a, b) if(!a) return b if(!b) return a return (a < b ? a : b) //Checks if any of a given list of needles is in the haystack /proc/text_in_list(haystack, list/needle_list, start=1, end=0) for(var/needle in needle_list) if(findtext(haystack, needle, start, end)) return 1 return 0 //Like above, but case sensitive /proc/text_in_list_case(haystack, list/needle_list, start=1, end=0) for(var/needle in needle_list) if(findtextEx(haystack, needle, start, end)) return 1 return 0 //Adds 'char' ahead of 'text' until there are 'count' characters total /proc/add_leading(text, count, char = " ") text = "[text]" var/charcount = count - length_char(text) var/list/chars_to_add[max(charcount + 1, 0)] return jointext(chars_to_add, char) + text //Adds 'char' behind 'text' until there are 'count' characters total /proc/add_trailing(text, count, char = " ") text = "[text]" var/charcount = count - length_char(text) var/list/chars_to_add[max(charcount + 1, 0)] return text + jointext(chars_to_add, char) //Returns a string with reserved characters and spaces before the first letter removed /proc/trim_left(text) for (var/i = 1 to length(text)) if (text2ascii(text, i) > 32) return copytext(text, i) return "" //Returns a string with reserved characters and spaces after the last letter removed /proc/trim_right(text) for (var/i = length(text), i > 0, i--) if (text2ascii(text, i) > 32) return copytext(text, 1, i + 1) return "" //Returns a string with reserved characters and spaces before the first word and after the last word removed. /proc/trim(text, max_length) if(max_length) text = copytext_char(text, 1, max_length) return trim_left(trim_right(text)) //Returns a string with the first element of the string capitalized. /proc/capitalize(t) . = t if(t) . = t[1] return uppertext(.) + copytext(t, 1 + length(.)) /proc/stringmerge(text,compare,replace = "*") //This proc fills in all spaces with the "replace" var (* by default) with whatever //is in the other string at the same spot (assuming it is not a replace char). //This is used for fingerprints var/newtext = text var/text_it = 1 //iterators var/comp_it = 1 var/newtext_it = 1 var/text_length = length(text) var/comp_length = length(compare) while(comp_it <= comp_length && text_it <= text_length) var/a = text[text_it] var/b = compare[comp_it] //if it isn't both the same letter, or if they are both the replacement character //(no way to know what it was supposed to be) if(a != b) if(a == replace) //if A is the replacement char newtext = copytext(newtext, 1, newtext_it) + b + copytext(newtext, newtext_it + length(newtext[newtext_it])) else if(b == replace) //if B is the replacement char newtext = copytext(newtext, 1, newtext_it) + a + copytext(newtext, newtext_it + length(newtext[newtext_it])) else //The lists disagree, Uh-oh! return 0 text_it += length(a) comp_it += length(b) newtext_it += length(newtext[newtext_it]) return newtext /proc/stringpercent(text,character = "*") //This proc returns the number of chars of the string that is the character //This is used for detective work to determine fingerprint completion. if(!text || !character) return 0 var/count = 0 var/lentext = length(text) var/a = "" for(var/i = 1, i <= lentext, i += length(a)) a = text[i] if(a == character) count++ return count /proc/reverse_text(text = "") var/new_text = "" var/lentext = length(text) var/letter = "" for(var/i = 1, i <= lentext, i += length(letter)) letter = text[i] new_text = letter + new_text return new_text GLOBAL_LIST_INIT(zero_character_only, list("0")) GLOBAL_LIST_INIT(hex_characters, list("0","1","2","3","4","5","6","7","8","9","a","b","c","d","e","f")) GLOBAL_LIST_INIT(alphabet, list("a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z")) GLOBAL_LIST_INIT(binary, list("0","1")) /proc/random_string(length, list/characters) . = "" for(var/i=1, i<=length, i++) . += pick(characters) /proc/repeat_string(times, string="") . = "" for(var/i=1, i<=times, i++) . += string /proc/random_short_color() return random_string(3, GLOB.hex_characters) /proc/random_color() return random_string(6, GLOB.hex_characters) //merges non-null characters (3rd argument) from "from" into "into". Returns result //e.g. into = "Hello World" // from = "Seeya______" // returns"Seeya World" //The returned text is always the same length as into //This was coded to handle DNA gene-splicing. /proc/merge_text(into, from, null_char="_") . = "" if(!istext(into)) into = "" if(!istext(from)) from = "" var/null_ascii = istext(null_char) ? text2ascii(null_char, 1) : null_char var/copying_into = FALSE var/char = "" var/start = 1 var/end_from = length(from) var/end_into = length(into) var/into_it = 1 var/from_it = 1 while(from_it <= end_from && into_it <= end_into) char = from[from_it] if(text2ascii(char) == null_ascii) if(!copying_into) . += copytext(from, start, from_it) start = into_it copying_into = TRUE else if(copying_into) . += copytext(into, start, into_it) start = from_it copying_into = FALSE into_it += length(into[into_it]) from_it += length(char) if(copying_into) . += copytext(into, start) else . += copytext(from, start, from_it) if(into_it <= end_into) . += copytext(into, into_it) //finds the first occurrence of one of the characters from needles argument inside haystack //it may appear this can be optimised, but it really can't. findtext() is so much faster than anything you can do in byondcode. //stupid byond :( /proc/findchar(haystack, needles, start=1, end=0) var/char = "" var/len = length(needles) for(var/i = 1, i <= len, i += length(char)) char = needles[i] . = findtextEx(haystack, char, start, end) if(.) return return 0 /proc/parsemarkdown_basic_step1(t, limited=FALSE) if(length(t) <= 0) return // This parses markdown with no custom rules // Escape backslashed t = replacetext(t, "$", "$-") t = replacetext(t, "\\\\", "$1") t = replacetext(t, "\\**", "$2") t = replacetext(t, "\\*", "$3") t = replacetext(t, "\\__", "$4") t = replacetext(t, "\\_", "$5") t = replacetext(t, "\\^", "$6") t = replacetext(t, "\\((", "$7") t = replacetext(t, "\\))", "$8") t = replacetext(t, "\\|", "$9") t = replacetext(t, "\\%", "$0") // Escape single characters that will be used t = replacetext(t, "!", "$a") // Parse hr and small if(!limited) t = replacetext(t, "((", "") t = replacetext(t, "))", "") t = replacetext(t, regex("(-){3,}", "gm"), "