Removes Unicode stuff (#26670)

* Removes Unicode stuff

* Fixes capitalize() and examine

* Not important but how did this happen

* No more emoji in say or HTML in me

* Clarifies comment

* Better HTML sanitization

* Rebuilds dll

* Fixes some browser windows

* Fixes telecomms scripts (lazily)
This is the easy way out, but could probably be made faster by doing the byte counting ourselves
This commit is contained in:
Exxion
2020-06-28 08:22:42 -04:00
committed by GitHub
parent a98a4b0000
commit 641009059e
32 changed files with 67 additions and 723 deletions

View File

@@ -92,6 +92,10 @@ forLineInText(text)
/proc/sanitize(var/t,var/list/repl_chars = null)
return html_encode(sanitize_simple(t,repl_chars))
/proc/sanitize_speech(var/t, var/limit = MAX_MESSAGE_LEN)
var/static/regex/speech_regex = regex(@"[^ -~¡-ÿ]", "g") //Matches all characters not in the printable ASCII range or (most of) the Latin-1 supplement. In BYOND, \w doesn't work outside the ASCII range, so it's no help here.
return trim(copytext(speech_regex.Replace(t, "*"), 1, limit)) //Note that this does NOT scrub HTML, because this is done in different places in me and say messages.
//Runs sanitize and strip_html_simple
//I believe strip_html_simple() is required to run first to prevent '<' from displaying as '&lt;' after sanitize() calls byond's html_encode()
/proc/strip_html(var/t,var/limit=MAX_MESSAGE_LEN)
@@ -103,10 +107,10 @@ forLineInText(text)
return copytext((html_encode(strip_html_simple(t))),1,limit)
/proc/reverse_text(txt)
var/i = length(txt)+1
. = ""
while(--i)
. += copytext(txt,i,i+1)
var/i = length(txt)+1
. = ""
while(--i)
. += copytext(txt,i,i+1)
/*
* returns null if there is any bad text in the string
@@ -138,7 +142,7 @@ forLineInText(text)
// Used to get a sanitized input.
/proc/stripped_input(var/mob/user, var/message = "", var/title = "", var/default = "", var/max_length=MAX_MESSAGE_LEN)
var/name = input(user, message, title, default) as null|text
return utf8_sanitize(name, user, max_length)
return strip_html_simple(name, max_length)
//Filters out undesirable characters from names
/proc/reject_bad_name(var/t_in, var/allow_numbers=0, var/max_length=MAX_NAME_LEN)
@@ -320,7 +324,7 @@ proc/checkhtml(var/t)
//Returns a string with the first element of the string capitalized.
/proc/capitalize(var/t as text)
return uppertext(copytext(t, 1, 2)) + copytext(t, 2)
return uppertext(copytext_char(t, 1, 2)) + copytext_char(t, 2)
//Centers text by adding spaces to either side of the string.
/proc/dd_centertext(message, length)

View File

@@ -1673,9 +1673,8 @@ Game Mode config tags:
// A standard proc for generic output to the msay window, Not useful for things that have their own prefs settings (prayers for instance)
/proc/output_to_msay(msg)
var/sane_msg = strict_ascii(msg)
for(var/client/C in admins)
C.output_to_special_tab(sane_msg)
C.output_to_special_tab(msg)
// This is awful and probably should be thrown away at some point.
/proc/generic_projectile_fire(var/atom/target, var/atom/source, var/obj/item/projectile/projectile, var/shot_sound, var/mob/firer)

View File

@@ -72,7 +72,7 @@
return {"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<!--<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">-->
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<head>
[head_content]
@@ -181,7 +181,7 @@
return {"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<head>
[head_content]
</head>

View File

@@ -206,7 +206,7 @@
body += "</ul>"
body = jointext(body,"")
var/html = "<html><head>"
var/html = "<html><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"><head>"
if (title)
html += "<title>[title]</title>"
html += {"<style>

View File

@@ -501,7 +501,7 @@
var/reminder = input("Write the reminder.", text("Cult reminder")) as null | message
if (!reminder)
return
reminder = utf8_sanitize(reminder) // No weird HTML
reminder = strip_html_simple(reminder) // No weird HTML
var/number = cult.cult_reminders.len
var/text = "[number + 1]) [reminder], by [user.real_name]."
cult.cult_reminders += text

View File

@@ -317,4 +317,4 @@ var/list/pointers = list()
for(var/d in data)
var/val = data[d]
if(istext(val))
data[d] = utf8_sanitize(val)
data[d] = strip_html_simple(val)

View File

@@ -589,7 +589,7 @@ var/list/obj/machinery/newscaster/allCasters = list() //Global list that will co
for(var/datum/feed_channel/F in news_network.network_channels)
if( (!F.locked || F.author == scanned_user) && !F.censored)
available_channels += F.channel_name
channel_name = utf8_sanitize(input(usr, "Choose receiving Feed Channel", "Network Channel Handler") in available_channels )
channel_name = input(usr, "Choose receiving Feed Channel", "Network Channel Handler") in available_channels
updateUsrDialog()
else if(href_list["set_new_message"])

View File

@@ -49,7 +49,7 @@
var/turf/T = get_turf(A)
playsound(src.loc, sound_type, 10, 1)
var/obj/item/weapon/reagent_containers/food/S = new food_type(T)
to_chat(user,"Fabricating [utf8_lowercase(S.name)]..")
to_chat(user,"Fabricating [lowertext(S.name)]..")
if(toxin)
S.reagents.add_reagent(toxin_type, toxin_amount)
if(isrobot(user))
@@ -84,4 +84,4 @@
/obj/item/weapon/cookiesynth/lollipop
name = "medipop synthesizer"
desc = "A self-recharging device used to rapidly deploy medicinal lollipops. Tell your patient they were very brave today."
food_type = /obj/item/weapon/reagent_containers/food/snacks/medipop
food_type = /obj/item/weapon/reagent_containers/food/snacks/medipop

View File

@@ -355,7 +355,7 @@
return
src.registered_name = n
var/u = strict_ascii(sanitize(stripped_input(user, "What occupation would you like to put on this card?\nNote: this will not grant or remove any access levels.", "Nanotrasen undercover ID: occupation", "Detective", MAX_MESSAGE_LEN)))
var/u = sanitize(stripped_input(user, "What occupation would you like to put on this card?\nNote: this will not grant or remove any access levels.", "Nanotrasen undercover ID: occupation", "Detective", MAX_MESSAGE_LEN))
if(!u)
alert("Invalid assignment.")
src.registered_name = null
@@ -396,7 +396,7 @@
to_chat(user, "Name changed to [new_name].")
if("Occupation")
var/new_job = strict_ascii(sanitize(stripped_input(user,"What job would you like to put on this card?\nChanging occupation will not grant or remove any access levels.","Nanotrasen undercover ID: occupation", "Detective", MAX_MESSAGE_LEN)))
var/new_job = sanitize(stripped_input(user,"What job would you like to put on this card?\nChanging occupation will not grant or remove any access levels.","Nanotrasen undercover ID: occupation", "Detective", MAX_MESSAGE_LEN))
if (!Adjacent(user) || user.incapacitated())
return
if (!new_job)

View File

@@ -12,7 +12,7 @@
to_chat(src, "Guests may not use OOC.")
return
msg = utf8_sanitize(msg, src, MAX_MESSAGE_LEN)
msg = copytext(sanitize(msg), 1, MAX_MESSAGE_LEN)
if(!msg)
return
@@ -108,7 +108,7 @@
to_chat(src, "Guests may not use OOC.")
return
msg = to_utf8(copytext(sanitize(msg), 1, MAX_MESSAGE_LEN), src)
msg = copytext(sanitize(msg), 1, MAX_MESSAGE_LEN)
if(!msg)
return

View File

@@ -33,7 +33,7 @@
to_chat(user, "<span class='notice'>The authorized user field on the card is blank.</span>")
/obj/item/weapon/card/debit/proc/change_authorized_name(var/desired_authorized_name)
authorized_name = uppertext(sanitize_simple(utf8_sanitize(desired_authorized_name, length = DEBIT_MAX_AUTHORIZED_NAME_LENGTH)))
authorized_name = uppertext(sanitize_simple(strip_html_simple(desired_authorized_name, DEBIT_MAX_AUTHORIZED_NAME_LENGTH)))
/obj/item/weapon/card/debit/attack_self(var/mob/user)
if(user.attack_delayer.blocked())

View File

@@ -4275,7 +4275,7 @@
src.access_news_network()
else if(href_list["ac_set_channel_name"])
src.admincaster_feed_channel.channel_name = utf8_sanitize(input(usr, "Provide a Feed Channel Name", "Network Channel Handler", ""))
src.admincaster_feed_channel.channel_name = stripped_input(usr, "Provide a Feed Channel Name", "Network Channel Handler", "")
while (findtext(src.admincaster_feed_channel.channel_name," ") == 1)
src.admincaster_feed_channel.channel_name = copytext(src.admincaster_feed_channel.channel_name,2,length(src.admincaster_feed_channel.channel_name)+1)
src.access_news_network()

View File

@@ -84,7 +84,7 @@ var/list/adminhelp_ignored_words = list("unknown","the","a","an","of","monkey","
return //this doesn't happen
var/ref_mob = "\ref[mob]"
msg = "<span class='notice'><b><font color=red>HELP: </font>[key_name(src, 1)] (<A HREF='?_src_=holder;adminmoreinfo=[ref_mob]'>?</A>) (<A HREF='?_src_=holder;adminplayeropts=[ref_mob]'>PP</A>) (<a href='?_src_=holder;role_panel=[ref_mob]'>RP</a>) (<A HREF='?_src_=vars;Vars=[ref_mob]'>VV</A>) (<A HREF='?_src_=holder;subtlemessage=[ref_mob]'>SM</A>) (<A HREF='?_src_=holder;adminplayerobservejump=[ref_mob]'>JMP</A>) (<A HREF='?_src_=holder;check_antagonist=1'>CA</A>) [ai_found ? " (<A HREF='?_src_=holder;adminchecklaws=[ref_mob]'>CL</A>)" : ""]:</b> [strict_ascii(msg)]</span>"
msg = "<span class='notice'><b><font color=red>HELP: </font>[key_name(src, 1)] (<A HREF='?_src_=holder;adminmoreinfo=[ref_mob]'>?</A>) (<A HREF='?_src_=holder;adminplayeropts=[ref_mob]'>PP</A>) (<a href='?_src_=holder;role_panel=[ref_mob]'>RP</a>) (<A HREF='?_src_=vars;Vars=[ref_mob]'>VV</A>) (<A HREF='?_src_=holder;subtlemessage=[ref_mob]'>SM</A>) (<A HREF='?_src_=holder;adminplayerobservejump=[ref_mob]'>JMP</A>) (<A HREF='?_src_=holder;check_antagonist=1'>CA</A>) [ai_found ? " (<A HREF='?_src_=holder;adminchecklaws=[ref_mob]'>CL</A>)" : ""]:</b> [msg]</span>"
//send this msg to all admins
var/admin_number_afk = 0

View File

@@ -135,7 +135,7 @@
adminhelp(reply) //sender has left, adminhelp instead
return
recieve_message = "<font color='[recieve_color]'>[recieve_pm_type] PM from-<b>[key_name(src, C, C.holder ? 1 : 0)]</b>: [strict_ascii(msg)]</font>"
recieve_message = "<font color='[recieve_color]'>[recieve_pm_type] PM from-<b>[key_name(src, C, C.holder ? 1 : 0)]</b>: [msg]</font>"
C.output_to_special_tab(recieve_message, force_focus = TRUE)
output_to_special_tab("<span class='notice'>[send_pm_type]PM to-<b>[key_name(C, src, holder ? 1 : 0)]</b>: [msg]</span>")

View File

@@ -179,7 +179,7 @@ var/global/list/assembly_short_name_to_type = list() //Please, I beg you, don't
if(!istext(new_value)) //Attempted to write a non-string to a string var - convert the non-string into a string and continue
new_value = "[new_value]"
new_value = utf8_sanitize(new_value, length = MAX_TEXT_VALUE_LEN)
new_value = strip_html(new_value, MAX_TEXT_VALUE_LEN)
//text values can accept either numbers or text, so don't check for that
@@ -303,4 +303,4 @@ var/global/list/assembly_short_name_to_type = list() //Please, I beg you, don't
return 0
user.set_machine(src)
interact(user)
return 1
return 1

View File

@@ -1,69 +0,0 @@
// Note about encodings:
// Encodings are passed by number as it's simplest to do it like this (citation needed)
// This may cause some confusion with what codes correspond how.
//
// 874 and 1250-1258 are Windows CodePage encodings. The number corresponds to the CodePage.
// 2312 is gb2312 (Chinese)
/proc/_determine_encoding(var/mob_or_client)
. = "1252"
if (isclient(mob_or_client))
var/client/C = mob_or_client
. = C.encoding
else if (ismob(mob_or_client))
var/mob/M = mob_or_client
if (M.client)
. = M.client.encoding
/proc/to_utf8(var/message, var/mob_or_client)
return LIBVG("to_utf8", _determine_encoding(mob_or_client), message)
// Converts a byte string to a UTF-8 string, sanitizes it and caps the length.
/proc/utf8_sanitize(var/message, var/mob_or_client, var/length = MAX_MESSAGE_LEN)
return LIBVG("utf8_sanitize", _determine_encoding(mob_or_client), message, num2text(length))
// Get the length (Unicode Scalars) of a UTF-8 string.
/proc/utf8_len(var/message)
return text2num(LIBVG("utf8_len", message))
/proc/utf8_byte_len(var/a)
return length(a)
/proc/utf8_find(var/haystack, var/needle, var/start=1, var/end=0)
return text2num(LIBVG("utf8_find", haystack, needle, "[start]", "[end]"))
/proc/utf8_copy(var/text, var/start=1, var/end=0)
return LIBVG("utf8_copy", text, "[start]", "[end]")
/proc/utf8_replace(var/text, var/from, var/to_, var/start=1, var/end=0)
return LIBVG("utf8_replace", text, from, to_, "[start]", "[end]")
/proc/utf8_index(var/text, var/index)
return LIBVG("utf8_index", text, "[index]")
/proc/utf8_uppercase(var/text)
return LIBVG("utf8_uppercase", text)
/proc/utf8_lowercase(var/text)
return LIBVG("utf8_lowercase", text)
// Removes non-7-bit ASCII characters.
// Useful for things which BYOND touches itself like object names.
/proc/strict_ascii(var/text)
return LIBVG("strict_ascii", text)
/proc/utf8_capitalize(var/text)
return utf8_uppercase(utf8_index(text, 1)) + utf8_copy(text, 2)
/proc/utf8_reverse(var/text)
return LIBVG("utf8_reverse", text)
/proc/utf8_leftpad(var/text, var/count, var/with=" ")
return LIBVG("utf8_leftpad", text, "[count]", with)
/proc/utf8_is_whitespace(var/text)
return text2num(LIBVG("utf8_is_whitespace", text))
/proc/utf8_trim(var/text)
return LIBVG("utf8_trim", text)

View File

@@ -627,7 +627,7 @@
else if (href_list["show_flavor_text"])
if(can_show_flavor_text())
var/datum/browser/popup = new(usr, "\ref[src]", name, 500, 200)
popup.set_content(utf8_sanitize(flavor_text))
popup.set_content(strip_html(flavor_text))
popup.open()
/*else if (href_list["lookmob"])
var/mob/M = locate(href_list["lookmob"])
@@ -1995,4 +1995,4 @@ mob/living/carbon/human/isincrit()
if(istype(locked_to, /obj/structure/bed/therapy))
return list(/datum/ambience/beach)
else
return ..()
return ..()

View File

@@ -114,7 +114,7 @@ var/list/headset_modes = list(
say_testing(src, "/mob/living/say(\"[message]\", [bubble_type]")
if(timestopped)
return //under the effects of time magick
message = trim(copytext(message, 1, MAX_MESSAGE_LEN))
message = sanitize_speech(message)
message = capitalize(message)
say_testing(src, "Say start, message=[message]")

View File

@@ -1264,7 +1264,7 @@ Use this proc preferably at the end of an equipment loadout
return
if(!can_show_flavor_text())
return
var/msg = utf8_sanitize(flavor_text)
var/msg = strip_html(flavor_text)
if(findtext(msg, "http:") || findtext(msg, "https:") || findtext(msg, "www."))
return "<font color='#ffa000'><b><a href='?src=\ref[src];show_flavor_text=1'>Show flavor text</a></b></font>"
if(length(msg) <= 32)

View File

@@ -5,7 +5,7 @@
if(say_disabled)
to_chat(usr, "<span class='danger'>Speech is currently admin-disabled.</span>")
return
usr.say(to_utf8(message, usr))
usr.say(message)
remove_typing_indicator()
/mob/verb/whisper(message as text)
@@ -31,7 +31,7 @@
remove_typing_indicator()
return
message = utf8_sanitize(message, usr, MAX_MESSAGE_LEN)
message = html_encode(sanitize_speech(message))
if(usr.stat == DEAD)
usr.emote_dead(message)

View File

@@ -419,7 +419,7 @@ nanoui is used to open and update nano browser uis
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<head>
<script type='text/javascript'>
function receiveUpdateData(jsonString)

View File

@@ -141,7 +141,7 @@
@param replacestring: the string to replace the substring with
*/
interpreter.SetProc("replace", /proc/utf8_replace)
interpreter.SetProc("replace", /proc/n_replacetext)
/*
-> Locates an element/substring inside of a list or string
@@ -178,7 +178,7 @@
interpreter.SetProc("pick", /proc/n_pick)
interpreter.SetProc("prob", /proc/prob_chance)
interpreter.SetProc("substr", /proc/utf8_copy)
interpreter.SetProc("substr", /proc/docopytext)
interpreter.SetProc("shuffle", /proc/shuffle)
interpreter.SetProc("uniquevector", /proc/uniquelist)
@@ -188,13 +188,13 @@
interpreter.SetProc("vector2text", /proc/vg_jointext)
// Strings
interpreter.SetProc("lower", /proc/utf8_lowercase)
interpreter.SetProc("upper", /proc/utf8_uppercase)
interpreter.SetProc("lower", /proc/n_lower)
interpreter.SetProc("upper", /proc/n_upper)
interpreter.SetProc("explode", /proc/string_explode)
interpreter.SetProc("repeat", /proc/n_repeat)
interpreter.SetProc("reverse", /proc/utf8_reverse)
interpreter.SetProc("reverse", /proc/reverse_text)
interpreter.SetProc("tonum", /proc/n_str2num)
interpreter.SetProc("capitalize", /proc/utf8_capitalize)
interpreter.SetProc("capitalize", /proc/capitalize)
//interpreter.SetProc("replacetextEx",/proc/n_replacetextEx)
// Numbers

View File

@@ -118,7 +118,7 @@
else
if(istext(haystack))
return utf8_find(haystack, needle, start, end)
return findtext_char(haystack, needle, start, end)
// Clone of copytext()
/proc/docopytext(var/string, var/start = 1, var/end = 0)
@@ -129,7 +129,7 @@
// Clone of length()
/proc/smartlength(var/container)
if (istext(container))
return utf8_len(container)
return length_char(container)
return length(container)

View File

@@ -26,7 +26,7 @@
Loads source code.
*/
/datum/n_Scanner/proc/LoadCode(var/c)
code=strict_ascii(c)
code = c
/*
Proc: LoadCodeFromFile
@@ -109,10 +109,10 @@
/datum/n_Scanner/nS_Scanner/Scan() //Creates a list of tokens from source code
var/list/tokens = new
for(, src.codepos <= length(code), src.codepos++)
for(, src.codepos <= length_char(code), src.codepos++)
var/char = copytext(code, codepos, codepos + 1)
var/nextchar = copytext(code, codepos + 1, codepos + 2)
var/char = copytext_char(code, codepos, codepos + 1)
var/nextchar = copytext_char(code, codepos + 1, codepos + 2)
if(char == "\n")
line++
linepos = codepos
@@ -155,12 +155,12 @@
*/
/datum/n_Scanner/nS_Scanner/proc/ReadString(start)
var/buf
for(, codepos <= length(code), codepos++)//codepos to length(code))
var/char = copytext(code, codepos, codepos + 1)
for(, codepos <= length_char(code), codepos++)//codepos to length(code))
var/char = copytext_char(code, codepos, codepos + 1)
switch(char)
if("\\") //Backslash (\) encountered in string
codepos++ //Skip next character in string, since it was escaped by a backslash
char = copytext(code, codepos, codepos+1)
char = copytext_char(code, codepos, codepos+1)
switch(char)
if("\\") //Double backslash
buf += "\\"
@@ -190,12 +190,12 @@
Reads characters separated by an item in <delim> into a token.
*/
/datum/n_Scanner/nS_Scanner/proc/ReadWord()
var/char = copytext(code, codepos, codepos + 1)
var/char = copytext_char(code, codepos, codepos + 1)
var/buf
while(!delim.Find(char) && codepos <= length(code))
while(!delim.Find(char) && codepos <= length_char(code))
buf += char
char = copytext(code, ++codepos, codepos + 1)
char = copytext_char(code, ++codepos, codepos + 1)
codepos-- //allow main Scan() proc to read the delimiter
if(options.keywords.Find(buf))
return new/datum/token/keyword(buf, line, COL)
@@ -207,14 +207,14 @@
Reads a symbol into a token.
*/
/datum/n_Scanner/nS_Scanner/proc/ReadSymbol()
var/char=copytext(code, codepos, codepos + 1)
var/char=copytext_char(code, codepos, codepos + 1)
var/buf
while(options.symbols.Find(buf + char))
buf += char
if(++codepos > length(code))
if(++codepos > length_char(code))
break
char = copytext(code, codepos, codepos + 1)
char = copytext_char(code, codepos, codepos + 1)
codepos-- //allow main Scan() proc to read the next character
return new /datum/token/symbol(buf, line, COL)
@@ -224,7 +224,7 @@
Reads a number into a token.
*/
/datum/n_Scanner/nS_Scanner/proc/ReadNumber()
var/char = copytext(code, codepos, codepos + 1)
var/char = copytext_char(code, codepos, codepos + 1)
var/buf
var/dec = 0
@@ -234,7 +234,7 @@
buf += char
codepos++
char = copytext(code, codepos, codepos + 1)
char = copytext_char(code, codepos, codepos + 1)
var/datum/token/number/T = new(buf, line, COL)
if(isnull(text2num(buf)))
@@ -250,8 +250,8 @@
*/
/datum/n_Scanner/nS_Scanner/proc/ReadComment()
var/char = copytext(code, codepos, codepos + 1)
var/nextchar = copytext(code, codepos + 1, codepos + 2)
var/char = copytext_char(code, codepos, codepos + 1)
var/nextchar = copytext_char(code, codepos + 1, codepos + 2)
var/charstring = char + nextchar
var/comm = 1
// 1: single-line comment
@@ -263,23 +263,23 @@
comm = 2 // starts a multi-line comment
while(comm)
if(++codepos > length(code))
if(++codepos > length_char(code))
break
if(expectedend) // ending statement expected...
char = copytext(code, codepos, codepos + 1)
char = copytext_char(code, codepos, codepos + 1)
if(char == "/") // ending statement found - beak the comment
comm = 0
break
if(comm == 2)
// multi-line comments are broken by ending statements
char = copytext(code, codepos, codepos + 1)
char = copytext_char(code, codepos, codepos + 1)
if(char == "*")
expectedend = 1
continue
else
char = copytext(code, codepos, codepos + 1)
char = copytext_char(code, codepos, codepos + 1)
if(char == "\n")
comm = 0
break

View File

@@ -120,7 +120,6 @@ var/list/SPS_list = list()
if(!builtin && (usr.get_active_hand() != src || usr.incapacitated())) //second check in case some chucklefuck drops the GPS while typing the tag
to_chat(usr, "<span class = 'caution'>The GPS needs to be kept in your active hand!</span>")
return TRUE
a = strict_ascii(a)
if(!a) //what a check
return TRUE
if(length(a) > 5)
@@ -203,6 +202,3 @@ var/list/SPS_list = list()
boop = TRUE
if (boop)
playsound(src,'sound/machines/radioboop.ogg',40,1)

View File

@@ -152,7 +152,7 @@ function highlightTerms(el) {
else {
toInsert = document.createTextNode(chunk);
}
// Insert back into our element
if (pre.length == 0) {
var result = parent.prepend(toInsert);
@@ -178,12 +178,6 @@ function output(message, flag) {
if (flag !== 'internal')
opts.lastPang = Date.now();
// Basically we url_encode twice server side so we can manually read the encoded version and actually do UTF-8.
// The replace for + is because FOR SOME REASON, BYOND replaces spaces with a + instead of %20, and a plus with %2b.
// Marvelous.
message = message.replace(/\+/g, "%20")
message = decoder(message)
//Stuff we do along with appending a message
var atBottom = false;
var bodyHeight = $('body').height();

View File

@@ -315,8 +315,7 @@ For the main html chat area
message = replacetext(message, "\n", "<br>")
// url_encode it TWICE, this way any UTF-8 characters are able to be decoded by the Javascript.
target << output(url_encode(url_encode(message)), "browseroutput:output")
target << output(url_encode(message), "browseroutput:output")
/datum/log //exists purely to capture to_chat() output
var/log = ""

BIN
libvg.dll

Binary file not shown.

View File

@@ -1,60 +0,0 @@
//! I have no idea WHY this is but to bench these you need to disable dylib in cargo.
#![feature(test)]
extern crate test;
extern crate libvg;
use libvg::utf8::{to_utf8, utf8_sanitize};
use std::ffi::CString;
use test::Bencher;
#[bench]
fn bench_utf8(b: &mut Bencher) {
let encoding = CString::new("1252".as_bytes()).unwrap();
let message = CString::new(
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Cras elementum \
mauris eu odio bibendum, ut porttitor libero vulputate. Vivamus et augue \
justo. Quisque ut auctor lectus. Vestibulum ante ipsum primis in faucibus \
orci luctus et ultrices posuere cubilia Curae; Maecenas non scelerisque \
nisl. Suspendisse egestas, diam et aliquam ultrices, mi est condimentum \
neque, eu fermentum dolor justo at lectus. Nam consequat dolor sit amet \
massa convallis volutpat eget eget nibh. Nullam a ultricies elit. Etiam eu \
quam interdum, ornare enim vitae, placerat dolor. Curabitur a tempor ex. \
Curabitur metus elit, pharetra nec faucibus a, consectetur nec ex. \
Pellentesque venenatis dapibus mi et vulputate. Nullam laoreet, tortor at \
rutrum sagittis, nibh purus ultrices est, ut efficitur nulla dui vel \
felis. Etiam malesuada nec orci in rutrum. Ut consectetur ante vitae arcu \
ultricies hendrerit. Etiam a tempor enim."
.as_bytes(),
).unwrap();
let both = [encoding.as_ptr(), message.as_ptr()];
b.iter(|| to_utf8(2, both.as_ptr()))
}
#[bench]
fn bench_sanitize(b: &mut Bencher) {
let encoding = CString::new("1252".as_bytes()).unwrap();
let message = CString::new(
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Cras elementum \
mauris eu odio bibendum, ut porttitor libero vulputate. Vivamus et augue \
justo. Quisque ut auctor lectus. Vestibulum ante ipsum primis in faucibus \
orci luctus et ultrices posuere cubilia Curae; Maecenas non scelerisque \
nisl. Suspendisse egestas, diam et aliquam ultrices, mi est condimentum \
neque, eu fermentum dolor justo at lectus. Nam consequat dolor sit amet \
massa convallis volutpat eget eget nibh. Nullam a ultricies elit. Etiam eu \
quam interdum, ornare enim vitae, placerat dolor. Curabitur a tempor ex. \
Curabitur metus elit, pharetra nec faucibus a, consectetur nec ex. \
Pellentesque venenatis dapibus mi et vulputate. Nullam laoreet, tortor at \
rutrum sagittis, nibh purus ultrices est, ut efficitur nulla dui vel \
felis. Etiam malesuada nec orci in rutrum. Ut consectetur ante vitae arcu \
ultricies hendrerit. Etiam a tempor enim."
.as_bytes(),
).unwrap();
let cap = CString::new("1024".as_bytes()).unwrap();
let both = [encoding.as_ptr(), message.as_ptr(), cap.as_ptr()];
b.iter(|| utf8_sanitize(3, both.as_ptr()))
}

View File

@@ -5,5 +5,3 @@
extern crate byond;
extern crate encoding;
extern crate libc;
pub mod utf8;

View File

@@ -1,516 +0,0 @@
use byond::call::return_to_byond;
use encoding::all::{WINDOWS_1252, ASCII, GB18030};
use encoding::Encoding;
use encoding::label::encoding_from_windows_code_page;
use encoding::types::DecoderTrap;
use libc;
use std::cmp::{max, Ordering};
use std::ffi::CStr;
use std::slice;
use std::ptr::null;
// Encodes a byte string to UTF-8, using the encoding supplied.
//
// Arguments are in the order of encoding, bytes.
#[no_mangle]
pub extern "C" fn to_utf8(n: libc::c_int, v: *const *const libc::c_char) -> *const libc::c_char {
// We do not let the byond crate handle arguments, as we want BYTES directly.
// Unicode decode could fail on the second argument.
let text = unsafe {
let slice = slice::from_raw_parts(v, n as usize);
decode(&slice)
};
return_to_byond(&text).unwrap_or(null())
}
// Encodes a byte string with a windows encoding, filters bad characters and limits message length.
//
// Operations like message length are done on Unicode code points!
// Arguments are in the order of encoding, bytes, cap.
#[no_mangle]
pub extern "C" fn utf8_sanitize(
n: libc::c_int,
v: *const *const libc::c_char,
) -> *const libc::c_char {
// Can't use the BYOND crate again because of unicode conversion failing.
let text = unsafe {
let slice = slice::from_raw_parts(v, n as usize);
let cap = CStr::from_ptr(slice[2])
.to_str()
.map(|cap| cap.parse::<usize>().unwrap_or(1024))
.unwrap_or(1024);
sanitize(&decode(&slice), cap)
};
return_to_byond(&text).unwrap_or(null())
}
// Removes non-ASCII characters from the input string.
#[no_mangle]
pub extern "C" fn strict_ascii(
n: libc::c_int,
v: *const *const libc::c_char,
) -> *const libc::c_char {
let bytes = unsafe {
let slice = slice::from_raw_parts(v, n as usize);
CStr::from_ptr(slice[0]).to_bytes()
};
return_to_byond(ASCII.decode(bytes, DecoderTrap::Ignore).unwrap()).unwrap_or(null())
}
// Returns the length of a UTF-8 string.
byond!(utf8_len: text; {
format!("{}", text.chars().count())
});
/* You saw nothing.
/// Returns the BYTE length of a UTF-8 string.
byond!(utf8_len_bytes: text; {
format!("{}", text.len())
});
*/
byond!(utf8_find: haystack, needle, start, end; {
match byte_bounds(haystack, start, end) {
Some((start, end)) => {
let ref sub = haystack[start .. end];
match sub.find(needle) {
Some(index) => format!("{}",
haystack
.char_indices()
.position(|x| x.0 == index)
.unwrap() + 1),
None => "0".to_string()
}
}
None => "0".to_string()
}
});
byond!(utf8_index: text, index; {
let index = index.parse::<isize>().unwrap_or(1);
// 0-indexed index for the string, by code points.
let index = match index.cmp(&0) {
Ordering::Greater => index - 1,
// Invalid index.
Ordering::Equal => return "",
Ordering::Less => {
let char_count = text.chars().count() as isize;
char_count + index
}
} as usize;
// Get the byte bound.
let mut iter = text.char_indices();
let byte = match iter.nth(index) {
Some((i, _)) => i,
None => return ""
};
&text[byte .. iter.next().map(|(i, _)| i).unwrap_or(text.len())]
});
byond!(utf8_copy: text, start, end; {
match byte_bounds(text, start, end) {
Some((start, end)) => &text[start .. end],
None => ""
}
});
byond!(utf8_replace: text, from, to, start, end; {
match byte_bounds(text, start, end) {
Some((start, end)) => {
let sub = &text[start .. end];
let mut out = text[.. start].to_owned();
out.push_str(&sub.replace(from, to));
out.push_str(&text[end ..]);
out
},
None => text.to_string()
}
});
byond!(utf8_uppercase: text; {
text.to_uppercase()
});
byond!(utf8_lowercase: text; {
text.to_lowercase()
});
byond!(utf8_reverse: text; {
text.chars().rev().collect::<String>()
});
// Side note: I originally tried to use the left-pad crate on Cargo.
// That crate is hilariously enough broken and doesn't understand what Unicode is.
// 10/10 meme tier.
byond!(utf8_leftpad: text, amount, with; {
let amount = match amount.parse::<usize>() {
Ok(a) => a,
Err(_) => return text.to_owned()
};
let with = with.chars().next().unwrap_or(' ');
let text_len = text.chars().count();
if amount <= text_len {
// Nothing would change.
return text.into();
}
let filler_amount = amount - text_len;
let mut out = String::with_capacity(text_len + filler_amount*with.len_utf8());
for _ in 0..filler_amount {
out.push(with);
}
out.push_str(text);
out
});
byond!(utf8_is_whitespace: string; {
match string.chars().all(|c| c.is_whitespace()) { true => "1", false => "0" }
});
byond!(utf8_trim: string; {
string.trim()
});
/// Function to get the byte bounds for copytext, findtext and replacetext.
/// Goes by one-indexing and correctly handles negatives.
pub(crate) fn byte_bounds(text: &str, start: &str, end: &str) -> Option<(usize, usize)> {
// BYOND uses 1-indexing because of course it does...
// I would've made sick one liners out of this if the negative index stuff weren't a thing.
let mut start = start.parse::<isize>().unwrap_or(1);
let mut end = end.parse::<isize>().unwrap_or(0);
let char_count = text.chars().count() as isize;
start += if start < 0 { char_count } else { -1 };
let start = max(start, 0) as usize;
match end.cmp(&0) {
Ordering::Greater => {
end -= 1;
}
Ordering::Equal => {
end = char_count;
}
Ordering::Less => {
end += char_count;
}
}
let end = max(end, 0) as usize;
if end <= start {
return None;
}
let mut iter = text.char_indices();
match (iter.nth(start), iter.nth(end - start - 1)) {
(Some((start, _)), Some((end, _))) => Some((start, end)),
(Some((start, _)), None) => Some((start, text.len())),
_ => None,
}
}
/// See utf8.dm for what the codes correspond to.
pub(crate) unsafe fn decode(args: &[*const libc::c_char]) -> String {
let bytes = CStr::from_ptr(args[1]).to_bytes();
CStr::from_ptr(args[0])
.to_str()
.map(|e| e.parse::<usize>().unwrap_or(1252))
.map(|e| match e {
e @ 874 | e @ 1250...1258 => encoding_from_windows_code_page(e).unwrap_or(WINDOWS_1252),
2312 => GB18030,
_ => WINDOWS_1252,
})
.unwrap_or(WINDOWS_1252)
.decode(bytes, DecoderTrap::Replace)
.unwrap()
}
pub(crate) fn sanitize(text: &str, cap: usize) -> String {
let mut out = String::with_capacity(text.len());
let mut count = 0;
for character in text.chars() {
match character {
'\u{0000}'...'\u{001F}' |
'\u{0080}'...'\u{00A0}' => continue,
'<' => out.push_str("&lt;"),
'>' => out.push_str("&gt;"),
_ => out.push(character),
};
count += 1;
if count >= cap {
break;
};
}
out
}
#[cfg(test)]
mod tests {
use super::*;
use byond::call::test_byond_call_args;
use std::ffi::CString;
#[test]
fn test_sanitize() {
assert_eq!(sanitize("testing!", 1024), "testing!");
assert_eq!(sanitize("testing<>!", 1024), "testing&lt;&gt;!");
assert_eq!(sanitize("testing\n\n\n<>!", 1024), "testing&lt;&gt;!");
assert_eq!(sanitize("testing\n\u{0088}\n<>!", 1024), "testing&lt;&gt;!");
assert_eq!(
sanitize("<script src='hacked.js'></script>icky ocky!\n<>!", 1024),
"&lt;script src='hacked.js'&gt;&lt;/script&gt;icky ocky!&lt;&gt;!"
);
assert_eq!(sanitize("test", 3), "tes");
assert_eq!(sanitize("\n\n\ntest", 3), "tes");
assert_eq!(sanitize("\n\n\n>test", 3), "&gt;te");
}
#[test]
fn test_utf8() {
let encoding = CString::new(b"1252".as_ref()).unwrap();
let test = CString::new(b"Hi there!".as_ref()).unwrap();
let both = [encoding.as_ptr(), test.as_ptr()];
unsafe { assert_eq!(decode(&both), "Hi there!") };
let encoding = CString::new(b"1252".as_ref()).unwrap();
let test = CString::new(b"H\xed th\xe9r\xe9!".as_ref()).unwrap();
let both = [encoding.as_ptr(), test.as_ptr()];
unsafe { assert_eq!(decode(&both), "Hí théré!") };
let encoding = CString::new(b"1251".as_ref()).unwrap();
let both = [encoding.as_ptr(), test.as_ptr()];
unsafe { assert_eq!(decode(&both), "Hн thйrй!") };
let encoding = CString::new(b"2312".as_ref()).unwrap();
let test = CString::new(b"\xDE\xC4".as_ref()).unwrap();
let both = [encoding.as_ptr(), test.as_ptr()];
unsafe { assert_eq!(decode(&both), "") };
}
#[test]
fn test_byte_bounds() {
assert_eq!(byte_bounds("abcdefgh", "1", "0"), Some((0, 8)));
assert_eq!(byte_bounds("abcdefgh", "0", "0"), Some((0, 8)));
assert_eq!(byte_bounds("abcdefgh", "-2", "0"), Some((6, 8)));
assert_eq!(byte_bounds("abcdefgh", "-4", "-2"), Some((4, 6)));
assert_eq!(
byte_bounds("abcdefghijklmnopwrstuvwxyz", "-4", "-2"),
Some((22, 24))
);
assert_eq!(byte_bounds("abcdefgh", "-20", "-2"), Some((0, 6)));
assert_eq!(byte_bounds("abcdefgh", "2", "1"), None);
assert_eq!(byte_bounds("àbçdéfgh", "1", "0"), Some((0, 11)));
assert_eq!(byte_bounds("àbç👏défgh", "2", "0"), Some((2, 15)));
assert_eq!(byte_bounds("👏àbç👏défgh", "2", "0"), Some((4, 19)));
assert_eq!(byte_bounds("abcdefgh", "20", "40"), None);
assert_eq!(byte_bounds("abcdefgh", "3", "40"), Some((2, 8)));
}
#[test]
fn test_utf8_find() {
assert_eq!(
test_byond_call_args(utf8_find, &["abcdefgh", "c", "1", "0"]),
"3"
);
assert_eq!(
test_byond_call_args(utf8_find, &["abcdefgh", "g", "1", "3"]),
"0"
);
assert_eq!(
test_byond_call_args(utf8_find, &["abcdefgh", "z", "1", "3"]),
"0"
);
}
#[test]
fn test_utf8_len() {
assert_eq!(test_byond_call_args(utf8_len, &["abc"]), "3");
assert_eq!(test_byond_call_args(utf8_len, &[""]), "0");
assert_eq!(
test_byond_call_args(utf8_len, &["👏àbç👏défgh"]),
"10"
);
}
#[test]
fn test_utf8_index() {
assert_eq!(test_byond_call_args(utf8_index, &["abc", "1"]), "a");
assert_eq!(test_byond_call_args(utf8_index, &["abc", "3"]), "c");
assert_eq!(test_byond_call_args(utf8_index, &["abc", "-2"]), "b");
assert_eq!(test_byond_call_args(utf8_index, &["abc", "-1"]), "c");
assert_eq!(test_byond_call_args(utf8_index, &["abc", "5"]), "");
assert_eq!(test_byond_call_args(utf8_index, &["abc", "0"]), "");
assert_eq!(test_byond_call_args(utf8_index, &["abc", "-10"]), "");
assert_eq!(test_byond_call_args(utf8_index, &["a👏bc", "3"]), "b");
assert_eq!(test_byond_call_args(utf8_index, &["a👏bc", "2"]), "👏");
}
#[test]
fn test_utf8_copy() {
assert_eq!(
test_byond_call_args(utf8_copy, &["abcdefgh", "1", "5"]),
"abcd"
);
assert_eq!(
test_byond_call_args(utf8_copy, &["a👏cdefgh", "1", "5"]),
"a👏cd"
);
assert_eq!(
test_byond_call_args(utf8_copy, &["abcdefgh", "-5", "-1"]),
"defg"
);
assert_eq!(
test_byond_call_args(utf8_copy, &["abcdefgh", "120", "200"]),
""
);
assert_eq!(
test_byond_call_args(utf8_copy, &["abcdefgh", "1", "2000"]),
"abcdefgh"
);
assert_eq!(test_byond_call_args(utf8_copy, &["abcdefgh", "5", "1"]), "");
assert_eq!(
test_byond_call_args(utf8_copy, &["abcdefgh", "5", "0"]),
"efgh"
);
assert_eq!(
test_byond_call_args(utf8_copy, &["abcdefgh", "5", "-2"]),
"ef"
)
}
#[test]
fn test_utf8_replace() {
assert_eq!(
test_byond_call_args(utf8_replace, &["Hello world!", "o", "z", "1", "0"]),
"Hellz wzrld!"
);
assert_eq!(
test_byond_call_args(utf8_replace, &["Hello world!", "o", "👏", "1", "0"]),
"Hell👏 w👏rld!"
);
assert_eq!(
test_byond_call_args(utf8_replace, &["Hell👏 w👏rld!", "👏", "a", "1", "0"]),
"Hella warld!"
);
assert_eq!(
test_byond_call_args(utf8_replace, &["Hello world!", "👏", "a", "1", "0"]),
"Hello world!"
);
assert_eq!(
test_byond_call_args(utf8_replace, &["Hello world!", "o", "a", "7", "0"]),
"Hello warld!"
);
assert_eq!(
test_byond_call_args(utf8_replace, &["Hello world!", "o", "aAa", "7", "0"]),
"Hello waAarld!"
);
assert_eq!(
test_byond_call_args(utf8_replace, &["Hello world!", "ll", "aAa", "1", "0"]),
"HeaAao world!"
);
}
#[test]
fn test_utf8_uppercase() {
assert_eq!(test_byond_call_args(utf8_uppercase, &["Hello"]), "HELLO");
}
#[test]
fn test_utf8_lowercase() {
assert_eq!(test_byond_call_args(utf8_lowercase, &["Hello"]), "hello");
}
#[test]
fn test_strict_ascii() {
assert_eq!(test_byond_call_args(strict_ascii, &["Hello"]), "Hello");
assert_eq!(test_byond_call_args(strict_ascii, &["Hell👏"]), "Hell");
assert_eq!(test_byond_call_args(strict_ascii, &["Héllö"]), "Hll");
}
#[test]
fn test_utf8_reverse() {
assert_eq!(test_byond_call_args(utf8_reverse, &["Hello!"]), "!olleH");
assert_eq!(
test_byond_call_args(utf8_reverse, &["Hello!👏"]),
"👏!olleH"
);
}
#[test]
fn test_utf8_leftpad() {
assert_eq!(
test_byond_call_args(utf8_leftpad, &["Hello!", "10", " "]),
" Hello!"
);
assert_eq!(
test_byond_call_args(utf8_leftpad, &["Hello!", "0", " "]),
"Hello!"
);
assert_eq!(
test_byond_call_args(utf8_leftpad, &["Hello!", "🤔", " "]),
"Hello!"
);
assert_eq!(
test_byond_call_args(utf8_leftpad, &["Hello!", "10", "🌭"]),
"🌭🌭🌭🌭Hello!"
);
assert_eq!(
test_byond_call_args(utf8_leftpad, &["He🌭🌭o!", "20", "!"]),
"!!!!!!!!!!!!!!He🌭🌭o!"
);
}
#[test]
fn test_utf8_is_whitespace() {
assert_eq!(test_byond_call_args(utf8_is_whitespace, &[" "]), "1");
assert_eq!(
test_byond_call_args(utf8_is_whitespace, &[" \r\n\t\u{A0}"]),
"1"
); // "\u{A0}" is U+00A0 NO-BREAK SPACE, AKA &nbsp;
assert_eq!(test_byond_call_args(utf8_is_whitespace, &[" hi "]), "0");
assert_eq!(
test_byond_call_args(utf8_is_whitespace, &[" \u{200B} "]),
"0"
); // U+200B ZERO-WIDTH SPACE is NOT whitespace following Unicode.
}
#[test]
fn test_utf8_trim() {
assert_eq!(test_byond_call_args(utf8_trim, &[" "]), "");
// "\u{A0}" is U+00A0 NO-BREAK SPACE, AKA &nbsp;
assert_eq!(test_byond_call_args(utf8_trim, &[" \r\n\t\u{A0}"]), "");
assert_eq!(test_byond_call_args(utf8_trim, &[" hi "]), "hi");
// U+200B ZERO-WIDTH SPACE is NOT whitespace following Unicode.
assert_eq!(test_byond_call_args(utf8_trim, &[" \u{200B} "]), "\u{200B}");
assert_eq!(
test_byond_call_args(utf8_trim, &[" hi there! "]),
"hi there!"
);
assert_eq!(
test_byond_call_args(utf8_trim, &[" hi\u{A0}there! "]),
"hi\u{A0}there!"
);
}
}

View File

@@ -1561,7 +1561,6 @@
#include "code\modules\library\computers\base.dm"
#include "code\modules\library\computers\checkout.dm"
#include "code\modules\library\computers\public.dm"
#include "code\modules\libvg\utf8.dm"
#include "code\modules\lighting\lighting_area.dm"
#include "code\modules\lighting\lighting_atom.dm"
#include "code\modules\lighting\lighting_corner.dm"