mirror of
https://github.com/vgstation-coders/vgstation13.git
synced 2025-12-09 16:14:13 +00:00
Removes Unicode stuff (#26670)
* Removes Unicode stuff * Fixes capitalize() and examine * Not important but how did this happen * No more emoji in say or HTML in me * Clarifies comment * Better HTML sanitization * Rebuilds dll * Fixes some browser windows * Fixes telecomms scripts (lazily) This is the easy way out, but could probably be made faster by doing the byte counting ourselves
This commit is contained in:
@@ -92,6 +92,10 @@ forLineInText(text)
|
||||
/proc/sanitize(var/t,var/list/repl_chars = null)
|
||||
return html_encode(sanitize_simple(t,repl_chars))
|
||||
|
||||
/proc/sanitize_speech(var/t, var/limit = MAX_MESSAGE_LEN)
|
||||
var/static/regex/speech_regex = regex(@"[^ -~¡-ÿ]", "g") //Matches all characters not in the printable ASCII range or (most of) the Latin-1 supplement. In BYOND, \w doesn't work outside the ASCII range, so it's no help here.
|
||||
return trim(copytext(speech_regex.Replace(t, "*"), 1, limit)) //Note that this does NOT scrub HTML, because this is done in different places in me and say messages.
|
||||
|
||||
//Runs sanitize and strip_html_simple
|
||||
//I believe strip_html_simple() is required to run first to prevent '<' from displaying as '<' after sanitize() calls byond's html_encode()
|
||||
/proc/strip_html(var/t,var/limit=MAX_MESSAGE_LEN)
|
||||
@@ -103,10 +107,10 @@ forLineInText(text)
|
||||
return copytext((html_encode(strip_html_simple(t))),1,limit)
|
||||
|
||||
/proc/reverse_text(txt)
|
||||
var/i = length(txt)+1
|
||||
. = ""
|
||||
while(--i)
|
||||
. += copytext(txt,i,i+1)
|
||||
var/i = length(txt)+1
|
||||
. = ""
|
||||
while(--i)
|
||||
. += copytext(txt,i,i+1)
|
||||
|
||||
/*
|
||||
* returns null if there is any bad text in the string
|
||||
@@ -138,7 +142,7 @@ forLineInText(text)
|
||||
// Used to get a sanitized input.
|
||||
/proc/stripped_input(var/mob/user, var/message = "", var/title = "", var/default = "", var/max_length=MAX_MESSAGE_LEN)
|
||||
var/name = input(user, message, title, default) as null|text
|
||||
return utf8_sanitize(name, user, max_length)
|
||||
return strip_html_simple(name, max_length)
|
||||
|
||||
//Filters out undesirable characters from names
|
||||
/proc/reject_bad_name(var/t_in, var/allow_numbers=0, var/max_length=MAX_NAME_LEN)
|
||||
@@ -320,7 +324,7 @@ proc/checkhtml(var/t)
|
||||
|
||||
//Returns a string with the first element of the string capitalized.
|
||||
/proc/capitalize(var/t as text)
|
||||
return uppertext(copytext(t, 1, 2)) + copytext(t, 2)
|
||||
return uppertext(copytext_char(t, 1, 2)) + copytext_char(t, 2)
|
||||
|
||||
//Centers text by adding spaces to either side of the string.
|
||||
/proc/dd_centertext(message, length)
|
||||
|
||||
@@ -1673,9 +1673,8 @@ Game Mode config tags:
|
||||
|
||||
// A standard proc for generic output to the msay window, Not useful for things that have their own prefs settings (prayers for instance)
|
||||
/proc/output_to_msay(msg)
|
||||
var/sane_msg = strict_ascii(msg)
|
||||
for(var/client/C in admins)
|
||||
C.output_to_special_tab(sane_msg)
|
||||
C.output_to_special_tab(msg)
|
||||
|
||||
// This is awful and probably should be thrown away at some point.
|
||||
/proc/generic_projectile_fire(var/atom/target, var/atom/source, var/obj/item/projectile/projectile, var/shot_sound, var/mob/firer)
|
||||
|
||||
@@ -72,7 +72,7 @@
|
||||
|
||||
return {"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html>
|
||||
<!--<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">-->
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
|
||||
<head>
|
||||
[head_content]
|
||||
@@ -181,7 +181,7 @@
|
||||
|
||||
return {"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||
<head>
|
||||
[head_content]
|
||||
</head>
|
||||
|
||||
@@ -206,7 +206,7 @@
|
||||
body += "</ul>"
|
||||
body = jointext(body,"")
|
||||
|
||||
var/html = "<html><head>"
|
||||
var/html = "<html><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"><head>"
|
||||
if (title)
|
||||
html += "<title>[title]</title>"
|
||||
html += {"<style>
|
||||
|
||||
@@ -501,7 +501,7 @@
|
||||
var/reminder = input("Write the reminder.", text("Cult reminder")) as null | message
|
||||
if (!reminder)
|
||||
return
|
||||
reminder = utf8_sanitize(reminder) // No weird HTML
|
||||
reminder = strip_html_simple(reminder) // No weird HTML
|
||||
var/number = cult.cult_reminders.len
|
||||
var/text = "[number + 1]) [reminder], by [user.real_name]."
|
||||
cult.cult_reminders += text
|
||||
|
||||
@@ -317,4 +317,4 @@ var/list/pointers = list()
|
||||
for(var/d in data)
|
||||
var/val = data[d]
|
||||
if(istext(val))
|
||||
data[d] = utf8_sanitize(val)
|
||||
data[d] = strip_html_simple(val)
|
||||
|
||||
@@ -589,7 +589,7 @@ var/list/obj/machinery/newscaster/allCasters = list() //Global list that will co
|
||||
for(var/datum/feed_channel/F in news_network.network_channels)
|
||||
if( (!F.locked || F.author == scanned_user) && !F.censored)
|
||||
available_channels += F.channel_name
|
||||
channel_name = utf8_sanitize(input(usr, "Choose receiving Feed Channel", "Network Channel Handler") in available_channels )
|
||||
channel_name = input(usr, "Choose receiving Feed Channel", "Network Channel Handler") in available_channels
|
||||
updateUsrDialog()
|
||||
|
||||
else if(href_list["set_new_message"])
|
||||
|
||||
@@ -49,7 +49,7 @@
|
||||
var/turf/T = get_turf(A)
|
||||
playsound(src.loc, sound_type, 10, 1)
|
||||
var/obj/item/weapon/reagent_containers/food/S = new food_type(T)
|
||||
to_chat(user,"Fabricating [utf8_lowercase(S.name)]..")
|
||||
to_chat(user,"Fabricating [lowertext(S.name)]..")
|
||||
if(toxin)
|
||||
S.reagents.add_reagent(toxin_type, toxin_amount)
|
||||
if(isrobot(user))
|
||||
@@ -84,4 +84,4 @@
|
||||
/obj/item/weapon/cookiesynth/lollipop
|
||||
name = "medipop synthesizer"
|
||||
desc = "A self-recharging device used to rapidly deploy medicinal lollipops. Tell your patient they were very brave today."
|
||||
food_type = /obj/item/weapon/reagent_containers/food/snacks/medipop
|
||||
food_type = /obj/item/weapon/reagent_containers/food/snacks/medipop
|
||||
|
||||
@@ -355,7 +355,7 @@
|
||||
return
|
||||
src.registered_name = n
|
||||
|
||||
var/u = strict_ascii(sanitize(stripped_input(user, "What occupation would you like to put on this card?\nNote: this will not grant or remove any access levels.", "Nanotrasen undercover ID: occupation", "Detective", MAX_MESSAGE_LEN)))
|
||||
var/u = sanitize(stripped_input(user, "What occupation would you like to put on this card?\nNote: this will not grant or remove any access levels.", "Nanotrasen undercover ID: occupation", "Detective", MAX_MESSAGE_LEN))
|
||||
if(!u)
|
||||
alert("Invalid assignment.")
|
||||
src.registered_name = null
|
||||
@@ -396,7 +396,7 @@
|
||||
to_chat(user, "Name changed to [new_name].")
|
||||
|
||||
if("Occupation")
|
||||
var/new_job = strict_ascii(sanitize(stripped_input(user,"What job would you like to put on this card?\nChanging occupation will not grant or remove any access levels.","Nanotrasen undercover ID: occupation", "Detective", MAX_MESSAGE_LEN)))
|
||||
var/new_job = sanitize(stripped_input(user,"What job would you like to put on this card?\nChanging occupation will not grant or remove any access levels.","Nanotrasen undercover ID: occupation", "Detective", MAX_MESSAGE_LEN))
|
||||
if (!Adjacent(user) || user.incapacitated())
|
||||
return
|
||||
if (!new_job)
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
to_chat(src, "Guests may not use OOC.")
|
||||
return
|
||||
|
||||
msg = utf8_sanitize(msg, src, MAX_MESSAGE_LEN)
|
||||
msg = copytext(sanitize(msg), 1, MAX_MESSAGE_LEN)
|
||||
if(!msg)
|
||||
return
|
||||
|
||||
@@ -108,7 +108,7 @@
|
||||
to_chat(src, "Guests may not use OOC.")
|
||||
return
|
||||
|
||||
msg = to_utf8(copytext(sanitize(msg), 1, MAX_MESSAGE_LEN), src)
|
||||
msg = copytext(sanitize(msg), 1, MAX_MESSAGE_LEN)
|
||||
if(!msg)
|
||||
return
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@
|
||||
to_chat(user, "<span class='notice'>The authorized user field on the card is blank.</span>")
|
||||
|
||||
/obj/item/weapon/card/debit/proc/change_authorized_name(var/desired_authorized_name)
|
||||
authorized_name = uppertext(sanitize_simple(utf8_sanitize(desired_authorized_name, length = DEBIT_MAX_AUTHORIZED_NAME_LENGTH)))
|
||||
authorized_name = uppertext(sanitize_simple(strip_html_simple(desired_authorized_name, DEBIT_MAX_AUTHORIZED_NAME_LENGTH)))
|
||||
|
||||
/obj/item/weapon/card/debit/attack_self(var/mob/user)
|
||||
if(user.attack_delayer.blocked())
|
||||
|
||||
@@ -4275,7 +4275,7 @@
|
||||
src.access_news_network()
|
||||
|
||||
else if(href_list["ac_set_channel_name"])
|
||||
src.admincaster_feed_channel.channel_name = utf8_sanitize(input(usr, "Provide a Feed Channel Name", "Network Channel Handler", ""))
|
||||
src.admincaster_feed_channel.channel_name = stripped_input(usr, "Provide a Feed Channel Name", "Network Channel Handler", "")
|
||||
while (findtext(src.admincaster_feed_channel.channel_name," ") == 1)
|
||||
src.admincaster_feed_channel.channel_name = copytext(src.admincaster_feed_channel.channel_name,2,length(src.admincaster_feed_channel.channel_name)+1)
|
||||
src.access_news_network()
|
||||
|
||||
@@ -84,7 +84,7 @@ var/list/adminhelp_ignored_words = list("unknown","the","a","an","of","monkey","
|
||||
return //this doesn't happen
|
||||
|
||||
var/ref_mob = "\ref[mob]"
|
||||
msg = "<span class='notice'><b><font color=red>HELP: </font>[key_name(src, 1)] (<A HREF='?_src_=holder;adminmoreinfo=[ref_mob]'>?</A>) (<A HREF='?_src_=holder;adminplayeropts=[ref_mob]'>PP</A>) (<a href='?_src_=holder;role_panel=[ref_mob]'>RP</a>) (<A HREF='?_src_=vars;Vars=[ref_mob]'>VV</A>) (<A HREF='?_src_=holder;subtlemessage=[ref_mob]'>SM</A>) (<A HREF='?_src_=holder;adminplayerobservejump=[ref_mob]'>JMP</A>) (<A HREF='?_src_=holder;check_antagonist=1'>CA</A>) [ai_found ? " (<A HREF='?_src_=holder;adminchecklaws=[ref_mob]'>CL</A>)" : ""]:</b> [strict_ascii(msg)]</span>"
|
||||
msg = "<span class='notice'><b><font color=red>HELP: </font>[key_name(src, 1)] (<A HREF='?_src_=holder;adminmoreinfo=[ref_mob]'>?</A>) (<A HREF='?_src_=holder;adminplayeropts=[ref_mob]'>PP</A>) (<a href='?_src_=holder;role_panel=[ref_mob]'>RP</a>) (<A HREF='?_src_=vars;Vars=[ref_mob]'>VV</A>) (<A HREF='?_src_=holder;subtlemessage=[ref_mob]'>SM</A>) (<A HREF='?_src_=holder;adminplayerobservejump=[ref_mob]'>JMP</A>) (<A HREF='?_src_=holder;check_antagonist=1'>CA</A>) [ai_found ? " (<A HREF='?_src_=holder;adminchecklaws=[ref_mob]'>CL</A>)" : ""]:</b> [msg]</span>"
|
||||
|
||||
//send this msg to all admins
|
||||
var/admin_number_afk = 0
|
||||
|
||||
@@ -135,7 +135,7 @@
|
||||
adminhelp(reply) //sender has left, adminhelp instead
|
||||
return
|
||||
|
||||
recieve_message = "<font color='[recieve_color]'>[recieve_pm_type] PM from-<b>[key_name(src, C, C.holder ? 1 : 0)]</b>: [strict_ascii(msg)]</font>"
|
||||
recieve_message = "<font color='[recieve_color]'>[recieve_pm_type] PM from-<b>[key_name(src, C, C.holder ? 1 : 0)]</b>: [msg]</font>"
|
||||
C.output_to_special_tab(recieve_message, force_focus = TRUE)
|
||||
|
||||
output_to_special_tab("<span class='notice'>[send_pm_type]PM to-<b>[key_name(C, src, holder ? 1 : 0)]</b>: [msg]</span>")
|
||||
|
||||
@@ -179,7 +179,7 @@ var/global/list/assembly_short_name_to_type = list() //Please, I beg you, don't
|
||||
if(!istext(new_value)) //Attempted to write a non-string to a string var - convert the non-string into a string and continue
|
||||
new_value = "[new_value]"
|
||||
|
||||
new_value = utf8_sanitize(new_value, length = MAX_TEXT_VALUE_LEN)
|
||||
new_value = strip_html(new_value, MAX_TEXT_VALUE_LEN)
|
||||
|
||||
//text values can accept either numbers or text, so don't check for that
|
||||
|
||||
@@ -303,4 +303,4 @@ var/global/list/assembly_short_name_to_type = list() //Please, I beg you, don't
|
||||
return 0
|
||||
user.set_machine(src)
|
||||
interact(user)
|
||||
return 1
|
||||
return 1
|
||||
|
||||
@@ -1,69 +0,0 @@
|
||||
// Note about encodings:
|
||||
// Encodings are passed by number as it's simplest to do it like this (citation needed)
|
||||
// This may cause some confusion with what codes correspond how.
|
||||
//
|
||||
// 874 and 1250-1258 are Windows CodePage encodings. The number corresponds to the CodePage.
|
||||
// 2312 is gb2312 (Chinese)
|
||||
/proc/_determine_encoding(var/mob_or_client)
|
||||
. = "1252"
|
||||
if (isclient(mob_or_client))
|
||||
var/client/C = mob_or_client
|
||||
. = C.encoding
|
||||
|
||||
else if (ismob(mob_or_client))
|
||||
var/mob/M = mob_or_client
|
||||
if (M.client)
|
||||
. = M.client.encoding
|
||||
|
||||
|
||||
/proc/to_utf8(var/message, var/mob_or_client)
|
||||
return LIBVG("to_utf8", _determine_encoding(mob_or_client), message)
|
||||
|
||||
// Converts a byte string to a UTF-8 string, sanitizes it and caps the length.
|
||||
/proc/utf8_sanitize(var/message, var/mob_or_client, var/length = MAX_MESSAGE_LEN)
|
||||
return LIBVG("utf8_sanitize", _determine_encoding(mob_or_client), message, num2text(length))
|
||||
|
||||
// Get the length (Unicode Scalars) of a UTF-8 string.
|
||||
/proc/utf8_len(var/message)
|
||||
return text2num(LIBVG("utf8_len", message))
|
||||
|
||||
/proc/utf8_byte_len(var/a)
|
||||
return length(a)
|
||||
|
||||
/proc/utf8_find(var/haystack, var/needle, var/start=1, var/end=0)
|
||||
return text2num(LIBVG("utf8_find", haystack, needle, "[start]", "[end]"))
|
||||
|
||||
/proc/utf8_copy(var/text, var/start=1, var/end=0)
|
||||
return LIBVG("utf8_copy", text, "[start]", "[end]")
|
||||
|
||||
/proc/utf8_replace(var/text, var/from, var/to_, var/start=1, var/end=0)
|
||||
return LIBVG("utf8_replace", text, from, to_, "[start]", "[end]")
|
||||
|
||||
/proc/utf8_index(var/text, var/index)
|
||||
return LIBVG("utf8_index", text, "[index]")
|
||||
|
||||
/proc/utf8_uppercase(var/text)
|
||||
return LIBVG("utf8_uppercase", text)
|
||||
|
||||
/proc/utf8_lowercase(var/text)
|
||||
return LIBVG("utf8_lowercase", text)
|
||||
|
||||
// Removes non-7-bit ASCII characters.
|
||||
// Useful for things which BYOND touches itself like object names.
|
||||
/proc/strict_ascii(var/text)
|
||||
return LIBVG("strict_ascii", text)
|
||||
|
||||
/proc/utf8_capitalize(var/text)
|
||||
return utf8_uppercase(utf8_index(text, 1)) + utf8_copy(text, 2)
|
||||
|
||||
/proc/utf8_reverse(var/text)
|
||||
return LIBVG("utf8_reverse", text)
|
||||
|
||||
/proc/utf8_leftpad(var/text, var/count, var/with=" ")
|
||||
return LIBVG("utf8_leftpad", text, "[count]", with)
|
||||
|
||||
/proc/utf8_is_whitespace(var/text)
|
||||
return text2num(LIBVG("utf8_is_whitespace", text))
|
||||
|
||||
/proc/utf8_trim(var/text)
|
||||
return LIBVG("utf8_trim", text)
|
||||
@@ -627,7 +627,7 @@
|
||||
else if (href_list["show_flavor_text"])
|
||||
if(can_show_flavor_text())
|
||||
var/datum/browser/popup = new(usr, "\ref[src]", name, 500, 200)
|
||||
popup.set_content(utf8_sanitize(flavor_text))
|
||||
popup.set_content(strip_html(flavor_text))
|
||||
popup.open()
|
||||
/*else if (href_list["lookmob"])
|
||||
var/mob/M = locate(href_list["lookmob"])
|
||||
@@ -1995,4 +1995,4 @@ mob/living/carbon/human/isincrit()
|
||||
if(istype(locked_to, /obj/structure/bed/therapy))
|
||||
return list(/datum/ambience/beach)
|
||||
else
|
||||
return ..()
|
||||
return ..()
|
||||
|
||||
@@ -114,7 +114,7 @@ var/list/headset_modes = list(
|
||||
say_testing(src, "/mob/living/say(\"[message]\", [bubble_type]")
|
||||
if(timestopped)
|
||||
return //under the effects of time magick
|
||||
message = trim(copytext(message, 1, MAX_MESSAGE_LEN))
|
||||
message = sanitize_speech(message)
|
||||
message = capitalize(message)
|
||||
|
||||
say_testing(src, "Say start, message=[message]")
|
||||
|
||||
@@ -1264,7 +1264,7 @@ Use this proc preferably at the end of an equipment loadout
|
||||
return
|
||||
if(!can_show_flavor_text())
|
||||
return
|
||||
var/msg = utf8_sanitize(flavor_text)
|
||||
var/msg = strip_html(flavor_text)
|
||||
if(findtext(msg, "http:") || findtext(msg, "https:") || findtext(msg, "www."))
|
||||
return "<font color='#ffa000'><b><a href='?src=\ref[src];show_flavor_text=1'>Show flavor text</a></b></font>"
|
||||
if(length(msg) <= 32)
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
if(say_disabled)
|
||||
to_chat(usr, "<span class='danger'>Speech is currently admin-disabled.</span>")
|
||||
return
|
||||
usr.say(to_utf8(message, usr))
|
||||
usr.say(message)
|
||||
remove_typing_indicator()
|
||||
|
||||
/mob/verb/whisper(message as text)
|
||||
@@ -31,7 +31,7 @@
|
||||
remove_typing_indicator()
|
||||
return
|
||||
|
||||
message = utf8_sanitize(message, usr, MAX_MESSAGE_LEN)
|
||||
message = html_encode(sanitize_speech(message))
|
||||
|
||||
if(usr.stat == DEAD)
|
||||
usr.emote_dead(message)
|
||||
|
||||
@@ -419,7 +419,7 @@ nanoui is used to open and update nano browser uis
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||
<head>
|
||||
<script type='text/javascript'>
|
||||
function receiveUpdateData(jsonString)
|
||||
|
||||
@@ -141,7 +141,7 @@
|
||||
@param replacestring: the string to replace the substring with
|
||||
|
||||
*/
|
||||
interpreter.SetProc("replace", /proc/utf8_replace)
|
||||
interpreter.SetProc("replace", /proc/n_replacetext)
|
||||
|
||||
/*
|
||||
-> Locates an element/substring inside of a list or string
|
||||
@@ -178,7 +178,7 @@
|
||||
|
||||
interpreter.SetProc("pick", /proc/n_pick)
|
||||
interpreter.SetProc("prob", /proc/prob_chance)
|
||||
interpreter.SetProc("substr", /proc/utf8_copy)
|
||||
interpreter.SetProc("substr", /proc/docopytext)
|
||||
|
||||
interpreter.SetProc("shuffle", /proc/shuffle)
|
||||
interpreter.SetProc("uniquevector", /proc/uniquelist)
|
||||
@@ -188,13 +188,13 @@
|
||||
interpreter.SetProc("vector2text", /proc/vg_jointext)
|
||||
|
||||
// Strings
|
||||
interpreter.SetProc("lower", /proc/utf8_lowercase)
|
||||
interpreter.SetProc("upper", /proc/utf8_uppercase)
|
||||
interpreter.SetProc("lower", /proc/n_lower)
|
||||
interpreter.SetProc("upper", /proc/n_upper)
|
||||
interpreter.SetProc("explode", /proc/string_explode)
|
||||
interpreter.SetProc("repeat", /proc/n_repeat)
|
||||
interpreter.SetProc("reverse", /proc/utf8_reverse)
|
||||
interpreter.SetProc("reverse", /proc/reverse_text)
|
||||
interpreter.SetProc("tonum", /proc/n_str2num)
|
||||
interpreter.SetProc("capitalize", /proc/utf8_capitalize)
|
||||
interpreter.SetProc("capitalize", /proc/capitalize)
|
||||
//interpreter.SetProc("replacetextEx",/proc/n_replacetextEx)
|
||||
|
||||
// Numbers
|
||||
|
||||
@@ -118,7 +118,7 @@
|
||||
|
||||
else
|
||||
if(istext(haystack))
|
||||
return utf8_find(haystack, needle, start, end)
|
||||
return findtext_char(haystack, needle, start, end)
|
||||
|
||||
// Clone of copytext()
|
||||
/proc/docopytext(var/string, var/start = 1, var/end = 0)
|
||||
@@ -129,7 +129,7 @@
|
||||
// Clone of length()
|
||||
/proc/smartlength(var/container)
|
||||
if (istext(container))
|
||||
return utf8_len(container)
|
||||
return length_char(container)
|
||||
|
||||
return length(container)
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
Loads source code.
|
||||
*/
|
||||
/datum/n_Scanner/proc/LoadCode(var/c)
|
||||
code=strict_ascii(c)
|
||||
code = c
|
||||
|
||||
/*
|
||||
Proc: LoadCodeFromFile
|
||||
@@ -109,10 +109,10 @@
|
||||
|
||||
/datum/n_Scanner/nS_Scanner/Scan() //Creates a list of tokens from source code
|
||||
var/list/tokens = new
|
||||
for(, src.codepos <= length(code), src.codepos++)
|
||||
for(, src.codepos <= length_char(code), src.codepos++)
|
||||
|
||||
var/char = copytext(code, codepos, codepos + 1)
|
||||
var/nextchar = copytext(code, codepos + 1, codepos + 2)
|
||||
var/char = copytext_char(code, codepos, codepos + 1)
|
||||
var/nextchar = copytext_char(code, codepos + 1, codepos + 2)
|
||||
if(char == "\n")
|
||||
line++
|
||||
linepos = codepos
|
||||
@@ -155,12 +155,12 @@
|
||||
*/
|
||||
/datum/n_Scanner/nS_Scanner/proc/ReadString(start)
|
||||
var/buf
|
||||
for(, codepos <= length(code), codepos++)//codepos to length(code))
|
||||
var/char = copytext(code, codepos, codepos + 1)
|
||||
for(, codepos <= length_char(code), codepos++)//codepos to length(code))
|
||||
var/char = copytext_char(code, codepos, codepos + 1)
|
||||
switch(char)
|
||||
if("\\") //Backslash (\) encountered in string
|
||||
codepos++ //Skip next character in string, since it was escaped by a backslash
|
||||
char = copytext(code, codepos, codepos+1)
|
||||
char = copytext_char(code, codepos, codepos+1)
|
||||
switch(char)
|
||||
if("\\") //Double backslash
|
||||
buf += "\\"
|
||||
@@ -190,12 +190,12 @@
|
||||
Reads characters separated by an item in <delim> into a token.
|
||||
*/
|
||||
/datum/n_Scanner/nS_Scanner/proc/ReadWord()
|
||||
var/char = copytext(code, codepos, codepos + 1)
|
||||
var/char = copytext_char(code, codepos, codepos + 1)
|
||||
var/buf
|
||||
|
||||
while(!delim.Find(char) && codepos <= length(code))
|
||||
while(!delim.Find(char) && codepos <= length_char(code))
|
||||
buf += char
|
||||
char = copytext(code, ++codepos, codepos + 1)
|
||||
char = copytext_char(code, ++codepos, codepos + 1)
|
||||
codepos-- //allow main Scan() proc to read the delimiter
|
||||
if(options.keywords.Find(buf))
|
||||
return new/datum/token/keyword(buf, line, COL)
|
||||
@@ -207,14 +207,14 @@
|
||||
Reads a symbol into a token.
|
||||
*/
|
||||
/datum/n_Scanner/nS_Scanner/proc/ReadSymbol()
|
||||
var/char=copytext(code, codepos, codepos + 1)
|
||||
var/char=copytext_char(code, codepos, codepos + 1)
|
||||
var/buf
|
||||
|
||||
while(options.symbols.Find(buf + char))
|
||||
buf += char
|
||||
if(++codepos > length(code))
|
||||
if(++codepos > length_char(code))
|
||||
break
|
||||
char = copytext(code, codepos, codepos + 1)
|
||||
char = copytext_char(code, codepos, codepos + 1)
|
||||
|
||||
codepos-- //allow main Scan() proc to read the next character
|
||||
return new /datum/token/symbol(buf, line, COL)
|
||||
@@ -224,7 +224,7 @@
|
||||
Reads a number into a token.
|
||||
*/
|
||||
/datum/n_Scanner/nS_Scanner/proc/ReadNumber()
|
||||
var/char = copytext(code, codepos, codepos + 1)
|
||||
var/char = copytext_char(code, codepos, codepos + 1)
|
||||
var/buf
|
||||
var/dec = 0
|
||||
|
||||
@@ -234,7 +234,7 @@
|
||||
|
||||
buf += char
|
||||
codepos++
|
||||
char = copytext(code, codepos, codepos + 1)
|
||||
char = copytext_char(code, codepos, codepos + 1)
|
||||
|
||||
var/datum/token/number/T = new(buf, line, COL)
|
||||
if(isnull(text2num(buf)))
|
||||
@@ -250,8 +250,8 @@
|
||||
*/
|
||||
|
||||
/datum/n_Scanner/nS_Scanner/proc/ReadComment()
|
||||
var/char = copytext(code, codepos, codepos + 1)
|
||||
var/nextchar = copytext(code, codepos + 1, codepos + 2)
|
||||
var/char = copytext_char(code, codepos, codepos + 1)
|
||||
var/nextchar = copytext_char(code, codepos + 1, codepos + 2)
|
||||
var/charstring = char + nextchar
|
||||
var/comm = 1
|
||||
// 1: single-line comment
|
||||
@@ -263,23 +263,23 @@
|
||||
comm = 2 // starts a multi-line comment
|
||||
|
||||
while(comm)
|
||||
if(++codepos > length(code))
|
||||
if(++codepos > length_char(code))
|
||||
break
|
||||
|
||||
if(expectedend) // ending statement expected...
|
||||
char = copytext(code, codepos, codepos + 1)
|
||||
char = copytext_char(code, codepos, codepos + 1)
|
||||
if(char == "/") // ending statement found - beak the comment
|
||||
comm = 0
|
||||
break
|
||||
|
||||
if(comm == 2)
|
||||
// multi-line comments are broken by ending statements
|
||||
char = copytext(code, codepos, codepos + 1)
|
||||
char = copytext_char(code, codepos, codepos + 1)
|
||||
if(char == "*")
|
||||
expectedend = 1
|
||||
continue
|
||||
else
|
||||
char = copytext(code, codepos, codepos + 1)
|
||||
char = copytext_char(code, codepos, codepos + 1)
|
||||
if(char == "\n")
|
||||
comm = 0
|
||||
break
|
||||
|
||||
@@ -120,7 +120,6 @@ var/list/SPS_list = list()
|
||||
if(!builtin && (usr.get_active_hand() != src || usr.incapacitated())) //second check in case some chucklefuck drops the GPS while typing the tag
|
||||
to_chat(usr, "<span class = 'caution'>The GPS needs to be kept in your active hand!</span>")
|
||||
return TRUE
|
||||
a = strict_ascii(a)
|
||||
if(!a) //what a check
|
||||
return TRUE
|
||||
if(length(a) > 5)
|
||||
@@ -203,6 +202,3 @@ var/list/SPS_list = list()
|
||||
boop = TRUE
|
||||
if (boop)
|
||||
playsound(src,'sound/machines/radioboop.ogg',40,1)
|
||||
|
||||
|
||||
|
||||
@@ -152,7 +152,7 @@ function highlightTerms(el) {
|
||||
else {
|
||||
toInsert = document.createTextNode(chunk);
|
||||
}
|
||||
|
||||
|
||||
// Insert back into our element
|
||||
if (pre.length == 0) {
|
||||
var result = parent.prepend(toInsert);
|
||||
@@ -178,12 +178,6 @@ function output(message, flag) {
|
||||
if (flag !== 'internal')
|
||||
opts.lastPang = Date.now();
|
||||
|
||||
// Basically we url_encode twice server side so we can manually read the encoded version and actually do UTF-8.
|
||||
// The replace for + is because FOR SOME REASON, BYOND replaces spaces with a + instead of %20, and a plus with %2b.
|
||||
// Marvelous.
|
||||
message = message.replace(/\+/g, "%20")
|
||||
message = decoder(message)
|
||||
|
||||
//Stuff we do along with appending a message
|
||||
var atBottom = false;
|
||||
var bodyHeight = $('body').height();
|
||||
|
||||
@@ -315,8 +315,7 @@ For the main html chat area
|
||||
|
||||
message = replacetext(message, "\n", "<br>")
|
||||
|
||||
// url_encode it TWICE, this way any UTF-8 characters are able to be decoded by the Javascript.
|
||||
target << output(url_encode(url_encode(message)), "browseroutput:output")
|
||||
target << output(url_encode(message), "browseroutput:output")
|
||||
|
||||
/datum/log //exists purely to capture to_chat() output
|
||||
var/log = ""
|
||||
|
||||
@@ -1,60 +0,0 @@
|
||||
//! I have no idea WHY this is but to bench these you need to disable dylib in cargo.
|
||||
|
||||
#![feature(test)]
|
||||
extern crate test;
|
||||
extern crate libvg;
|
||||
|
||||
use libvg::utf8::{to_utf8, utf8_sanitize};
|
||||
use std::ffi::CString;
|
||||
use test::Bencher;
|
||||
|
||||
#[bench]
|
||||
fn bench_utf8(b: &mut Bencher) {
|
||||
let encoding = CString::new("1252".as_bytes()).unwrap();
|
||||
let message = CString::new(
|
||||
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Cras elementum \
|
||||
mauris eu odio bibendum, ut porttitor libero vulputate. Vivamus et augue \
|
||||
justo. Quisque ut auctor lectus. Vestibulum ante ipsum primis in faucibus \
|
||||
orci luctus et ultrices posuere cubilia Curae; Maecenas non scelerisque \
|
||||
nisl. Suspendisse egestas, diam et aliquam ultrices, mi est condimentum \
|
||||
neque, eu fermentum dolor justo at lectus. Nam consequat dolor sit amet \
|
||||
massa convallis volutpat eget eget nibh. Nullam a ultricies elit. Etiam eu \
|
||||
quam interdum, ornare enim vitae, placerat dolor. Curabitur a tempor ex. \
|
||||
Curabitur metus elit, pharetra nec faucibus a, consectetur nec ex. \
|
||||
Pellentesque venenatis dapibus mi et vulputate. Nullam laoreet, tortor at \
|
||||
rutrum sagittis, nibh purus ultrices est, ut efficitur nulla dui vel \
|
||||
felis. Etiam malesuada nec orci in rutrum. Ut consectetur ante vitae arcu \
|
||||
ultricies hendrerit. Etiam a tempor enim."
|
||||
.as_bytes(),
|
||||
).unwrap();
|
||||
|
||||
let both = [encoding.as_ptr(), message.as_ptr()];
|
||||
|
||||
b.iter(|| to_utf8(2, both.as_ptr()))
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_sanitize(b: &mut Bencher) {
|
||||
let encoding = CString::new("1252".as_bytes()).unwrap();
|
||||
let message = CString::new(
|
||||
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Cras elementum \
|
||||
mauris eu odio bibendum, ut porttitor libero vulputate. Vivamus et augue \
|
||||
justo. Quisque ut auctor lectus. Vestibulum ante ipsum primis in faucibus \
|
||||
orci luctus et ultrices posuere cubilia Curae; Maecenas non scelerisque \
|
||||
nisl. Suspendisse egestas, diam et aliquam ultrices, mi est condimentum \
|
||||
neque, eu fermentum dolor justo at lectus. Nam consequat dolor sit amet \
|
||||
massa convallis volutpat eget eget nibh. Nullam a ultricies elit. Etiam eu \
|
||||
quam interdum, ornare enim vitae, placerat dolor. Curabitur a tempor ex. \
|
||||
Curabitur metus elit, pharetra nec faucibus a, consectetur nec ex. \
|
||||
Pellentesque venenatis dapibus mi et vulputate. Nullam laoreet, tortor at \
|
||||
rutrum sagittis, nibh purus ultrices est, ut efficitur nulla dui vel \
|
||||
felis. Etiam malesuada nec orci in rutrum. Ut consectetur ante vitae arcu \
|
||||
ultricies hendrerit. Etiam a tempor enim."
|
||||
.as_bytes(),
|
||||
).unwrap();
|
||||
let cap = CString::new("1024".as_bytes()).unwrap();
|
||||
|
||||
let both = [encoding.as_ptr(), message.as_ptr(), cap.as_ptr()];
|
||||
|
||||
b.iter(|| utf8_sanitize(3, both.as_ptr()))
|
||||
}
|
||||
@@ -5,5 +5,3 @@
|
||||
extern crate byond;
|
||||
extern crate encoding;
|
||||
extern crate libc;
|
||||
|
||||
pub mod utf8;
|
||||
|
||||
@@ -1,516 +0,0 @@
|
||||
use byond::call::return_to_byond;
|
||||
use encoding::all::{WINDOWS_1252, ASCII, GB18030};
|
||||
use encoding::Encoding;
|
||||
use encoding::label::encoding_from_windows_code_page;
|
||||
use encoding::types::DecoderTrap;
|
||||
use libc;
|
||||
use std::cmp::{max, Ordering};
|
||||
use std::ffi::CStr;
|
||||
use std::slice;
|
||||
use std::ptr::null;
|
||||
|
||||
// Encodes a byte string to UTF-8, using the encoding supplied.
|
||||
//
|
||||
// Arguments are in the order of encoding, bytes.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn to_utf8(n: libc::c_int, v: *const *const libc::c_char) -> *const libc::c_char {
|
||||
// We do not let the byond crate handle arguments, as we want BYTES directly.
|
||||
// Unicode decode could fail on the second argument.
|
||||
let text = unsafe {
|
||||
let slice = slice::from_raw_parts(v, n as usize);
|
||||
|
||||
decode(&slice)
|
||||
};
|
||||
|
||||
return_to_byond(&text).unwrap_or(null())
|
||||
}
|
||||
|
||||
// Encodes a byte string with a windows encoding, filters bad characters and limits message length.
|
||||
//
|
||||
// Operations like message length are done on Unicode code points!
|
||||
// Arguments are in the order of encoding, bytes, cap.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn utf8_sanitize(
|
||||
n: libc::c_int,
|
||||
v: *const *const libc::c_char,
|
||||
) -> *const libc::c_char {
|
||||
// Can't use the BYOND crate again because of unicode conversion failing.
|
||||
let text = unsafe {
|
||||
let slice = slice::from_raw_parts(v, n as usize);
|
||||
let cap = CStr::from_ptr(slice[2])
|
||||
.to_str()
|
||||
.map(|cap| cap.parse::<usize>().unwrap_or(1024))
|
||||
.unwrap_or(1024);
|
||||
|
||||
sanitize(&decode(&slice), cap)
|
||||
};
|
||||
|
||||
return_to_byond(&text).unwrap_or(null())
|
||||
}
|
||||
|
||||
// Removes non-ASCII characters from the input string.
|
||||
#[no_mangle]
|
||||
pub extern "C" fn strict_ascii(
|
||||
n: libc::c_int,
|
||||
v: *const *const libc::c_char,
|
||||
) -> *const libc::c_char {
|
||||
let bytes = unsafe {
|
||||
let slice = slice::from_raw_parts(v, n as usize);
|
||||
CStr::from_ptr(slice[0]).to_bytes()
|
||||
};
|
||||
|
||||
return_to_byond(ASCII.decode(bytes, DecoderTrap::Ignore).unwrap()).unwrap_or(null())
|
||||
}
|
||||
|
||||
// Returns the length of a UTF-8 string.
|
||||
byond!(utf8_len: text; {
|
||||
format!("{}", text.chars().count())
|
||||
});
|
||||
|
||||
/* You saw nothing.
|
||||
/// Returns the BYTE length of a UTF-8 string.
|
||||
byond!(utf8_len_bytes: text; {
|
||||
format!("{}", text.len())
|
||||
});
|
||||
*/
|
||||
|
||||
byond!(utf8_find: haystack, needle, start, end; {
|
||||
match byte_bounds(haystack, start, end) {
|
||||
Some((start, end)) => {
|
||||
let ref sub = haystack[start .. end];
|
||||
match sub.find(needle) {
|
||||
Some(index) => format!("{}",
|
||||
haystack
|
||||
.char_indices()
|
||||
.position(|x| x.0 == index)
|
||||
.unwrap() + 1),
|
||||
None => "0".to_string()
|
||||
}
|
||||
}
|
||||
None => "0".to_string()
|
||||
}
|
||||
});
|
||||
|
||||
byond!(utf8_index: text, index; {
|
||||
let index = index.parse::<isize>().unwrap_or(1);
|
||||
|
||||
// 0-indexed index for the string, by code points.
|
||||
let index = match index.cmp(&0) {
|
||||
Ordering::Greater => index - 1,
|
||||
// Invalid index.
|
||||
Ordering::Equal => return "",
|
||||
Ordering::Less => {
|
||||
let char_count = text.chars().count() as isize;
|
||||
char_count + index
|
||||
}
|
||||
} as usize;
|
||||
|
||||
// Get the byte bound.
|
||||
let mut iter = text.char_indices();
|
||||
let byte = match iter.nth(index) {
|
||||
Some((i, _)) => i,
|
||||
None => return ""
|
||||
};
|
||||
|
||||
&text[byte .. iter.next().map(|(i, _)| i).unwrap_or(text.len())]
|
||||
});
|
||||
|
||||
byond!(utf8_copy: text, start, end; {
|
||||
match byte_bounds(text, start, end) {
|
||||
Some((start, end)) => &text[start .. end],
|
||||
None => ""
|
||||
}
|
||||
});
|
||||
|
||||
byond!(utf8_replace: text, from, to, start, end; {
|
||||
match byte_bounds(text, start, end) {
|
||||
Some((start, end)) => {
|
||||
let sub = &text[start .. end];
|
||||
let mut out = text[.. start].to_owned();
|
||||
out.push_str(&sub.replace(from, to));
|
||||
out.push_str(&text[end ..]);
|
||||
out
|
||||
},
|
||||
None => text.to_string()
|
||||
}
|
||||
});
|
||||
|
||||
byond!(utf8_uppercase: text; {
|
||||
text.to_uppercase()
|
||||
});
|
||||
|
||||
byond!(utf8_lowercase: text; {
|
||||
text.to_lowercase()
|
||||
});
|
||||
|
||||
byond!(utf8_reverse: text; {
|
||||
text.chars().rev().collect::<String>()
|
||||
});
|
||||
|
||||
// Side note: I originally tried to use the left-pad crate on Cargo.
|
||||
// That crate is hilariously enough broken and doesn't understand what Unicode is.
|
||||
// 10/10 meme tier.
|
||||
byond!(utf8_leftpad: text, amount, with; {
|
||||
let amount = match amount.parse::<usize>() {
|
||||
Ok(a) => a,
|
||||
Err(_) => return text.to_owned()
|
||||
};
|
||||
|
||||
let with = with.chars().next().unwrap_or(' ');
|
||||
let text_len = text.chars().count();
|
||||
|
||||
if amount <= text_len {
|
||||
// Nothing would change.
|
||||
return text.into();
|
||||
}
|
||||
|
||||
let filler_amount = amount - text_len;
|
||||
|
||||
let mut out = String::with_capacity(text_len + filler_amount*with.len_utf8());
|
||||
|
||||
for _ in 0..filler_amount {
|
||||
out.push(with);
|
||||
}
|
||||
|
||||
out.push_str(text);
|
||||
|
||||
out
|
||||
});
|
||||
|
||||
byond!(utf8_is_whitespace: string; {
|
||||
match string.chars().all(|c| c.is_whitespace()) { true => "1", false => "0" }
|
||||
});
|
||||
|
||||
byond!(utf8_trim: string; {
|
||||
string.trim()
|
||||
});
|
||||
|
||||
/// Function to get the byte bounds for copytext, findtext and replacetext.
|
||||
/// Goes by one-indexing and correctly handles negatives.
|
||||
pub(crate) fn byte_bounds(text: &str, start: &str, end: &str) -> Option<(usize, usize)> {
|
||||
// BYOND uses 1-indexing because of course it does...
|
||||
// I would've made sick one liners out of this if the negative index stuff weren't a thing.
|
||||
let mut start = start.parse::<isize>().unwrap_or(1);
|
||||
let mut end = end.parse::<isize>().unwrap_or(0);
|
||||
|
||||
let char_count = text.chars().count() as isize;
|
||||
|
||||
start += if start < 0 { char_count } else { -1 };
|
||||
let start = max(start, 0) as usize;
|
||||
|
||||
match end.cmp(&0) {
|
||||
Ordering::Greater => {
|
||||
end -= 1;
|
||||
}
|
||||
Ordering::Equal => {
|
||||
end = char_count;
|
||||
}
|
||||
Ordering::Less => {
|
||||
end += char_count;
|
||||
}
|
||||
}
|
||||
|
||||
let end = max(end, 0) as usize;
|
||||
|
||||
if end <= start {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut iter = text.char_indices();
|
||||
|
||||
match (iter.nth(start), iter.nth(end - start - 1)) {
|
||||
(Some((start, _)), Some((end, _))) => Some((start, end)),
|
||||
(Some((start, _)), None) => Some((start, text.len())),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// See utf8.dm for what the codes correspond to.
|
||||
pub(crate) unsafe fn decode(args: &[*const libc::c_char]) -> String {
|
||||
let bytes = CStr::from_ptr(args[1]).to_bytes();
|
||||
CStr::from_ptr(args[0])
|
||||
.to_str()
|
||||
.map(|e| e.parse::<usize>().unwrap_or(1252))
|
||||
.map(|e| match e {
|
||||
e @ 874 | e @ 1250...1258 => encoding_from_windows_code_page(e).unwrap_or(WINDOWS_1252),
|
||||
2312 => GB18030,
|
||||
_ => WINDOWS_1252,
|
||||
})
|
||||
.unwrap_or(WINDOWS_1252)
|
||||
.decode(bytes, DecoderTrap::Replace)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub(crate) fn sanitize(text: &str, cap: usize) -> String {
|
||||
let mut out = String::with_capacity(text.len());
|
||||
let mut count = 0;
|
||||
for character in text.chars() {
|
||||
match character {
|
||||
'\u{0000}'...'\u{001F}' |
|
||||
'\u{0080}'...'\u{00A0}' => continue,
|
||||
'<' => out.push_str("<"),
|
||||
'>' => out.push_str(">"),
|
||||
_ => out.push(character),
|
||||
};
|
||||
count += 1;
|
||||
if count >= cap {
|
||||
break;
|
||||
};
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use byond::call::test_byond_call_args;
|
||||
use std::ffi::CString;
|
||||
|
||||
#[test]
|
||||
fn test_sanitize() {
|
||||
assert_eq!(sanitize("testing!", 1024), "testing!");
|
||||
assert_eq!(sanitize("testing<>!", 1024), "testing<>!");
|
||||
assert_eq!(sanitize("testing\n\n\n<>!", 1024), "testing<>!");
|
||||
assert_eq!(sanitize("testing\n\u{0088}\n<>!", 1024), "testing<>!");
|
||||
assert_eq!(
|
||||
sanitize("<script src='hacked.js'></script>icky ocky!\n<>!", 1024),
|
||||
"<script src='hacked.js'></script>icky ocky!<>!"
|
||||
);
|
||||
assert_eq!(sanitize("test", 3), "tes");
|
||||
assert_eq!(sanitize("\n\n\ntest", 3), "tes");
|
||||
assert_eq!(sanitize("\n\n\n>test", 3), ">te");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_utf8() {
|
||||
let encoding = CString::new(b"1252".as_ref()).unwrap();
|
||||
let test = CString::new(b"Hi there!".as_ref()).unwrap();
|
||||
let both = [encoding.as_ptr(), test.as_ptr()];
|
||||
|
||||
unsafe { assert_eq!(decode(&both), "Hi there!") };
|
||||
|
||||
|
||||
let encoding = CString::new(b"1252".as_ref()).unwrap();
|
||||
let test = CString::new(b"H\xed th\xe9r\xe9!".as_ref()).unwrap();
|
||||
let both = [encoding.as_ptr(), test.as_ptr()];
|
||||
|
||||
unsafe { assert_eq!(decode(&both), "Hí théré!") };
|
||||
|
||||
|
||||
let encoding = CString::new(b"1251".as_ref()).unwrap();
|
||||
let both = [encoding.as_ptr(), test.as_ptr()];
|
||||
|
||||
unsafe { assert_eq!(decode(&both), "Hн thйrй!") };
|
||||
|
||||
let encoding = CString::new(b"2312".as_ref()).unwrap();
|
||||
let test = CString::new(b"\xDE\xC4".as_ref()).unwrap();
|
||||
let both = [encoding.as_ptr(), test.as_ptr()];
|
||||
|
||||
unsafe { assert_eq!(decode(&both), "弈") };
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_byte_bounds() {
|
||||
assert_eq!(byte_bounds("abcdefgh", "1", "0"), Some((0, 8)));
|
||||
assert_eq!(byte_bounds("abcdefgh", "0", "0"), Some((0, 8)));
|
||||
assert_eq!(byte_bounds("abcdefgh", "-2", "0"), Some((6, 8)));
|
||||
assert_eq!(byte_bounds("abcdefgh", "-4", "-2"), Some((4, 6)));
|
||||
assert_eq!(
|
||||
byte_bounds("abcdefghijklmnopwrstuvwxyz", "-4", "-2"),
|
||||
Some((22, 24))
|
||||
);
|
||||
assert_eq!(byte_bounds("abcdefgh", "-20", "-2"), Some((0, 6)));
|
||||
assert_eq!(byte_bounds("abcdefgh", "2", "1"), None);
|
||||
assert_eq!(byte_bounds("àbçdéfgh", "1", "0"), Some((0, 11)));
|
||||
assert_eq!(byte_bounds("àbç👏défgh", "2", "0"), Some((2, 15)));
|
||||
assert_eq!(byte_bounds("👏àbç👏défgh", "2", "0"), Some((4, 19)));
|
||||
assert_eq!(byte_bounds("abcdefgh", "20", "40"), None);
|
||||
assert_eq!(byte_bounds("abcdefgh", "3", "40"), Some((2, 8)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_utf8_find() {
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_find, &["abcdefgh", "c", "1", "0"]),
|
||||
"3"
|
||||
);
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_find, &["abcdefgh", "g", "1", "3"]),
|
||||
"0"
|
||||
);
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_find, &["abcdefgh", "z", "1", "3"]),
|
||||
"0"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_utf8_len() {
|
||||
assert_eq!(test_byond_call_args(utf8_len, &["abc"]), "3");
|
||||
assert_eq!(test_byond_call_args(utf8_len, &[""]), "0");
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_len, &["👏àbç👏défgh"]),
|
||||
"10"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_utf8_index() {
|
||||
assert_eq!(test_byond_call_args(utf8_index, &["abc", "1"]), "a");
|
||||
assert_eq!(test_byond_call_args(utf8_index, &["abc", "3"]), "c");
|
||||
assert_eq!(test_byond_call_args(utf8_index, &["abc", "-2"]), "b");
|
||||
assert_eq!(test_byond_call_args(utf8_index, &["abc", "-1"]), "c");
|
||||
assert_eq!(test_byond_call_args(utf8_index, &["abc", "5"]), "");
|
||||
assert_eq!(test_byond_call_args(utf8_index, &["abc", "0"]), "");
|
||||
assert_eq!(test_byond_call_args(utf8_index, &["abc", "-10"]), "");
|
||||
assert_eq!(test_byond_call_args(utf8_index, &["a👏bc", "3"]), "b");
|
||||
assert_eq!(test_byond_call_args(utf8_index, &["a👏bc", "2"]), "👏");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_utf8_copy() {
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_copy, &["abcdefgh", "1", "5"]),
|
||||
"abcd"
|
||||
);
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_copy, &["a👏cdefgh", "1", "5"]),
|
||||
"a👏cd"
|
||||
);
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_copy, &["abcdefgh", "-5", "-1"]),
|
||||
"defg"
|
||||
);
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_copy, &["abcdefgh", "120", "200"]),
|
||||
""
|
||||
);
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_copy, &["abcdefgh", "1", "2000"]),
|
||||
"abcdefgh"
|
||||
);
|
||||
assert_eq!(test_byond_call_args(utf8_copy, &["abcdefgh", "5", "1"]), "");
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_copy, &["abcdefgh", "5", "0"]),
|
||||
"efgh"
|
||||
);
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_copy, &["abcdefgh", "5", "-2"]),
|
||||
"ef"
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_utf8_replace() {
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_replace, &["Hello world!", "o", "z", "1", "0"]),
|
||||
"Hellz wzrld!"
|
||||
);
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_replace, &["Hello world!", "o", "👏", "1", "0"]),
|
||||
"Hell👏 w👏rld!"
|
||||
);
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_replace, &["Hell👏 w👏rld!", "👏", "a", "1", "0"]),
|
||||
"Hella warld!"
|
||||
);
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_replace, &["Hello world!", "👏", "a", "1", "0"]),
|
||||
"Hello world!"
|
||||
);
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_replace, &["Hello world!", "o", "a", "7", "0"]),
|
||||
"Hello warld!"
|
||||
);
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_replace, &["Hello world!", "o", "aAa", "7", "0"]),
|
||||
"Hello waAarld!"
|
||||
);
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_replace, &["Hello world!", "ll", "aAa", "1", "0"]),
|
||||
"HeaAao world!"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_utf8_uppercase() {
|
||||
assert_eq!(test_byond_call_args(utf8_uppercase, &["Hello"]), "HELLO");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_utf8_lowercase() {
|
||||
assert_eq!(test_byond_call_args(utf8_lowercase, &["Hello"]), "hello");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_strict_ascii() {
|
||||
assert_eq!(test_byond_call_args(strict_ascii, &["Hello"]), "Hello");
|
||||
assert_eq!(test_byond_call_args(strict_ascii, &["Hell👏"]), "Hell");
|
||||
assert_eq!(test_byond_call_args(strict_ascii, &["Héllö"]), "Hll");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_utf8_reverse() {
|
||||
assert_eq!(test_byond_call_args(utf8_reverse, &["Hello!"]), "!olleH");
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_reverse, &["Hello!👏"]),
|
||||
"👏!olleH"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_utf8_leftpad() {
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_leftpad, &["Hello!", "10", " "]),
|
||||
" Hello!"
|
||||
);
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_leftpad, &["Hello!", "0", " "]),
|
||||
"Hello!"
|
||||
);
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_leftpad, &["Hello!", "🤔", " "]),
|
||||
"Hello!"
|
||||
);
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_leftpad, &["Hello!", "10", "🌭"]),
|
||||
"🌭🌭🌭🌭Hello!"
|
||||
);
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_leftpad, &["He🌭🌭o!", "20", "!"]),
|
||||
"!!!!!!!!!!!!!!He🌭🌭o!"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_utf8_is_whitespace() {
|
||||
assert_eq!(test_byond_call_args(utf8_is_whitespace, &[" "]), "1");
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_is_whitespace, &[" \r\n\t\u{A0}"]),
|
||||
"1"
|
||||
); // "\u{A0}" is U+00A0 NO-BREAK SPACE, AKA
|
||||
assert_eq!(test_byond_call_args(utf8_is_whitespace, &[" hi "]), "0");
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_is_whitespace, &[" \u{200B} "]),
|
||||
"0"
|
||||
); // U+200B ZERO-WIDTH SPACE is NOT whitespace following Unicode.
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_utf8_trim() {
|
||||
assert_eq!(test_byond_call_args(utf8_trim, &[" "]), "");
|
||||
// "\u{A0}" is U+00A0 NO-BREAK SPACE, AKA
|
||||
assert_eq!(test_byond_call_args(utf8_trim, &[" \r\n\t\u{A0}"]), "");
|
||||
assert_eq!(test_byond_call_args(utf8_trim, &[" hi "]), "hi");
|
||||
// U+200B ZERO-WIDTH SPACE is NOT whitespace following Unicode.
|
||||
assert_eq!(test_byond_call_args(utf8_trim, &[" \u{200B} "]), "\u{200B}");
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_trim, &[" hi there! "]),
|
||||
"hi there!"
|
||||
);
|
||||
assert_eq!(
|
||||
test_byond_call_args(utf8_trim, &[" hi\u{A0}there! "]),
|
||||
"hi\u{A0}there!"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1561,7 +1561,6 @@
|
||||
#include "code\modules\library\computers\base.dm"
|
||||
#include "code\modules\library\computers\checkout.dm"
|
||||
#include "code\modules\library\computers\public.dm"
|
||||
#include "code\modules\libvg\utf8.dm"
|
||||
#include "code\modules\lighting\lighting_area.dm"
|
||||
#include "code\modules\lighting\lighting_atom.dm"
|
||||
#include "code\modules\lighting\lighting_corner.dm"
|
||||
|
||||
Reference in New Issue
Block a user