mirror of
https://github.com/vgstation-coders/vgstation13.git
synced 2025-12-10 10:21:11 +00:00
* Removes Unicode stuff * Fixes capitalize() and examine * Not important but how did this happen * No more emoji in say or HTML in me * Clarifies comment * Better HTML sanitization * Rebuilds dll * Fixes some browser windows * Fixes telecomms scripts (lazily) This is the easy way out, but could probably be made faster by doing the byte counting ourselves
293 lines
6.9 KiB
Plaintext
293 lines
6.9 KiB
Plaintext
/*
|
|
File: Scanner
|
|
*/
|
|
/*
|
|
Class: n_Scanner
|
|
An object responsible for breaking up source code into tokens for use by the parser.
|
|
*/
|
|
/datum/n_Scanner
|
|
var/code
|
|
/*
|
|
Var: errors
|
|
A list of fatal errors found by the scanner. If there are any items in this list, then it is not safe to parse the returned tokens.
|
|
|
|
See Also:
|
|
- <scriptError>
|
|
*/
|
|
var/list/errors = new
|
|
/*
|
|
Var: warnings
|
|
A list of non-fatal problems in the source code found by the scanner.
|
|
*/
|
|
var/list/warnings = new
|
|
|
|
/*
|
|
Proc: LoadCode
|
|
Loads source code.
|
|
*/
|
|
/datum/n_Scanner/proc/LoadCode(var/c)
|
|
code = c
|
|
|
|
/*
|
|
Proc: LoadCodeFromFile
|
|
Gets the code from a file and calls <LoadCode()>.
|
|
*/
|
|
/datum/n_Scanner/proc/LoadCodeFromFile(var/f)
|
|
LoadCode(file2text(f))
|
|
|
|
/*
|
|
Proc: Scan
|
|
Runs the scanner and returns the resulting list of tokens. Ensure that <LoadCode()> has been called first.
|
|
*/
|
|
/datum/n_Scanner/proc/Scan()
|
|
|
|
/*
|
|
Class: nS_Scanner
|
|
A scanner implementation for n_Script.
|
|
*/
|
|
/datum/n_Scanner/nS_Scanner
|
|
/*
|
|
Variable: codepos
|
|
The scanner's position in the source code.
|
|
*/
|
|
var/codepos = 1
|
|
var/line = 1
|
|
var/linepos = 0 //column=codepos-linepos
|
|
var/datum/n_scriptOptions/nS_Options/options
|
|
|
|
var/commenting = 0
|
|
// 1: single-line
|
|
// 2: multi-line
|
|
/*
|
|
Variable: ignore
|
|
A list of characters that are ignored by the scanner.
|
|
|
|
Default Value:
|
|
Whitespace
|
|
*/
|
|
var/list/ignore = list(" ", "\t", "\n") //Don't add tokens for whitespace
|
|
/*
|
|
Variable: end_stmt
|
|
A list of characters that end a statement. Each item may only be one character long.
|
|
|
|
Default Value:
|
|
Semicolon
|
|
*/
|
|
var/list/end_stmt = list(";")
|
|
/*
|
|
Variable: string_delim
|
|
A list of characters that can start and end strings.
|
|
|
|
Default Value:
|
|
Double and single quotes.
|
|
*/
|
|
var/list/string_delim = list("\"", "'")
|
|
/*
|
|
Variable: delim
|
|
A list of characters that denote the start of a new token. This list is automatically populated.
|
|
*/
|
|
var/list/delim = new
|
|
|
|
/*
|
|
Macro: COL
|
|
The current column number.
|
|
*/
|
|
#define COL codepos-linepos
|
|
|
|
/*
|
|
Constructor: New
|
|
Parameters:
|
|
code - The source code to tokenize.
|
|
options - An <nS_Options> object used to configure the scanner.
|
|
*/
|
|
/datum/n_Scanner/nS_Scanner/New(var/code, var/datum/n_scriptOptions/nS_Options/options)
|
|
. = ..()
|
|
ignore += ascii2text(13) //Carriage return
|
|
delim += ignore + options.symbols + end_stmt + string_delim
|
|
src.options = options
|
|
LoadCode(code)
|
|
|
|
/datum/n_Scanner/nS_Scanner/Scan() //Creates a list of tokens from source code
|
|
var/list/tokens = new
|
|
for(, src.codepos <= length_char(code), src.codepos++)
|
|
|
|
var/char = copytext_char(code, codepos, codepos + 1)
|
|
var/nextchar = copytext_char(code, codepos + 1, codepos + 2)
|
|
if(char == "\n")
|
|
line++
|
|
linepos = codepos
|
|
|
|
if(ignore.Find(char))
|
|
continue
|
|
|
|
else if(char == "/" && (nextchar == "*" || nextchar == "/"))
|
|
ReadComment()
|
|
|
|
else if(end_stmt.Find(char))
|
|
tokens += new/datum/token/end(char, line, COL)
|
|
|
|
else if(string_delim.Find(char))
|
|
codepos++ //skip string delimiter
|
|
tokens += ReadString(char)
|
|
|
|
else if(options.CanStartID(char))
|
|
tokens += ReadWord()
|
|
|
|
else if(options.IsDigit(char))
|
|
tokens += ReadNumber()
|
|
|
|
else if(options.symbols.Find(char))
|
|
tokens += ReadSymbol()
|
|
|
|
|
|
codepos = initial(codepos)
|
|
line = initial(line)
|
|
linepos = initial(linepos)
|
|
return tokens
|
|
|
|
|
|
/*
|
|
Proc: ReadString
|
|
Reads a string in the source code into a token.
|
|
|
|
Parameters:
|
|
start - The character used to start the string.
|
|
*/
|
|
/datum/n_Scanner/nS_Scanner/proc/ReadString(start)
|
|
var/buf
|
|
for(, codepos <= length_char(code), codepos++)//codepos to length(code))
|
|
var/char = copytext_char(code, codepos, codepos + 1)
|
|
switch(char)
|
|
if("\\") //Backslash (\) encountered in string
|
|
codepos++ //Skip next character in string, since it was escaped by a backslash
|
|
char = copytext_char(code, codepos, codepos+1)
|
|
switch(char)
|
|
if("\\") //Double backslash
|
|
buf += "\\"
|
|
if("n") //\n Newline
|
|
buf += "\n"
|
|
else
|
|
if(char == start) //\" Doublequote
|
|
buf += start
|
|
else //Unknown escaped text
|
|
buf += char
|
|
if("\n")
|
|
. = new/datum/token/string(buf, line, COL)
|
|
errors += new/datum/scriptError("Unterminated string. Newline reached.", .)
|
|
line++
|
|
linepos = codepos
|
|
break
|
|
else
|
|
if(char == start) //string delimiter found, end string
|
|
break
|
|
else
|
|
buf += char //Just a normal character in a string
|
|
if(!.)
|
|
return new/datum/token/string(buf, line, COL)
|
|
|
|
/*
|
|
Proc: ReadWord
|
|
Reads characters separated by an item in <delim> into a token.
|
|
*/
|
|
/datum/n_Scanner/nS_Scanner/proc/ReadWord()
|
|
var/char = copytext_char(code, codepos, codepos + 1)
|
|
var/buf
|
|
|
|
while(!delim.Find(char) && codepos <= length_char(code))
|
|
buf += char
|
|
char = copytext_char(code, ++codepos, codepos + 1)
|
|
codepos-- //allow main Scan() proc to read the delimiter
|
|
if(options.keywords.Find(buf))
|
|
return new/datum/token/keyword(buf, line, COL)
|
|
else
|
|
return new/datum/token/word(buf, line, COL)
|
|
|
|
/*
|
|
Proc: ReadSymbol
|
|
Reads a symbol into a token.
|
|
*/
|
|
/datum/n_Scanner/nS_Scanner/proc/ReadSymbol()
|
|
var/char=copytext_char(code, codepos, codepos + 1)
|
|
var/buf
|
|
|
|
while(options.symbols.Find(buf + char))
|
|
buf += char
|
|
if(++codepos > length_char(code))
|
|
break
|
|
char = copytext_char(code, codepos, codepos + 1)
|
|
|
|
codepos-- //allow main Scan() proc to read the next character
|
|
return new /datum/token/symbol(buf, line, COL)
|
|
|
|
/*
|
|
Proc: ReadNumber
|
|
Reads a number into a token.
|
|
*/
|
|
/datum/n_Scanner/nS_Scanner/proc/ReadNumber()
|
|
var/char = copytext_char(code, codepos, codepos + 1)
|
|
var/buf
|
|
var/dec = 0
|
|
|
|
while(options.IsDigit(char) || (char == "." && !dec))
|
|
if(char == ".")
|
|
dec = 1
|
|
|
|
buf += char
|
|
codepos++
|
|
char = copytext_char(code, codepos, codepos + 1)
|
|
|
|
var/datum/token/number/T = new(buf, line, COL)
|
|
if(isnull(text2num(buf)))
|
|
errors += new/datum/scriptError("Bad number: ", T)
|
|
T.value = 0
|
|
|
|
codepos-- //allow main Scan() proc to read the next character
|
|
return T
|
|
|
|
/*
|
|
Proc: ReadComment
|
|
Reads a comment and outputs the type of comment
|
|
*/
|
|
|
|
/datum/n_Scanner/nS_Scanner/proc/ReadComment()
|
|
var/char = copytext_char(code, codepos, codepos + 1)
|
|
var/nextchar = copytext_char(code, codepos + 1, codepos + 2)
|
|
var/charstring = char + nextchar
|
|
var/comm = 1
|
|
// 1: single-line comment
|
|
// 2: multi-line comment
|
|
var/expectedend = 0
|
|
|
|
if(charstring == "//" || charstring == "/*")
|
|
if(charstring == "/*")
|
|
comm = 2 // starts a multi-line comment
|
|
|
|
while(comm)
|
|
if(++codepos > length_char(code))
|
|
break
|
|
|
|
if(expectedend) // ending statement expected...
|
|
char = copytext_char(code, codepos, codepos + 1)
|
|
if(char == "/") // ending statement found - beak the comment
|
|
comm = 0
|
|
break
|
|
|
|
if(comm == 2)
|
|
// multi-line comments are broken by ending statements
|
|
char = copytext_char(code, codepos, codepos + 1)
|
|
if(char == "*")
|
|
expectedend = 1
|
|
continue
|
|
else
|
|
char = copytext_char(code, codepos, codepos + 1)
|
|
if(char == "\n")
|
|
comm = 0
|
|
break
|
|
|
|
if(expectedend)
|
|
expectedend = 0
|
|
|
|
if(comm == 2)
|
|
errors += new/datum/scriptError/UnterminatedComment()
|
|
|