Files
CHOMPStation2/code/modules/scripting/Scanner/Scanner.dm
2020-01-21 01:06:04 -05:00

283 lines
6.7 KiB
Plaintext

/*
File: Scanner
*/
/*
Class: n_Scanner
An object responsible for breaking up source code into tokens for use by the parser.
*/
/n_Scanner
var
code
list
/*
Var: errors
A list of fatal errors found by the scanner. If there are any items in this list, then it is not safe to parse the returned tokens.
See Also:
- <scriptError>
*/
errors = new
/*
Var: warnings
A list of non-fatal problems in the source code found by the scanner.
*/
warnings = new
proc
/*
Proc: LoadCode
Loads source code.
*/
LoadCode(c)
code=c
/*
Proc: LoadCodeFromFile
Gets the code from a file and calls <LoadCode()>.
*/
LoadCodeFromFile(f)
LoadCode(file2text(f))
/*
Proc: Scan
Runs the scanner and returns the resulting list of tokens. Ensure that <LoadCode()> has been called first.
*/
Scan()
/*
Class: nS_Scanner
A scanner implementation for n_Script.
*/
/n_Scanner/nS_Scanner
var
/*
Variable: codepos
The scanner's position in the source code.
*/
codepos = 1
line = 1
linepos = 0 //column=codepos-linepos
n_scriptOptions/nS_Options/options
commenting = 0
// 1: single-line
// 2: multi-line
list
/*
Variable: ignore
A list of characters that are ignored by the scanner.
Default Value:
Whitespace
*/
ignore = list(" ", "\t", "\n") //Don't add tokens for whitespace
/*
Variable: end_stmt
A list of characters that end a statement. Each item may only be one character long.
Default Value:
Semicolon
*/
end_stmt = list(";")
/*
Variable: string_delim
A list of characters that can start and end strings.
Default Value:
Double and single quotes.
*/
string_delim = list("\"", "'")
/*
Variable: delim
A list of characters that denote the start of a new token. This list is automatically populated.
*/
delim = new
/*
Macro: COL
The current column number.
*/
#define COL codepos-linepos
/*
Constructor: New
Parameters:
code - The source code to tokenize.
options - An <nS_Options> object used to configure the scanner.
*/
New(code, n_scriptOptions/nS_Options/options)
.=..()
ignore+= ascii2text(13) //Carriage return
delim += ignore + options.symbols + end_stmt + string_delim
src.options=options
LoadCode(code)
Scan() //Creates a list of tokens from source code
var/list/tokens=new
for(, src.codepos<=length(code), src.codepos++)
var/char=copytext(code, codepos, codepos+1)
if(char=="\n")
line++
linepos=codepos
if(ignore.Find(char))
continue
else if(char == "/")
ReadComment()
else if(end_stmt.Find(char))
tokens+=new /token/end(char, line, COL)
else if(string_delim.Find(char))
codepos++ //skip string delimiter
tokens+=ReadString(char)
else if(options.CanStartID(char))
tokens+=ReadWord()
else if(options.IsDigit(char))
tokens+=ReadNumber()
else if(options.symbols.Find(char))
tokens+=ReadSymbol()
codepos=initial(codepos)
line=initial(line)
linepos=initial(linepos)
return tokens
proc
/*
Proc: ReadString
Reads a string in the source code into a token.
Parameters:
start - The character used to start the string.
*/
ReadString(start)
var/buf
for(, codepos <= length(code), codepos++)//codepos to length(code))
var/char=copytext(code, codepos, codepos+1)
switch(char)
if("\\") //Backslash (\) encountered in string
codepos++ //Skip next character in string, since it was escaped by a backslash
char=copytext(code, codepos, codepos+1)
switch(char)
if("\\") //Double backslash
buf+="\\"
if("n") //\n Newline
buf+="\n"
else
if(char==start) //\" Doublequote
buf+=start
else //Unknown escaped text
buf+=char
if("\n")
. = new/token/string(buf, line, COL)
errors+=new/scriptError("Unterminated string. Newline reached.", .)
line++
linepos=codepos
break
else
if(char==start) //string delimiter found, end string
break
else
buf+=char //Just a normal character in a string
if(!.) return new/token/string(buf, line, COL)
/*
Proc: ReadWord
Reads characters separated by an item in <delim> into a token.
*/
ReadWord()
var/char=copytext(code, codepos, codepos+1)
var/buf
while(!delim.Find(char) && codepos<=length(code))
buf+=char
char=copytext(code, ++codepos, codepos+1)
codepos-- //allow main Scan() proc to read the delimiter
if(options.keywords.Find(buf))
return new /token/keyword(buf, line, COL)
else
return new /token/word(buf, line, COL)
/*
Proc: ReadSymbol
Reads a symbol into a token.
*/
ReadSymbol()
var/char=copytext(code, codepos, codepos+1)
var/buf
while(options.symbols.Find(buf+char))
buf+=char
if(++codepos>length(code)) break
char=copytext(code, codepos, codepos+1)
codepos-- //allow main Scan() proc to read the next character
return new /token/symbol(buf, line, COL)
/*
Proc: ReadNumber
Reads a number into a token.
*/
ReadNumber()
var/char=copytext(code, codepos, codepos+1)
var/buf
var/dec=0
while(options.IsDigit(char) || (char=="." && !dec))
if(char==".") dec=1
buf+=char
codepos++
char=copytext(code, codepos, codepos+1)
var/token/number/T=new(buf, line, COL)
if(isnull(text2num(buf)))
errors+=new/scriptError("Bad number: ", T)
T.value=0
codepos-- //allow main Scan() proc to read the next character
return T
/*
Proc: ReadComment
Reads a comment and outputs the type of comment
*/
ReadComment()
var/char=copytext(code, codepos, codepos+1)
var/nextchar=copytext(code, codepos+1, codepos+2)
var/charstring = char+nextchar
var/comm = 1
// 1: single-line comment
// 2: multi-line comment
var/expectedend = 0
if(charstring == "//" || charstring == "/*")
if(charstring == "/*")
comm = 2 // starts a multi-line comment
while(comm)
if(++codepos>length(code)) break
if(expectedend) // ending statement expected...
char = copytext(code, codepos, codepos+1)
if(char == "/") // ending statement found - beak the comment
comm = 0
break
if(comm == 2)
// multi-line comments are broken by ending statements
char = copytext(code, codepos, codepos+1)
if(char == "*")
expectedend = 1
continue
else
char = copytext(code, codepos, codepos+1)
if(char == "\n")
comm = 0
break
if(expectedend) expectedend = 0
if(comm == 2)
errors+=new/scriptError/UnterminatedComment()