mirror of https://github.com/tbklang/tlang.git
919 lines
28 KiB
D
919 lines
28 KiB
D
module compiler.lexer.core;
|
|
|
|
import std.container.slist;
|
|
import gogga;
|
|
import std.conv : to;
|
|
import std.string : cmp;
|
|
import std.ascii : isDigit;
|
|
import misc.exceptions : TError;
|
|
|
|
public enum LexerError
|
|
{
|
|
EXHAUSTED_CHARACTERS,
|
|
OTHER
|
|
}
|
|
|
|
public final class LexerException : TError
|
|
{
|
|
public const Lexer offendingInstance;
|
|
public const LexerError errType;
|
|
|
|
this(Lexer offendingInstance, LexerError errType = LexerError.OTHER, string msg = "")
|
|
{
|
|
string positionString = "("~to!(string)(offendingInstance.line)~", "~to!(string)(offendingInstance.column)~")";
|
|
super("LexerException("~to!(string)(errType)~")"~(msg.length ? ": "~msg : "")~" at "~positionString);
|
|
this.offendingInstance = offendingInstance;
|
|
this.errType = errType;
|
|
}
|
|
|
|
this(Lexer offendingInstance, string msg)
|
|
{
|
|
this(offendingInstance, LexerError.OTHER, msg);
|
|
}
|
|
}
|
|
|
|
/* TODO: Add Token type (which matches column and position too) */
|
|
public final class Token
|
|
{
|
|
/* The token */
|
|
private string token;
|
|
|
|
/* Line number information */
|
|
private ulong line, column;
|
|
|
|
this(string token, ulong line, ulong column)
|
|
{
|
|
this.token = token;
|
|
this.line = line;
|
|
this.column = column;
|
|
}
|
|
|
|
override bool opEquals(Object other)
|
|
{
|
|
return cmp(token, (cast(Token)other).getToken()) == 0;
|
|
}
|
|
|
|
override string toString()
|
|
{
|
|
/* TODO (Column number): Don't adjust here, do it maybe in the lexer itself */
|
|
return token~" at ("~to!(string)(line)~", "~to!(string)(column-token.length)~")";
|
|
}
|
|
|
|
public string getToken()
|
|
{
|
|
return token;
|
|
}
|
|
}
|
|
|
|
public final class Lexer
|
|
{
|
|
/**
|
|
* Lexer state data
|
|
*/
|
|
private string sourceCode; /* The source to be lexed */
|
|
private ulong line = 1; /* Current line */
|
|
private ulong column = 1;
|
|
private Token[] currentTokens; /* Current token set */
|
|
private string currentToken; /* Current token */
|
|
private ulong position; /* Current character position */
|
|
private char currentChar; /* Current character */
|
|
private bool stringMode; /* Whether we are in a string "we are here" or not */
|
|
private bool floatMode; /* Whether or not we are building a floating point constant */
|
|
|
|
/* The tokens */
|
|
private Token[] tokens;
|
|
|
|
this(string sourceCode)
|
|
{
|
|
this.sourceCode = sourceCode;
|
|
}
|
|
|
|
private bool isForward()
|
|
{
|
|
return position+1 < sourceCode.length;
|
|
}
|
|
|
|
public bool isBackward()
|
|
{
|
|
return position-1 < sourceCode.length;
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
* Used for tokenising a2.b2
|
|
*
|
|
* When the `.` is encountered
|
|
* and there are some characters
|
|
* behind it this checks if we can
|
|
* append a further dot to it
|
|
*/
|
|
private bool isBuildUpValidIdent()
|
|
{
|
|
import compiler.symbols.check;
|
|
return isPathIdentifier(currentToken) || isIdentifier(currentToken);
|
|
}
|
|
|
|
/**
|
|
* Returns true if we have a token being built
|
|
* false otherwise
|
|
*/
|
|
private bool hasToken()
|
|
{
|
|
return currentToken.length != 0;
|
|
}
|
|
|
|
/* Perform the lexing process */
|
|
/* TODO: Use return value */
|
|
public void performLex()
|
|
{
|
|
|
|
while(position < sourceCode.length)
|
|
{
|
|
// gprintln("SrcCodeLen: "~to!(string)(sourceCode.length));
|
|
// gprintln("Position: "~to!(string)(position));
|
|
|
|
currentChar = sourceCode[position];
|
|
|
|
if(floatMode == true)
|
|
{
|
|
if(isDigit(currentChar))
|
|
{
|
|
/* tack on and move to next iteration */
|
|
currentToken~=currentChar;
|
|
position++;
|
|
column++;
|
|
continue;
|
|
}
|
|
/* TODO; handle closer case and error case */
|
|
else
|
|
{
|
|
/* TODO: Throw erropr here */
|
|
if(isSpliter(currentChar))
|
|
{
|
|
floatMode = false;
|
|
currentTokens ~= new Token(currentToken, line, column);
|
|
currentToken = "";
|
|
|
|
/* We just flush and catch splitter in next round, hence below is commented out */
|
|
// column++;
|
|
// position++;
|
|
}
|
|
else
|
|
{
|
|
throw new LexerException(this, "Floating point '"~currentToken~"' cannot be followed by a '"~currentChar~"'");
|
|
}
|
|
}
|
|
}
|
|
else if(currentChar == ' ' && !stringMode)
|
|
{
|
|
/* TODO: Check if current token is fulled, then flush */
|
|
if(currentToken.length != 0)
|
|
{
|
|
currentTokens ~= new Token(currentToken, line, column);
|
|
currentToken = "";
|
|
}
|
|
|
|
column++;
|
|
position++;
|
|
}
|
|
else if(isSpliter(currentChar) && !stringMode)
|
|
{
|
|
/* The splitter token to finally insert */
|
|
string splitterToken;
|
|
|
|
gprintln("Build up: "~currentToken);
|
|
gprintln("Current char: "~currentChar);
|
|
|
|
/* Check for case of `==` (where we are on the first `=` sign) */
|
|
if(currentChar == '=' && isForward() && sourceCode[position+1] == '=')
|
|
{
|
|
/* Flush any current token (if exists) */
|
|
if(currentToken.length)
|
|
{
|
|
currentTokens ~= new Token(currentToken, line, column);
|
|
currentToken = "";
|
|
}
|
|
|
|
// Create the `==` token
|
|
currentTokens ~= new Token("==", line, column);
|
|
|
|
// Skip over the current `=` and the next `=`
|
|
position+=2;
|
|
|
|
column+=2;
|
|
|
|
continue;
|
|
}
|
|
|
|
/* FIXME: Add floating point support here */
|
|
/* TODO: IF buildUp is all numerical and we have dot go into float mode */
|
|
/* TODO: Error checking will need to be added */
|
|
if(isNumericalStr(currentToken) && currentChar == '.')
|
|
{
|
|
/* Tack on the dot */
|
|
currentToken~=".";
|
|
|
|
/* Enable floating point mode and go to next iteration*/
|
|
floatMode = true;
|
|
gprintln("Float mode just got enabled: Current build up: \""~currentToken~"\"");
|
|
column++;
|
|
position++;
|
|
continue;
|
|
}
|
|
|
|
|
|
/**
|
|
* Here we check if we have a `.` and that the characters
|
|
* preceding us were all godd for an identifier
|
|
*/
|
|
import misc.utils;
|
|
|
|
if(currentChar == '.' && hasToken() && isBuildUpValidIdent())
|
|
{
|
|
gprintln("Bruh");
|
|
/**
|
|
* Now we check that we have a character infront of us
|
|
* and that it is a letter
|
|
*
|
|
* TODO: Add _ check too as that is a valid identifier start
|
|
*/
|
|
if(isForward() && isCharacterAlpha(sourceCode[position+1]))
|
|
{
|
|
position++;
|
|
column+=1;
|
|
|
|
currentToken ~= '.';
|
|
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
throw new LexerException(this, "Expected a letter to follow the .");
|
|
}
|
|
|
|
}
|
|
/* Check if we need to do combinators (e.g. for ||, &&) */
|
|
/* TODO: Second operand in condition out of bounds */
|
|
else if(currentChar == '|' && (position+1) != sourceCode.length && sourceCode[position+1] == '|')
|
|
{
|
|
splitterToken = "||";
|
|
column += 2;
|
|
position += 2;
|
|
}
|
|
else if(currentChar == '&' && (position+1) != sourceCode.length && sourceCode[position+1] == '&')
|
|
{
|
|
splitterToken = "&&";
|
|
column += 2;
|
|
position += 2;
|
|
}
|
|
else if (currentChar == '\n') /* TODO: Unrelated!!!!!, but we shouldn't allow this bahevaipur in string mode */
|
|
{
|
|
line++;
|
|
column = 1;
|
|
|
|
position++;
|
|
}
|
|
else
|
|
{
|
|
splitterToken = ""~currentChar;
|
|
column++;
|
|
position++;
|
|
}
|
|
|
|
|
|
/* Flush the current token (if one exists) */
|
|
if(currentToken.length)
|
|
{
|
|
currentTokens ~= new Token(currentToken, line, column);
|
|
currentToken = "";
|
|
}
|
|
|
|
/* Add the splitter token (only if it isn't empty) */
|
|
if(splitterToken.length)
|
|
{
|
|
currentTokens ~= new Token(splitterToken, line, column);
|
|
}
|
|
}
|
|
else if(currentChar == '"')
|
|
{
|
|
/* If we are not in string mode */
|
|
if(!stringMode)
|
|
{
|
|
/* Add the opening " to the token */
|
|
currentToken ~= '"';
|
|
|
|
/* Enable string mode */
|
|
stringMode = true;
|
|
}
|
|
/* If we are in string mode */
|
|
else
|
|
{
|
|
/* Add the closing " to the token */
|
|
currentToken ~= '"';
|
|
|
|
/* Flush the token */
|
|
currentTokens ~= new Token(currentToken, line, column);
|
|
currentToken = "";
|
|
|
|
/* Get out of string mode */
|
|
stringMode = false;
|
|
}
|
|
|
|
column++;
|
|
position++;
|
|
}
|
|
else if(currentChar == '\\')
|
|
{
|
|
/* You can only use these in strings */
|
|
if(stringMode)
|
|
{
|
|
/* Check if we have a next character */
|
|
if(position+1 != sourceCode.length && isValidEscape_String(sourceCode[position+1]))
|
|
{
|
|
/* Add to the string */
|
|
currentToken ~= "\\"~sourceCode[position+1];
|
|
|
|
column += 2;
|
|
position += 2;
|
|
}
|
|
/* If we don't have a next character then raise error */
|
|
else
|
|
{
|
|
throw new LexerException(this, "Unfinished escape sequence");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
throw new LexerException(this, "Escape sequences can only be used within strings");
|
|
}
|
|
}
|
|
/* Character literal support */
|
|
else if(!stringMode && currentChar == '\'')
|
|
{
|
|
currentToken ~= "'";
|
|
|
|
/* Character literal must be next */
|
|
if(position+1 != sourceCode.length)
|
|
{
|
|
/* TODO: Escape support for \' */
|
|
|
|
/* Get the character */
|
|
currentToken ~= ""~sourceCode[position+1];
|
|
column++;
|
|
position++;
|
|
|
|
|
|
/* Closing ' must be next */
|
|
if(position+1 != sourceCode.length && sourceCode[position+1] == '\'')
|
|
{
|
|
/* Generate and add the token */
|
|
currentToken ~= "'";
|
|
currentTokens ~= new Token(currentToken, line, column);
|
|
|
|
/* Flush the token */
|
|
currentToken = "";
|
|
|
|
column += 2;
|
|
position += 2;
|
|
}
|
|
else
|
|
{
|
|
throw new LexerException(this, "Was expecting closing ' when finishing character literal");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
throw new LexerException(this, LexerError.EXHAUSTED_CHARACTERS, "EOSC reached when trying to get character literal");
|
|
}
|
|
}
|
|
/**
|
|
* If we are building up a number
|
|
*
|
|
* TODO: Build up token right at the end (#DuplicateCode)
|
|
*/
|
|
else if(isBuildUpNumerical())
|
|
{
|
|
gprintln("jfdjkhfdjkhfsdkj");
|
|
/* fetch the encoder segment */
|
|
char[] encoderSegment = numbericalEncoderSegmentFetch();
|
|
|
|
gprintln("isBuildUpNumerical(): Enter");
|
|
|
|
/**
|
|
* If we don't have any encoders
|
|
*/
|
|
if(encoderSegment.length == 0)
|
|
{
|
|
/* We can add a signage encoder */
|
|
if(isNumericalEncoder_Signage(currentChar))
|
|
{
|
|
gprintln("Hello");
|
|
|
|
/* Check if the next character is a size (it MUST be) */
|
|
if(isForward() && isNumericalEncoder_Size(sourceCode[position+1]))
|
|
{
|
|
currentToken ~= currentChar;
|
|
column++;
|
|
position++;
|
|
|
|
|
|
}
|
|
else
|
|
{
|
|
throw new LexerException(this, "You MUST specify a size encoder after a signagae encoder");
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
/* We can add a size encoder */
|
|
else if(isNumericalEncoder_Size(currentChar))
|
|
{
|
|
currentToken ~= currentChar;
|
|
column++;
|
|
position++;
|
|
}
|
|
/* We can add more numbers */
|
|
else if(isDigit(currentChar))
|
|
{
|
|
currentToken ~= currentChar;
|
|
column++;
|
|
position++;
|
|
}
|
|
/* Splitter (TODO) */
|
|
else if(isSpliter(currentChar))
|
|
{
|
|
/* Add the numerical literal as a new token */
|
|
currentTokens ~= new Token(currentToken, line, column);
|
|
|
|
/* Add the splitter token if not a newline */
|
|
if(currentChar != '\n')
|
|
{
|
|
currentTokens ~= new Token(""~currentChar, line, column);
|
|
}
|
|
|
|
|
|
/* Flush the token */
|
|
currentToken = "";
|
|
|
|
/* TODO: Check these */
|
|
column += 2;
|
|
position += 2;
|
|
}
|
|
/* Anything else is invalid */
|
|
else
|
|
{
|
|
throw new LexerException(this, "Not valid TODO");
|
|
}
|
|
}
|
|
/**
|
|
* If we have one encoder
|
|
*/
|
|
else if((encoderSegment.length == 1))
|
|
{
|
|
/* Check what the encoder is */
|
|
|
|
/**
|
|
* If we had a signage then we must have a size after it
|
|
*/
|
|
if(isNumericalEncoder_Signage(encoderSegment[0]))
|
|
{
|
|
/**
|
|
* Size encoder must then follow
|
|
*/
|
|
if(isNumericalEncoder_Size(currentChar))
|
|
{
|
|
currentToken ~= currentChar;
|
|
column++;
|
|
position++;
|
|
|
|
/* Add the numerical literal as a new token */
|
|
currentTokens ~= new Token(currentToken, line, column);
|
|
|
|
/* Flush the token */
|
|
currentToken = "";
|
|
|
|
}
|
|
/**
|
|
* Anything else is invalid
|
|
*/
|
|
else
|
|
{
|
|
throw new LexerException(this, "A size-encoder must follow a signage encoder");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
throw new LexerException(this, "Cannot have another encoder after a size encoder");
|
|
}
|
|
}
|
|
/* It is impossible to reach this as flushing means we cannot add more */
|
|
else
|
|
{
|
|
assert(false);
|
|
}
|
|
|
|
|
|
}
|
|
/* Any other case, keep building the curent token */
|
|
else
|
|
{
|
|
currentToken ~= currentChar;
|
|
column++;
|
|
position++;
|
|
}
|
|
}
|
|
|
|
/* If there was a token made at the end then flush it */
|
|
if(currentToken.length)
|
|
{
|
|
currentTokens ~= new Token(currentToken, line, column);
|
|
}
|
|
|
|
tokens = currentTokens;
|
|
}
|
|
|
|
private char[] numbericalEncoderSegmentFetch()
|
|
{
|
|
char[] numberPart;
|
|
ulong stopped;
|
|
for(ulong i = 0; i < currentToken.length; i++)
|
|
{
|
|
char character = currentToken[i];
|
|
|
|
if(isDigit(character))
|
|
{
|
|
numberPart~=character;
|
|
}
|
|
else
|
|
{
|
|
stopped = i;
|
|
break;
|
|
}
|
|
}
|
|
|
|
char[] remaining = cast(char[])currentToken[stopped..currentToken.length];
|
|
|
|
return remaining;
|
|
}
|
|
|
|
/**
|
|
* Returns true if the current build up is entirely
|
|
* numerical
|
|
*
|
|
* FIXME: THis, probably by its own will pick up `UL`
|
|
* as a number, or even just ``
|
|
*/
|
|
private bool isBuildUpNumerical()
|
|
{
|
|
import std.ascii : isDigit;
|
|
|
|
|
|
char[] numberPart;
|
|
ulong stopped;
|
|
for(ulong i = 0; i < currentToken.length; i++)
|
|
{
|
|
char character = currentToken[i];
|
|
|
|
if(isDigit(character))
|
|
{
|
|
numberPart~=character;
|
|
}
|
|
else
|
|
{
|
|
stopped = i;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* We need SOME numerical stuff
|
|
*/
|
|
if(stopped == 0)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
char[] remaining = cast(char[])currentToken[stopped..currentToken.length];
|
|
|
|
char lstEncoder;
|
|
|
|
for(ulong i = 0; i < remaining.length; i++)
|
|
{
|
|
char character = remaining[i];
|
|
|
|
if(!isNumericalEncoder(character))
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
|
|
|
|
|
|
}
|
|
|
|
/**
|
|
* Given a string return true if all characters
|
|
* are digits, false otherwise and false if
|
|
* the string is empty
|
|
*/
|
|
private static bool isNumericalStr(string input)
|
|
{
|
|
/**
|
|
* If the given input is empty then return false
|
|
*/
|
|
if(input.length == 0)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* If there are any characters in the string then
|
|
* check if all are digits
|
|
*/
|
|
for(ulong i = 0; i < input.length; i++)
|
|
{
|
|
char character = input[i];
|
|
|
|
if(!isDigit(character))
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
/* Return the tokens */
|
|
public Token[] getTokens()
|
|
{
|
|
return tokens;
|
|
}
|
|
|
|
private bool isSpliter(char character)
|
|
{
|
|
return character == ';' || character == ',' || character == '(' ||
|
|
character == ')' || character == '[' || character == ']' ||
|
|
character == '+' || character == '-' || character == '/' ||
|
|
character == '%' || character == '*' || character == '&' ||
|
|
character == '{' || character == '}' || character == '=' ||
|
|
character == '|' || character == '^' || character == '!' ||
|
|
character == '\n' || character == '~' || character =='.' ||
|
|
character == ':'; //|| isNumericalEncoder(character);
|
|
}
|
|
|
|
private bool isNumericalEncoder(char character)
|
|
{
|
|
return isNumericalEncoder_Size(character) ||
|
|
isNumericalEncoder_Signage(character);
|
|
}
|
|
|
|
private bool isNumericalEncoder_Size(char character)
|
|
{
|
|
return character == 'B' || character == 'W' ||
|
|
character == 'I' || character == 'L';
|
|
}
|
|
|
|
private bool isNumericalEncoder_Signage(char character)
|
|
{
|
|
return character == 'S' || character == 'U';
|
|
}
|
|
|
|
/* Supported escapes \" */
|
|
public bool isValidEscape_String(char character)
|
|
{
|
|
return true; /* TODO: Implement me */
|
|
}
|
|
}
|
|
|
|
/* Test input: `hello "world";` */
|
|
unittest
|
|
{
|
|
import std.algorithm.comparison;
|
|
string sourceCode = "hello \"world\";";
|
|
Lexer currentLexer = new Lexer(sourceCode);
|
|
currentLexer.performLex();
|
|
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("hello", 0, 0), new Token("\"world\"", 0, 0), new Token(";", 0, 0)]);
|
|
}
|
|
|
|
/* Test input: `hello "world"|| ` */
|
|
unittest
|
|
{
|
|
import std.algorithm.comparison;
|
|
string sourceCode = "hello \"world\"|| ";
|
|
Lexer currentLexer = new Lexer(sourceCode);
|
|
currentLexer.performLex();
|
|
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("hello", 0, 0), new Token("\"world\"", 0, 0), new Token("||", 0, 0)]);
|
|
}
|
|
|
|
/* Test input: `hello "world"||` */
|
|
unittest
|
|
{
|
|
import std.algorithm.comparison;
|
|
string sourceCode = "hello \"world\"||";
|
|
Lexer currentLexer = new Lexer(sourceCode);
|
|
currentLexer.performLex();
|
|
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("hello", 0, 0), new Token("\"world\"", 0, 0), new Token("||", 0, 0)]);
|
|
}
|
|
|
|
/* Test input: `hello "world"|` */
|
|
unittest
|
|
{
|
|
import std.algorithm.comparison;
|
|
string sourceCode = "hello \"world\";|";
|
|
Lexer currentLexer = new Lexer(sourceCode);
|
|
currentLexer.performLex();
|
|
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("hello", 0, 0), new Token("\"world\"", 0, 0), new Token(";", 0, 0), new Token("|", 0, 0)]);
|
|
}
|
|
|
|
/* Test input: ` hello` */
|
|
unittest
|
|
{
|
|
import std.algorithm.comparison;
|
|
string sourceCode = " hello";
|
|
Lexer currentLexer = new Lexer(sourceCode);
|
|
currentLexer.performLex();
|
|
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("hello", 0, 0)]);
|
|
}
|
|
|
|
/* Test input: `hello;` */
|
|
unittest
|
|
{
|
|
import std.algorithm.comparison;
|
|
string sourceCode = " hello;";
|
|
Lexer currentLexer = new Lexer(sourceCode);
|
|
currentLexer.performLex();
|
|
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("hello", 0, 0), new Token(";", 0, 0)]);
|
|
}
|
|
|
|
/* Test input: `hello "world\""` */
|
|
unittest
|
|
{
|
|
import std.algorithm.comparison;
|
|
string sourceCode = "hello \"world\\\"\"";
|
|
Lexer currentLexer = new Lexer(sourceCode);
|
|
currentLexer.performLex();
|
|
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("hello", 0, 0), new Token("\"world\\\"\"", 0, 0)]);
|
|
}
|
|
|
|
/* Test input: `'c'` */
|
|
unittest
|
|
{
|
|
import std.algorithm.comparison;
|
|
string sourceCode = "'c'";
|
|
Lexer currentLexer = new Lexer(sourceCode);
|
|
currentLexer.performLex();
|
|
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("'c'", 0, 0)]);
|
|
}
|
|
|
|
/* Test input: `2121\n2121` */
|
|
unittest
|
|
{
|
|
import std.algorithm.comparison;
|
|
string sourceCode = "2121\n2121";
|
|
Lexer currentLexer = new Lexer(sourceCode);
|
|
currentLexer.performLex();
|
|
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("2121", 0, 0), new Token("2121", 0, 0)]);
|
|
}
|
|
|
|
/**
|
|
* Test `=`` and `==` handling
|
|
*/
|
|
unittest
|
|
{
|
|
import std.algorithm.comparison;
|
|
string sourceCode = " =\n";
|
|
Lexer currentLexer = new Lexer(sourceCode);
|
|
currentLexer.performLex();
|
|
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("=", 0, 0)]);
|
|
|
|
import std.algorithm.comparison;
|
|
sourceCode = " = ==\n";
|
|
currentLexer = new Lexer(sourceCode);
|
|
currentLexer.performLex();
|
|
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("=", 0, 0), new Token("==", 0, 0)]);
|
|
|
|
import std.algorithm.comparison;
|
|
sourceCode = " ==\n";
|
|
currentLexer = new Lexer(sourceCode);
|
|
currentLexer.performLex();
|
|
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("==", 0, 0)]);
|
|
|
|
import std.algorithm.comparison;
|
|
sourceCode = " = =\n";
|
|
currentLexer = new Lexer(sourceCode);
|
|
currentLexer.performLex();
|
|
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("=", 0, 0), new Token("=", 0, 0)]);
|
|
|
|
import std.algorithm.comparison;
|
|
sourceCode = " ==, = ==\n";
|
|
currentLexer = new Lexer(sourceCode);
|
|
currentLexer.performLex();
|
|
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("==", 0, 0), new Token(",", 0, 0), new Token("=", 0, 0), new Token("==", 0, 0)]);
|
|
|
|
// Test flushing of previous token
|
|
import std.algorithm.comparison;
|
|
sourceCode = "i==i=\n";
|
|
currentLexer = new Lexer(sourceCode);
|
|
currentLexer.performLex();
|
|
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("i", 0, 0), new Token("==", 0, 0), new Token("i", 0, 0), new Token("=", 0, 0)]);
|
|
}
|
|
|
|
/**
|
|
* Test: Literal value encoding
|
|
*
|
|
* Tests validity
|
|
*/
|
|
unittest
|
|
{
|
|
import std.algorithm.comparison;
|
|
string sourceCode;
|
|
Lexer currentLexer;
|
|
|
|
/* 21L (valid) */
|
|
sourceCode = "21L";
|
|
currentLexer = new Lexer(sourceCode);
|
|
currentLexer.performLex();
|
|
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("21L", 0, 0)]);
|
|
|
|
/* 21UL (valid) */
|
|
sourceCode = "21UL";
|
|
currentLexer = new Lexer(sourceCode);
|
|
currentLexer.performLex();
|
|
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("21UL", 0, 0)]);
|
|
|
|
// /* 21U (invalid) */
|
|
// sourceCode = "21U ";
|
|
// currentLexer = new Lexer(sourceCode);
|
|
// // gprintln(currentLexer.performLex());
|
|
// bool status = currentLexer.performLex();
|
|
// gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
|
// assert(!status);
|
|
|
|
|
|
// /* 21UL (valid) */
|
|
// sourceCode = "21UL";
|
|
// currentLexer = new Lexer(sourceCode);
|
|
// currentLexer.performLex();
|
|
// gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
|
// assert(currentLexer.getTokens() == [new Token("21UL", 0, 0)]);
|
|
|
|
|
|
}
|
|
|
|
/* Test input: `1.5` */
|
|
unittest
|
|
{
|
|
import std.algorithm.comparison;
|
|
string sourceCode = "1.5";
|
|
Lexer currentLexer = new Lexer(sourceCode);
|
|
currentLexer.performLex();
|
|
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("1.5", 0, 0)]);
|
|
}
|
|
|
|
/**
|
|
* Test correct handling of dot-operator for
|
|
* non-floating point cases
|
|
*
|
|
* Input: `new A().l.p.p;`
|
|
*/
|
|
unittest
|
|
{
|
|
import std.algorithm.comparison;
|
|
string sourceCode = "new A().l.p.p;";
|
|
Lexer currentLexer = new Lexer(sourceCode);
|
|
currentLexer.performLex();
|
|
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("new", 0, 0),
|
|
new Token("A", 0, 0),
|
|
new Token("(", 0, 0),
|
|
new Token(")", 0, 0),
|
|
new Token(".", 0, 0),
|
|
new Token("l.p.p", 0, 0),
|
|
new Token(";", 0, 0)
|
|
]);
|
|
} |