mirror of https://github.com/tbklang/tlang.git
1964 lines
48 KiB
D
1964 lines
48 KiB
D
/**
|
|
* A single-pass tokenizer
|
|
*/
|
|
module tlang.compiler.lexer.kinds.basic;
|
|
|
|
import std.container.slist;
|
|
import std.string : replace;
|
|
import tlang.misc.logging;
|
|
import std.conv : to;
|
|
import std.ascii : isDigit, isAlpha, isWhite;
|
|
import tlang.compiler.lexer.core;
|
|
|
|
enum EMPTY = "";
|
|
|
|
/**
|
|
* Represents a basic lexer which performs the whole tokenization
|
|
* process in one short via a call to `performLex()`, only after
|
|
* this may the `LexerInterface` methods, such as `getCurrentToken()`,
|
|
* `nextToken()` and so forth, actually be used.
|
|
*
|
|
* This is effectively a single pass lexer.
|
|
*/
|
|
public final class BasicLexer : LexerInterface
|
|
{
|
|
/**
|
|
* Post-perform lex() data
|
|
*
|
|
* This exports the LexerInterface API.
|
|
*
|
|
* To-do, ensure these can only be used AFTER `performLex()`
|
|
* has been called.
|
|
*/
|
|
private ulong tokenPtr = 0;
|
|
|
|
/**
|
|
* Returns the token at the current cursor
|
|
* position
|
|
*
|
|
* Returns: the `Token`
|
|
*/
|
|
public override Token getCurrentToken()
|
|
{
|
|
/* TODO: Throw an exception here when we try get more than we can */
|
|
return tokens[tokenPtr];
|
|
}
|
|
|
|
/**
|
|
* Moves the cursor one token forward
|
|
*/
|
|
public override void nextToken()
|
|
{
|
|
tokenPtr++;
|
|
}
|
|
|
|
/**
|
|
* Moves the cursor one token backwards
|
|
*/
|
|
public override void previousToken()
|
|
{
|
|
tokenPtr--;
|
|
}
|
|
|
|
/**
|
|
* Sets the position of the cursor
|
|
*
|
|
* Params:
|
|
* newPosition = the new position
|
|
*/
|
|
public override void setCursor(ulong newPosition)
|
|
{
|
|
tokenPtr = newPosition;
|
|
}
|
|
|
|
/**
|
|
* Retrieves the cursor's current position
|
|
*
|
|
* Returns: the position
|
|
*/
|
|
public override ulong getCursor()
|
|
{
|
|
return tokenPtr;
|
|
}
|
|
|
|
/**
|
|
* Checks whether more tokens are available
|
|
* of not
|
|
*
|
|
* Returns: true if more tokens are available, false otherwise
|
|
*/
|
|
public override bool hasTokens()
|
|
{
|
|
return tokenPtr < tokens.length;
|
|
}
|
|
|
|
/**
|
|
* Get the line position of the lexer in the source text
|
|
*
|
|
* Returns: the position
|
|
*/
|
|
public override ulong getLine()
|
|
{
|
|
return this.line;
|
|
}
|
|
|
|
/**
|
|
* Get the column position of the lexer in the source text
|
|
*
|
|
* Returns: the position
|
|
*/
|
|
public override ulong getColumn()
|
|
{
|
|
return this.column;
|
|
}
|
|
|
|
/**
|
|
* Exhaustively provide a list of all tokens
|
|
*
|
|
* Returns: a `Token[]` containing all tokens
|
|
*/
|
|
public override Token[] getTokens()
|
|
{
|
|
return tokens;
|
|
}
|
|
|
|
/**
|
|
* Lexer state data
|
|
*/
|
|
private string sourceCode; /* The source to be lexed */
|
|
private ulong line = 1; /* Current line */
|
|
private ulong column = 1;
|
|
private Token[] currentTokens; /* Current token set */
|
|
private string currentToken; /* Current token */
|
|
private ulong position; /* Current character position */
|
|
private char currentChar; /* Current character */
|
|
|
|
/* The tokens */
|
|
private Token[] tokens;
|
|
|
|
/**
|
|
* Constructs a new lexer with the given
|
|
* source code of which is should tokenize
|
|
*
|
|
* Params:
|
|
* sourceCode = the source text
|
|
*/
|
|
this(string sourceCode)
|
|
{
|
|
this.sourceCode = sourceCode;
|
|
}
|
|
|
|
/**
|
|
* Checks whether or not we could shift our
|
|
* source text pointer forward if it would
|
|
* be within the boundries of the source text
|
|
* or not
|
|
*
|
|
* Returns: `true` if within the boundries,
|
|
* `false` otherwise
|
|
*/
|
|
private bool isForward()
|
|
{
|
|
return position + 1 < sourceCode.length;
|
|
}
|
|
|
|
/**
|
|
* Checks whether or not we could shift our
|
|
* source text pointer backwards and it it
|
|
* would be within the boundries of the source
|
|
* text or not
|
|
*
|
|
* Returns: `true` if within the boundries,
|
|
* `false` otherwise
|
|
*/
|
|
private bool isBackward()
|
|
{
|
|
return position - 1 < sourceCode.length;
|
|
}
|
|
|
|
/**
|
|
* Returns true if we have a token being built
|
|
* false otherwise
|
|
*
|
|
* Returns: `true` if we have a token built-up,
|
|
* `false` otherwise
|
|
*/
|
|
private bool hasToken()
|
|
{
|
|
return currentToken.length != 0;
|
|
}
|
|
|
|
/**
|
|
* Performs the lexing process
|
|
*
|
|
* Throws:
|
|
* LexerException on error tokenizing
|
|
*/
|
|
public void performLex()
|
|
{
|
|
|
|
currentChar = sourceCode[position];
|
|
while (position < sourceCode.length)
|
|
{
|
|
// gprintln("SrcCodeLen: "~to!(string)(sourceCode.length));
|
|
// gprintln("Position: "~to!(string)(position));
|
|
|
|
|
|
// // currentChar = sourceCode[position];
|
|
// gprintln("Current Char\"" ~ currentChar ~ "\"");
|
|
// gprintln("Current Token\"" ~ currentToken ~ "\"");
|
|
// gprintln("Match alpha check" ~ to!(bool)(currentChar == LS.UNDERSCORE || isAlpha(currentChar)));
|
|
|
|
if (isSplitter(currentChar))
|
|
{
|
|
|
|
if (currentToken.length != 0)
|
|
{
|
|
flush();
|
|
}
|
|
if (isWhite(currentChar) ) {
|
|
if (improvedAdvance()) {
|
|
continue;
|
|
} else {
|
|
break;
|
|
}
|
|
} /* The splitter token to finally insert */
|
|
string splitterToken;
|
|
|
|
// gprintln("Build up: " ~ currentToken);
|
|
// gprintln("Current char, splitter: " ~ currentChar);
|
|
if (currentChar == LS.FORWARD_SLASH && isForward() && (sourceCode[position+1] == LS.FORWARD_SLASH || sourceCode[position+1] == LS.STAR)) {
|
|
if (!doComment()) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Check for case of `==` or `=<` or `=>` (where we are on the first `=` sign) */
|
|
if (currentChar == LS.EQUALS && isForward() && (sourceCode[position + 1] == LS.EQUALS || sourceCode[position + 1] == LS.LESS_THAN || sourceCode[position + 1] == LS.BIGGER_THAN))
|
|
{
|
|
buildAdvance();
|
|
buildAdvance();
|
|
flush();
|
|
continue;
|
|
}
|
|
|
|
/* Check for case of `<=` or `>=` */
|
|
if ((currentChar == LS.LESS_THAN || currentChar == LS.BIGGER_THAN) && isForward() && (sourceCode[position + 1] == LS.EQUALS || sourceCode[position + 1] == LS.LESS_THAN || sourceCode[position + 1] == LS.BIGGER_THAN))
|
|
{
|
|
buildAdvance();
|
|
buildAdvance();
|
|
flush();
|
|
continue;
|
|
}
|
|
|
|
/**
|
|
* Here we check if we have a `.` and that the characters
|
|
* preceding us were all good for an identifier
|
|
*/
|
|
import tlang.misc.utils;
|
|
|
|
if (currentChar == LS.DOT)
|
|
{
|
|
if (isBackward() && isWhite(sourceCode[position - 1]))
|
|
{
|
|
throw new LexerException(this, "Character '.' is not allowed to follow a whitespace.");
|
|
}
|
|
if (isForward() && isWhite(sourceCode[position + 1]))
|
|
{
|
|
throw new LexerException(this, "Character '.' is not allowed to precede a whitespace.");
|
|
}
|
|
else if (!hasToken() && (isBackward() && !isValidDotPrecede(
|
|
sourceCode[position - 1])))
|
|
{
|
|
throw new LexerException(this, "Character '.' should be preceded by valid identifier or numerical.");
|
|
}
|
|
else
|
|
{
|
|
splitterToken = EMPTY ~ currentChar;
|
|
improvedAdvance();
|
|
}
|
|
}else if (currentChar == LS.AMPERSAND && (position + 1) != sourceCode.length && sourceCode[position + 1] == LS.AMPERSAND)
|
|
{
|
|
splitterToken = "&&";
|
|
improvedAdvance(2, false);
|
|
}
|
|
/* Check if we need to do combinators (e.g. for ||, &&) */
|
|
/* TODO: Second operand in condition out of bounds */
|
|
else if (currentChar == LS.SHEFFER_STROKE && isForward() && sourceCode[position + 1] == LS.SHEFFER_STROKE)
|
|
{
|
|
splitterToken = "||";
|
|
improvedAdvance(2, false);
|
|
} else if (currentChar == LS.EXCLAMATION && isForward() && sourceCode[position + 1] == LS.EQUALS)
|
|
{
|
|
splitterToken = "!=";
|
|
improvedAdvance(2, false);
|
|
}else if (currentChar == LS.SHEFFER_STROKE) {
|
|
splitterToken = "|";
|
|
improvedAdvance(1, false);
|
|
} else if (currentChar == LS.AMPERSAND) {
|
|
splitterToken = "&";
|
|
improvedAdvance(1, false);
|
|
} else if (currentChar == LS.CARET) {
|
|
splitterToken = "^";
|
|
improvedAdvance(1, false);
|
|
} else if (currentChar == LS.LESS_THAN) {
|
|
splitterToken = [LS.LESS_THAN];
|
|
improvedAdvance(1, false);
|
|
} else if (currentChar == LS.BIGGER_THAN) {
|
|
splitterToken = [LS.BIGGER_THAN];
|
|
improvedAdvance(1, false);
|
|
}
|
|
else if (isWhite(currentChar)) {
|
|
if (!improvedAdvance()) {
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
splitterToken = EMPTY ~ currentChar;
|
|
improvedAdvance();
|
|
}
|
|
|
|
/* Flush the current token (if one exists) */
|
|
if (currentToken.length)
|
|
{
|
|
flush();
|
|
}
|
|
|
|
/* Add the splitter token (only if it isn't empty) */
|
|
if (splitterToken.length)
|
|
{
|
|
currentTokens ~= new Token(splitterToken, line, column);
|
|
}
|
|
}
|
|
//else if (currentChar == LS.UNDERSCORE || ((!isSplitter(currentChar) && !isDigit(currentChar)) && currentChar != LS.DOUBLE_QUOTE && currentChar != LS.SINGLE_QUOTE && currentChar != LS.BACKSLASH)) {
|
|
else if (currentChar == LS.UNDERSCORE || isAlpha(currentChar)) {
|
|
DEBUG("path ident String");
|
|
if (!doIdentOrPath()) {
|
|
break;
|
|
} else {
|
|
continue;
|
|
}
|
|
}
|
|
else if (currentChar == LS.DOUBLE_QUOTE)
|
|
{
|
|
if (!doString()) {
|
|
break;
|
|
}
|
|
}
|
|
else if (currentChar == LS.SINGLE_QUOTE)
|
|
{
|
|
if (!doChar()) {
|
|
break;
|
|
}
|
|
}
|
|
else if (isDigit(currentChar)){
|
|
if (!doNumber()) {
|
|
break;
|
|
}
|
|
currentToken = currentToken.replace("_", "");
|
|
}
|
|
else if (currentChar == LS.BACKSLASH)
|
|
{
|
|
throw new LexerException(this, "Escape sequences can only be used within strings");
|
|
} else {
|
|
throw new LexerException(this, "Unsupported Character in this position");
|
|
//gprintln("Fuck " ~ " me col" ~ to!(string)(column));
|
|
}
|
|
}
|
|
|
|
/* If there was a token made at the end then flush it */
|
|
if (currentToken.length)
|
|
{
|
|
currentTokens ~= new Token(currentToken, line, column);
|
|
}
|
|
|
|
tokens = currentTokens;
|
|
}
|
|
|
|
/**
|
|
* Processes an ident with or without a dot-path
|
|
*
|
|
* Returns: `true` if characters left in buffer, else `false`
|
|
*/
|
|
private bool doIdentOrPath () {
|
|
if (!buildAdvance()) {
|
|
flush();
|
|
return false;
|
|
}
|
|
|
|
while (true) {
|
|
if (currentChar == LS.DOT) {
|
|
if (isForward() && (isSplitter(sourceCode[position + 1]) || isDigit(sourceCode[position + 1]))) {
|
|
throw new LexerException(this, "Invalid character in identifier build up.");
|
|
} else {
|
|
if (!buildAdvance()) {
|
|
throw new LexerException(this, "Invalid character in identifier build up.");
|
|
//return false;
|
|
}
|
|
}
|
|
} else if (isSplitter(currentChar)) {
|
|
flush();
|
|
return true;
|
|
} else if (!(isAlpha(currentChar) || isDigit(currentChar) || currentChar == LS.UNDERSCORE)) {
|
|
throw new LexerException(this, "Invalid character in identifier build up.");
|
|
} else {
|
|
if (!buildAdvance()) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Tokenizes a character
|
|
*
|
|
* Returns: `true` if characters left in buffer, else `false`
|
|
*/
|
|
private bool doChar()
|
|
{
|
|
if(!buildAdvance())
|
|
{
|
|
throw new LexerException(this, "Expected character, but got EOF");
|
|
}
|
|
/* Character literal must be next */
|
|
bool valid;
|
|
if(currentChar == LS.BACKSLASH)
|
|
{
|
|
valid = doEscapeCode();
|
|
}
|
|
else
|
|
{
|
|
valid = buildAdvance();
|
|
}
|
|
if(!valid)
|
|
{
|
|
throw new LexerException(this, "Expected ''', but got EOF");
|
|
}
|
|
|
|
if(currentChar != LS.SINGLE_QUOTE)
|
|
{
|
|
throw new LexerException(this, "Expected ''', but got EOF");
|
|
}
|
|
if(!buildAdvance())
|
|
{
|
|
flush();
|
|
return false;
|
|
}
|
|
flush();
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Tokenizes a string
|
|
*
|
|
* Returns: `true` if characters left in buffer, else `false`
|
|
*/
|
|
private bool doString()
|
|
{
|
|
if(!buildAdvance())
|
|
{
|
|
throw new LexerException(this, "Expected closing \", but got EOF");
|
|
}
|
|
|
|
while (true) {
|
|
if (currentChar == LS.DOUBLE_QUOTE) {
|
|
if (!buildAdvance) {
|
|
flush();
|
|
return false;
|
|
}
|
|
return true;
|
|
} else if (currentChar == LS.BACKSLASH) {
|
|
if (!doEscapeCode()) {
|
|
throw new LexerException(this, "Expected closing \", but got EOF");
|
|
}
|
|
} else if (currentChar == LS.NEWLINE) {
|
|
throw new LexerException(this, "Expected closing \", but got NEWLINE");
|
|
} else {
|
|
if (!buildAdvance()) {
|
|
throw new LexerException(this, "Expected closing \", but got EOF");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Lex a comment, start by consuming the '/' and setting a flag for
|
|
* multi-line based on the next character and consume.
|
|
*
|
|
* Enters a loop that looks for the end of the comment and if not
|
|
* builds up the comment.
|
|
*
|
|
* Returns: `true` if characters left in buffer, else `false`
|
|
*/
|
|
private bool doComment() {
|
|
buildAdvance();
|
|
// if (!buildAdvance()) {
|
|
// flush();
|
|
// return false;
|
|
// }
|
|
bool multiLine = currentChar == LS.STAR;
|
|
if (!buildAdvance()) {
|
|
if (multiLine) {
|
|
throw new LexerException(this, "Expected closing Comment, but got EOF");
|
|
} else {
|
|
flush();
|
|
return false;
|
|
}
|
|
}
|
|
while (true) {
|
|
if (!multiLine && currentChar == LS.NEWLINE) {
|
|
flush();
|
|
return advanceLine();
|
|
}
|
|
if (multiLine && currentChar == LS.STAR && isForward() && sourceCode[position+1] == LS.FORWARD_SLASH) {
|
|
buildAdvance();
|
|
if (!buildAdvance()) {
|
|
flush();
|
|
return false;
|
|
} else {
|
|
return true;
|
|
}
|
|
} else {
|
|
if (!buildAdvance()) {
|
|
if (multiLine)
|
|
{
|
|
throw new LexerException(this, "Expected closing Comment, but got EOF");
|
|
}
|
|
else
|
|
{
|
|
flush();
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Lex an escape code. If valid one id found, add it to the token, else throw Exception
|
|
*
|
|
* Returns: `true` if characters left in buffer, else `false`
|
|
*/
|
|
private bool doEscapeCode() {
|
|
if (!buildAdvance()) {
|
|
return false;
|
|
}
|
|
// currentToken ~= LS.BACKSLASH;
|
|
if (isValidEscape_String(currentChar)) {
|
|
return buildAdvance();
|
|
} else {
|
|
throw new LexerException(this, "Invalid escape code");
|
|
}
|
|
// flush();
|
|
}
|
|
|
|
|
|
/**
|
|
* Lex a number, this method lexes a plain number, float or numerically encoded.
|
|
* The Float and numerically encoded numbers are deferred to other methods.
|
|
*
|
|
* Returns: `true` if characters left in buffer, else `false`
|
|
*/
|
|
private bool doNumber() {
|
|
while (true) {
|
|
if (isDigit(currentChar) || currentChar == LS.UNDERSCORE) {
|
|
if(!buildAdvance()) {
|
|
currentToken = currentToken.replace("_", "");
|
|
flush();
|
|
return false;
|
|
}
|
|
} else if (currentChar == LS.DOT) {
|
|
return doFloat();
|
|
} else if (isNumericalEncoder(currentChar)) {
|
|
return doEncoder();
|
|
} else {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Lex a numerical encoder, looks for Signage followed by Size, or if there is
|
|
* no signage, just the size.
|
|
*
|
|
* Returns: `true` if characters left in buffer, else `false`
|
|
*/
|
|
private bool doEncoder() {
|
|
if (isNumericalEncoder_Signage(currentChar)) {
|
|
if (!buildAdvance() || !isNumericalEncoder_Size(currentChar)) {
|
|
throw new LexerException(this, "Expected size indicator B,I,L,W but got EOF");
|
|
}
|
|
}
|
|
if (isNumericalEncoder_Size(currentChar)) {
|
|
if (!buildAdvance()) {
|
|
flush();
|
|
return false;
|
|
} else {
|
|
if (!isSplitter(currentChar)) {
|
|
throw new LexerException(this, "Expected splitter but got \"" ~ currentChar ~ "\".");
|
|
}
|
|
}
|
|
}
|
|
flush();
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Lex a floating point, the initial part of the number is lexed by the `doNumber()`
|
|
* method. Here we consume the '.' and consume digits until a splitter is reached.
|
|
*
|
|
* Returns: `true` if characters left in buffer, else `false`
|
|
*/
|
|
private bool doFloat() {
|
|
if (!buildAdvance()) {
|
|
throw new LexerException(this, "Floating point expected digit, got EOF.");
|
|
//return false;
|
|
}
|
|
size_t count = 0;
|
|
bool valid = false;
|
|
while (true) {
|
|
|
|
if (isDigit(currentChar) || (count > 0 && currentChar == LS.UNDERSCORE))
|
|
{
|
|
/* tack on and move to next iteration */
|
|
valid = true;
|
|
if (!buildAdvance()) {
|
|
currentToken = currentToken.replace("_", "");
|
|
flush();
|
|
return false;
|
|
}
|
|
count++;
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
/* TODO: Throw erropr here */
|
|
if (isSplitter(currentChar) && valid)
|
|
{
|
|
currentToken = currentToken.replace("_", "");
|
|
flush();
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
throw new LexerException(this, "Floating point '" ~ currentToken ~ "' cannot be followed by a '" ~ currentChar ~ "'");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Flush the current token to the token buffer.
|
|
*/
|
|
private void flush()
|
|
{
|
|
currentTokens ~= new Token(currentToken, line, column);
|
|
currentToken = EMPTY;
|
|
}
|
|
|
|
/**
|
|
* Consume the current char into the current token
|
|
*
|
|
* Returns: `true` if characters left in buffer, else `false`
|
|
*/
|
|
private bool buildAdvance()
|
|
{
|
|
currentToken ~= currentChar;
|
|
return improvedAdvance();
|
|
}
|
|
|
|
/**
|
|
* Advances the source code pointer
|
|
*
|
|
* Params:
|
|
* inc = advancement counter, default 1
|
|
* shouldFlush = whether or not to flush, default is `false`
|
|
* Returns: `true` if characters left in buffer, else `false`
|
|
*/
|
|
private bool improvedAdvance(int inc = 1, bool shouldFlush = false)
|
|
{
|
|
if (currentChar == LS.NEWLINE)
|
|
{
|
|
shouldFlush && flush();
|
|
line++;
|
|
column = 1;
|
|
position++;
|
|
}
|
|
else
|
|
{
|
|
column += inc;
|
|
position += inc;
|
|
}
|
|
|
|
if (position >= sourceCode.length)
|
|
{
|
|
return false;
|
|
}
|
|
currentChar = sourceCode[position];
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Advance the position, line and current token, reset the column to 1.
|
|
*
|
|
* Returns: `true` if characters left in buffer, else `false`
|
|
*/
|
|
private bool advanceLine()
|
|
{
|
|
column = 1;
|
|
line++;
|
|
position++;
|
|
if (position >= sourceCode.length)
|
|
{
|
|
return false;
|
|
}
|
|
currentChar = sourceCode[position];
|
|
return true;
|
|
}
|
|
}
|
|
|
|
version(unittest)
|
|
{
|
|
/**
|
|
* Does a print out of some text just to show you
|
|
* where you are from within the caller
|
|
*
|
|
* Params:
|
|
* __LINE__ = line number (auto-filled)
|
|
* __MODULE__ = module name (auto-filled)
|
|
* __FUNCTION__ = function name (auto-filled)
|
|
*/
|
|
private void shout(int i = __LINE__, string mod = __MODULE__, string func = __FUNCTION__)
|
|
{
|
|
DEBUG("Unittest at "~to!(string)(i)~" in "~func~" (within module "~mod~")");
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Test input: `hello "world";`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "hello \"world\";";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("hello", 0, 0), new Token("\"world\"", 0, 0),
|
|
new Token(";", 0, 0)
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Test input: `hello \n "world";`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "hello \n \"world\";";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("hello", 0, 0), new Token("\"world\"", 0, 0),
|
|
new Token(";", 0, 0)
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Test input: `hello "wo\nrld";`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "hello \"wo\nrld\";";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
try {
|
|
currentLexer.performLex();
|
|
} catch (LexerException) {
|
|
assert(true);
|
|
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Test input: `hello "world"|| `
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "hello \"world\"|| ";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("hello", 0, 0), new Token("\"world\"", 0, 0),
|
|
new Token("||", 0, 0)
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Test input: `hello "world"&& `
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "hello \"world\"&& ";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("hello", 0, 0), new Token("\"world\"", 0, 0),
|
|
new Token("&&", 0, 0)
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Test input: `hello "wooorld"||`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "hello \"wooorld\"||";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("hello", 0, 0), new Token("\"wooorld\"", 0, 0),
|
|
new Token("||", 0, 0)
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Test input: `hello "world"|`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "hello \"world\";|";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("hello", 0, 0), new Token("\"world\"", 0, 0),
|
|
new Token(";", 0, 0), new Token("|", 0, 0)
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Test input: ` hello`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = " hello";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("hello", 0, 0)]);
|
|
}
|
|
|
|
/**
|
|
* Test input: `//trist`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "//trist";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("//trist", 0, 0)]);
|
|
}
|
|
|
|
/**
|
|
* Test input: `/*trist\*\/`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "/*trist*/";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("/*trist*/", 0, 0)]);
|
|
}
|
|
|
|
/**
|
|
* Test input: `/*t\nr\ni\ns\nt\*\/`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "/*t\nr\ni\ns\nt*/";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("/*t\nr\ni\ns\nt*/", 0, 0)]);
|
|
}
|
|
|
|
/**
|
|
* Test input: `/*t\nr\ni\ns\nt\*\/ `
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "/*t\nr\ni\ns\nt*/ ";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("/*t\nr\ni\ns\nt*/", 0, 0)]);
|
|
}
|
|
|
|
/**
|
|
* Test input: `//trist \n hello`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "//trist \n hello";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("//trist ", 0, 0),
|
|
new Token("hello", 0, 0),
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Test input: `hello;`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = " hello;";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("hello", 0, 0), new Token(";", 0, 0)
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Test input: `5+5`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "5+5";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("5", 0, 0),
|
|
new Token("+", 0, 0),
|
|
new Token("5", 0, 0),
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Test input: `hello "world\""`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "hello \"world\\\"\"";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("hello", 0, 0), new Token("\"world\\\"\"", 0, 0)
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Test input: `'c'`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "'c'";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("'c'", 0, 0)]);
|
|
}
|
|
|
|
/**
|
|
* Test input: `2121\n2121`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "2121\n2121";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("2121", 0, 0), new Token("2121", 0, 0)
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Test `=`` and `==` handling
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = " =\n";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("=", 0, 0)]);
|
|
|
|
import std.algorithm.comparison;
|
|
|
|
sourceCode = " = ==\n";
|
|
currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("=", 0, 0), new Token("==", 0, 0)
|
|
]);
|
|
|
|
import std.algorithm.comparison;
|
|
|
|
sourceCode = " ==\n";
|
|
currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("==", 0, 0)]);
|
|
|
|
import std.algorithm.comparison;
|
|
|
|
sourceCode = " = =\n";
|
|
currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("=", 0, 0), new Token("=", 0, 0)
|
|
]);
|
|
|
|
import std.algorithm.comparison;
|
|
|
|
sourceCode = " ==, = ==\n";
|
|
currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("==", 0, 0), new Token(",", 0, 0), new Token("=", 0, 0),
|
|
new Token("==", 0, 0)
|
|
]);
|
|
|
|
// Test flushing of previous token
|
|
import std.algorithm.comparison;
|
|
|
|
sourceCode = "i==i=\n";
|
|
currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("i", 0, 0), new Token("==", 0, 0), new Token("i", 0, 0),
|
|
new Token("=", 0, 0)
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Test: Literal value encoding
|
|
*
|
|
* Tests validity
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode;
|
|
BasicLexer currentLexer;
|
|
|
|
/* 21L (valid) */
|
|
sourceCode = "21L";
|
|
currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("21L", 0, 0)]);
|
|
|
|
/* 21UL (valid) */
|
|
sourceCode = "21UL";
|
|
currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("21UL", 0, 0)]);
|
|
|
|
/* 21U (invalid) */
|
|
sourceCode = "21U ";
|
|
currentLexer = new BasicLexer(sourceCode);
|
|
// gprintln(currentLexer.performLex());
|
|
try {
|
|
currentLexer.performLex();
|
|
assert(false);
|
|
} catch (LexerException) {
|
|
assert(true);
|
|
}
|
|
|
|
/* 21ULa (invalid) */
|
|
sourceCode = "21ULa";
|
|
currentLexer = new BasicLexer(sourceCode);
|
|
// gprintln(currentLexer.performLex());
|
|
try {
|
|
currentLexer.performLex();
|
|
assert(false);
|
|
} catch (LexerException) {
|
|
assert(true);
|
|
}
|
|
|
|
/* 21UL (valid) */
|
|
sourceCode = "21SI";
|
|
currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected "~to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("21SI", 0, 0)]);
|
|
|
|
/* 21UL; (valid) */
|
|
sourceCode = "21SI;";
|
|
currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected "~to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("21SI", 0, 0),
|
|
new Token(";", 0, 0)
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Test input: `1.5`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "1.5";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("1.5", 0, 0)]);
|
|
}
|
|
|
|
/**
|
|
* Test correct handling of dot-operator for
|
|
* non-floating point cases
|
|
*
|
|
* Input: `new A().l.p.p;`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "new A().l.p.p;";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("new", 0, 0),
|
|
new Token("A", 0, 0),
|
|
new Token("(", 0, 0),
|
|
new Token(")", 0, 0),
|
|
new Token(".", 0, 0),
|
|
new Token("l.p.p", 0, 0),
|
|
new Token(";", 0, 0)
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Tab testing
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
/**
|
|
* Test tab dropping in front of a float.
|
|
* Test calssification: Valid
|
|
* Test input: `\t1.5`
|
|
*/
|
|
DEBUG("Tab Unit Test");
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "\t1.5";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [new Token("1.5", 0, 0)]);
|
|
|
|
/**
|
|
* Test tab dropping before '.' of float.
|
|
* Catch fail for verification.
|
|
* Test calssification: Invalid
|
|
* Test input: `1\t.5`
|
|
*/
|
|
import std.algorithm.comparison;
|
|
|
|
bool didFail = false;
|
|
sourceCode = "1\t.5";
|
|
currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
|
|
/**
|
|
* Testing Float EOF after '.'.
|
|
* Test calssification: Invalid
|
|
* Test input: `1.`
|
|
*/
|
|
sourceCode = "1.";
|
|
currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
assert(false);
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
}
|
|
|
|
/**
|
|
* Testing illegal backslash.
|
|
* Test calssification: Invalid
|
|
* Test input: `1.`
|
|
*/
|
|
sourceCode = "hello \\ ";
|
|
currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
assert(false);
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
}
|
|
|
|
/**
|
|
* Test tab dropping after '.' of float.
|
|
* Catch fail for verification.
|
|
* Test calssification: Invalid
|
|
* Test input: `1.\t5`
|
|
*/
|
|
import std.algorithm.comparison;
|
|
|
|
didFail = false;
|
|
sourceCode = "1.\t5";
|
|
currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
|
|
/**
|
|
* Test tab dropping for an empty token array.
|
|
* Test calssification: Valid
|
|
* Test input: `\t\t\t\t\t`
|
|
*/
|
|
DEBUG("Tab Unit Test");
|
|
import std.algorithm.comparison;
|
|
|
|
sourceCode = "\t\t\t\t\t";
|
|
currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens().length == 0);
|
|
}
|
|
|
|
/**
|
|
* Test correct handling of dot-operator for
|
|
* non-floating point cases where whitespace has been inserted before and after.
|
|
* Test Classification: Invalid
|
|
*
|
|
* Input: `new A() .l.p.p;`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
bool didFail = false;
|
|
string sourceCode = "new A(). l.p.p;";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
}
|
|
|
|
/**
|
|
* Test correct handling of dot-operator for
|
|
* non-floating point cases where whitespace has been inserted before and after.
|
|
* Test Classification: Invalid
|
|
*
|
|
* Input: `new A() . l.p.p;`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
bool didFail = false;
|
|
string sourceCode = "new A() . l.p.p;";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
}
|
|
|
|
unittest
|
|
{
|
|
shout();
|
|
|
|
/**
|
|
* Test dot for fail on dot operator with no buildup and invalid lead
|
|
* Catch fail for verification.
|
|
* Test calssification: Invalid
|
|
* Test input: `1.5.5`
|
|
*/
|
|
import std.algorithm.comparison;
|
|
|
|
bool didFail = false;
|
|
string sourceCode = "1.5.5";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
|
|
/**
|
|
* Test for fail on space following dot operator.
|
|
* Test Classification: Invalid
|
|
* Input: `1. a`
|
|
*/
|
|
didFail = false;
|
|
sourceCode = "1. a";
|
|
currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
|
|
/**
|
|
* Test for correct lex space following paren
|
|
* Test Classification: Valid
|
|
* Input: `).x`
|
|
*/
|
|
sourceCode = ").x";
|
|
currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token(")", 0, 0),
|
|
new Token(".", 0, 0),
|
|
new Token("x", 0, 0),
|
|
]);
|
|
/**
|
|
* Test for fail on space preceding dot operator.
|
|
* Test Classification: Invalid
|
|
* Input: `1 .a`
|
|
*/
|
|
didFail = false;
|
|
sourceCode = "1 .a";
|
|
currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
}
|
|
|
|
/**
|
|
* Test newlines
|
|
* Test Classification: Valid
|
|
* Input: `\n\n\n\n`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "\n\n\n\n";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens().length == 0);
|
|
}
|
|
|
|
/**
|
|
* Test for character escape codes
|
|
*
|
|
* Input: `'\\'`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "'\\\\'";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("'\\\\'", 0, 0),
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Test for character escape codes
|
|
*
|
|
* Input: `'\a'`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "'\\a'";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("'\\a'", 0, 0),
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Test for invalid escape sequence
|
|
* Input: `'\f'`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
|
|
bool didFail = false;
|
|
string sourceCode = "\\f";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
}
|
|
|
|
/**
|
|
* Test for invalid char in ident
|
|
* Input: `hello$k`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
|
|
bool didFail = false;
|
|
string sourceCode = "hello$k";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
}
|
|
|
|
/**
|
|
* Test for invalid char in ident
|
|
* Input: `$`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
|
|
bool didFail = false;
|
|
string sourceCode = "$";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
}
|
|
|
|
/**
|
|
* Testing Underscores in numbers
|
|
*
|
|
* Input: `1_ 1_2 1_2.3 1_2.3_ 1__2 1__2.3 1__.23__`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "1_ 1_2 1_2.3 1_2.3_ 1__2 1__2.3 1__.23__";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("1", 0, 0),
|
|
new Token("12", 0, 0),
|
|
new Token("12.3", 0, 0),
|
|
new Token("12.3", 0, 0),
|
|
new Token("12", 0, 0),
|
|
new Token("12.3", 0, 0),
|
|
new Token("1.23", 0, 0),
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Testing Comparison in numbers
|
|
*
|
|
* Input: `<= >= ==`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "<= >= =< => == != < > ^";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("<=", 0, 0),
|
|
new Token(">=", 0, 0),
|
|
new Token("=<", 0, 0),
|
|
new Token("=>", 0, 0),
|
|
new Token("==", 0, 0),
|
|
new Token("!=", 0, 0),
|
|
new Token("<", 0, 0),
|
|
new Token(">", 0, 0),
|
|
new Token("^", 0, 0),
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Testing Chars
|
|
*
|
|
* Input: `'a'`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "'a'";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("'a'", 0, 0),
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Test for invalid ident
|
|
* Input: `hello. `
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
|
|
bool didFail = false;
|
|
string sourceCode = "hello. ";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
}
|
|
|
|
/**
|
|
* Test for invalid ident
|
|
* Input: `hello.`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
|
|
bool didFail = false;
|
|
string sourceCode = "hello.";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
}
|
|
|
|
/**
|
|
* Testing Chars
|
|
* Input: `'`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
|
|
bool didFail = false;
|
|
string sourceCode = "'";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
}
|
|
|
|
/**
|
|
* Testing Chars
|
|
* Input: `'a`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
|
|
bool didFail = false;
|
|
string sourceCode = "'a";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
}
|
|
|
|
/**
|
|
* Testing Chars
|
|
* Input: `'aa`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
|
|
bool didFail = false;
|
|
string sourceCode = "'aa";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
}
|
|
|
|
/**
|
|
* Testing String EOF
|
|
* Input: `"a`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
|
|
bool didFail = false;
|
|
string sourceCode = "\"a";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
}
|
|
|
|
/**
|
|
* Testing String EOF
|
|
* Input: `"a`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
|
|
bool didFail = false;
|
|
string sourceCode = "\"";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
}
|
|
|
|
/**
|
|
* Testing String EOF
|
|
* Input: `"\`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
|
|
bool didFail = false;
|
|
string sourceCode = "\"\\";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
}
|
|
|
|
/**
|
|
* Testing Comment EOF
|
|
* Input: `/*`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
|
|
bool didFail = false;
|
|
string sourceCode = "/*";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
}
|
|
|
|
/**
|
|
* Testing Comment EOF
|
|
* Input: `/* `
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
|
|
bool didFail = false;
|
|
string sourceCode = "/* ";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
}
|
|
|
|
/**
|
|
* Testing Line comment EOF
|
|
*
|
|
* Input: `//`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "//";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("//", 0, 0)
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Testing invalid Escape Code
|
|
* Input: `\p`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
|
|
bool didFail = false;
|
|
string sourceCode = "\"\\p";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
}
|
|
|
|
/**
|
|
* Testing invalid Escape Code
|
|
* Input: `\p`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
|
|
bool didFail = false;
|
|
string sourceCode = "\\p";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
try
|
|
{
|
|
currentLexer.performLex();
|
|
}
|
|
catch (LexerException e)
|
|
{
|
|
didFail = true;
|
|
}
|
|
assert(didFail);
|
|
}
|
|
|
|
/**
|
|
* Testing comment
|
|
*
|
|
* Input: `'a' `
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "'a' ";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("'a'", 0, 0)
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Testing comment
|
|
*
|
|
* Input: `// \n`
|
|
*/
|
|
unittest
|
|
{
|
|
shout();
|
|
import std.algorithm.comparison;
|
|
|
|
string sourceCode = "// \n";
|
|
BasicLexer currentLexer = new BasicLexer(sourceCode);
|
|
currentLexer.performLex();
|
|
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
|
|
assert(currentLexer.getTokens() == [
|
|
new Token("// ", 0, 0)
|
|
]);
|
|
} |