tlang/source/tlang/compiler/lexer/kinds/basic.d

1964 lines
48 KiB
D

/**
* A single-pass tokenizer
*/
module tlang.compiler.lexer.kinds.basic;
import std.container.slist;
import std.string : replace;
import tlang.misc.logging;
import std.conv : to;
import std.ascii : isDigit, isAlpha, isWhite;
import tlang.compiler.lexer.core;
enum EMPTY = "";
/**
* Represents a basic lexer which performs the whole tokenization
* process in one short via a call to `performLex()`, only after
* this may the `LexerInterface` methods, such as `getCurrentToken()`,
* `nextToken()` and so forth, actually be used.
*
* This is effectively a single pass lexer.
*/
public final class BasicLexer : LexerInterface
{
/**
* Post-perform lex() data
*
* This exports the LexerInterface API.
*
* To-do, ensure these can only be used AFTER `performLex()`
* has been called.
*/
private ulong tokenPtr = 0;
/**
* Returns the token at the current cursor
* position
*
* Returns: the `Token`
*/
public override Token getCurrentToken()
{
/* TODO: Throw an exception here when we try get more than we can */
return tokens[tokenPtr];
}
/**
* Moves the cursor one token forward
*/
public override void nextToken()
{
tokenPtr++;
}
/**
* Moves the cursor one token backwards
*/
public override void previousToken()
{
tokenPtr--;
}
/**
* Sets the position of the cursor
*
* Params:
* newPosition = the new position
*/
public override void setCursor(ulong newPosition)
{
tokenPtr = newPosition;
}
/**
* Retrieves the cursor's current position
*
* Returns: the position
*/
public override ulong getCursor()
{
return tokenPtr;
}
/**
* Checks whether more tokens are available
* of not
*
* Returns: true if more tokens are available, false otherwise
*/
public override bool hasTokens()
{
return tokenPtr < tokens.length;
}
/**
* Get the line position of the lexer in the source text
*
* Returns: the position
*/
public override ulong getLine()
{
return this.line;
}
/**
* Get the column position of the lexer in the source text
*
* Returns: the position
*/
public override ulong getColumn()
{
return this.column;
}
/**
* Exhaustively provide a list of all tokens
*
* Returns: a `Token[]` containing all tokens
*/
public override Token[] getTokens()
{
return tokens;
}
/**
* Lexer state data
*/
private string sourceCode; /* The source to be lexed */
private ulong line = 1; /* Current line */
private ulong column = 1;
private Token[] currentTokens; /* Current token set */
private string currentToken; /* Current token */
private ulong position; /* Current character position */
private char currentChar; /* Current character */
/* The tokens */
private Token[] tokens;
/**
* Constructs a new lexer with the given
* source code of which is should tokenize
*
* Params:
* sourceCode = the source text
*/
this(string sourceCode)
{
this.sourceCode = sourceCode;
}
/**
* Checks whether or not we could shift our
* source text pointer forward if it would
* be within the boundries of the source text
* or not
*
* Returns: `true` if within the boundries,
* `false` otherwise
*/
private bool isForward()
{
return position + 1 < sourceCode.length;
}
/**
* Checks whether or not we could shift our
* source text pointer backwards and it it
* would be within the boundries of the source
* text or not
*
* Returns: `true` if within the boundries,
* `false` otherwise
*/
private bool isBackward()
{
return position - 1 < sourceCode.length;
}
/**
* Returns true if we have a token being built
* false otherwise
*
* Returns: `true` if we have a token built-up,
* `false` otherwise
*/
private bool hasToken()
{
return currentToken.length != 0;
}
/**
* Performs the lexing process
*
* Throws:
* LexerException on error tokenizing
*/
public void performLex()
{
currentChar = sourceCode[position];
while (position < sourceCode.length)
{
// gprintln("SrcCodeLen: "~to!(string)(sourceCode.length));
// gprintln("Position: "~to!(string)(position));
// // currentChar = sourceCode[position];
// gprintln("Current Char\"" ~ currentChar ~ "\"");
// gprintln("Current Token\"" ~ currentToken ~ "\"");
// gprintln("Match alpha check" ~ to!(bool)(currentChar == LS.UNDERSCORE || isAlpha(currentChar)));
if (isSplitter(currentChar))
{
if (currentToken.length != 0)
{
flush();
}
if (isWhite(currentChar) ) {
if (improvedAdvance()) {
continue;
} else {
break;
}
} /* The splitter token to finally insert */
string splitterToken;
// gprintln("Build up: " ~ currentToken);
// gprintln("Current char, splitter: " ~ currentChar);
if (currentChar == LS.FORWARD_SLASH && isForward() && (sourceCode[position+1] == LS.FORWARD_SLASH || sourceCode[position+1] == LS.STAR)) {
if (!doComment()) {
break;
}
}
/* Check for case of `==` or `=<` or `=>` (where we are on the first `=` sign) */
if (currentChar == LS.EQUALS && isForward() && (sourceCode[position + 1] == LS.EQUALS || sourceCode[position + 1] == LS.LESS_THAN || sourceCode[position + 1] == LS.BIGGER_THAN))
{
buildAdvance();
buildAdvance();
flush();
continue;
}
/* Check for case of `<=` or `>=` */
if ((currentChar == LS.LESS_THAN || currentChar == LS.BIGGER_THAN) && isForward() && (sourceCode[position + 1] == LS.EQUALS || sourceCode[position + 1] == LS.LESS_THAN || sourceCode[position + 1] == LS.BIGGER_THAN))
{
buildAdvance();
buildAdvance();
flush();
continue;
}
/**
* Here we check if we have a `.` and that the characters
* preceding us were all good for an identifier
*/
import tlang.misc.utils;
if (currentChar == LS.DOT)
{
if (isBackward() && isWhite(sourceCode[position - 1]))
{
throw new LexerException(this, "Character '.' is not allowed to follow a whitespace.");
}
if (isForward() && isWhite(sourceCode[position + 1]))
{
throw new LexerException(this, "Character '.' is not allowed to precede a whitespace.");
}
else if (!hasToken() && (isBackward() && !isValidDotPrecede(
sourceCode[position - 1])))
{
throw new LexerException(this, "Character '.' should be preceded by valid identifier or numerical.");
}
else
{
splitterToken = EMPTY ~ currentChar;
improvedAdvance();
}
}else if (currentChar == LS.AMPERSAND && (position + 1) != sourceCode.length && sourceCode[position + 1] == LS.AMPERSAND)
{
splitterToken = "&&";
improvedAdvance(2, false);
}
/* Check if we need to do combinators (e.g. for ||, &&) */
/* TODO: Second operand in condition out of bounds */
else if (currentChar == LS.SHEFFER_STROKE && isForward() && sourceCode[position + 1] == LS.SHEFFER_STROKE)
{
splitterToken = "||";
improvedAdvance(2, false);
} else if (currentChar == LS.EXCLAMATION && isForward() && sourceCode[position + 1] == LS.EQUALS)
{
splitterToken = "!=";
improvedAdvance(2, false);
}else if (currentChar == LS.SHEFFER_STROKE) {
splitterToken = "|";
improvedAdvance(1, false);
} else if (currentChar == LS.AMPERSAND) {
splitterToken = "&";
improvedAdvance(1, false);
} else if (currentChar == LS.CARET) {
splitterToken = "^";
improvedAdvance(1, false);
} else if (currentChar == LS.LESS_THAN) {
splitterToken = [LS.LESS_THAN];
improvedAdvance(1, false);
} else if (currentChar == LS.BIGGER_THAN) {
splitterToken = [LS.BIGGER_THAN];
improvedAdvance(1, false);
}
else if (isWhite(currentChar)) {
if (!improvedAdvance()) {
break;
}
}
else
{
splitterToken = EMPTY ~ currentChar;
improvedAdvance();
}
/* Flush the current token (if one exists) */
if (currentToken.length)
{
flush();
}
/* Add the splitter token (only if it isn't empty) */
if (splitterToken.length)
{
currentTokens ~= new Token(splitterToken, line, column);
}
}
//else if (currentChar == LS.UNDERSCORE || ((!isSplitter(currentChar) && !isDigit(currentChar)) && currentChar != LS.DOUBLE_QUOTE && currentChar != LS.SINGLE_QUOTE && currentChar != LS.BACKSLASH)) {
else if (currentChar == LS.UNDERSCORE || isAlpha(currentChar)) {
DEBUG("path ident String");
if (!doIdentOrPath()) {
break;
} else {
continue;
}
}
else if (currentChar == LS.DOUBLE_QUOTE)
{
if (!doString()) {
break;
}
}
else if (currentChar == LS.SINGLE_QUOTE)
{
if (!doChar()) {
break;
}
}
else if (isDigit(currentChar)){
if (!doNumber()) {
break;
}
currentToken = currentToken.replace("_", "");
}
else if (currentChar == LS.BACKSLASH)
{
throw new LexerException(this, "Escape sequences can only be used within strings");
} else {
throw new LexerException(this, "Unsupported Character in this position");
//gprintln("Fuck " ~ " me col" ~ to!(string)(column));
}
}
/* If there was a token made at the end then flush it */
if (currentToken.length)
{
currentTokens ~= new Token(currentToken, line, column);
}
tokens = currentTokens;
}
/**
* Processes an ident with or without a dot-path
*
* Returns: `true` if characters left in buffer, else `false`
*/
private bool doIdentOrPath () {
if (!buildAdvance()) {
flush();
return false;
}
while (true) {
if (currentChar == LS.DOT) {
if (isForward() && (isSplitter(sourceCode[position + 1]) || isDigit(sourceCode[position + 1]))) {
throw new LexerException(this, "Invalid character in identifier build up.");
} else {
if (!buildAdvance()) {
throw new LexerException(this, "Invalid character in identifier build up.");
//return false;
}
}
} else if (isSplitter(currentChar)) {
flush();
return true;
} else if (!(isAlpha(currentChar) || isDigit(currentChar) || currentChar == LS.UNDERSCORE)) {
throw new LexerException(this, "Invalid character in identifier build up.");
} else {
if (!buildAdvance()) {
return false;
}
}
}
}
/**
* Tokenizes a character
*
* Returns: `true` if characters left in buffer, else `false`
*/
private bool doChar()
{
if(!buildAdvance())
{
throw new LexerException(this, "Expected character, but got EOF");
}
/* Character literal must be next */
bool valid;
if(currentChar == LS.BACKSLASH)
{
valid = doEscapeCode();
}
else
{
valid = buildAdvance();
}
if(!valid)
{
throw new LexerException(this, "Expected ''', but got EOF");
}
if(currentChar != LS.SINGLE_QUOTE)
{
throw new LexerException(this, "Expected ''', but got EOF");
}
if(!buildAdvance())
{
flush();
return false;
}
flush();
return true;
}
/**
* Tokenizes a string
*
* Returns: `true` if characters left in buffer, else `false`
*/
private bool doString()
{
if(!buildAdvance())
{
throw new LexerException(this, "Expected closing \", but got EOF");
}
while (true) {
if (currentChar == LS.DOUBLE_QUOTE) {
if (!buildAdvance) {
flush();
return false;
}
return true;
} else if (currentChar == LS.BACKSLASH) {
if (!doEscapeCode()) {
throw new LexerException(this, "Expected closing \", but got EOF");
}
} else if (currentChar == LS.NEWLINE) {
throw new LexerException(this, "Expected closing \", but got NEWLINE");
} else {
if (!buildAdvance()) {
throw new LexerException(this, "Expected closing \", but got EOF");
}
}
}
}
/**
* Lex a comment, start by consuming the '/' and setting a flag for
* multi-line based on the next character and consume.
*
* Enters a loop that looks for the end of the comment and if not
* builds up the comment.
*
* Returns: `true` if characters left in buffer, else `false`
*/
private bool doComment() {
buildAdvance();
// if (!buildAdvance()) {
// flush();
// return false;
// }
bool multiLine = currentChar == LS.STAR;
if (!buildAdvance()) {
if (multiLine) {
throw new LexerException(this, "Expected closing Comment, but got EOF");
} else {
flush();
return false;
}
}
while (true) {
if (!multiLine && currentChar == LS.NEWLINE) {
flush();
return advanceLine();
}
if (multiLine && currentChar == LS.STAR && isForward() && sourceCode[position+1] == LS.FORWARD_SLASH) {
buildAdvance();
if (!buildAdvance()) {
flush();
return false;
} else {
return true;
}
} else {
if (!buildAdvance()) {
if (multiLine)
{
throw new LexerException(this, "Expected closing Comment, but got EOF");
}
else
{
flush();
return false;
}
}
}
}
}
/**
* Lex an escape code. If valid one id found, add it to the token, else throw Exception
*
* Returns: `true` if characters left in buffer, else `false`
*/
private bool doEscapeCode() {
if (!buildAdvance()) {
return false;
}
// currentToken ~= LS.BACKSLASH;
if (isValidEscape_String(currentChar)) {
return buildAdvance();
} else {
throw new LexerException(this, "Invalid escape code");
}
// flush();
}
/**
* Lex a number, this method lexes a plain number, float or numerically encoded.
* The Float and numerically encoded numbers are deferred to other methods.
*
* Returns: `true` if characters left in buffer, else `false`
*/
private bool doNumber() {
while (true) {
if (isDigit(currentChar) || currentChar == LS.UNDERSCORE) {
if(!buildAdvance()) {
currentToken = currentToken.replace("_", "");
flush();
return false;
}
} else if (currentChar == LS.DOT) {
return doFloat();
} else if (isNumericalEncoder(currentChar)) {
return doEncoder();
} else {
return true;
}
}
}
/**
* Lex a numerical encoder, looks for Signage followed by Size, or if there is
* no signage, just the size.
*
* Returns: `true` if characters left in buffer, else `false`
*/
private bool doEncoder() {
if (isNumericalEncoder_Signage(currentChar)) {
if (!buildAdvance() || !isNumericalEncoder_Size(currentChar)) {
throw new LexerException(this, "Expected size indicator B,I,L,W but got EOF");
}
}
if (isNumericalEncoder_Size(currentChar)) {
if (!buildAdvance()) {
flush();
return false;
} else {
if (!isSplitter(currentChar)) {
throw new LexerException(this, "Expected splitter but got \"" ~ currentChar ~ "\".");
}
}
}
flush();
return true;
}
/**
* Lex a floating point, the initial part of the number is lexed by the `doNumber()`
* method. Here we consume the '.' and consume digits until a splitter is reached.
*
* Returns: `true` if characters left in buffer, else `false`
*/
private bool doFloat() {
if (!buildAdvance()) {
throw new LexerException(this, "Floating point expected digit, got EOF.");
//return false;
}
size_t count = 0;
bool valid = false;
while (true) {
if (isDigit(currentChar) || (count > 0 && currentChar == LS.UNDERSCORE))
{
/* tack on and move to next iteration */
valid = true;
if (!buildAdvance()) {
currentToken = currentToken.replace("_", "");
flush();
return false;
}
count++;
continue;
}
else
{
/* TODO: Throw erropr here */
if (isSplitter(currentChar) && valid)
{
currentToken = currentToken.replace("_", "");
flush();
return true;
}
else
{
throw new LexerException(this, "Floating point '" ~ currentToken ~ "' cannot be followed by a '" ~ currentChar ~ "'");
}
}
}
}
/**
* Flush the current token to the token buffer.
*/
private void flush()
{
currentTokens ~= new Token(currentToken, line, column);
currentToken = EMPTY;
}
/**
* Consume the current char into the current token
*
* Returns: `true` if characters left in buffer, else `false`
*/
private bool buildAdvance()
{
currentToken ~= currentChar;
return improvedAdvance();
}
/**
* Advances the source code pointer
*
* Params:
* inc = advancement counter, default 1
* shouldFlush = whether or not to flush, default is `false`
* Returns: `true` if characters left in buffer, else `false`
*/
private bool improvedAdvance(int inc = 1, bool shouldFlush = false)
{
if (currentChar == LS.NEWLINE)
{
shouldFlush && flush();
line++;
column = 1;
position++;
}
else
{
column += inc;
position += inc;
}
if (position >= sourceCode.length)
{
return false;
}
currentChar = sourceCode[position];
return true;
}
/**
* Advance the position, line and current token, reset the column to 1.
*
* Returns: `true` if characters left in buffer, else `false`
*/
private bool advanceLine()
{
column = 1;
line++;
position++;
if (position >= sourceCode.length)
{
return false;
}
currentChar = sourceCode[position];
return true;
}
}
version(unittest)
{
/**
* Does a print out of some text just to show you
* where you are from within the caller
*
* Params:
* __LINE__ = line number (auto-filled)
* __MODULE__ = module name (auto-filled)
* __FUNCTION__ = function name (auto-filled)
*/
private void shout(int i = __LINE__, string mod = __MODULE__, string func = __FUNCTION__)
{
DEBUG("Unittest at "~to!(string)(i)~" in "~func~" (within module "~mod~")");
}
}
/**
* Test input: `hello "world";`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "hello \"world\";";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("hello", 0, 0), new Token("\"world\"", 0, 0),
new Token(";", 0, 0)
]);
}
/**
* Test input: `hello \n "world";`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "hello \n \"world\";";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("hello", 0, 0), new Token("\"world\"", 0, 0),
new Token(";", 0, 0)
]);
}
/**
* Test input: `hello "wo\nrld";`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "hello \"wo\nrld\";";
BasicLexer currentLexer = new BasicLexer(sourceCode);
try {
currentLexer.performLex();
} catch (LexerException) {
assert(true);
}
}
/**
* Test input: `hello "world"|| `
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "hello \"world\"|| ";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("hello", 0, 0), new Token("\"world\"", 0, 0),
new Token("||", 0, 0)
]);
}
/**
* Test input: `hello "world"&& `
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "hello \"world\"&& ";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("hello", 0, 0), new Token("\"world\"", 0, 0),
new Token("&&", 0, 0)
]);
}
/**
* Test input: `hello "wooorld"||`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "hello \"wooorld\"||";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("hello", 0, 0), new Token("\"wooorld\"", 0, 0),
new Token("||", 0, 0)
]);
}
/**
* Test input: `hello "world"|`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "hello \"world\";|";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("hello", 0, 0), new Token("\"world\"", 0, 0),
new Token(";", 0, 0), new Token("|", 0, 0)
]);
}
/**
* Test input: ` hello`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = " hello";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("hello", 0, 0)]);
}
/**
* Test input: `//trist`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "//trist";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("//trist", 0, 0)]);
}
/**
* Test input: `/*trist\*\/`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "/*trist*/";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("/*trist*/", 0, 0)]);
}
/**
* Test input: `/*t\nr\ni\ns\nt\*\/`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "/*t\nr\ni\ns\nt*/";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("/*t\nr\ni\ns\nt*/", 0, 0)]);
}
/**
* Test input: `/*t\nr\ni\ns\nt\*\/ `
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "/*t\nr\ni\ns\nt*/ ";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("/*t\nr\ni\ns\nt*/", 0, 0)]);
}
/**
* Test input: `//trist \n hello`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "//trist \n hello";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("//trist ", 0, 0),
new Token("hello", 0, 0),
]);
}
/**
* Test input: `hello;`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = " hello;";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("hello", 0, 0), new Token(";", 0, 0)
]);
}
/**
* Test input: `5+5`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "5+5";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("5", 0, 0),
new Token("+", 0, 0),
new Token("5", 0, 0),
]);
}
/**
* Test input: `hello "world\""`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "hello \"world\\\"\"";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("hello", 0, 0), new Token("\"world\\\"\"", 0, 0)
]);
}
/**
* Test input: `'c'`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "'c'";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("'c'", 0, 0)]);
}
/**
* Test input: `2121\n2121`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "2121\n2121";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("2121", 0, 0), new Token("2121", 0, 0)
]);
}
/**
* Test `=`` and `==` handling
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = " =\n";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("=", 0, 0)]);
import std.algorithm.comparison;
sourceCode = " = ==\n";
currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("=", 0, 0), new Token("==", 0, 0)
]);
import std.algorithm.comparison;
sourceCode = " ==\n";
currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("==", 0, 0)]);
import std.algorithm.comparison;
sourceCode = " = =\n";
currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("=", 0, 0), new Token("=", 0, 0)
]);
import std.algorithm.comparison;
sourceCode = " ==, = ==\n";
currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("==", 0, 0), new Token(",", 0, 0), new Token("=", 0, 0),
new Token("==", 0, 0)
]);
// Test flushing of previous token
import std.algorithm.comparison;
sourceCode = "i==i=\n";
currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("i", 0, 0), new Token("==", 0, 0), new Token("i", 0, 0),
new Token("=", 0, 0)
]);
}
/**
* Test: Literal value encoding
*
* Tests validity
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode;
BasicLexer currentLexer;
/* 21L (valid) */
sourceCode = "21L";
currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("21L", 0, 0)]);
/* 21UL (valid) */
sourceCode = "21UL";
currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("21UL", 0, 0)]);
/* 21U (invalid) */
sourceCode = "21U ";
currentLexer = new BasicLexer(sourceCode);
// gprintln(currentLexer.performLex());
try {
currentLexer.performLex();
assert(false);
} catch (LexerException) {
assert(true);
}
/* 21ULa (invalid) */
sourceCode = "21ULa";
currentLexer = new BasicLexer(sourceCode);
// gprintln(currentLexer.performLex());
try {
currentLexer.performLex();
assert(false);
} catch (LexerException) {
assert(true);
}
/* 21UL (valid) */
sourceCode = "21SI";
currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected "~to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("21SI", 0, 0)]);
/* 21UL; (valid) */
sourceCode = "21SI;";
currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected "~to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("21SI", 0, 0),
new Token(";", 0, 0)
]);
}
/**
* Test input: `1.5`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "1.5";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("1.5", 0, 0)]);
}
/**
* Test correct handling of dot-operator for
* non-floating point cases
*
* Input: `new A().l.p.p;`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "new A().l.p.p;";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("new", 0, 0),
new Token("A", 0, 0),
new Token("(", 0, 0),
new Token(")", 0, 0),
new Token(".", 0, 0),
new Token("l.p.p", 0, 0),
new Token(";", 0, 0)
]);
}
/**
* Tab testing
*/
unittest
{
shout();
/**
* Test tab dropping in front of a float.
* Test calssification: Valid
* Test input: `\t1.5`
*/
DEBUG("Tab Unit Test");
import std.algorithm.comparison;
string sourceCode = "\t1.5";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("1.5", 0, 0)]);
/**
* Test tab dropping before '.' of float.
* Catch fail for verification.
* Test calssification: Invalid
* Test input: `1\t.5`
*/
import std.algorithm.comparison;
bool didFail = false;
sourceCode = "1\t.5";
currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException e)
{
didFail = true;
}
assert(didFail);
/**
* Testing Float EOF after '.'.
* Test calssification: Invalid
* Test input: `1.`
*/
sourceCode = "1.";
currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
assert(false);
}
catch (LexerException e)
{
}
/**
* Testing illegal backslash.
* Test calssification: Invalid
* Test input: `1.`
*/
sourceCode = "hello \\ ";
currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
assert(false);
}
catch (LexerException e)
{
}
/**
* Test tab dropping after '.' of float.
* Catch fail for verification.
* Test calssification: Invalid
* Test input: `1.\t5`
*/
import std.algorithm.comparison;
didFail = false;
sourceCode = "1.\t5";
currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException e)
{
didFail = true;
}
assert(didFail);
/**
* Test tab dropping for an empty token array.
* Test calssification: Valid
* Test input: `\t\t\t\t\t`
*/
DEBUG("Tab Unit Test");
import std.algorithm.comparison;
sourceCode = "\t\t\t\t\t";
currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens().length == 0);
}
/**
* Test correct handling of dot-operator for
* non-floating point cases where whitespace has been inserted before and after.
* Test Classification: Invalid
*
* Input: `new A() .l.p.p;`
*/
unittest
{
shout();
import std.algorithm.comparison;
bool didFail = false;
string sourceCode = "new A(). l.p.p;";
BasicLexer currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException)
{
didFail = true;
}
assert(didFail);
}
/**
* Test correct handling of dot-operator for
* non-floating point cases where whitespace has been inserted before and after.
* Test Classification: Invalid
*
* Input: `new A() . l.p.p;`
*/
unittest
{
shout();
import std.algorithm.comparison;
bool didFail = false;
string sourceCode = "new A() . l.p.p;";
BasicLexer currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException)
{
didFail = true;
}
assert(didFail);
}
unittest
{
shout();
/**
* Test dot for fail on dot operator with no buildup and invalid lead
* Catch fail for verification.
* Test calssification: Invalid
* Test input: `1.5.5`
*/
import std.algorithm.comparison;
bool didFail = false;
string sourceCode = "1.5.5";
BasicLexer currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException e)
{
didFail = true;
}
assert(didFail);
/**
* Test for fail on space following dot operator.
* Test Classification: Invalid
* Input: `1. a`
*/
didFail = false;
sourceCode = "1. a";
currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException e)
{
didFail = true;
}
assert(didFail);
/**
* Test for correct lex space following paren
* Test Classification: Valid
* Input: `).x`
*/
sourceCode = ").x";
currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token(")", 0, 0),
new Token(".", 0, 0),
new Token("x", 0, 0),
]);
/**
* Test for fail on space preceding dot operator.
* Test Classification: Invalid
* Input: `1 .a`
*/
didFail = false;
sourceCode = "1 .a";
currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException e)
{
didFail = true;
}
assert(didFail);
}
/**
* Test newlines
* Test Classification: Valid
* Input: `\n\n\n\n`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "\n\n\n\n";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens().length == 0);
}
/**
* Test for character escape codes
*
* Input: `'\\'`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "'\\\\'";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("'\\\\'", 0, 0),
]);
}
/**
* Test for character escape codes
*
* Input: `'\a'`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "'\\a'";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("'\\a'", 0, 0),
]);
}
/**
* Test for invalid escape sequence
* Input: `'\f'`
*/
unittest
{
shout();
bool didFail = false;
string sourceCode = "\\f";
BasicLexer currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException e)
{
didFail = true;
}
assert(didFail);
}
/**
* Test for invalid char in ident
* Input: `hello$k`
*/
unittest
{
shout();
bool didFail = false;
string sourceCode = "hello$k";
BasicLexer currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException e)
{
didFail = true;
}
assert(didFail);
}
/**
* Test for invalid char in ident
* Input: `$`
*/
unittest
{
shout();
bool didFail = false;
string sourceCode = "$";
BasicLexer currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException e)
{
didFail = true;
}
assert(didFail);
}
/**
* Testing Underscores in numbers
*
* Input: `1_ 1_2 1_2.3 1_2.3_ 1__2 1__2.3 1__.23__`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "1_ 1_2 1_2.3 1_2.3_ 1__2 1__2.3 1__.23__";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("1", 0, 0),
new Token("12", 0, 0),
new Token("12.3", 0, 0),
new Token("12.3", 0, 0),
new Token("12", 0, 0),
new Token("12.3", 0, 0),
new Token("1.23", 0, 0),
]);
}
/**
* Testing Comparison in numbers
*
* Input: `<= >= ==`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "<= >= =< => == != < > ^";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("<=", 0, 0),
new Token(">=", 0, 0),
new Token("=<", 0, 0),
new Token("=>", 0, 0),
new Token("==", 0, 0),
new Token("!=", 0, 0),
new Token("<", 0, 0),
new Token(">", 0, 0),
new Token("^", 0, 0),
]);
}
/**
* Testing Chars
*
* Input: `'a'`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "'a'";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("'a'", 0, 0),
]);
}
/**
* Test for invalid ident
* Input: `hello. `
*/
unittest
{
shout();
bool didFail = false;
string sourceCode = "hello. ";
BasicLexer currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException e)
{
didFail = true;
}
assert(didFail);
}
/**
* Test for invalid ident
* Input: `hello.`
*/
unittest
{
shout();
bool didFail = false;
string sourceCode = "hello.";
BasicLexer currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException e)
{
didFail = true;
}
assert(didFail);
}
/**
* Testing Chars
* Input: `'`
*/
unittest
{
shout();
bool didFail = false;
string sourceCode = "'";
BasicLexer currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException e)
{
didFail = true;
}
assert(didFail);
}
/**
* Testing Chars
* Input: `'a`
*/
unittest
{
shout();
bool didFail = false;
string sourceCode = "'a";
BasicLexer currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException e)
{
didFail = true;
}
assert(didFail);
}
/**
* Testing Chars
* Input: `'aa`
*/
unittest
{
shout();
bool didFail = false;
string sourceCode = "'aa";
BasicLexer currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException e)
{
didFail = true;
}
assert(didFail);
}
/**
* Testing String EOF
* Input: `"a`
*/
unittest
{
shout();
bool didFail = false;
string sourceCode = "\"a";
BasicLexer currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException e)
{
didFail = true;
}
assert(didFail);
}
/**
* Testing String EOF
* Input: `"a`
*/
unittest
{
shout();
bool didFail = false;
string sourceCode = "\"";
BasicLexer currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException e)
{
didFail = true;
}
assert(didFail);
}
/**
* Testing String EOF
* Input: `"\`
*/
unittest
{
shout();
bool didFail = false;
string sourceCode = "\"\\";
BasicLexer currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException e)
{
didFail = true;
}
assert(didFail);
}
/**
* Testing Comment EOF
* Input: `/*`
*/
unittest
{
shout();
bool didFail = false;
string sourceCode = "/*";
BasicLexer currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException e)
{
didFail = true;
}
assert(didFail);
}
/**
* Testing Comment EOF
* Input: `/* `
*/
unittest
{
shout();
bool didFail = false;
string sourceCode = "/* ";
BasicLexer currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException e)
{
didFail = true;
}
assert(didFail);
}
/**
* Testing Line comment EOF
*
* Input: `//`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "//";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("//", 0, 0)
]);
}
/**
* Testing invalid Escape Code
* Input: `\p`
*/
unittest
{
shout();
bool didFail = false;
string sourceCode = "\"\\p";
BasicLexer currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException e)
{
didFail = true;
}
assert(didFail);
}
/**
* Testing invalid Escape Code
* Input: `\p`
*/
unittest
{
shout();
bool didFail = false;
string sourceCode = "\\p";
BasicLexer currentLexer = new BasicLexer(sourceCode);
try
{
currentLexer.performLex();
}
catch (LexerException e)
{
didFail = true;
}
assert(didFail);
}
/**
* Testing comment
*
* Input: `'a' `
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "'a' ";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("'a'", 0, 0)
]);
}
/**
* Testing comment
*
* Input: `// \n`
*/
unittest
{
shout();
import std.algorithm.comparison;
string sourceCode = "// \n";
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
DEBUG("Collected " ~ to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [
new Token("// ", 0, 0)
]);
}