mirror of https://github.com/tbklang/tlang.git
Merge branch 'vardec_varass_dependency' into hotfix/func_defs_in_dep_generator
This commit is contained in:
commit
13740a2ce4
|
@ -4,6 +4,7 @@
|
|||
module tlang.compiler.lexer.core.lexer;
|
||||
|
||||
import tlang.compiler.lexer.core.tokens : Token;
|
||||
import std.ascii : isDigit, isAlpha, isWhite;
|
||||
|
||||
/**
|
||||
* Defines the interface a lexer must provide
|
||||
|
@ -73,4 +74,163 @@ public interface LexerInterface
|
|||
* Returns: a `Token[]` containing all tokens
|
||||
*/
|
||||
public Token[] getTokens();
|
||||
}
|
||||
|
||||
/**
|
||||
* Human-readable names assigned
|
||||
* to commonly used character
|
||||
* constants
|
||||
*/
|
||||
public enum LexerSymbols : char
|
||||
{
|
||||
L_PAREN = '(',
|
||||
R_PAREN = ')',
|
||||
SEMI_COLON = ';',
|
||||
COMMA = ',',
|
||||
L_BRACK = '[' ,
|
||||
R_BRACK = ']' ,
|
||||
PLUS = '+' ,
|
||||
MINUS = '-' ,
|
||||
FORWARD_SLASH = '/' ,
|
||||
PERCENT = '%' ,
|
||||
STAR = '*' ,
|
||||
AMPERSAND = '&' ,
|
||||
L_BRACE = '{' ,
|
||||
R_BRACE = '}' ,
|
||||
EQUALS = '=' ,
|
||||
SHEFFER_STROKE = '|' ,
|
||||
CARET = '^' ,
|
||||
EXCLAMATION = '!' ,
|
||||
TILDE = '~' ,
|
||||
DOT = '.' ,
|
||||
COLON = ':',
|
||||
SPACE = ' ',
|
||||
TAB = '\t',
|
||||
NEWLINE = '\n',
|
||||
DOUBLE_QUOTE = '"',
|
||||
SINGLE_QUOTE = '\'' ,
|
||||
BACKSLASH = '\\' ,
|
||||
UNDERSCORE = '_' ,
|
||||
LESS_THAN = '<' ,
|
||||
BIGGER_THAN = '>' ,
|
||||
|
||||
ESC_NOTHING = '0' ,
|
||||
ESC_CARRIAGE_RETURN = 'r' ,
|
||||
ESC_TAB = 't' ,
|
||||
ESC_NEWLINE = 'n' ,
|
||||
ESC_BELL= 'a' ,
|
||||
|
||||
ENC_BYTE = 'B' ,
|
||||
ENC_INT = 'I' ,
|
||||
ENC_LONG = 'L' ,
|
||||
ENC_WORD = 'W' ,
|
||||
ENC_UNSIGNED = 'U' ,
|
||||
ENC_SIGNED = 'S' ,
|
||||
}
|
||||
|
||||
/**
|
||||
* Alias to `LexerSymbols`
|
||||
*/
|
||||
public alias LS = LexerSymbols;
|
||||
|
||||
/**
|
||||
* Checks if the provided character is an operator
|
||||
*
|
||||
* Params:
|
||||
* c = the character to check
|
||||
* Returns: `true` if it is a character, `false`
|
||||
* otherwise
|
||||
*/
|
||||
public bool isOperator(char c)
|
||||
{
|
||||
return c == LS.PLUS || c == LS.TILDE || c == LS.MINUS ||
|
||||
c == LS.STAR || c == LS.FORWARD_SLASH || c == LS.AMPERSAND ||
|
||||
c == LS.CARET || c == LS.EXCLAMATION || c == LS.SHEFFER_STROKE ||
|
||||
c == LS.LESS_THAN || c == LS.BIGGER_THAN;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the provided character is a splitter
|
||||
*
|
||||
* Params:
|
||||
* c = the character to check
|
||||
* Returns: `true` if it is a splitter, `false`
|
||||
* otherwise
|
||||
*/
|
||||
public bool isSplitter(char c)
|
||||
{
|
||||
return c == LS.SEMI_COLON || c == LS.COMMA || c == LS.L_PAREN ||
|
||||
c == LS.R_PAREN || c == LS.L_BRACK || c == LS.R_BRACK ||
|
||||
c == LS.PERCENT || c == LS.L_BRACE || c == LS.R_BRACE ||
|
||||
c == LS.EQUALS || c == LS.DOT || c == LS.COLON ||
|
||||
isOperator(c) || isWhite(c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the provided character is a
|
||||
* numerical size encoder
|
||||
*
|
||||
* Params:
|
||||
* character = the character to check
|
||||
* Returns: `true` if so, `false` otheriwse
|
||||
*/
|
||||
public bool isNumericalEncoder_Size(char character)
|
||||
{
|
||||
return character == LS.ENC_BYTE || character == LS.ENC_WORD ||
|
||||
character == LS.ENC_INT || character == LS.ENC_LONG;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the provided character is a
|
||||
* numerical signage encoder
|
||||
*
|
||||
* Params:
|
||||
* character = the character to check
|
||||
* Returns: `true` if so, `false` otherwise
|
||||
*/
|
||||
public bool isNumericalEncoder_Signage(char character)
|
||||
{
|
||||
return character == LS.ENC_SIGNED || character == LS.ENC_UNSIGNED;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the provided character is
|
||||
* either a numerical size encoder
|
||||
* or signage encoder
|
||||
*
|
||||
* Params:
|
||||
* character = the character to check
|
||||
* Returns: `true` if so, `false` otherwise
|
||||
*/
|
||||
public bool isNumericalEncoder(char character)
|
||||
{
|
||||
return isNumericalEncoder_Size(character) ||
|
||||
isNumericalEncoder_Signage(character);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the given character is a valid
|
||||
* escape character (something which would
|
||||
* have followed a `\`)
|
||||
*
|
||||
* Params:
|
||||
* character = the character to check
|
||||
* Returns: `true` if so, `false` otherwise
|
||||
*/
|
||||
public bool isValidEscape_String(char character)
|
||||
{
|
||||
return character == LS.BACKSLASH || character == LS.DOUBLE_QUOTE || character == LS.SINGLE_QUOTE ||
|
||||
character == LS.ESC_NOTHING || character == LS.ESC_NEWLINE || character == LS.ESC_CARRIAGE_RETURN ||
|
||||
character == LS.TAB || character == LS.ESC_BELL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a character return whether it is valid entry
|
||||
* for preceding a '.'.
|
||||
*
|
||||
* Returns: `true` if so, otherwise `false`
|
||||
*/
|
||||
public bool isValidDotPrecede(char character)
|
||||
{
|
||||
return character == LS.R_PAREN || character == LS.R_BRACK; // || isAlpha(character) || isDigit(character);
|
||||
}
|
|
@ -0,0 +1,124 @@
|
|||
module tlang.compiler.lexer.kinds.arr;
|
||||
|
||||
import tlang.compiler.lexer.core;
|
||||
|
||||
/**
|
||||
* An array-based tokenizer which takes a
|
||||
* provided array of `Token[]`. useful
|
||||
* for testing parser-only related things
|
||||
* with concrete tokens
|
||||
*/
|
||||
public final class ArrLexer : LexerInterface
|
||||
{
|
||||
/**
|
||||
* The concrete token source
|
||||
*/
|
||||
private Token[] tokens;
|
||||
|
||||
/**
|
||||
* Position in the `tokens` array
|
||||
*/
|
||||
private ulong tokenPtr = 0;
|
||||
|
||||
/**
|
||||
* Constructs a new `ArrLexer` (dummy lexer) with
|
||||
* the tokens already in concrete form in the
|
||||
* provided array.
|
||||
*
|
||||
* Params:
|
||||
* tokens = the `Token[]`
|
||||
*/
|
||||
this(Token[] tokens)
|
||||
{
|
||||
this.tokens = tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the token at the current cursor
|
||||
* position
|
||||
*
|
||||
* Returns: the `Token`
|
||||
*/
|
||||
public Token getCurrentToken()
|
||||
{
|
||||
return tokens[tokenPtr];
|
||||
}
|
||||
|
||||
/**
|
||||
* Moves the cursor one token forward
|
||||
*/
|
||||
public void nextToken()
|
||||
{
|
||||
tokenPtr++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Moves the cursor one token backwards
|
||||
*/
|
||||
public void previousToken()
|
||||
{
|
||||
tokenPtr--;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the position of the cursor
|
||||
*
|
||||
* Params:
|
||||
* cursor = the new position
|
||||
*/
|
||||
public void setCursor(ulong cursor)
|
||||
{
|
||||
this.tokenPtr = cursor;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the cursor's current position
|
||||
*
|
||||
* Returns: the position
|
||||
*/
|
||||
public ulong getCursor()
|
||||
{
|
||||
return this.tokenPtr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether more tokens are available
|
||||
* of not
|
||||
*
|
||||
* Returns: true if more tokens are available, false otherwise
|
||||
*/
|
||||
public bool hasTokens()
|
||||
{
|
||||
return tokenPtr < tokens.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the line position of the lexer in the source text
|
||||
*
|
||||
* Returns: the position
|
||||
*/
|
||||
public ulong getLine()
|
||||
{
|
||||
return 0; // TODO: anything meaningful?
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the column position of the lexer in the source text
|
||||
*
|
||||
* Returns: the position
|
||||
*/
|
||||
public ulong getColumn()
|
||||
{
|
||||
return 0; // TODO: anything meaningful?
|
||||
}
|
||||
|
||||
/**
|
||||
* Exhaustively provide a list of all tokens
|
||||
*
|
||||
* Returns: a `Token[]` containing all tokens
|
||||
*/
|
||||
public Token[] getTokens()
|
||||
{
|
||||
return tokens;
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -2008,6 +2008,124 @@ public final class Parser
|
|||
return statement;
|
||||
}
|
||||
|
||||
import std.container.slist : SList;
|
||||
private SList!(Token) commentStack;
|
||||
private void pushComment(Token commentToken)
|
||||
{
|
||||
// Sanity check
|
||||
assert(getSymbolType(commentToken) == SymbolType.SINGLE_LINE_COMMENT ||
|
||||
getSymbolType(commentToken) == SymbolType.MULTI_LINE_COMMENT
|
||||
);
|
||||
|
||||
// Push it onto top of stack
|
||||
commentStack.insertFront(commentToken);
|
||||
}
|
||||
//TODO: Add a popToken() (also think if we want a stack-based mechanism)
|
||||
private bool hasCommentsOnStack()
|
||||
{
|
||||
return getCommentCount() != 0;
|
||||
}
|
||||
|
||||
private ulong getCommentCount()
|
||||
{
|
||||
import std.range : walkLength;
|
||||
return walkLength(commentStack[]);
|
||||
}
|
||||
|
||||
private void parseComment()
|
||||
{
|
||||
gprintln("parseComment(): Enter", DebugType.WARNING);
|
||||
|
||||
Token curCommentToken = lexer.getCurrentToken();
|
||||
|
||||
pushComment(curCommentToken);
|
||||
|
||||
// TODO: Do something here like placing it on some kind of stack
|
||||
gprintln("Comment is: '"~curCommentToken.getToken()~"'");
|
||||
lexer.nextToken(); // Move off comment
|
||||
|
||||
gprintln("parseComment(): Leave", DebugType.WARNING);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests the handling of comments
|
||||
*/
|
||||
unittest
|
||||
{
|
||||
import tlang.compiler.lexer.kinds.arr : ArrLexer;
|
||||
|
||||
string sourceCode = `module myCommentModule;
|
||||
// Hello`;
|
||||
|
||||
LexerInterface currentLexer = new BasicLexer(sourceCode);
|
||||
(cast(BasicLexer)currentLexer).performLex();
|
||||
|
||||
Parser parser = new Parser(currentLexer);
|
||||
|
||||
try
|
||||
{
|
||||
Module modulle = parser.parse();
|
||||
|
||||
assert(parser.hasCommentsOnStack());
|
||||
assert(parser.getCommentCount() == 1);
|
||||
}
|
||||
catch(TError e)
|
||||
{
|
||||
assert(false);
|
||||
}
|
||||
|
||||
sourceCode = `module myCommntedModule;
|
||||
/*Hello */
|
||||
|
||||
/* Hello*/`;
|
||||
|
||||
currentLexer = new BasicLexer(sourceCode);
|
||||
(cast(BasicLexer)currentLexer).performLex();
|
||||
parser = new Parser(currentLexer);
|
||||
|
||||
try
|
||||
{
|
||||
Module modulle = parser.parse();
|
||||
|
||||
assert(parser.hasCommentsOnStack());
|
||||
assert(parser.getCommentCount() == 2);
|
||||
}
|
||||
catch(TError e)
|
||||
{
|
||||
assert(false);
|
||||
}
|
||||
|
||||
sourceCode = `module myCommentedModule;
|
||||
|
||||
void function()
|
||||
{
|
||||
/*Hello */
|
||||
/* Hello */
|
||||
// Hello
|
||||
//Hello
|
||||
}
|
||||
`;
|
||||
|
||||
currentLexer = new BasicLexer(sourceCode);
|
||||
(cast(BasicLexer)currentLexer).performLex();
|
||||
parser = new Parser(currentLexer);
|
||||
|
||||
try
|
||||
{
|
||||
Module modulle = parser.parse();
|
||||
|
||||
assert(parser.hasCommentsOnStack());
|
||||
assert(parser.getCommentCount() == 4);
|
||||
}
|
||||
catch(TError e)
|
||||
{
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: We need to add `parseComment()`
|
||||
// support here (see issue #84)
|
||||
// TODO: This ic currently dead code and ought to be used/implemented
|
||||
private Statement parseStatement(SymbolType terminatingSymbol = SymbolType.SEMICOLON)
|
||||
{
|
||||
gprintln("parseStatement(): Enter", DebugType.WARNING);
|
||||
|
@ -2080,6 +2198,12 @@ public final class Parser
|
|||
{
|
||||
statement = parseDerefAssignment();
|
||||
}
|
||||
/* If it is a kind-of comment */
|
||||
else if(symbol == SymbolType.SINGLE_LINE_COMMENT || symbol == SymbolType.MULTI_LINE_COMMENT)
|
||||
{
|
||||
gprintln("COMMENTS NOT YET PROPERLY SUPOORTED", DebugType.ERROR);
|
||||
parseComment();
|
||||
}
|
||||
/* Error out */
|
||||
else
|
||||
{
|
||||
|
@ -2303,6 +2427,12 @@ public final class Parser
|
|||
|
||||
modulle.addStatement(externStatement);
|
||||
}
|
||||
/* If it is a kind-of comment */
|
||||
else if(symbol == SymbolType.SINGLE_LINE_COMMENT || symbol == SymbolType.MULTI_LINE_COMMENT)
|
||||
{
|
||||
gprintln("COMMENTS NOT YET PROPERLY SUPOORTED", DebugType.ERROR);
|
||||
parseComment();
|
||||
}
|
||||
else
|
||||
{
|
||||
expect("parse(): Unknown '" ~ tok.getToken() ~ "'");
|
||||
|
|
|
@ -290,6 +290,16 @@ public enum SymbolType
|
|||
*/
|
||||
GENERIC_TYPE_DECLARE,
|
||||
|
||||
/**
|
||||
* Multi-line comment (frwd-slash-star)
|
||||
*/
|
||||
MULTI_LINE_COMMENT,
|
||||
|
||||
/**
|
||||
* Singleiline comment (frwd-slash-slash)
|
||||
*/
|
||||
SINGLE_LINE_COMMENT,
|
||||
|
||||
/**
|
||||
* Unknown symbol
|
||||
*/
|
||||
|
@ -780,6 +790,16 @@ public SymbolType getSymbolType(Token tokenIn)
|
|||
{
|
||||
return SymbolType.STAR;
|
||||
}
|
||||
/* Multi-line comment (fwrd-slash-star) check */
|
||||
else if(token[0] == '/' && token.length >= 2 && token[1]=='*')
|
||||
{
|
||||
return SymbolType.MULTI_LINE_COMMENT;
|
||||
}
|
||||
/* Single-line comment (fwrd-slash-slash) check */
|
||||
else if(token[0] == '/' && token.length >= 2 && token[1]=='/')
|
||||
{
|
||||
return SymbolType.SINGLE_LINE_COMMENT;
|
||||
}
|
||||
/* Divide `/` operator check */
|
||||
else if(token[0] == '/')
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue