🧹🧼️ Cleanup: Clean up series 1 (#16)

* ️ Feature: Lexer Interface cleanup (#14)

* LexerInterface

Defined the lexer interface

* Parser

- Fixed import for `Token` class
- Removed the token management fields such as `tokens`, `currentToken` and `tokenPtr` as these are now replaced by our `LexerInterface`, `lexer` field which manages this all for us
- Removed constructor which accepts a `Token[]`, now onyl accept a `LexerInterface`
- Removed `nextToken()`, `hasTokens()`, `getCurrentToken()`, `previousToken()`, `setCursor(ulong)` and `getCursor()`.
- The above now are called via the `lexer` instance

Parser (unit tests)

- Migrated to new `LexerInterface`+`BasicLexer` system
- Hoisted out common imports for unit tests into a `version(unittest)`

TypeChecker (unittests)

- Hoisted out common imports for unit tests into a `version(unittest)`
- Migrated to new `LexerInterface`+`BasicLexer` system

LexerInterface

- Moved to new `lexer.core` package
- Documented module and class

Commands

- Fixed imports for the (now) `BasicLexer`
- Fixed imports for the (now) `lexer.core` package

Compiler

- Fixed imports for the (now) `BasicLexer`
- Use `LexerInterface` instead of `Lexer`
- The `doLex()` method now uses an instance of `BasicLexer` and then downcasts to quickly call `performLex()` in order to tokenize and make them available
- The `doParse()` method now takes in an instance of `LexerInterface` rather than `Token[]`

BasicLexer (previously Lexer)

- Moved to the `lexer.kinds` package
- Now implements `LexerInterface`
- Documented module and class
- Documented the `LexerInterface` methods

Exceptions

- Moved to the `lexer.core` package
- Fixed import of `Token` class
- Now uses `LexerInterface`

Core.Lexer.Package

- Documented package module

Tokens

- Moved to the `lexer.core` package
- Documented module and class

Check

- Fixed import for `Token`
- Fixed import for `BasicLexer`

* `core.lexer` (package)

- Documented all public imports

* Exceptions

- Documented the module
- Documented `LexerError` and its members
- Documented `LexerException`, its members too

* Tokens

- Documented the fields (using proper syntax)
- Documented constructor and methods

* BasicLexer

- Removed now-completed TODO
- Added (for clarity) `override` keywords to the `getLine()` and `getColumn()` methods
- Moved `getLine()`, `getColumn()` and `getTokens()` altoghether
- Made `getTokens()` override-marked
- Documented `getTokens()`

* Check

- Removed weird TODO that makes no sense
- Documented some of the members of `SymbolType`

* Check

- Documented a few more enum members of `SymbolType`
- Fixed documentation (and added a TODO) for the `SymbolType.LE_SYMBOL`

* Check

- Documented a few more enum members of `SymbolType`

* Check

- Documented `isType(string)`
- Added a TODO for `isTYpe(string)` to  "Check if below is even used
- Documented `isPathIdentifier(string)`

* Check

- Updated description of `isPathIdentifier(string)` to note it can contain underscores
- Documented isIdentifier(string)`
- Updated `SymbolType.IDENT_TYPE` to acknowledge underscores
- Documented `isAccessor(Token token)` and `isModifier(Token)`

* Check

- Documented `isIdentifier_NoDot(Token tokenIn)`, `isIdentifier_Dot(Token tokenIn)`, `isNumericLiteral(string token)`
- Removed uneeded import of `BasicLexer`
- Moved import to the top of file

* Check

- Documented `getSymbolType(Token tokenIn)`, `isMathOp(Token token)`, `isBinaryOp(Token token)`

* Check

- Documented the `symbols.check` module

* Builtins

- Properly documented `getBuiltInType(TypeChecker, string)`

* Builtins

- Documented module

* Typing (core)

- Documented module
- Documented all members

* Exceptions (lexer)

- Fixed documentation missing parameters

* Check

- Make comments docs/ddox compatible

* BasicLexer

- Fixed parameter name in documentation

* BasixLexer

- Fixed formatting in documentation for class

* Typing (core)

- Documented all remaining class members and fields
This commit is contained in:
Tristan B. Velloza Kildaire 2023-07-09 14:35:40 +02:00 committed by GitHub
parent 39508a5907
commit aaf54f14f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 1199 additions and 535 deletions

View File

@ -10,8 +10,8 @@ import jcli;
import std.stdio;
import misc.exceptions : TError;
import std.exception : ErrnoException;
import tlang.compiler.lexer.core : Lexer;
import tlang.compiler.lexer.tokens : Token;
import tlang.compiler.lexer.kinds.basic : BasicLexer;
import tlang.compiler.lexer.core;
import tlang.compiler.parsing.core : Parser;
import tlang.compiler.typecheck.core : TypeChecker;
import gogga;

View File

@ -3,7 +3,7 @@ module tlang.compiler.core;
import gogga;
import std.conv : to;
import tlang.compiler.lexer.core;
import tlang.compiler.lexer.tokens : Token;
import tlang.compiler.lexer.kinds.basic : BasicLexer;
import std.stdio : File;
import tlang.compiler.parsing.core;
import tlang.compiler.symbols.check;
@ -96,7 +96,7 @@ public class Compiler
private string inputSource;
/* The lexer */
private Lexer lexer;
private LexerInterface lexer;
/* The lexed tokens */
private Token[] tokens;
@ -140,8 +140,8 @@ public class Compiler
public void doLex()
{
/* Setup the lexer and begin lexing */
this.lexer = new Lexer(inputSource);
this.lexer.performLex();
this.lexer = new BasicLexer(inputSource);
(cast(BasicLexer)(this.lexer)).performLex();
this.tokens = this.lexer.getTokens();
}
@ -168,7 +168,7 @@ public class Compiler
else
{
/* Spawn a new parser with the provided tokens */
this.parser = new Parser(lexedTokens);
this.parser = new Parser(lexer);
modulle = parser.parse();
}

View File

@ -0,0 +1,74 @@
/**
* Exception definitions
*/
module tlang.compiler.lexer.core.exceptions;
import misc.exceptions : TError;
import tlang.compiler.lexer.core.lexer : LexerInterface;
import std.conv : to;
/**
* The specified error which occurred
*/
public enum LexerError
{
/**
* If all the characters were
* exhausted
*/
EXHAUSTED_CHARACTERS,
/**
* Generic error
*/
OTHER
}
/**
* Represents an exception that can occur
* when using a `LexerInterface`
*/
public final class LexerException : TError
{
/**
* The offending `LexerInterface` instance
*/
public const LexerInterface offendingInstance;
/**
* The sub-error type (specific error)
*/
public const LexerError errType;
/**
* Constructs a new `LexerException` with the given offending instance
* where the error occured from and the default error type and no
* custom message
*
* Params:
* offendingInstance = the offending `LexerInterface`
* errType = the sub-error type as a `LexerError`
* msg = the custom message (default is empty/`""`)
*/
this(LexerInterface offendingInstance, LexerError errType = LexerError.OTHER, string msg = "")
{
string positionString = "("~to!(string)(offendingInstance.getLine())~", "~to!(string)(offendingInstance.getColumn())~")";
super("LexerException("~to!(string)(errType)~")"~(msg.length ? ": "~msg : "")~" at "~positionString);
this.offendingInstance = offendingInstance;
this.errType = errType;
}
/**
* Constructs a new `LexerException` with the given offending instance
* where the error occured from and the default error type and a
* custom message
*
* Params:
* offendingInstance = the offending `LexerInterface`
* msg = the custom message
*/
this(LexerInterface offendingInstance, string msg)
{
this(offendingInstance, LexerError.OTHER, msg);
}
}

View File

@ -0,0 +1,76 @@
/**
* Lexer interface definition
*/
module tlang.compiler.lexer.core.lexer;
import tlang.compiler.lexer.core.tokens : Token;
/**
* Defines the interface a lexer must provide
* such that is can be used to source tokens
* from in the parser
*/
public interface LexerInterface
{
/**
* Returns the token at the current cursor
* position
*
* Returns: the `Token`
*/
public Token getCurrentToken();
/**
* Moves the cursor one token forward
*/
public void nextToken();
/**
* Moves the cursor one token backwards
*/
public void previousToken();
/**
* Sets the position of the cursor
*
* Params:
* cursor = the new position
*/
public void setCursor(ulong cursor);
/**
* Retrieves the cursor's current position
*
* Returns: the position
*/
public ulong getCursor();
/**
* Checks whether more tokens are available
* of not
*
* Returns: true if more tokens are available, false otherwise
*/
public bool hasTokens();
/**
* Get the line position of the lexer in the source text
*
* Returns: the position
*/
public ulong getLine();
/**
* Get the column position of the lexer in the source text
*
* Returns: the position
*/
public ulong getColumn();
/**
* Exhaustively provide a list of all tokens
*
* Returns: a `Token[]` containing all tokens
*/
public Token[] getTokens();
}

View File

@ -0,0 +1,19 @@
/**
* Base definitions relating to the lexer
*/
module tlang.compiler.lexer.core;
/**
* Lexer interface definition
*/
public import tlang.compiler.lexer.core.lexer;
/**
* Token definition
*/
public import tlang.compiler.lexer.core.tokens;
/**
* Exception definitions
*/
public import tlang.compiler.lexer.core.exceptions;

View File

@ -0,0 +1,76 @@
/**
* Token definition
*/
module tlang.compiler.lexer.core.tokens;
import std.string : cmp;
import std.conv : to;
/**
* Defines a `Token` that a lexer
* would be able to produce
*/
public final class Token
{
/**
* The token
*/
private string token;
/**
* Line number information
*/
private ulong line, column;
/**
* Constructs a new `Token` with the given
* contents and line information
*
* Params:
* token = the actual string
* line = the line it occurs at
* column = the column it occurs at
*/
this(string token, ulong line, ulong column)
{
this.token = token;
this.line = line;
this.column = column;
}
/**
* Overrides the `==` operator to do equality
* based on the stored token's contents
*
* Params:
* other = the other `Token` being compared to
* Returns: true if the contents of the two tokens
* match, false otherwise
*/
override bool opEquals(Object other)
{
return cmp(token, (cast(Token)other).getToken()) == 0;
}
/**
* Rerturns a string representation of the token including
* its data and line information
*
* Returns: a `string`
*/
override string toString()
{
/* TODO (Column number): Don't adjust here, do it maybe in the lexer itself */
return token~" at ("~to!(string)(line)~", "~to!(string)(column-token.length)~")";
}
/**
* Returns the token's contents
*
* Returns: a `string`
*/
public string getToken()
{
return token;
}
}

View File

@ -1,30 +0,0 @@
module tlang.compiler.lexer.exceptions;
import misc.exceptions : TError;
import tlang.compiler.lexer.core : Lexer;
import std.conv : to;
public enum LexerError
{
EXHAUSTED_CHARACTERS,
OTHER
}
public final class LexerException : TError
{
public const Lexer offendingInstance;
public const LexerError errType;
this(Lexer offendingInstance, LexerError errType = LexerError.OTHER, string msg = "")
{
string positionString = "("~to!(string)(offendingInstance.getLine())~", "~to!(string)(offendingInstance.getColumn())~")";
super("LexerException("~to!(string)(errType)~")"~(msg.length ? ": "~msg : "")~" at "~positionString);
this.offendingInstance = offendingInstance;
this.errType = errType;
}
this(Lexer offendingInstance, string msg)
{
this(offendingInstance, LexerError.OTHER, msg);
}
}

View File

@ -1,14 +1,125 @@
module tlang.compiler.lexer.core;
/**
* A single-pass tokenizer
*/
module tlang.compiler.lexer.kinds.basic;
import std.container.slist;
import gogga;
import std.conv : to;
import std.ascii : isDigit;
import tlang.compiler.lexer.exceptions;
import tlang.compiler.lexer.tokens : Token;
import tlang.compiler.lexer.core;
public final class Lexer
/**
* Represents a basic lexer which performs the whole tokenization
* process in one short via a call to `performLex()`, only after
* this may the `LexerInterface` methods, such as `getCurrentToken()`,
* `nextToken()` and so forth, actually be used.
*
* This is effectively a single pass lexer.
*/
public final class BasicLexer : LexerInterface
{
/**
* Post-perform lex() data
*
* This exports the LexerInterface API.
*
* To-do, ensure these can only be used AFTER `performLex()`
* has been called.
*/
private ulong tokenPtr = 0;
/**
* Returns the token at the current cursor
* position
*
* Returns: the `Token`
*/
public override Token getCurrentToken()
{
/* TODO: Throw an exception here when we try get more than we can */
return tokens[tokenPtr];
}
/**
* Moves the cursor one token forward
*/
public override void nextToken()
{
tokenPtr++;
}
/**
* Moves the cursor one token backwards
*/
public override void previousToken()
{
tokenPtr--;
}
/**
* Sets the position of the cursor
*
* Params:
* newPosition = the new position
*/
public override void setCursor(ulong newPosition)
{
tokenPtr = newPosition;
}
/**
* Retrieves the cursor's current position
*
* Returns: the position
*/
public override ulong getCursor()
{
return tokenPtr;
}
/**
* Checks whether more tokens are available
* of not
*
* Returns: true if more tokens are available, false otherwise
*/
public override bool hasTokens()
{
return tokenPtr < tokens.length;
}
/**
* Get the line position of the lexer in the source text
*
* Returns: the position
*/
public override ulong getLine()
{
return this.line;
}
/**
* Get the column position of the lexer in the source text
*
* Returns: the position
*/
public override ulong getColumn()
{
return this.column;
}
/**
* Exhaustively provide a list of all tokens
*
* Returns: a `Token[]` containing all tokens
*/
public override Token[] getTokens()
{
return tokens;
}
/**
* Lexer state data
*/
@ -23,18 +134,6 @@ public final class Lexer
private bool floatMode; /* Whether or not we are building a floating point constant */
// TODO: Move these all to end, I don't like em here
public ulong getLine()
{
return this.line;
}
public ulong getColumn()
{
return this.column;
}
/* The tokens */
private Token[] tokens;
@ -603,13 +702,6 @@ public final class Lexer
return true;
}
/* Return the tokens */
public Token[] getTokens()
{
return tokens;
}
private bool isSpliter(char character)
{
return character == ';' || character == ',' || character == '(' ||
@ -651,7 +743,7 @@ unittest
{
import std.algorithm.comparison;
string sourceCode = "hello \"world\";";
Lexer currentLexer = new Lexer(sourceCode);
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("hello", 0, 0), new Token("\"world\"", 0, 0), new Token(";", 0, 0)]);
@ -662,7 +754,7 @@ unittest
{
import std.algorithm.comparison;
string sourceCode = "hello \"world\"|| ";
Lexer currentLexer = new Lexer(sourceCode);
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("hello", 0, 0), new Token("\"world\"", 0, 0), new Token("||", 0, 0)]);
@ -673,7 +765,7 @@ unittest
{
import std.algorithm.comparison;
string sourceCode = "hello \"world\"||";
Lexer currentLexer = new Lexer(sourceCode);
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("hello", 0, 0), new Token("\"world\"", 0, 0), new Token("||", 0, 0)]);
@ -684,7 +776,7 @@ unittest
{
import std.algorithm.comparison;
string sourceCode = "hello \"world\";|";
Lexer currentLexer = new Lexer(sourceCode);
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("hello", 0, 0), new Token("\"world\"", 0, 0), new Token(";", 0, 0), new Token("|", 0, 0)]);
@ -695,7 +787,7 @@ unittest
{
import std.algorithm.comparison;
string sourceCode = " hello";
Lexer currentLexer = new Lexer(sourceCode);
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("hello", 0, 0)]);
@ -706,7 +798,7 @@ unittest
{
import std.algorithm.comparison;
string sourceCode = " hello;";
Lexer currentLexer = new Lexer(sourceCode);
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("hello", 0, 0), new Token(";", 0, 0)]);
@ -717,7 +809,7 @@ unittest
{
import std.algorithm.comparison;
string sourceCode = "hello \"world\\\"\"";
Lexer currentLexer = new Lexer(sourceCode);
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("hello", 0, 0), new Token("\"world\\\"\"", 0, 0)]);
@ -728,7 +820,7 @@ unittest
{
import std.algorithm.comparison;
string sourceCode = "'c'";
Lexer currentLexer = new Lexer(sourceCode);
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("'c'", 0, 0)]);
@ -739,7 +831,7 @@ unittest
{
import std.algorithm.comparison;
string sourceCode = "2121\n2121";
Lexer currentLexer = new Lexer(sourceCode);
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("2121", 0, 0), new Token("2121", 0, 0)]);
@ -752,35 +844,35 @@ unittest
{
import std.algorithm.comparison;
string sourceCode = " =\n";
Lexer currentLexer = new Lexer(sourceCode);
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("=", 0, 0)]);
import std.algorithm.comparison;
sourceCode = " = ==\n";
currentLexer = new Lexer(sourceCode);
currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("=", 0, 0), new Token("==", 0, 0)]);
import std.algorithm.comparison;
sourceCode = " ==\n";
currentLexer = new Lexer(sourceCode);
currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("==", 0, 0)]);
import std.algorithm.comparison;
sourceCode = " = =\n";
currentLexer = new Lexer(sourceCode);
currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("=", 0, 0), new Token("=", 0, 0)]);
import std.algorithm.comparison;
sourceCode = " ==, = ==\n";
currentLexer = new Lexer(sourceCode);
currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("==", 0, 0), new Token(",", 0, 0), new Token("=", 0, 0), new Token("==", 0, 0)]);
@ -788,7 +880,7 @@ unittest
// Test flushing of previous token
import std.algorithm.comparison;
sourceCode = "i==i=\n";
currentLexer = new Lexer(sourceCode);
currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("i", 0, 0), new Token("==", 0, 0), new Token("i", 0, 0), new Token("=", 0, 0)]);
@ -803,18 +895,18 @@ unittest
{
import std.algorithm.comparison;
string sourceCode;
Lexer currentLexer;
BasicLexer currentLexer;
/* 21L (valid) */
sourceCode = "21L";
currentLexer = new Lexer(sourceCode);
currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("21L", 0, 0)]);
/* 21UL (valid) */
sourceCode = "21UL";
currentLexer = new Lexer(sourceCode);
currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("21UL", 0, 0)]);
@ -843,7 +935,7 @@ unittest
{
import std.algorithm.comparison;
string sourceCode = "1.5";
Lexer currentLexer = new Lexer(sourceCode);
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [new Token("1.5", 0, 0)]);
@ -859,7 +951,7 @@ unittest
{
import std.algorithm.comparison;
string sourceCode = "new A().l.p.p;";
Lexer currentLexer = new Lexer(sourceCode);
BasicLexer currentLexer = new BasicLexer(sourceCode);
currentLexer.performLex();
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
assert(currentLexer.getTokens() == [

View File

@ -1,36 +0,0 @@
module tlang.compiler.lexer.tokens;
import std.string : cmp;
import std.conv : to;
public final class Token
{
/* The token */
private string token;
/* Line number information */
private ulong line, column;
this(string token, ulong line, ulong column)
{
this.token = token;
this.line = line;
this.column = column;
}
override bool opEquals(Object other)
{
return cmp(token, (cast(Token)other).getToken()) == 0;
}
override string toString()
{
/* TODO (Column number): Don't adjust here, do it maybe in the lexer itself */
return token~" at ("~to!(string)(line)~", "~to!(string)(column-token.length)~")";
}
public string getToken()
{
return token;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -4,7 +4,7 @@ import tlang.compiler.parsing.core;
import misc.exceptions;
import tlang.compiler.symbols.check;
import tlang.compiler.symbols.data;
import tlang.compiler.lexer.tokens : Token;
import tlang.compiler.lexer.core.tokens : Token;
import std.conv : to;
public class ParserException : TError

View File

@ -1,68 +1,288 @@
/**
* Token-to-symbol mappings (and vice-versa),
* facilities for performing tests on what sort
* of tokens are of certain classes (operators, etc.)
* and detection of different types of identifiers
*/
module tlang.compiler.symbols.check;
import tlang.compiler.lexer.tokens : Token;
import tlang.compiler.lexer.core.tokens : Token;
import std.conv : to;
import std.string : isNumeric, cmp;
import std.algorithm.searching : canFind;
import misc.utils;
import gogga;
/**
* All allowed symbols
* TODO: There should be a symbol class with sub-types
*/
* All allowed symbols
*/
public enum SymbolType
{
/**
* Default symbol (TODO: idk why this exists)
*/
LE_SYMBOL,
/**
* Any sort of identifier
*
* Must start with a letter,
* can contain numbers and
* may contain periods.
*
* It may also contain underscores.
*/
IDENT_TYPE,
/**
* Any sort of number, this can
* be `8` or `8.5`
*/
NUMBER_LITERAL,
/**
* A character constant like `'a'`
*/
CHARACTER_LITERAL,
/**
* A string constant like `"FELLA"`
*/
STRING_LITERAL,
/**
* Semicolon `;`
*/
SEMICOLON,
/**
* Left smooth brace $(LPAREN)
*/
LBRACE,
/**
* Right smooth brace $(RPAREN)
*/
RBRACE,
/**
* Assigmment symbol `=`
*/
ASSIGN,
/**
* Comma `,`
*/
COMMA,
/**
* Left curly brace `{`
*/
OCURLY,
/**
* Right curly brace `}`
*/
CCURLY,
/**
* Module keyword `module`
*/
MODULE,
/**
* New keyword `new`
*/
NEW,
/**
* If keyword `if`
*/
IF,
/**
* Else keyword `else`
*/
ELSE,
/**
* Discard keyword `discard`
*/
DISCARD,
/**
* While keyword `while`
*/
WHILE,
/**
* Class keyword `class`
*/
CLASS,
/**
* Inherit keyword `:`
*/
INHERIT_OPP,
/**
* Tilde `~`
*/
TILDE,
/**
* For keyword `for`
*/
FOR,
/**
* Super keyword `super`
*/
SUPER,
/**
* This keyword `this`
*/
THIS,
/**
* Switch keyword `switch`
*/
SWITCH,
/**
* Return keyword `return`
*/
RETURN,
/**
* Public keyword `public`
*/
PUBLIC,
/**
* Private keyword `private`
*/
PRIVATE,
/**
* Protected keyword `protected`
*/
PROTECTED,
/**
* Static keyword `static`
*/
STATIC,
/**
* Case keyword `case`
*/
CASE,
/**
* Goto keyword `goto`
*/
GOTO,
/**
* Do keyword `do`
*/
DO,
/**
* Dot operator `.`
*/
DOT,
/**
* Delete keyword `delete`
*/
DELETE,
/**
* Struct keyword `struct`
*/
STRUCT,
/**
* Subtraction operator `-`
*/
SUB,
/**
* Addition operator `+`
*/
ADD,
/**
* Division operator `/`
*/
DIVIDE,
/**
* Star operator `*`
*/
STAR,
/**
* Ampersand (reffer) operator `&`
*/
AMPERSAND,
/**
* Equality operator `==`
*/
EQUALS,
/**
* Greater than operator `>`
*/
GREATER_THAN,
/**
* Smaller than operator `<`
*/
SMALLER_THAN,
/**
* Greater than or equals to operator `>=`
*/
GREATER_THAN_OR_EQUALS,
/**
* Smaller than or equals to operator `<=`
*/
SMALLER_THAN_OR_EQUALS,
/**
* Opening bracket `[`
*/
OBRACKET,
/**
* Closing bracket `]`
*/
CBRACKET,
/**
* Cast keyword `cast`
*/
CAST,
/**
* Extern keyword `extern`
*/
EXTERN,
/**
* Extern-function keyword `efunc`
*/
EXTERN_EFUNC,
/**
* Extern-variable keyword `evar`
*/
EXTERN_EVAR,
/**
@ -70,13 +290,23 @@ public enum SymbolType
*/
GENERIC_TYPE_DECLARE,
/**
* Unknown symbol
*/
UNKNOWN
}
/* TODO: Later build classes specific to symbol */
/* TODO: Check if below is even used */
/**
* Checks if the given token string is that of
* a built-in type
*
* Params:
* tokenStr = the string to check
* Returns: `true` if one of the built-in types,
* `false` otherwise
*/
public bool isType(string tokenStr)
{
return cmp(tokenStr, "byte") == 0 || cmp(tokenStr, "ubyte") == 0
@ -85,6 +315,18 @@ public bool isType(string tokenStr)
"long") == 0 || cmp(tokenStr, "ulong") == 0 || cmp(tokenStr, "void") == 0;
}
/**
* Checks if the given token string is a path
* identifier. This means that it is something
* which contains dots inbetween it like `a.b`
* but does not appear as a floating point literal
* such as `7.5`. It may also contain udnerscores `_`.
*
* Params:
* token = the token string to check
* Returns: `true` if it is a path identifier,
* `false` otherwise
*/
public bool isPathIdentifier(string token)
{
/* This is used to prevent the first character from not being number */
@ -138,6 +380,17 @@ public bool isPathIdentifier(string token)
return isDot;
}
/**
* Checks if the given token string is an identifier
* which means it can contains letters and umbers
* but MUST start with a letter. It may also
* contain udnerscores `_`.
*
* Params:
* token = the token string to check
* Returns: `true` if an identifier, `flase`
* otherwise
*/
public bool isIdentifier(string token)
{
/* This is used to prevent the first character from not being number */
@ -175,6 +428,13 @@ public bool isIdentifier(string token)
return true;
}
/**
* Checks if the given `Token` is an accessor
*
* Params:
* token = the `Token` to check
* Returns: `true` if so, `false` otherwise
*/
public bool isAccessor(Token token)
{
return getSymbolType(token) == SymbolType.PUBLIC ||
@ -182,11 +442,26 @@ public bool isAccessor(Token token)
getSymbolType(token) == SymbolType.PROTECTED;
}
/**
* Checks if the given `Token` is a modifier
*
* Params:
* token = the `Token` to check
* Returns: `true` if so, `false` otherwise
*/
public bool isModifier(Token token)
{
return getSymbolType(token) == SymbolType.STATIC;
}
/**
* Checks if the given `Token` is a normal
* identifier (with no dots/periods)
*
* Params:
* tokenIn = the `Token` to test
* Returns: `true` if so, `false` otherwise
*/
public bool isIdentifier_NoDot(Token tokenIn)
{
/* Make sure it isn't any other type of symbol */
@ -200,6 +475,15 @@ public bool isIdentifier_NoDot(Token tokenIn)
}
}
/**
* Checks if the given `Token` is a dotted-identifier
* meaning it contains `.`/periods in it - a so-called
* path identifier.
*
* Params:
* tokenIn = the `Token` to test
* Returns: `true` if so, `false` otherwise
*/
public bool isIdentifier_Dot(Token tokenIn)
{
/* Make sure it isn't any other type of symbol */
@ -213,10 +497,19 @@ public bool isIdentifier_Dot(Token tokenIn)
}
}
/**
* Checks if the given token string
* as a numeric literal. It has support
* for checking if it has a size specifier
* as well.
*
* Params:
* token = the string token to check
* Returns: `true` if it is a numeric literal,
* `false` otherwise
*/
private bool isNumericLiteral(string token)
{
import std.algorithm.searching : canFind;
import tlang.compiler.lexer.core :Lexer;
if(canFind(token, "UL") || canFind(token, "UI"))
{
return isNumeric(token[0..$-2]);
@ -235,6 +528,17 @@ private bool isNumericLiteral(string token)
}
}
/**
* Maps a given `Token` to its `SymbolType` such
* that you can determine the type of symbol it
* is.
*
* Params:
* tokenIn = the `Token` to check
* Returns: the `SymbolType` of this token, if
* unrecgnizable then `SymbolType.UNKNOWN` is
* returned
*/
public SymbolType getSymbolType(Token tokenIn)
{
string token = tokenIn.getToken();
@ -511,6 +815,15 @@ public SymbolType getSymbolType(Token tokenIn)
return SymbolType.UNKNOWN;
}
/**
* Determines whether the given token is
* a mathematical operator
*
* Params:
* token = the `Token` to test
* Returns: `true` if it is a mathematical
* operator, `false` otherwise
*/
public bool isMathOp(Token token)
{
string tokenStr = token.getToken();
@ -519,6 +832,17 @@ public bool isMathOp(Token token)
tokenStr[0] == '*' || tokenStr[0] == '/';
}
/**
* Determines whether the given token is
* a binary operator, meaning one which
* would be infixed/flanked by two operands
* (one to the left and one to the right)
*
* Params:
* token = the `Token` to test
* Returns: `true` if it is a binary
* operator, `false` otherwise
*/
public bool isBinaryOp(Token token)
{
string tokenStr = token.getToken();

View File

@ -1,3 +1,8 @@
/**
* Routines for determining, based on an input strung,
* the built-in type that is associated with that
* identifier/name
*/
module tlang.compiler.symbols.typing.builtins;
import tlang.compiler.symbols.typing.core;
@ -11,6 +16,18 @@ import std.conv : to;
* no machine is good if int is not 4, as in imagine short being max addressable unit
* like no, fuck that (and then short=int=long, no , that is shit AND is NOT WHAT TLANG aims for)
*/
/**
* Creates a new instance of the type that is detected via
* the given string. Only for built-in types.
*
* Example, if given `"int"` then you will get an instance
* of `new Integer("int", 4, true)`
*
* Params:
* tc = the associated `TypeChecker` required for lookups
* typeString = the type string to test
* Returns: the `Type` found, if not found then `null`
*/
public Type getBuiltInType(TypeChecker tc, string typeString)
{
gprintln("getBuiltInType("~typeString~")");

View File

@ -1,3 +1,8 @@
/**
* Data structures which represent kind-of `Entity`(s),
* starting with the base-`Entity`, `Type`, which represents
* a name that describes a data type
*/
module tlang.compiler.symbols.typing.core;
import tlang.compiler.symbols.data;
@ -6,6 +11,10 @@ import std.conv : to;
public import tlang.compiler.symbols.typing.builtins;
/**
* The base entity from which all types are derived
* from
*/
public class Type : Entity
{
/* TODO: Add width here */
@ -16,6 +25,14 @@ public class Type : Entity
* Actually yeah, we should, as Number types won't be entities
* Wait lmao they will
*/
/**
* Constructs a new `Type` with the
* given name
*
* Params:
* name = the new type's name
*/
this(string name)
{
super(name);
@ -25,16 +42,33 @@ public class Type : Entity
// ... where referene equality was used, hence I stear clear of that
}
/**
* Represents a void type, a type
* which has no return value for it
*/
public final class Void : Primitive
{
/**
* Constructs a new `Void` type
*/
this()
{
super("void");
}
}
/**
* Represents all primitive data types
*/
public class Primitive : Type
{
/**
* Constructs a new `Primitive`
* type
*
* Params:
* name = the new type's name
*/
this(string name)
{
super(name);
@ -42,63 +76,148 @@ public class Primitive : Type
}
/* TODO: Move width to Type class */
/**
* Represents any kind of number
*
* This means it has a width associated
* with it which is the number of bytes
* wide it is
*/
public class Number : Primitive
{
/* Number of bytes (1,2,4,8) */
/**
* Number of bytes (1,2,4,8)
*/
private ubyte width;
/* TODO: Aligbment details etc. */
/**
* COnstructs a new `Number` type
* with the given name and width
*
* Params:
* name = the new type's name
* width = the bit-width (in bytes)
*/
this(string name, ubyte width)
{
super(name);
this.width = width;
}
/**
* Returns the bit-width of this number
*
* Returns: the width in bytes
*/
public final ubyte getSize()
{
return width;
}
}
/**
* Represents an integer, a kind-of `Number`,
* but with a signedness/unsignedness encoding
* scheme associated with it
*/
public class Integer : Number
{
/* Whether or not signed (if so, then 2's complement) */
/**
* Whether or not signed (if so, then 2's complement)
*/
private bool signed;
/**
* Constructs a new `Integer` type with the given
* parameters
*
* Params:
* name = the name of this type
* width = the bit-width (in bytes)
* signed = whether or not it represents a
* two's complement signed integer or not
*/
this(string name, ubyte width, bool signed = false)
{
super(name, width);
this.signed = signed;
}
/**
* Determines if the type of
* integer being described is signed
* or not
*
* Returns: `true` if signed, `false`
* otherwise
*/
public final bool isSigned()
{
return signed;
}
/* TODO: Remove ig */
/**
* Returns a string representation of
* this `Integer` type
*
* Returns: a `string`
*/
public override string toString()
{
return name;
}
}
/**
* Represents a floating point number
*/
public class Float : Number
{
/**
* Constructs a new floating point
* type with the given name and width
*
* Params:
* name = this type's name
* width = the width (in bytes) of
* the floating point
*/
this(string name, ubyte width)
{
super(name, width);
}
}
/**
* A `Pointer`, is a kind-of `Integer`
* which is unsigned. This represents
* a memory address and is CURRENTLY
* set to `8` bytes (TODO: Change this
* to be dependent on the system used
* basically it should actually take
* in a size)
*
* A pointer is a 64-bit integer
* that point to data in memory of
* another given type
*/
public class Pointer : Integer
{
/* Data type being pointed to */
/**
* Data type being pointed to
*/
private Type dataType;
/**
* Constructs a new `Pointer` to point
* to data of the given type
*
* Params:
* dataType = the `Type` of data being
* pointed to
*/
this(Type dataType)
{
/* The name should be `dataType*` */
@ -109,6 +228,12 @@ public class Pointer : Integer
this.dataType = dataType;
}
/**
* Returns the `Type` of the data this
* pointer refers to
*
* Returns: the `Type`
*/
public Type getReferredType()
{
return dataType;
@ -120,12 +245,25 @@ public class Pointer : Integer
*/
public class StackArray : Type
{
/* Size of the stack array to allocate */
/**
* Size of the stack array to allocate
*/
private ulong arraySize;
/* Component type */
/**
* Component type
*/
private Type elementType;
/**
* Constructs a new `StackArray` type
* with the given type of element and
* the size to allocate on the stack
*
* Params:
* elementType = the component `Type`
* arraySize = the stack allocation size
*/
this(Type elementType, ulong arraySize)
{
/* The name should be `elementType[arraySize]` */
@ -135,11 +273,22 @@ public class StackArray : Type
this.arraySize = arraySize;
}
/**
* Gets the stack array's element type
*
* Returns: the `Type` of the components
*/
public Type getComponentType()
{
return elementType;
}
/**
* Gets the size to be allocated on the stack
* for this array
*
* Returns: the size
*/
public ulong getAllocatedSize()
{
return arraySize;

View File

@ -2618,13 +2618,20 @@ public final class TypeChecker
}
/* Test name colliding with container name (1/3) [module] */
unittest
version(unittest)
{
import std.file;
import std.stdio;
import tlang.compiler.lexer.core;
import tlang.compiler.lexer.kinds.basic : BasicLexer;
import tlang.compiler.parsing.core;
}
/* Test name colliding with container name (1/3) [module] */
unittest
{
string sourceFile = "source/tlang/testing/collide_container_module1.t";
@ -2637,10 +2644,10 @@ unittest
sourceFileFile.close();
string sourceCode = cast(string) fileBytes;
Lexer currentLexer = new Lexer(sourceCode);
currentLexer.performLex();
LexerInterface currentLexer = new BasicLexer(sourceCode);
(cast(BasicLexer)currentLexer).performLex();
Parser parser = new Parser(currentLexer.getTokens());
Parser parser = new Parser(currentLexer);
Module modulle = parser.parse();
TypeChecker typeChecker = new TypeChecker(modulle);
@ -2668,11 +2675,6 @@ unittest
/* Test name colliding with container name (2/3) [module, nested collider] */
unittest
{
import std.file;
import std.stdio;
import tlang.compiler.lexer.core;
import tlang.compiler.parsing.core;
string sourceFile = "source/tlang/testing/collide_container_module2.t";
File sourceFileFile;
@ -2684,10 +2686,10 @@ unittest
sourceFileFile.close();
string sourceCode = cast(string) fileBytes;
Lexer currentLexer = new Lexer(sourceCode);
currentLexer.performLex();
LexerInterface currentLexer = new BasicLexer(sourceCode);
(cast(BasicLexer)currentLexer).performLex();
Parser parser = new Parser(currentLexer.getTokens());
Parser parser = new Parser(currentLexer);
Module modulle = parser.parse();
TypeChecker typeChecker = new TypeChecker(modulle);
@ -2713,11 +2715,6 @@ unittest
/* Test name colliding with container name (3/3) [container (non-module), nested collider] */
unittest
{
import std.file;
import std.stdio;
import tlang.compiler.lexer.core;
import tlang.compiler.parsing.core;
string sourceFile = "source/tlang/testing/collide_container_non_module.t";
File sourceFileFile;
@ -2729,10 +2726,10 @@ unittest
sourceFileFile.close();
string sourceCode = cast(string) fileBytes;
Lexer currentLexer = new Lexer(sourceCode);
currentLexer.performLex();
LexerInterface currentLexer = new BasicLexer(sourceCode);
(cast(BasicLexer)currentLexer).performLex();
Parser parser = new Parser(currentLexer.getTokens());
Parser parser = new Parser(currentLexer);
Module modulle = parser.parse();
TypeChecker typeChecker = new TypeChecker(modulle);
@ -2758,11 +2755,6 @@ unittest
/* Test name colliding with member */
unittest
{
import std.file;
import std.stdio;
import tlang.compiler.lexer.core;
import tlang.compiler.parsing.core;
string sourceFile = "source/tlang/testing/collide_member.t";
File sourceFileFile;
@ -2774,10 +2766,10 @@ unittest
sourceFileFile.close();
string sourceCode = cast(string) fileBytes;
Lexer currentLexer = new Lexer(sourceCode);
currentLexer.performLex();
LexerInterface currentLexer = new BasicLexer(sourceCode);
(cast(BasicLexer)currentLexer).performLex();
Parser parser = new Parser(currentLexer.getTokens());
Parser parser = new Parser(currentLexer);
Module modulle = parser.parse();
TypeChecker typeChecker = new TypeChecker(modulle);
@ -2802,11 +2794,6 @@ unittest
/* Test name colliding with member (check that the member defined is class (precendence test)) */
unittest
{
import std.file;
import std.stdio;
import tlang.compiler.lexer.core;
import tlang.compiler.parsing.core;
string sourceFile = "source/tlang/testing/precedence_collision_test.t";
File sourceFileFile;
@ -2818,10 +2805,10 @@ unittest
sourceFileFile.close();
string sourceCode = cast(string) fileBytes;
Lexer currentLexer = new Lexer(sourceCode);
currentLexer.performLex();
LexerInterface currentLexer = new BasicLexer(sourceCode);
(cast(BasicLexer)currentLexer).performLex();
Parser parser = new Parser(currentLexer.getTokens());
Parser parser = new Parser(currentLexer);
Module modulle = parser.parse();
TypeChecker typeChecker = new TypeChecker(modulle);
@ -2848,11 +2835,6 @@ unittest
/* Test name colliding with container name (1/2) */
unittest
{
import std.file;
import std.stdio;
import tlang.compiler.lexer.core;
import tlang.compiler.parsing.core;
string sourceFile = "source/tlang/testing/collide_container.t";
File sourceFileFile;
@ -2864,10 +2846,10 @@ unittest
sourceFileFile.close();
string sourceCode = cast(string) fileBytes;
Lexer currentLexer = new Lexer(sourceCode);
currentLexer.performLex();
LexerInterface currentLexer = new BasicLexer(sourceCode);
(cast(BasicLexer)currentLexer).performLex();
Parser parser = new Parser(currentLexer.getTokens());
Parser parser = new Parser(currentLexer);
Module modulle = parser.parse();
TypeChecker typeChecker = new TypeChecker(modulle);
@ -2933,11 +2915,6 @@ unittest
*/
unittest
{
import std.file;
import std.stdio;
import tlang.compiler.lexer.core;
import tlang.compiler.parsing.core;
string sourceFile = "source/tlang/testing/typecheck/simple_function_call.t";
File sourceFileFile;
@ -2949,10 +2926,10 @@ unittest
sourceFileFile.close();
string sourceCode = cast(string) fileBytes;
Lexer currentLexer = new Lexer(sourceCode);
currentLexer.performLex();
LexerInterface currentLexer = new BasicLexer(sourceCode);
(cast(BasicLexer)currentLexer).performLex();
Parser parser = new Parser(currentLexer.getTokens());
Parser parser = new Parser(currentLexer);
Module modulle = parser.parse();
TypeChecker typeChecker = new TypeChecker(modulle);