Migrating to Pandoc
This commit is contained in:
parent
4e24c0d326
commit
65b89614b3
|
@ -0,0 +1,9 @@
|
|||
---
|
||||
title: Tristan Programming Language
|
||||
subtitle: _From start to finish_
|
||||
author: Tristan B. V. Kildaire
|
||||
documentclass: scrbook
|
||||
classoption:
|
||||
- oneside
|
||||
---
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
# Dedication
|
||||
|
||||
_Dedicated to Gisele, Clint and Vaughan for whomst have always cared the most for me even when faced with adversity_
|
|
@ -238,6 +238,16 @@ filename "part3_4.lyx"
|
|||
\end_inset
|
||||
|
||||
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
\begin_inset CommandInset include
|
||||
LatexCommand include
|
||||
filename "new.lyx"
|
||||
|
||||
\end_inset
|
||||
|
||||
|
||||
\end_layout
|
||||
|
||||
\end_body
|
||||
|
|
|
@ -0,0 +1,86 @@
|
|||
%% LyX 2.3.6.1 created this file. For more info, see http://www.lyx.org/.
|
||||
%% Do not edit unless you really know what you are doing.
|
||||
\documentclass[oneside,english]{scrbook}
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage[latin9]{inputenc}
|
||||
\usepackage{geometry}
|
||||
\geometry{verbose}
|
||||
\usepackage[active]{srcltx}
|
||||
\usepackage[authoryear]{natbib}
|
||||
|
||||
\makeatletter
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% User specified LaTeX commands.
|
||||
\usepackage{listings}
|
||||
\usepackage{listings}
|
||||
\usepackage{xcolor}
|
||||
|
||||
\definecolor{codegreen}{rgb}{0,0.6,0}
|
||||
\definecolor{codegray}{rgb}{0.5,0.5,0.5}
|
||||
\definecolor{codepurple}{rgb}{0.58,0,0.82}
|
||||
\definecolor{backcolour}{rgb}{0.95,0.95,0.92}
|
||||
|
||||
\lstdefinestyle{mystyle}{
|
||||
backgroundcolor=\color{backcolour},
|
||||
commentstyle=\color{codegray},
|
||||
keywordstyle=\color{red},
|
||||
numberstyle=\tiny\color{codegray},
|
||||
stringstyle=\color{codepurple},
|
||||
basicstyle=\ttfamily\footnotesize,
|
||||
breakatwhitespace=false,
|
||||
breaklines=true,
|
||||
captionpos=b,
|
||||
keepspaces=true,
|
||||
numbers=left,
|
||||
numbersep=5pt,
|
||||
showspaces=false,
|
||||
showstringspaces=false,
|
||||
showtabs=false,
|
||||
tabsize=2
|
||||
}
|
||||
|
||||
\lstset{style=mystyle}
|
||||
|
||||
\usepackage{tikz}
|
||||
|
||||
\makeatother
|
||||
|
||||
\usepackage{babel}
|
||||
\begin{document}
|
||||
\title{Tristan Programming Language}
|
||||
\subtitle{\emph{From start to finish}}
|
||||
\author{Tristan B. V. Kildaire}
|
||||
\dedication{Dedicated to Gisele, Clint and Vaughan for whomst have always cared
|
||||
the most for me even when faced with adversity}
|
||||
|
||||
\maketitle
|
||||
\tableofcontents{}
|
||||
|
||||
\newpage{}
|
||||
|
||||
Haha, this page is left intentionally blank because I want to waste
|
||||
paper.
|
||||
|
||||
\newpage{}
|
||||
|
||||
\part{Introduction}
|
||||
|
||||
\include{part1_1}
|
||||
|
||||
\include{part1_2}
|
||||
|
||||
\part{Users guide}
|
||||
|
||||
\include{part2_1}
|
||||
|
||||
\part{Implementation}
|
||||
|
||||
\include{part3_1}
|
||||
|
||||
\include{part3_2}
|
||||
|
||||
\include{part3_3}
|
||||
|
||||
\include{part3_4}
|
||||
|
||||
\include{new}
|
||||
\end{document}
|
|
@ -0,0 +1,3 @@
|
|||
# Implementation
|
||||
|
||||
TODO: Addinformation about internals section
|
|
@ -0,0 +1,55 @@
|
|||
## Lexical analysis
|
||||
|
||||
Lexical analysis is the process of taking a program as an input string $A$ and splitting it into a list of $n$ sub-strings $A_{1},\,A_{2}\ldots A_{n}$ called tokens. The length $n$ of this list of dependent on several rules that determine how, when and where new tokens are built - this set of rules is called a _grammar_.
|
||||
|
||||
### Grammar
|
||||
|
||||
TODO: Add link to other seciton or remove this
|
||||
|
||||
### Overview of implementation
|
||||
|
||||
The source code for the lexical analysis part of the compiler is located in `source/tlang/lexer.d` which contains two important class definitions:
|
||||
|
||||
* `Token` - This represents a token
|
||||
* Complete with the token string itself, `token`. Retrivebale with a call to `getToken()`
|
||||
* The coordinates in the source code where the token begins as `line` and `column`
|
||||
* Overrides equality (`opEquals`) such that doing,
|
||||
```d
|
||||
new Token("int") == new Token("int")
|
||||
```
|
||||
* ...would evaluate to `true`, rather than false by reference equality (the default in D)
|
||||
* `Lexer` - The token builder
|
||||
* `sourceCode`, the whole input program (as a string) to be tokenized
|
||||
* `position`, holds the index to the current character in the string array `sourceCode`
|
||||
* `currentChar`, the current character at index-`position`
|
||||
* Contains a list of the currently built tokens, `Token[] tokens`
|
||||
* Current line and column numbers as `line` and `column` respectively
|
||||
* A “build up” - this is the token (in string form) currently being built - `currentToken`
|
||||
|
||||
### Implementation
|
||||
|
||||
The implementation of the lexer, the `Lexer` class, is explained in detail in this section. (TODO: constructor) The lexical analysis is done one-shot via the `performLex()` method which will attempt to tokenize the input program, on failure returning `false`, `true` otherwise. In the successful case the `tokens` array will be filled with the created tokens and can then later be retrieved via a call to `getTokens()`.
|
||||
|
||||
Example usage:
|
||||
TODO
|
||||
|
||||
#### performLex()
|
||||
|
||||
TODO: This is going to change sometime soonish, so I want the final version of how it works here. I may as well, however, give a brief explanation as I doubt _much_ will change - only specific parsing cases.
|
||||
|
||||
This method contains a looping structure which will read character-by-character from the `sourceCode` string and follow the rules of the grammar (TODO: add link), looping whilst there are still characters available for consumption (`position < sourceCode.length`).
|
||||
|
||||
We loop through each character and dependent on its value we start building new tokens, certain characters will cause a token to finish being built which will sometimes be caused by `isSpliter(character)` being `true`. A typical token building process looks something like the following, containing the final character to be tacked onto the current token build up, the creation of a new token object and the addition of it to the `tokens` list, finishing with flushing the build up string and incrementing the coordinates:
|
||||
|
||||
A typical token building procedure looks something like this:
|
||||
|
||||
```d
|
||||
/* Generate and add the token */
|
||||
currentToken ~= "'";
|
||||
currentTokens ~= new Token(currentToken, line, column);
|
||||
|
||||
/* Flush the token */
|
||||
currentToken = "";
|
||||
column += 2
|
||||
position += 2;
|
||||
```
|
|
@ -1,8 +1,4 @@
|
|||
---
|
||||
title: Virtual Table implementation
|
||||
date: 2021-04-20
|
||||
tags: [documentation, internals, vtable]
|
||||
---
|
||||
# Virtual Table implementation
|
||||
|
||||
This article aims to describe a possible virtual table implementation that we could use to for method dispatch that not only supports overriding but **also** interfaces.
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
# Introduction
|
|
@ -0,0 +1,9 @@
|
|||
## Why in the lords name are you doing this?
|
||||
|
||||
Despite my eagerness to jump directly into the subject matter at hand I think believe there is something of even greater importance. Despite there being a myriad of reasons I embarked upon this project something more important than the stock-and-standard “I needed it to solve a problem of mine” reasoning comes to mind. There is indeed a better reason for embarking on something that the mere technical _requirement thereof_ - I did this **because I can**. This sentiment is something that I really hold dear to my heart despite it being a seemingly obvious one. Of course you can do what you want with your code - it's a free country. One would not be wrong to make such a statement but mention your ideas online and you get hounded down by others saying “that's dumb, just use X” or “your implementation will be inefficient”. These statements are not entirely untrue but they miss the point that this is an exercise in scientific thinking and an artistic approach at it in that as well.
|
||||
|
||||
I would not want to see the naysayers put anyone down from doing something they have always dreamed of, that is why I have done this. I am aware of the risks and the downfalls of having grandeause expectations but luckily I do not require the external feedback of the mass - just some close few friends who can appreciate my work and join the endeavor with me.
|
||||
|
||||
|
||||
_Don't let people stop you, you only have one life - take it by the horns and fly_
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
## Aims
|
||||
|
||||
A programming language normally has an aim, a _purpose of existence_, to put it in a snobbish way that a white male like me would. It can range from solving a problem in a highly specific domain (such are Domain Specific Language (TODO: add citation)) to trying to solving various problems spread across several different domains, a _general purpose_ programming language. This is where I would like to place Tristan - a language that can support multiple paradigms of programming - whether this be object-oriented programming with the usage of _classes_ and _objects_ or functional programming with techniques such as map and filter.
|
||||
|
||||
|
||||
|
||||
Tristan aims to be able to support all of these but with certain limits, this is after all mainly an imperative language with those paradigms as _“extra features”_. Avoiding feature creep in other systems-levels languages such as C++ is something I really want to stress about the design of this language, I do not want a big and confusing mess that has an extremely steep learning curve and way too many moving parts.
|
||||
|
||||
### Paradigms
|
||||
|
||||
Tristan is a procedural programming language that supports object-oriented programming and templates.
|
||||
|
||||
#### Object-oriented programming
|
||||
|
||||
Object orientation allows the programmer to create user-defined types which encapsulate both data fields and methods which act upon said data. Tristan supports:
|
||||
|
||||
1. Class-based object orientation
|
||||
* Classes as the base of user-defined types and objects are instances of these types
|
||||
* Single inheritance hierachy
|
||||
* Runtime polymorhpism
|
||||
2. Interfaces
|
||||
* Multiple inheritance
|
||||
* Runtime polomprhism (thinking\hyperref{})
|
||||
|
||||
It is with this lean approach to object orientation that we keep things simple enough (only single inheritance) but with enough power to model the real world in code (by supporting interfaces).
|
||||
|
||||
### Templating
|
||||
|
||||
Templating, otherwise known as _generics_, is a mechanism by which a given body of code which contains a type specifier such as variable declarations or function definitions can have their said type specifiers parameterized. The usage of this can be illustrated in the code below, where we want to define a method `sum(a, b)` which returns the summation of the two inputs. We define version that works for integral types (`int`) and a version that works for decimal types (`float`):
|
||||
|
||||
```d
|
||||
// Integral summation function
|
||||
int sum(int a, int b)
|
||||
{
|
||||
return a+b;
|
||||
}
|
||||
|
||||
// Decimal summation function
|
||||
float sum(float a, float b)
|
||||
{
|
||||
return a+b;
|
||||
}
|
||||
```
|
||||
|
||||
Being a small example we can reason about the easiness of simply defining two versions of the `sum(a, b)` method for the two types, but after some time this can either get overly repetitive if we have to do this for more methods of a similar structure or when more types are involved. This is where templating comes in, we can write a more general version of the same function and let the compiler generate the differently typed versions dependent on what _type parameter_ we pass in.
|
||||
|
||||
A templatised version of the above `sum(a, b)` function is shown below:
|
||||
|
||||
```d
|
||||
// Templatised function
|
||||
template T
|
||||
{
|
||||
T sum(T a, T b)
|
||||
{
|
||||
return a+b;
|
||||
}
|
||||
}
|
||||
|
||||
// Integral version
|
||||
sum!(int)(1,2)
|
||||
|
||||
// Decimal version
|
||||
sum!(float)(1.0,2.0)
|
||||
```
|
||||
|
||||
The way this works is that whenever you call the function `sum(a, b)` you will have to provide it with the specific type you want generated for that function.
|
||||
|
||||
### Systems-level access
|
||||
|
||||
Tristan does not shy away from features which give you access to system-level concepts such as memory addresses (via pointers), assembly (via the inline assembler) and so on. Such features are inherently unsafe but it is this sort of control that I wish to give the user, the balance between what the compiler should do and what the user should make sure they are doing is tipped quite heavily in favor of the latter in my viewpoint and hence we support such features as:
|
||||
|
||||
* Weak typing
|
||||
* By default this is not the behavior when using `cast()`
|
||||
* Casting to an incompatible type is allowed - even when a run-time type-check is invalid you can still force a cast with `castunsafe()`
|
||||
* The user should be able to do what _he_ wants if requested
|
||||
* Pointers
|
||||
* The mere _support_ of pointers allowing one to take a memory-level view of objects in memory rather than the normal “safe access” means
|
||||
* Inline assembly
|
||||
* Inserting of arbitrary assembler is allowed, providing the programmer with access to systems level registers, interrupts/syscall instructions and so on
|
||||
* Custom byte-packing
|
||||
* Allowing the user to deviate from the normal struct packing structure in favor of a tweaked packing technique
|
||||
* Custom packing on a system that doesn't agree with the alignment of your data **is** allowed but the default is to pack accordingly to the respective platform
|
||||
|
||||
### Specified behaviour
|
||||
|
||||
TODO: Insert ramblings here about underspecified behaviour and how they plague C and how we easily fix this in tlang
|
|
@ -1,5 +1,4 @@
|
|||
Language
|
||||
========
|
||||
# Language
|
||||
|
||||
This page serves as an official manual for both user's of TLang and those who want to understand/develop the internals of the compiler and runtime (the language itself).
|
||||
|
|
@ -1,5 +1,4 @@
|
|||
Grammar
|
||||
=======
|
||||
## Grammar
|
||||
|
||||
* TODO: Need help with this @Wilhelm, some things to look at (must reference!)
|
||||
* https://karmin.ch/ebnf/examples
|
|
@ -1,5 +1,4 @@
|
|||
Literals
|
||||
========
|
||||
## Literals
|
||||
|
||||
#### Document todo
|
||||
|
||||
|
@ -9,7 +8,7 @@ Literals
|
|||
- [ ] fixes needed:
|
||||
- [x] sole `-1` does not work but `0-1` does, need to check the binary operator and unary operator things
|
||||
|
||||
## Integer literals
|
||||
### Integer literals
|
||||
|
||||
Integer literals are encoded according to the following table:
|
||||
|
|
@ -1,4 +1,6 @@
|
|||
# Primitive data types
|
||||
## Types
|
||||
|
||||
## Primitive data types
|
||||
|
||||
Primitive data type are the building blocks of which other more complex types are derived from. Each primitive type has the following 3 attributes:
|
||||
|
||||
|
@ -6,7 +8,7 @@ Primitive data type are the building blocks of which other more complex types ar
|
|||
2. **Width:** How many bits it takes up
|
||||
3. **Intended interpretation:** How it should be interpreted
|
||||
|
||||
## Integral types
|
||||
### Integral types
|
||||
|
||||
| Type | Width | Intended interpretation |
|
||||
|-|-|-|
|
||||
|
@ -20,18 +22,18 @@ Primitive data type are the building blocks of which other more complex types ar
|
|||
| ulong | 64 | unsigned long |
|
||||
|
||||
|
||||
### Conversion rules
|
||||
#### Conversion rules
|
||||
|
||||
1. TODO: Sign/zero extension
|
||||
2. Promotion?
|
||||
3. Precedence in interpretation when the first two don't apply
|
||||
|
||||
## Decimal
|
||||
### Decimal
|
||||
|
||||
TODO: Add this
|
||||
|
||||
* float32, float64 etc
|
||||
|
||||
### Conversion rules
|
||||
#### Conversion rules
|
||||
|
||||
TODO: Add this
|
|
@ -1,9 +1,8 @@
|
|||
Variables
|
||||
==============
|
||||
## Variables
|
||||
|
||||
Variables are the basic building blocks of a program. They are effcetively a storage container for data of a certain [type](types.md). More complex types are built out of a combination of such variables.
|
||||
|
||||
## Declaring variables
|
||||
### Declaring variables
|
||||
|
||||
Declaring variables is very simple and follows the following format:
|
||||
|
||||
|
@ -17,7 +16,7 @@ You can also assign to a variable whilst declaring it:
|
|||
<type> <identifier> = <expression of <type>>;
|
||||
```
|
||||
|
||||
## Usage
|
||||
### Usage
|
||||
You can update the stored value in a variable as so:
|
||||
|
||||
```
|
|
@ -1,11 +1,10 @@
|
|||
Conditionals
|
||||
============
|
||||
## Conditionals
|
||||
|
||||
The following conditionals are supported in T:
|
||||
|
||||
1. `if` statements
|
||||
|
||||
## If statements
|
||||
### If statements
|
||||
|
||||
If statements are like those you have seen in any other language, they are composed of atleast one `if` branch:
|
||||
|
|
@ -1,11 +1,10 @@
|
|||
Loops
|
||||
=====
|
||||
## Loops
|
||||
|
||||
Loops are structures which allow one to run code a repeated number of times based on a condition. The currently supported looping structures in TLang are:
|
||||
|
||||
1. `while` loops
|
||||
|
||||
## `while` loops
|
||||
### `while` loops
|
||||
|
||||
One can declare a while loop using the `while` keyword followed by a condition (an expression) as follows:
|
||||
|
||||
|
@ -19,7 +18,7 @@ while(i)
|
|||
}
|
||||
```
|
||||
|
||||
## `for` loops
|
||||
### `for` loops
|
||||
|
||||
One can declare a for loop using the `for` keyword. A for loop consists of 4 parts:
|
||||
|
|
@ -1,5 +1,4 @@
|
|||
Functions
|
||||
=========
|
||||
## Functions
|
||||
|
||||
A function has three things:
|
||||
|
||||
|
@ -7,7 +6,7 @@ A function has three things:
|
|||
2. Optional _input parameters_
|
||||
3. An optional _return type_ and respective _return value_
|
||||
|
||||
## Void functions
|
||||
### Void functions
|
||||
|
||||
A function of type `void` does not return anything. An example would be:
|
||||
|
||||
|
@ -27,7 +26,7 @@ void sayHello()
|
|||
}
|
||||
```
|
||||
|
||||
## Typed functions
|
||||
### Typed functions
|
||||
|
||||
A typed function has a _return type_ and can return a value of said type, here we have such an example of a function defined to return a value of type `int`:
|
||||
|
||||
|
@ -38,7 +37,7 @@ int myFunction()
|
|||
}
|
||||
```
|
||||
|
||||
## Parameters
|
||||
### Parameters
|
||||
|
||||
Functions can take in parameters of the form _`<type> arg1, <type> arg2, ...`_, below is an example of such a function which takes to integral parameters and returns the sum:
|
||||
|
|
@ -1,11 +1,10 @@
|
|||
Pointers
|
||||
========
|
||||
## Pointers
|
||||
|
||||
Pointers allow one to get the address of a named entity, store it, and use
|
||||
it in a manner to either update the value at said address or fetch
|
||||
the value from said address in an indirect manner.
|
||||
|
||||
## Pointer types
|
||||
### Pointer types
|
||||
|
||||
A pointer type is written in the form of `<type>*` where this is read as "a pointer-to <type>". The `<type>` is anything before the last asterisk. Therefore `<type>**` is a "a pointer-to < a pointer-to <type>>".
|
||||
|
||||
|
@ -13,7 +12,7 @@ One also gets untyped pointers, these are written as `void*`.
|
|||
|
||||
All pointers are 64-bit values - the size of addresses on one's system.
|
||||
|
||||
## Usage
|
||||
### Usage
|
||||
|
||||
Here we shall show you the use cases of pointers in the below example:
|
||||
|
||||
|
@ -32,7 +31,7 @@ What we have ==here== is the declaration of a variable called `valuePtr` of type
|
|||
|
||||
Resulting in us having another "variable" (our `valuePtr`) or address which, if we visit it we get another value which is _intended_ to be interpreted as another address as well - it is **this** address that if we visit we fetch the value of 69.
|
||||
|
||||
## Example code
|
||||
### Example code
|
||||
|
||||
Below is example usage of a pointer:
|
||||
|
|
@ -1,7 +1,6 @@
|
|||
Structs
|
||||
===========
|
||||
## Structs
|
||||
|
||||
## Introduction
|
||||
### Introduction
|
||||
|
||||
What are structs? In T a Struct is a user-defined type which associates several fields toghether however, unlike C, structs in T can have a set of functions associated with them as well.
|
||||
|
||||
|
@ -17,7 +16,7 @@ struct <name>
|
|||
|
||||
Note: Assignments to these variables within the struct's body is not allowed.
|
||||
|
||||
### Example
|
||||
#### Example
|
||||
|
||||
Perhaps we want a simple struct that associates a name, age and gender together to represent a _Person_, then we can declare such a struct as follows:
|
||||
|
||||
|
@ -32,7 +31,7 @@ struct Person
|
|||
|
||||
---
|
||||
|
||||
## Member functions
|
||||
### Member functions
|
||||
|
||||
One can also define a struct to have certain functions associated with it that will operate on its data without having to refer to it directly in the source code. The syntac for a member function with return type `<returnType>`, name `<funcName>`, of a struct is (along with our previous struct) as follows:
|
||||
|
||||
|
@ -49,7 +48,7 @@ struct <name>
|
|||
}
|
||||
```
|
||||
|
||||
### Example
|
||||
#### Example
|
||||
|
||||
TODO: Add some text here describing it
|
||||
|
|
@ -1,9 +1,8 @@
|
|||
OOP
|
||||
===
|
||||
## OOP
|
||||
|
||||
T supports object-oriented programming out of the box, specifically support for classes (with single inheritance) and interfaces.
|
||||
|
||||
## Classes
|
||||
### Classes
|
||||
|
||||
Single inheritance classes are supported in T and a basic class definition
|
||||
for a class named `A` looks as follows:
|
||||
|
@ -15,7 +14,7 @@ class A
|
|||
}
|
||||
```
|
||||
|
||||
### Constructors
|
||||
#### Constructors
|
||||
|
||||
A constructor for our class `A` is defined with a function named after the
|
||||
class, so in this case that would be `A` as shown below:
|
||||
|
@ -30,7 +29,7 @@ class A
|
|||
}
|
||||
```
|
||||
|
||||
### Destructors
|
||||
#### Destructors
|
||||
|
||||
Like a constructors, destructors follow the same syntax. However, destructors
|
||||
have the tilde symbol, `~`, infront of them like so:
|
||||
|
@ -49,7 +48,7 @@ Destructors run when you use the `delete` keyword on an object reference.
|
|||
|
||||
---
|
||||
|
||||
### Inheritance
|
||||
#### Inheritance
|
||||
|
||||
Classes in T support single inheritance using the `:` operator. Below we
|
||||
have a base class **A** and a sub-class **B**. The syntax is as follows:
|
||||
|
@ -72,7 +71,7 @@ class B : A
|
|||
}
|
||||
```
|
||||
|
||||
## Interfaces
|
||||
### Interfaces
|
||||
|
||||
TODO: Add seciton on interfaces
|
||||
TODO: Also add support for this to TLang
|
||||
|
@ -84,6 +83,6 @@ interface A
|
|||
}
|
||||
```
|
||||
|
||||
## Polymorhipsm
|
||||
### Polymorhipsm
|
||||
|
||||
TODO: Add notes on type-casting here and RTTI
|
|
@ -1,9 +1,8 @@
|
|||
Modules
|
||||
=======
|
||||
## Modules
|
||||
|
||||
A module is the top-level container for all T programs, it is the parent of all other syntactical components.
|
||||
|
||||
## Declaration
|
||||
### Declaration
|
||||
|
||||
A module is defined using the `module` keyword followed by the name of the module:
|
||||
|
||||
|
@ -13,7 +12,7 @@ module myModule;
|
|||
// Code goes here
|
||||
```
|
||||
|
||||
## Naming
|
||||
### Naming
|
||||
|
||||
Because the module is the root of all other containers such as classes and structs, one can always use the module name to refer from the top-down. An example:
|
||||
|
|
@ -0,0 +1,85 @@
|
|||
#LyX 2.3 created this file. For more info see http://www.lyx.org/
|
||||
\lyxformat 544
|
||||
\begin_document
|
||||
\begin_header
|
||||
\save_transient_properties true
|
||||
\origin unavailable
|
||||
\textclass scrbook
|
||||
\use_default_options true
|
||||
\maintain_unincluded_children false
|
||||
\language english
|
||||
\language_package default
|
||||
\inputencoding auto
|
||||
\fontencoding global
|
||||
\font_roman "default" "default"
|
||||
\font_sans "default" "default"
|
||||
\font_typewriter "default" "default"
|
||||
\font_math "auto" "auto"
|
||||
\font_default_family default
|
||||
\use_non_tex_fonts false
|
||||
\font_sc false
|
||||
\font_osf false
|
||||
\font_sf_scale 100 100
|
||||
\font_tt_scale 100 100
|
||||
\use_microtype false
|
||||
\use_dash_ligatures true
|
||||
\graphics default
|
||||
\default_output_format default
|
||||
\output_sync 0
|
||||
\bibtex_command default
|
||||
\index_command default
|
||||
\paperfontsize default
|
||||
\spacing single
|
||||
\use_hyperref false
|
||||
\papersize default
|
||||
\use_geometry false
|
||||
\use_package amsmath 1
|
||||
\use_package amssymb 1
|
||||
\use_package cancel 1
|
||||
\use_package esint 1
|
||||
\use_package mathdots 1
|
||||
\use_package mathtools 1
|
||||
\use_package mhchem 1
|
||||
\use_package stackrel 1
|
||||
\use_package stmaryrd 1
|
||||
\use_package undertilde 1
|
||||
\cite_engine basic
|
||||
\cite_engine_type default
|
||||
\biblio_style plain
|
||||
\use_bibtopic false
|
||||
\use_indices false
|
||||
\paperorientation portrait
|
||||
\suppress_date false
|
||||
\justification true
|
||||
\use_refstyle 1
|
||||
\use_minted 0
|
||||
\index Index
|
||||
\shortcut idx
|
||||
\color #008000
|
||||
\end_index
|
||||
\secnumdepth 3
|
||||
\tocdepth 3
|
||||
\paragraph_separation indent
|
||||
\paragraph_indentation default
|
||||
\is_math_indent 0
|
||||
\math_numbering_side default
|
||||
\quotes_style english
|
||||
\dynamic_quotes 0
|
||||
\papercolumns 1
|
||||
\papersides 1
|
||||
\paperpagestyle default
|
||||
\tracking_changes false
|
||||
\output_changes false
|
||||
\html_math_output 0
|
||||
\html_css_as_file 0
|
||||
\html_be_strict false
|
||||
\end_header
|
||||
|
||||
\begin_body
|
||||
|
||||
\begin_layout Standard
|
||||
s
|
||||
\end_layout
|
||||
|
||||
\end_body
|
||||
\end_document
|
|
@ -0,0 +1 @@
|
|||
s
|
|
@ -0,0 +1,95 @@
|
|||
#LyX 2.3 created this file. For more info see http://www.lyx.org/
|
||||
\lyxformat 544
|
||||
\begin_document
|
||||
\begin_header
|
||||
\save_transient_properties true
|
||||
\origin unavailable
|
||||
\textclass article
|
||||
\use_default_options true
|
||||
\maintain_unincluded_children false
|
||||
\language english
|
||||
\language_package default
|
||||
\inputencoding auto
|
||||
\fontencoding global
|
||||
\font_roman "default" "default"
|
||||
\font_sans "default" "default"
|
||||
\font_typewriter "default" "default"
|
||||
\font_math "auto" "auto"
|
||||
\font_default_family default
|
||||
\use_non_tex_fonts false
|
||||
\font_sc false
|
||||
\font_osf false
|
||||
\font_sf_scale 100 100
|
||||
\font_tt_scale 100 100
|
||||
\use_microtype false
|
||||
\use_dash_ligatures true
|
||||
\graphics default
|
||||
\default_output_format default
|
||||
\output_sync 0
|
||||
\bibtex_command default
|
||||
\index_command default
|
||||
\paperfontsize default
|
||||
\use_hyperref false
|
||||
\papersize default
|
||||
\use_geometry false
|
||||
\use_package amsmath 1
|
||||
\use_package amssymb 1
|
||||
\use_package cancel 1
|
||||
\use_package esint 1
|
||||
\use_package mathdots 1
|
||||
\use_package mathtools 1
|
||||
\use_package mhchem 1
|
||||
\use_package stackrel 1
|
||||
\use_package stmaryrd 1
|
||||
\use_package undertilde 1
|
||||
\cite_engine basic
|
||||
\cite_engine_type default
|
||||
\use_bibtopic false
|
||||
\use_indices false
|
||||
\paperorientation portrait
|
||||
\suppress_date false
|
||||
\justification true
|
||||
\use_refstyle 1
|
||||
\use_minted 0
|
||||
\index Index
|
||||
\shortcut idx
|
||||
\color #008000
|
||||
\end_index
|
||||
\secnumdepth 3
|
||||
\tocdepth 3
|
||||
\paragraph_separation indent
|
||||
\paragraph_indentation default
|
||||
\is_math_indent 0
|
||||
\math_numbering_side default
|
||||
\quotes_style english
|
||||
\dynamic_quotes 0
|
||||
\papercolumns 1
|
||||
\papersides 1
|
||||
\paperpagestyle default
|
||||
\tracking_changes false
|
||||
\output_changes false
|
||||
\html_math_output 0
|
||||
\html_css_as_file 0
|
||||
\html_be_strict false
|
||||
\end_header
|
||||
|
||||
\begin_body
|
||||
|
||||
\begin_layout Standard
|
||||
\begin_inset ERT
|
||||
status open
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
|
||||
\backslash
|
||||
input{../test.tex}
|
||||
\end_layout
|
||||
|
||||
\end_inset
|
||||
|
||||
|
||||
\end_layout
|
||||
|
||||
\end_body
|
||||
\end_document
|
|
@ -0,0 +1,29 @@
|
|||
|
||||
\chapter{Why in the lords name are you doing this?}
|
||||
|
||||
Despite my eagerness to jump directly into the subject matter at hand
|
||||
I think believe there is something of even greater importance. Despite
|
||||
there being a myriad of reasons I embarked upon this project something
|
||||
more important than the stock-and-standard ``I needed it to solve
|
||||
a problem of mine'' reasoning comes to mind. There is indeed a better
|
||||
reason for embarking on something that the mere \emph{technical requirement}
|
||||
thereof - I did this \textbf{because I can}. This sentiment is something
|
||||
that I really hold dear to my heart despite it being a seemingly obvious
|
||||
one. Of course you can do what you want with your code - it's a free
|
||||
country. One would not be wrong to make such a statement but mention
|
||||
your ideas online and you get hounded down by others saying ``that's
|
||||
dumb, just use X'' or ``your implementation will be inefficient''.
|
||||
These statements are not entirely untrue but they miss the point that
|
||||
this is an exercise in scientific thinking and an artistic approach
|
||||
at it in that as well.\\
|
||||
\\
|
||||
I would not want to see the naysayers put anyone down from doing something
|
||||
they have always dreamed of, that is why I have done this. I am aware
|
||||
of the risks and the downfalls of having grandeause expectations but
|
||||
luckily I do not require the external feedback of the mass - just
|
||||
some close few friends who can appreciate my work and join the endeavor
|
||||
with me.\\
|
||||
\emph{}\\
|
||||
\emph{}\\
|
||||
\emph{Don't let people stop you, you only have one life - take it
|
||||
by the horns and fly}
|
|
@ -213,8 +213,296 @@ It is with this lean approach to object orientation that we keep things
|
|||
the real world in code (by supporting interfaces).
|
||||
\end_layout
|
||||
|
||||
\begin_layout Subsection
|
||||
Templating
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
Templating, otherwise known as
|
||||
\emph on
|
||||
generics
|
||||
\emph default
|
||||
, is a mechanism by which a given body of code which contains a type specifier
|
||||
such as variable declarations or function definitions can have their said
|
||||
type specifiers parameterized.
|
||||
The usage of this can be illustrated in the code below, where we want to
|
||||
define a method
|
||||
\begin_inset ERT
|
||||
status open
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
|
||||
\backslash
|
||||
texttt{sum(a, b)}
|
||||
\end_layout
|
||||
|
||||
\end_inset
|
||||
|
||||
which returns the summation of the two inputs.
|
||||
We define version that works for integral types (
|
||||
\begin_inset ERT
|
||||
status open
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
|
||||
\backslash
|
||||
texttt{int}
|
||||
\end_layout
|
||||
|
||||
\end_inset
|
||||
|
||||
) and a version that works for decimal types (
|
||||
\begin_inset ERT
|
||||
status open
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
|
||||
\backslash
|
||||
texttt{float}
|
||||
\end_layout
|
||||
|
||||
\end_inset
|
||||
|
||||
):
|
||||
\begin_inset Newline newline
|
||||
\end_inset
|
||||
|
||||
|
||||
\begin_inset ERT
|
||||
status open
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
|
||||
\backslash
|
||||
begin{lstlisting}[language=C]
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
// Integral summation function
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
int sum(int a, int b)
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
{
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
return a+b;
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
}
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
// Decimal summation function
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
float sum(float a, float b)
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
{
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
return a+b;
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
}
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
|
||||
\backslash
|
||||
end{lstlisting}
|
||||
\end_layout
|
||||
|
||||
\end_inset
|
||||
|
||||
Being a small example we can reason about the easiness of simply defining
|
||||
two versions of the
|
||||
\begin_inset ERT
|
||||
status open
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
|
||||
\backslash
|
||||
texttt{sum(a, b)}
|
||||
\end_layout
|
||||
|
||||
\end_inset
|
||||
|
||||
method for the two types, but after some time this can either get overly
|
||||
repetitive if we have to do this for more methods of a similar structure
|
||||
|
||||
\emph on
|
||||
or
|
||||
\emph default
|
||||
when more types are involved.
|
||||
This is where templating comes in, we can write a more general version
|
||||
of the same function and let the compiler generate the differently typed
|
||||
versions dependent on what
|
||||
\emph on
|
||||
type parameter
|
||||
\emph default
|
||||
we pass in.
|
||||
\begin_inset Newline newline
|
||||
\end_inset
|
||||
|
||||
|
||||
\begin_inset Newline newline
|
||||
\end_inset
|
||||
|
||||
A templatised version of the above
|
||||
\begin_inset ERT
|
||||
status open
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
|
||||
\backslash
|
||||
texttt{sum(a, b)}
|
||||
\end_layout
|
||||
|
||||
\end_inset
|
||||
|
||||
function is shown below:
|
||||
\begin_inset Newline newline
|
||||
\end_inset
|
||||
|
||||
|
||||
\begin_inset ERT
|
||||
status open
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
|
||||
\backslash
|
||||
begin{lstlisting}[language=C]
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
// Templatised function
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
template T
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
{
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
T sum(T a, T b)
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
{
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
return a+b;
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
}
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
}
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
// Integral version
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
sum!(int)(1,2)
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
// Decimal version
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
sum!(float)(1.0,2.0)
|
||||
\end_layout
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
|
||||
\backslash
|
||||
end{lstlisting}
|
||||
\end_layout
|
||||
|
||||
\end_inset
|
||||
|
||||
The way this works is that whenever you call the function
|
||||
\begin_inset ERT
|
||||
status open
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
|
||||
\backslash
|
||||
texttt{sum(a, b)}
|
||||
\end_layout
|
||||
|
||||
\end_inset
|
||||
|
||||
you will have to provide it with the specific type you want generated for
|
||||
that function.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Section
|
||||
Systems-level
|
||||
Systems-level access
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
|
@ -235,12 +523,23 @@ Weak typing
|
|||
\begin_deeper
|
||||
\begin_layout Itemize
|
||||
By default this is not the behavior when using
|
||||
\begin_inset ERT
|
||||
status open
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
|
||||
\backslash
|
||||
texttt{cast()}
|
||||
\end_layout
|
||||
|
||||
\end_inset
|
||||
|
||||
|
||||
\end_layout
|
||||
|
||||
\begin_layout Itemize
|
||||
Casting to an incompatible type is allowed - even when a runtime-type check
|
||||
Casting to an incompatible type is allowed - even when a run-time type-check
|
||||
is invalid you can still force a cast with
|
||||
\begin_inset ERT
|
||||
status open
|
||||
|
@ -306,14 +605,6 @@ Custom byte-packing
|
|||
\end_layout
|
||||
|
||||
\begin_deeper
|
||||
\begin_layout Itemize
|
||||
|
||||
\series bold
|
||||
TODO:
|
||||
\series default
|
||||
How will we go about this in the backend C compiler
|
||||
\end_layout
|
||||
|
||||
\begin_layout Itemize
|
||||
Allowing the user to deviate from the normal struct packing structure in
|
||||
favor of a tweaked packing technique
|
||||
|
@ -329,5 +620,14 @@ allowed but the default is to pack accordingly to the respective platform
|
|||
\end_layout
|
||||
|
||||
\end_deeper
|
||||
\begin_layout Section
|
||||
Speecified behaviour
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
TODO: Insert ramblings here about underspecified behaviour and how they
|
||||
plague C and how we easily fix this in tlang
|
||||
\end_layout
|
||||
|
||||
\end_body
|
||||
\end_document
|
||||
|
|
|
@ -0,0 +1,151 @@
|
|||
|
||||
\chapter{Aims}
|
||||
|
||||
A programming language normally has an aim, a \emph{purpose of existence},
|
||||
to put it in a snobbish way that a white male like me would. It can
|
||||
range from solving a problem in a highly specific domain (such are
|
||||
Domain Specific Language (TODO: add citation)) to trying to solving
|
||||
various problems spread across several different domains, a \emph{general
|
||||
purpose} programming language. This is where I would like to place
|
||||
Tristan - a language that can support multiple paradigms of programming
|
||||
- whether this be object-oriented programming with the usage of \emph{classes}
|
||||
and \emph{objects} or functional programming with techniques such
|
||||
as map and filter.\\
|
||||
\\
|
||||
Tristan aims to be able to support all of these but with certain limits,
|
||||
this is after all mainly an imperative language with those paradigms
|
||||
as \emph{``extra features''}. Avoiding feature creep in other systems-levels
|
||||
languages such as C++ is something I really want to stress about the
|
||||
design of this language, I do not want a big and confusing mess that
|
||||
has an extremely steep learning curve and way too many moving parts.\\
|
||||
\\
|
||||
One should not require the knowledge of more than two different paradigms
|
||||
in order to understand the usage of a standard library function as
|
||||
an example. If a user is looking at the documentation of a given function
|
||||
call then at most the amount of concepts required to understand it
|
||||
should be two, for example a \emph{templatised} and \emph{object-based}
|
||||
function would be the upper bound on concepts allowed.
|
||||
|
||||
\section{Paradigms}
|
||||
|
||||
Tristan is a procedural programming language that supports object-oriented
|
||||
programming and templates.
|
||||
|
||||
\subsection{Object-oriented programming}
|
||||
|
||||
Object orientation allows the programmer to create user-defined types
|
||||
which encapsulate both data fields and methods which act upon said
|
||||
data. Tristan supports:
|
||||
\begin{enumerate}
|
||||
\item Class-based object orientation
|
||||
\begin{enumerate}
|
||||
\item Classes as the base of user-defined types and objects are instances
|
||||
of these types
|
||||
\item Single inheritance hierachy
|
||||
\item Runtime polymorhpism
|
||||
\end{enumerate}
|
||||
\item Interfaces
|
||||
\begin{enumerate}
|
||||
\item Multiple inheritance
|
||||
\item Runtime polomprhism (thinking\textbackslash hyperref\{\})
|
||||
\end{enumerate}
|
||||
\end{enumerate}
|
||||
It is with this lean approach to object orientation that we keep things
|
||||
simple enough (only single inheritance) but with enough power to model
|
||||
the real world in code (by supporting interfaces).
|
||||
|
||||
\subsection{Templating}
|
||||
|
||||
Templating, otherwise known as \emph{generics}, is a mechanism by
|
||||
which a given body of code which contains a type specifier such as
|
||||
variable declarations or function definitions can have their said
|
||||
type specifiers parameterized. The usage of this can be illustrated
|
||||
in the code below, where we want to define a method \texttt{sum(a, b)}
|
||||
which returns the summation of the two inputs. We define version that
|
||||
works for integral types (\texttt{int}) and a version that works
|
||||
for decimal types (\texttt{float}):\\
|
||||
\begin{lstlisting}[language=C]
|
||||
// Integral summation function
|
||||
int sum(int a, int b)
|
||||
{
|
||||
return a+b;
|
||||
}
|
||||
|
||||
// Decimal summation function
|
||||
float sum(float a, float b)
|
||||
{
|
||||
return a+b;
|
||||
}
|
||||
\end{lstlisting}Being a small example we can reason about the easiness of simply defining
|
||||
two versions of the \texttt{sum(a, b)} method for the two types,
|
||||
but after some time this can either get overly repetitive if we have
|
||||
to do this for more methods of a similar structure \emph{or }when
|
||||
more types are involved. This is where templating comes in, we can
|
||||
write a more general version of the same function and let the compiler
|
||||
generate the differently typed versions dependent on what \emph{type
|
||||
parameter} we pass in.\\
|
||||
\\
|
||||
A templatised version of the above \texttt{sum(a, b)} function is
|
||||
shown below:\\
|
||||
\begin{lstlisting}[language=C]
|
||||
// Templatised function
|
||||
template T
|
||||
{
|
||||
T sum(T a, T b)
|
||||
{
|
||||
return a+b;
|
||||
}
|
||||
}
|
||||
|
||||
// Integral version
|
||||
sum!(int)(1,2)
|
||||
|
||||
// Decimal version
|
||||
sum!(float)(1.0,2.0)
|
||||
\end{lstlisting}The way this works is that whenever you call the function \texttt{sum(a, b)}
|
||||
you will have to provide it with the specific type you want generated
|
||||
for that function.
|
||||
|
||||
\section{Systems-level access}
|
||||
|
||||
Tristan does not shy away from features which give you access to system-level
|
||||
concepts such as memory addresses (via pointers), assembly (via the
|
||||
inline assembler) and so on. Such features are inherently unsafe but
|
||||
it is this sort of control that I wish to give the user, the balance
|
||||
between what the compiler should do and what the user should make
|
||||
sure they are doing is tipped quite heavily in favor of the latter
|
||||
in my viewpoint and hence we support such features as:
|
||||
\begin{itemize}
|
||||
\item Weak typing
|
||||
\begin{itemize}
|
||||
\item By default this is not the behavior when using \texttt{cast()}
|
||||
\item Casting to an incompatible type is allowed - even when a run-time
|
||||
type-check is invalid you can still force a cast with \texttt{castunsafe()}
|
||||
\item The user should be able to do what \emph{he} wants if requested
|
||||
\end{itemize}
|
||||
\item Pointers
|
||||
\begin{itemize}
|
||||
\item The mere \emph{support} of pointers allowing one to take a memory-level
|
||||
view of objects in memory rather than the normal ``safe access''
|
||||
means
|
||||
\end{itemize}
|
||||
\item Inline assembly
|
||||
\begin{itemize}
|
||||
\item Inserting of arbitrary assembler is allowed, providing the programmer
|
||||
with access to systems level registers, interrupts/syscall instructions
|
||||
and so on
|
||||
\end{itemize}
|
||||
\item Custom byte-packing
|
||||
\begin{itemize}
|
||||
\item Allowing the user to deviate from the normal struct packing structure
|
||||
in favor of a tweaked packing technique
|
||||
\item Custom packing on a system that doesn't agree with the alignment of
|
||||
your data \textbf{is }allowed but the default is to pack accordingly
|
||||
to the respective platform
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
\section{Specified behaviour}
|
||||
|
||||
TODO: Insert ramblings here about underspecified behaviour and how
|
||||
they plague C and how we easily fix this in tlang
|
|
@ -0,0 +1,2 @@
|
|||
|
||||
\chapter{TODO Basics syntax}
|
|
@ -0,0 +1,144 @@
|
|||
|
||||
\chapter{Lexical analysis}
|
||||
|
||||
Lexical analysis is the process of taking a program as an input string
|
||||
$A$ and splitting it into a list of $n$ sub-strings $A_{1},\,A_{2}\ldots A_{n}$
|
||||
called \emph{tokens}. The length $n$ of this list of dependent on
|
||||
several rules that determine how, when and where new tokens are built
|
||||
- this set of rules is called a \emph{grammar}.
|
||||
|
||||
\section{Grammar}
|
||||
|
||||
The Tristan grammar is specified in EBNF below:\\
|
||||
\\
|
||||
TODO: We need to derive a grammar/come up with one (and include explanations
|
||||
of EBNF).
|
||||
|
||||
\section{Overview of implementation}
|
||||
|
||||
The source code for the lexical analysis part of the compiler is located
|
||||
in \texttt{source/tlang/lexer.d} which contains two important class
|
||||
definitions:
|
||||
\begin{itemize}
|
||||
\item \texttt{Token} - This represents a token
|
||||
\begin{itemize}
|
||||
\item Complete with the token string itself, \texttt{token}. Retrivebale
|
||||
with a call to \texttt{getToken()}
|
||||
\item The coordinates in the source code where the token begins as \texttt{line}
|
||||
and \texttt{column}
|
||||
\item Overrides equality (\texttt{opEquals}) such that doing,
|
||||
|
||||
\begin{lstlisting}[language=C]
|
||||
new Token("int") == new Token("int")
|
||||
\end{lstlisting}
|
||||
\item would evaluate to \texttt{true}, rather than false by reference equality
|
||||
(the default in D)
|
||||
\end{itemize}
|
||||
\item \texttt{Lexer} - The token builder
|
||||
\begin{itemize}
|
||||
\item \texttt{sourceCode}, the whole input program (as a string) to be
|
||||
tokenized
|
||||
\item \texttt{position}, holds the index to the current character in the
|
||||
string array \texttt{sourceCode}
|
||||
\item \texttt{currentChar}, the current character at index-\texttt{position}
|
||||
\item Contains a list of the currently built tokens, \texttt{Token[] tokens}
|
||||
\item Current line and column numbers as \texttt{line} and \texttt{column}
|
||||
respectively
|
||||
\item A ``build up'' - this is the token (in string form) currently being
|
||||
built - \texttt{currentToken}
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
|
||||
\section{Implementation}
|
||||
|
||||
The implementation of the lexer, the \texttt{Lexer} class, is explained
|
||||
in detail in this section. (TODO: constructor) The lexical analysis
|
||||
is done one-shot via the \texttt{performLex()} method which will
|
||||
attempt to tokenize the input program, on failure returning \texttt{false},
|
||||
\texttt{true} otherwise. In the successful case the \texttt{tokens}
|
||||
array will be filled with the created tokens and can then later be
|
||||
retrieved via a call to \texttt{getTokens()}.\\
|
||||
\\
|
||||
Example usage:\\
|
||||
TODO
|
||||
|
||||
\subsection{performLex()}
|
||||
|
||||
TODO: This is going to change sometime soonish, so I want the final
|
||||
version of how it works here. I may as well, however, give a brief
|
||||
explanation as I doubt \emph{much }will change - only specific parsing
|
||||
cases.\\
|
||||
\\
|
||||
This method contains a looping structure which will read character-by-character
|
||||
from the \texttt{sourceCode} string and follow the rules of the grammar
|
||||
(TODO: add link), looping whilst there are still characters available
|
||||
for consumption (\texttt{position < sourceCode.length}).\\
|
||||
\\
|
||||
We loop through each character and dependent on its value we start
|
||||
building new tokens, certain characters will cause a token to finish
|
||||
being built which will sometimes be caused by \texttt{isSpliter(character)}
|
||||
being \texttt{true}. A typical token building process looks something
|
||||
like the following, containing the final character to be tacked onto
|
||||
the current token build up, the creation of a new token object and
|
||||
the addition of it to the \texttt{tokens} list, finishing with flushing
|
||||
the build up string and incrementing the coordinates:\\
|
||||
\\
|
||||
A typical token building procedure looks something like this:\\
|
||||
\begin{lstlisting}[language=Java]
|
||||
/* Generate and add the token */
|
||||
currentToken ~= "'";
|
||||
currentTokens ~= new Token(currentToken, line, column);
|
||||
|
||||
/* Flush the token */
|
||||
currentToken = "";
|
||||
column += 2
|
||||
position += 2;
|
||||
\end{lstlisting}
|
||||
|
||||
\subsection{Character and token availability}
|
||||
|
||||
Helper functions relating to character and token availability.
|
||||
|
||||
\subsubsection{hasToken()}
|
||||
|
||||
Returns \texttt{true} if there is a token currently built i.e. \texttt{currentToken.length != 0},
|
||||
\texttt{false} otherwise.
|
||||
|
||||
\subsubsection{isBackward()}
|
||||
|
||||
Returns \texttt{true} if we can move the character pointer backwards,
|
||||
\texttt{false} otherwise.
|
||||
|
||||
\subsubsection{isForward()}
|
||||
|
||||
Returns \texttt{true} if we can move the character pointer forward,
|
||||
\texttt{false} otherwise.
|
||||
|
||||
\subsection{isNumericalStr()}
|
||||
|
||||
This method is called in order to chck if the build up, \texttt{currentToken},
|
||||
is a valid numerical string. If the string is empty, then it returns
|
||||
\texttt{false}. If the string is non-empty and contains anything
|
||||
\emph{other }than digits then it returns \texttt{false}, otherwise
|
||||
is returns \texttt{true}.
|
||||
|
||||
TODO
|
||||
|
||||
\subsection{isSpliter()}
|
||||
|
||||
This method checks if the given character is one of the following:
|
||||
\begin{itemize}
|
||||
\item character == ';' || character == ',' || character == '(' || character
|
||||
== ')' || character == '{[}' || character == '{]}' || character ==
|
||||
'+' || character == '-' || character == '/' || character == '\%' ||
|
||||
character == '{*}' || character == '\&' || character == '\{' || character
|
||||
== '\}' || character == '=' || character == '|' || character == '\textasciicircum '
|
||||
|| character == '!' || character == '\textbackslash n' || character
|
||||
== '\textasciitilde ' || character =='.' || character == ':';
|
||||
\item \texttt{;}~\texttt{,}~\texttt{(}~\texttt{)}~\texttt{[}~\texttt{]}~\texttt{+}~\texttt{-}~\texttt{/}~\texttt{\%}~\texttt{*}~\texttt{\&}~\texttt{\{}~\texttt{\}}
|
||||
\item \texttt{=}~| (TODO: make it texttt)~\texttt{\^}~\texttt{!}~\texttt{\\n}(TODO:
|
||||
\textbackslash n not appearing)~\texttt{\~}~\texttt{.}~\texttt{\:}
|
||||
\end{itemize}
|
||||
Whenever this method returns \texttt{true} it generally means you
|
||||
should flush the current token, start a new token add the offending
|
||||
spliter token and flush that as well.
|
|
@ -0,0 +1,314 @@
|
|||
|
||||
\chapter{Parsing}
|
||||
|
||||
Once we have generated a list of tokens (instances of \texttt{Token})
|
||||
from the \texttt{Lexer} instance we need to turn these into a structure
|
||||
that represents our program's source code \emph{but}\textbf{ }using
|
||||
in-memory data-structures which we can traverse and process at a later
|
||||
stage.
|
||||
|
||||
\section{Overview}
|
||||
|
||||
The \texttt{Parser} class contains several methods for parsing different
|
||||
sub-structures of a TLang program and returning different data types
|
||||
generated by these methods. The parser has the ability to move back
|
||||
and forth between the token stream provided and fetch the current
|
||||
token (along with analysing it to return the type of symbol the token
|
||||
represents - known as the \texttt{SymbolType}).\\
|
||||
\\
|
||||
For example, the method \texttt{parseIf()} is used to parse if statements,
|
||||
it is called on the occurence of the token of \texttt{if}. This method
|
||||
returns an instance of type \texttt{IfStatement}. Then there are
|
||||
methods like \texttt{parseBody()} which is responsible for creating
|
||||
several sub-calls to methods such as \texttt{parseIf()} and building
|
||||
up a list of \texttt{Statement} instances (the top-type for all parser
|
||||
nodes).\\
|
||||
\\
|
||||
The entry point to call is \texttt{parse()} which will return an
|
||||
instance of type \texttt{Module}.
|
||||
|
||||
\section{API}
|
||||
|
||||
The API exposed by the parser is rather minimal as there isn't much
|
||||
to a parser than controlling the token stream pointer (the position
|
||||
in the token stream), fetching the token and acting upon the type
|
||||
or value of said token. Therefore we have the methods summarised below:
|
||||
\begin{enumerate}
|
||||
\item \texttt{nextToken()}
|
||||
\begin{enumerate}
|
||||
\item Moves the token pointer to the next token
|
||||
\end{enumerate}
|
||||
\item \texttt{previousToken()}
|
||||
\begin{enumerate}
|
||||
\item Moves the token pointer to the previous token
|
||||
\end{enumerate}
|
||||
\item \texttt{getCurrentToken()}
|
||||
\begin{enumerate}
|
||||
\item Returns the current \texttt{Token} instance at the current token
|
||||
pointer position
|
||||
\end{enumerate}
|
||||
\item \texttt{hasTokens()}
|
||||
\begin{enumerate}
|
||||
\item Returns \texttt{true} if there are tokens still left in the stream
|
||||
(i.e. \texttt{tokenPtr < tokens.length}), \texttt{false} otherwise
|
||||
\end{enumerate}
|
||||
\end{enumerate}
|
||||
|
||||
\section{Initialization}
|
||||
|
||||
The initialization of the parser is rather simple, an instance of
|
||||
the \texttt{Parser} class must be instantiated, along with this the
|
||||
following arguments must be provided to the constructor:
|
||||
\begin{enumerate}
|
||||
\item \texttt{Token[] tokens}
|
||||
\begin{enumerate}
|
||||
\item This is an array of \texttt{Token} to be provided to the parser for
|
||||
parsing. This would have been derived from the \texttt{Lexer} via
|
||||
its \texttt{performLex()} and \texttt{getTokens()} call.
|
||||
\end{enumerate}
|
||||
\end{enumerate}
|
||||
A new instance woud therefore be created with something akin to:\begin{lstlisting}[language=Java]
|
||||
// Tokenize the following program
|
||||
string sourceCode = "int i = 2;"
|
||||
Lexer lexer = new Lexer(sourceCode);
|
||||
lexer.performLex();
|
||||
|
||||
// Extract tokens and pass to the lexer
|
||||
Token[] tokens = lexer.getTokens();
|
||||
Parser parser = new Parser(tokens);
|
||||
\end{lstlisting}
|
||||
|
||||
\section{Symbol types}
|
||||
|
||||
The token stream is effectively a list of instances of \texttt{Token}
|
||||
which consist just of the token itself as a string and the coordinates
|
||||
of the token (where it occurs). However, some tokens, despite being
|
||||
different strings, can be of the same type or \emph{syntactical grouping}.
|
||||
For example one would agree that both tokens \texttt{1.5} and \texttt{25.2}
|
||||
are both different tokens but are both floating points. This is where
|
||||
the notion of symbol types comes in.\\
|
||||
\\
|
||||
The enum \texttt{SymbolType} in \texttt{parsing/symbols/check.d}
|
||||
describes all of the available \emph{types }of tokens there are in
|
||||
the grammar of the Tristan programming language like so:\begin{lstlisting}[language=Java]
|
||||
public enum SymbolType {
|
||||
LE_SYMBOL,
|
||||
IDENT_TYPE,
|
||||
NUMBER_LITERAL,
|
||||
CHARACTER_LITERAL,
|
||||
STRING_LITERAL,
|
||||
SEMICOLON,
|
||||
LBRACE,
|
||||
...
|
||||
}
|
||||
\end{lstlisting}~\\
|
||||
Given an instance of \texttt{Token} one can pass it to the \texttt{getSymbolType(Token)}
|
||||
method which will then return an enum member from \texttt{SymbolType}.
|
||||
When a token has no associated symbol type then \texttt{SymbolType.UNKNOWN}
|
||||
is returned. Now for an example:\begin{lstlisting}[language=Java]
|
||||
// Create a new token at with (0, 0) as coordinates
|
||||
Token token = new Token("100", 0, 0);
|
||||
|
||||
// Get the symbol type
|
||||
SymbolType symType = getSymbolType(token);
|
||||
assert(symType == SymbolType.NUMBER_LITERAL);
|
||||
\end{lstlisting}This assertion would pass as the symbol type of such a token is a
|
||||
number literal.
|
||||
|
||||
\subsection{API}
|
||||
|
||||
The API for working with and using \texttt{SymbolType}s is made available
|
||||
within the \texttt{parsing/data/check.d} and contains the following
|
||||
methods:
|
||||
\begin{enumerate}
|
||||
\item \texttt{isType(string)}
|
||||
\begin{enumerate}
|
||||
\item Returns \texttt{true} if the given string (a token) is a built-in
|
||||
type
|
||||
\item Built-in type strings would be: \texttt{byte, ubyte, short, ushort, int, uint, long, ulong, void}
|
||||
\end{enumerate}
|
||||
\item \texttt{getSymbolType(Token)}
|
||||
\begin{enumerate}
|
||||
\item Returns the \texttt{SymbolType} associated with the given \texttt{Token}
|
||||
\item If the token is not of a valid type then \texttt{SymbolType.UNKNOWN}
|
||||
is returned
|
||||
\end{enumerate}
|
||||
\item \texttt{getCharacter(SymbolType)}
|
||||
\begin{enumerate}
|
||||
\item This performs the reverse of \texttt{getSymbolType(Token)} in the
|
||||
sense that you provide it a \texttt{SymbolType} and it will return
|
||||
the corresponding string that is of that type.
|
||||
\item This will work only for back-mapping a sub-section of tokens as you
|
||||
won't get anything back if you provide \texttt{SymbolType.IDENT\_TYPE}
|
||||
as there are infinite possibiltiies for that - not a fixed token.
|
||||
\end{enumerate}
|
||||
\end{enumerate}
|
||||
|
||||
\section{Data types}
|
||||
|
||||
Every node returned by a \texttt{parseX()} is of a certain type and
|
||||
there are some important types to mention here. The following types
|
||||
are from either \texttt{parsing/data.d} or \texttt{parsing/containers.d}.
|
||||
|
||||
\subsection{\texttt{Statement}}
|
||||
|
||||
The \texttt{Statement} type is the top-type for most parse nodes,
|
||||
it has the following important methods and fields:
|
||||
\begin{enumerate}
|
||||
\item \texttt{weight}
|
||||
\begin{enumerate}
|
||||
\item This holds a \texttt{byte} value which is used for when statements
|
||||
are required to be re-ordered. It starts default at 0 whereby that
|
||||
is the most prioritized re-ordering value (i.e. smaller means you
|
||||
appear first)
|
||||
\end{enumerate}
|
||||
\item \texttt{parentOf()}
|
||||
\begin{enumerate}
|
||||
\item This returns an instance of \texttt{Container}, specifically indicating
|
||||
of which container this Statement is a \emph{parent of}.
|
||||
\item It can be \texttt{null} if this Statement was not parented.
|
||||
\end{enumerate}
|
||||
\item \texttt{parentTo(Container)}
|
||||
\begin{enumerate}
|
||||
\item Set the parenting \texttt{Container} of this Statement to the one
|
||||
provided.
|
||||
\end{enumerate}
|
||||
\item \texttt{toString()}
|
||||
\begin{enumerate}
|
||||
\item The default string representtion method for Statements (unless overridden)
|
||||
is to show a rolling count which is increment with every instantiation
|
||||
of a Statement object.
|
||||
\end{enumerate}
|
||||
\end{enumerate}
|
||||
|
||||
\subsection{\texttt{Entity}}
|
||||
|
||||
The \texttt{Entity} type is a sub-type of \texttt{Statement} and
|
||||
represents any named entity, along with initialization scopes and
|
||||
accessor types. The following methods and fields are to note:
|
||||
\begin{enumerate}
|
||||
\item \texttt{this(string)}
|
||||
\begin{enumerate}
|
||||
\item Constructs a new instance of an Entity with the provided name.
|
||||
\end{enumerate}
|
||||
\item \texttt{getName()}
|
||||
\begin{enumerate}
|
||||
\item Returns the name of the entity.
|
||||
\end{enumerate}
|
||||
\item \texttt{setAccessorType(AccessorType accessorType)}
|
||||
\begin{enumerate}
|
||||
\item TODO: Describe this
|
||||
\end{enumerate}
|
||||
\item \texttt{getAccessorType()}
|
||||
\begin{enumerate}
|
||||
\item TODO: Describe this
|
||||
\end{enumerate}
|
||||
\item \texttt{setModifierType(InitScope initScope)}
|
||||
\begin{enumerate}
|
||||
\item TODO: Describe this
|
||||
\end{enumerate}
|
||||
\item \texttt{InitScope getModifierType()}
|
||||
\begin{enumerate}
|
||||
\item TODO: Describe this
|
||||
\end{enumerate}
|
||||
\end{enumerate}
|
||||
|
||||
\subsection{\texttt{Container}}
|
||||
|
||||
The \texttt{Container} type is an interface that specifies a certain
|
||||
type to implement a set of methods. These methods allow the type to
|
||||
\emph{become} a container by then allowing one or more instances of
|
||||
\texttt{Statement} or rather a \texttt{Statement[]} to be contained
|
||||
within the container i.e. making it contain them.\\
|
||||
\\
|
||||
It should be noted that the parenting method is used to climb up the
|
||||
hierachy \textbf{given} a Statement instance, however the Container
|
||||
technique is useful for a top-down search for an Entity - they are
|
||||
independent in that sense but can be used toghether .
|
||||
|
||||
\section{How to parse}
|
||||
|
||||
The basic flow of the parser involves the following process:
|
||||
\begin{enumerate}
|
||||
\item Firstly you need an entry point, this entry point for us is the \texttt{parse()}
|
||||
method which will return an instance of \texttt{Module} which represents
|
||||
the module - the TLang program.
|
||||
\item Every \texttt{parseX()} method gets called by another such method
|
||||
dependent on the current symbol (and sometimes a lookahead)
|
||||
\begin{enumerate}
|
||||
\item For example, sometimes when we come across \texttt{SymbolType.IDENTIFIER}
|
||||
we call \texttt{parseName()} which can then either call \texttt{parseFuncCall()},
|
||||
\texttt{parseTypedDeclaration()} or \texttt{parseAssignment()}.
|
||||
This requires a lookahead to check what follows the identifier because
|
||||
just by itself it is too ambuguous grammatically.
|
||||
\item After determining what comes next the token is pushed back using \texttt{previousToken()}
|
||||
and then we proceed into the correct function
|
||||
\item Lookaheads are rare but they do appear in situations like that
|
||||
\end{enumerate}
|
||||
\item The \texttt{parseX()} methods return instances of \texttt{Statement}
|
||||
which is the top type for all parser-generated nodes or \emph{AST
|
||||
nodes}.
|
||||
\item When you are about to parse a sub-section (like an if statement) of
|
||||
a bigger syntax group (like a body) you leave the \emph{offending
|
||||
token} as the current token, then you call the parsing method (in
|
||||
this case \texttt{parseIf()}) and let it handle the call to \texttt{nextToken()}
|
||||
- this is simply the structure of parsing that TLang follows.
|
||||
\item Upon exiting a \texttt{parseX()} method you call \texttt{nextToken()}
|
||||
- this determines whether this method would continue parsing or not
|
||||
- if not then you return and the caller will continue with that current
|
||||
token and move on from there.
|
||||
\end{enumerate}
|
||||
|
||||
\subsection{Example of parsing if-statements}
|
||||
|
||||
We will now look at an example of how we deal with parsing if statements
|
||||
in our parser, specifically within the \texttt{parseBody()}. The
|
||||
beginning of this method starts by moving us off the offending token
|
||||
that made us call \texttt{parseBody()} (hence the call to \texttt{nextToken()}).
|
||||
After which we setup an array of \texttt{Statement} such that we
|
||||
can build up a \emph{body} of them:\begin{lstlisting}[language=Java]
|
||||
gprintln("parseBody(): Enter", DebugType.WARNING);
|
||||
|
||||
Statement[] statements;
|
||||
|
||||
/* Consume the `{` symbol */
|
||||
nextToken();
|
||||
\end{lstlisting}~\\
|
||||
Now we are within the body, as you can imagine a body is to be made
|
||||
up of several statements of which we do not know how many there are.
|
||||
Therefore we setup a loop that will iterate till we run out of tokens:\begin{lstlisting}[language=Java]
|
||||
while (hasTokens())
|
||||
{
|
||||
...
|
||||
}
|
||||
\end{lstlisting}~\\
|
||||
Next thing we want to do if grab the current token and check what
|
||||
type of symbol it is:\begin{lstlisting}[language=Java]
|
||||
while (hasTokens())
|
||||
{
|
||||
/* Get the token */
|
||||
Token tok = getCurrentToken();
|
||||
SymbolType symbol = getSymbolType(tok);
|
||||
gprintln("parseBody(): SymbolType=" ~ to!(string)(symbol));
|
||||
|
||||
...
|
||||
}
|
||||
\end{lstlisting}~\\
|
||||
Following this we now have several checks that make use of \texttt{getSymbolType(Token)}
|
||||
in order to determine what the token's type is and then in our case
|
||||
if the token is \texttt{"if"} then we will make a call to \texttt{parseIf()}
|
||||
and append the returned Statement-sub-type to the body of statements
|
||||
(\texttt{Statement[]}):\begin{lstlisting}[language=Java]
|
||||
while(hasTokens())
|
||||
{
|
||||
...
|
||||
|
||||
/* If it is a branch */
|
||||
else if (symbol == SymbolType.IF)
|
||||
{
|
||||
statements ~= parseIf();
|
||||
}
|
||||
|
||||
...
|
||||
}
|
||||
\end{lstlisting}
|
|
@ -0,0 +1,4 @@
|
|||
|
||||
\chapter{Dependency idk}
|
||||
|
||||
TODO: Add lexer information here
|
|
@ -0,0 +1,176 @@
|
|||
|
||||
\chapter{Code emit}
|
||||
|
||||
The code emit process is the final process of the compiler whereby
|
||||
the \texttt{initQueue}, \texttt{codeQueue} and all assorted auxilllary
|
||||
information is passed to an instance of \texttt{CodeEmitter} (in
|
||||
the case of the C backend this is sub-typed to the \texttt{DGen}
|
||||
class) such that the code can be written to a file. At this stage
|
||||
all queues consist simply of instances of the \texttt{Instruction}
|
||||
class.\\
|
||||
\\
|
||||
Our C backend or \emph{custom code emitter}, \texttt{DGen}, inherits
|
||||
from the \texttt{CodeEmitter} class which specifies that the following
|
||||
methods must be overriden/implemented:
|
||||
\begin{enumerate}
|
||||
\item \texttt{emit()}
|
||||
\begin{enumerate}
|
||||
\item Begins the emit process
|
||||
\end{enumerate}
|
||||
\item \texttt{finalize()}
|
||||
\begin{enumerate}
|
||||
\item Finalizes the emitting process (only to be called after the `emit()`
|
||||
finishes)
|
||||
\end{enumerate}
|
||||
\item \texttt{transform(Instruction instruction)}
|
||||
\begin{enumerate}
|
||||
\item Transforms or emits a single Instruction and returns the transformation
|
||||
as a string
|
||||
\end{enumerate}
|
||||
\end{enumerate}
|
||||
|
||||
\section{Queues}
|
||||
|
||||
There are several notable queues that the \texttt{CodeEmitter} class
|
||||
contains, these are as follows:
|
||||
\begin{enumerate}
|
||||
\item \texttt{initQueue}
|
||||
\begin{enumerate}
|
||||
\item Despite its name this holds instructions for doing memory allocations
|
||||
for static entities (\textbf{not} initialization code for said entities)
|
||||
\end{enumerate}
|
||||
\item \texttt{globalsQueue}
|
||||
\begin{enumerate}
|
||||
\item This queue holds instructions for the globals executions. This includes
|
||||
things such as global variable declarations and the sorts.
|
||||
\end{enumerate}
|
||||
\item Function definitions map
|
||||
\begin{enumerate}
|
||||
\item This is a string-to-queue map which contains the code queues for every
|
||||
function definition.
|
||||
\end{enumerate}
|
||||
\end{enumerate}
|
||||
~\\
|
||||
Along with these queues there are some methods used to amniopulate
|
||||
and use them, these are:
|
||||
\begin{enumerate}
|
||||
\item \texttt{selectQueue(QueueType, string)}
|
||||
\begin{enumerate}
|
||||
\item Select the type of queue: \texttt{ALLOC\_QUEUE} (for the \texttt{initQueue}),
|
||||
\texttt{GLOBALS\_QUEUE} (for \texttt{globalsQueue} and \texttt{FUNCTION\_DEF\_QUEUE}
|
||||
(for the function definitions queue)
|
||||
\item For function definitions, the optional string argument (second argument)
|
||||
must specify the name of the function definition you would wish to
|
||||
use. An invalid name will throw an error.
|
||||
\item This automatically calls \texttt{resetCursor()}.
|
||||
\end{enumerate}
|
||||
\item \texttt{nextInstruction()}
|
||||
\begin{enumerate}
|
||||
\item Moves the cursor to the next instruction. Throws an exception if out
|
||||
of bounds.
|
||||
\end{enumerate}
|
||||
\item \texttt{previousInstruction()}
|
||||
\begin{enumerate}
|
||||
\item Moves the cursor to the previous instruction. Throws an exception
|
||||
if out of bounds.
|
||||
\end{enumerate}
|
||||
\item \texttt{resetCursor()}
|
||||
\begin{enumerate}
|
||||
\item Resets the position of the instruction pointer to 0.
|
||||
\end{enumerate}
|
||||
\item \texttt{getCurrentInstruction()}
|
||||
\begin{enumerate}
|
||||
\item Retrieves the current instruction at the cursor.
|
||||
\end{enumerate}
|
||||
\end{enumerate}
|
||||
|
||||
\section{Custom code emits}
|
||||
|
||||
We override/implement the \texttt{transform(Instruction instruction)}
|
||||
in \texttt{DGen} to work somewhat as a big if-statement that matches
|
||||
the different sub-types of Instructions that exist, then the respective
|
||||
code-emit (C code) is generated. This method has the potential to
|
||||
be recursive as some instructions contain nested instructions that
|
||||
must be transformed prior before the final transformation, in which
|
||||
case a recursive call to \texttt{transform(Instruction)} is made.
|
||||
|
||||
\subsection{Code emit example: Variable declarations}
|
||||
|
||||
The example below is the code used to transform the in-memory representation
|
||||
of a variable declaration, known as the \texttt{VariableDeclaration}
|
||||
instruction, into the C code to be emitted:\\
|
||||
\\
|
||||
\begin{lstlisting}[language=Java]
|
||||
/* VariableDeclaration */
|
||||
else if(cast(VariableDeclaration)instruction)
|
||||
{
|
||||
VariableDeclaration varDecInstr = cast(VariableDeclaration)instruction;
|
||||
Context context = varDecInstr.getContext();
|
||||
|
||||
Variable typedEntityVariable = cast(Variable)context.tc.getResolver().resolveBest(context.getContainer(), varDecInstr.varName);
|
||||
|
||||
string renamedSymbol = SymbolMapper.symbolLookup(typedEntityVariable);
|
||||
|
||||
return varDecInstr.varType~" "~renamedSymbol~";";
|
||||
}
|
||||
\end{lstlisting}What we have here is some code which will extract the name of the
|
||||
variable being declared via \textbackslash texttt\{varDecInstr.varName\}
|
||||
which is then used to lookup the parser node of type \texttt{Variable}.
|
||||
The \texttt{Variable} object contains information such as the variable's
|
||||
type and also if a variable assignment is attached to this declaration
|
||||
or not.\\
|
||||
\\
|
||||
TODO: Insert code regarding assignment checking\\
|
||||
\\
|
||||
Right at the end wwe then build up the C variable declaration with
|
||||
the line:\begin{lstlisting}[language=Java]
|
||||
return varDecInstr.varType~" "~renamedSymbol~";";
|
||||
\end{lstlisting}
|
||||
|
||||
\subsection{Symbol renaming}
|
||||
|
||||
In terms of general code emitting we could have simply decided to
|
||||
use the TLang-esque symbol name structure where entities are seperated
|
||||
by periods such as \texttt{simple\_module.x} where \texttt{simple\_module}
|
||||
is a container-type such as a \texttt{module} and \texttt{x} is
|
||||
some entity within it, such as a variable. However, what we have decided
|
||||
to do in the emitter process, specifically in \texttt{DGen} - our
|
||||
C code emitter - is to actually rename these symbols to a hash, wherever
|
||||
they occur.\\
|
||||
\\
|
||||
The renaming mechanism is hanlded by the \texttt{symbolLookup(Entity)}
|
||||
method from the \texttt{SymbolMapper} class. This method takes in
|
||||
a single argument:
|
||||
\begin{enumerate}
|
||||
\item \texttt{entity}
|
||||
\begin{enumerate}
|
||||
\item This must be a type-of \texttt{Entity}, this is the entity of which
|
||||
the symbol renaming should be applied on.
|
||||
\end{enumerate}
|
||||
\end{enumerate}
|
||||
~\\
|
||||
This allows one do then translate the symbol name with the following
|
||||
usage. In this case we want to translate the symbol of the entity
|
||||
named \texttt{x} which is container in the module-container named
|
||||
\texttt{simple\_variables\_decls\_ass}. Therefore we provide both
|
||||
peices of information into the function \texttt{symbolLookup}:\begin{lstlisting}[language=Java]
|
||||
// The relative container of this variable is the module
|
||||
Container container = tc.getModule();
|
||||
|
||||
// Lookup a variable named "x"
|
||||
string varLookup = "x"
|
||||
|
||||
// The Variable (type-of Entity)
|
||||
Variable variable = cast(Variable)tc.getResolver().resolveBest(context.getContainer(), varLookup);
|
||||
|
||||
// Symbol map
|
||||
string renamedSymbol = SymbolMapper.symbolLookup(variable);
|
||||
|
||||
// renamedSymbol == t_c326f89096616e69e89a3874a4c7f324
|
||||
\end{lstlisting}~\\
|
||||
The resulting hash is generated by resolving the absolute path name
|
||||
of the entity provided, applying an md5 hash to this name and then
|
||||
pre-pending a \texttt{t\_} to the name. Therefore for the above code
|
||||
we will have \texttt{simple\_variables\_decls\_ass.x} mapped to a
|
||||
symbol name of \texttt{t\_c326f89096616e69e89a3874a4c7f324} to be
|
||||
emitted into the C code file.
|
12
mkdocs.yml
12
mkdocs.yml
|
@ -23,6 +23,10 @@ plugins:
|
|||
- search
|
||||
#- mermaid2
|
||||
|
||||
- exclude:
|
||||
glob:
|
||||
- docs/00-bookindex.md
|
||||
|
||||
|
||||
- section-index
|
||||
#- git-committers-2
|
||||
|
@ -37,7 +41,7 @@ plugins:
|
|||
features:
|
||||
tags: {}
|
||||
|
||||
- print-site
|
||||
#- print-site
|
||||
|
||||
#TODO: See if we can get git commit authors automatically on each page
|
||||
#TODO: See if we can ehnace material docs theme
|
||||
|
@ -50,6 +54,8 @@ markdown_extensions:
|
|||
- pymdownx.mark
|
||||
- pymdownx.tilde
|
||||
|
||||
- pymdownx.arithmatex:
|
||||
generic: true
|
||||
|
||||
- pymdownx.highlight:
|
||||
anchor_linenums: true
|
||||
|
@ -77,3 +83,7 @@ markdown_extensions:
|
|||
emoji_generator: !!python/name:materialx.emoji.to_svg
|
||||
|
||||
|
||||
extra_javascript:
|
||||
- javascripts/mathjax.js
|
||||
- https://polyfill.io/v3/polyfill.min.js?features=es6
|
||||
- https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js
|
||||
|
|
Loading…
Reference in New Issue