
707 lines
15 KiB
Raw Normal View History

2024-04-22 10:36:20 +01:00
* Comment types and parsing
* facilities
* Authors: Tristan Brice Velloza Kildaire (deavmi)
module tlang.compiler.symbols.comments;
import std.string : startsWith, split, strip, stripLeft, stripRight;
import std.array : join;
2024-04-16 19:11:55 +01:00
import tlang.misc.logging;
import std.string : format;
2024-04-20 22:57:19 +01:00
import tlang.compiler.lexer.core.tokens : Token;
2024-04-16 19:11:55 +01:00
* The type of docstring
2024-04-16 19:11:55 +01:00
public enum DocType
* A parameter docstring
* This documents a function's
* parameter
2024-04-16 19:11:55 +01:00
* An exception docstring
* This documents a function's
* exceptions which is throws
2024-04-16 19:11:55 +01:00
* A return docstring
* This documents a cuntion's
* return type
2024-04-16 19:11:55 +01:00
* A parameter docstring
* This documents a function's
* parameter
2024-04-16 19:11:55 +01:00
public struct ParamDoc
private string param;
private string description;
public string getParam()
return this.param;
public string getDescription()
return this.description;
2024-04-16 19:11:55 +01:00
* A return docstring
* This documents a cuntion's
* return type
2024-04-16 19:11:55 +01:00
public struct ReturnsDoc
private string description;
public string getDescription()
return this.description;
2024-04-16 19:11:55 +01:00
* An exception docstring
* This documents a function's
* exceptions which is throws
2024-04-16 19:11:55 +01:00
public struct ExceptionDoc
private string exception;
private string description;
2024-04-20 20:28:06 +01:00
public string getException()
return this.exception;
public string getDescription()
return this.description;
2024-04-16 19:11:55 +01:00
* Union to be able
* to reinterpret cast
* any of the members
* listed below
2024-04-16 19:11:55 +01:00
private union DocContent
ParamDoc param;
ReturnsDoc returns;
ExceptionDoc exception;
* Represents a docstring
* comprised of a type
* and the docstring itself
2024-04-16 19:11:55 +01:00
public struct DocStr
private DocType type;
private DocContent content;
public DocType getType()
2024-04-16 19:11:55 +01:00
return this.type;
public static DocStr param(string name, string description)
DocStr dstr;
dstr.type = DocType.PARAM;
dstr.content.param = ParamDoc(name, description);
return dstr;
public static DocStr returns(string description)
DocStr dstr;
dstr.type = DocType.RETURNS;
dstr.content.returns = ReturnsDoc(description);
return dstr;
public static DocStr exception(string name, string description)
DocStr dstr;
dstr.type = DocType.THROWS;
dstr.content.exception = ExceptionDoc(name, description);
return dstr;
public bool getExceptionDoc(ref ExceptionDoc doc)
2024-04-16 19:11:55 +01:00
if(this.type == DocType.THROWS)
doc = content.exception;
return true;
return false;
2024-04-16 19:11:55 +01:00
public bool getParamDoc(ref ParamDoc doc)
if(this.type == DocType.PARAM)
doc = content.param;
return true;
return false;
public bool getReturnDoc(ref ReturnsDoc doc)
if(this.type == DocType.RETURNS)
doc = content.returns;
return true;
2024-04-16 19:11:55 +01:00
return false;
2024-04-16 19:11:55 +01:00
2024-04-22 10:47:38 +01:00
* Parts of a comment
2024-04-16 19:11:55 +01:00
private struct CommentParts
string bdy;
DocStr[] strs;
2024-04-22 10:47:38 +01:00
* Parses comments of various forms
private class CommentParser
2024-04-22 10:47:38 +01:00
* Comment text
private string source;
2024-04-22 10:47:38 +01:00
* Constructs a new `CommentParser`
* which can extract the comments
* from the given comment text
* Params:
* source = the comment itself
this(string source)
this.source = source;
2024-04-22 10:47:38 +01:00
* Begins the parsing of the provided
* comment source text.
* This assumes a well-formatted
* comment is passed to us. i.e. one
* extracted form the lexer.
* Returns: the `CommentParts`
2024-04-16 19:11:55 +01:00
private CommentParts extract()
2024-04-16 19:11:55 +01:00
CommentParts parts;
// Handle multi-line comments
string[] lines = split(this.source, "\n");
// Strip all lines of any white space on the left-hand or right-hand margins
for(ulong i = 0; i < lines.length; i++)
lines[i] = strip(lines[i]);
// Strip first line of the `/`
lines[0] = stripLeft(lines[0], "/");
// Strip last line of the `/`
lines[$-1] = stripRight(lines[$-1], "/");
// Strip all lines of `*` (on either side)
for(ulong i = 0; i < lines.length; i++)
lines[i] = strip(lines[i], "*");
import niknaks.debugging;
import std.stdio : writeln;
// Now get rid of the first line if it is empty
if(lines[0].length == 0)
lines = lines[1..$];
// Get rid of the last line if it is empty
if(lines[$-1].length == 0)
lines = lines[0..$-1];
2024-04-16 19:11:55 +01:00
// Set the body parts
parts.bdy = join(stripOutDocLines(lines));
// Set doc strings
2024-04-16 19:11:55 +01:00
DocStr[] docStrs;
foreach(string line; onlyParams(lines))
DocStr ds;
if(extractDocLine(line, ds))
docStrs ~= ds;
DEBUG(format("Converted docline '%s' to: %s", line, ds));
parts.strs = docStrs;
// Handle single-line comments
// Set body parts
parts.bdy = strip(stripLeft(this.source, ("//")));
2024-04-16 19:11:55 +01:00
return parts;
2024-04-22 10:47:38 +01:00
* Attempts to extract the doc params
* from a given line, returning if it
* was a success or not
* Params:
* line = the line to parse
* ds = the result (if any)
* Returns: `true` if extraction succeeded,
* otherwise `false`
2024-04-16 19:11:55 +01:00
private bool extractDocLine(string line, ref DocStr ds)
string buildUp;
bool foundType = false;
ulong i = 0;
bool getch(ref char c)
if(i < line.length)
c = line[i];
return true;
return false;
void prog()
char c;
bool spacey(char c)
return c == ' ' || c == '\t';
2024-04-16 19:11:55 +01:00
bool parseParam(ref string paramName, ref string paramDescription)
bool gotParamName = false;
string foundParamName;
bool gotParamDescription = false;
string foundParamDescription;
2024-04-16 19:11:55 +01:00
else if(!gotParamName)
while(getch(c) && !spacey(c))
2024-04-16 19:11:55 +01:00
foundParamName ~= c;
// TODO: Validate name?
gotParamName = true;
foundParamDescription ~= c;
gotParamDescription = true;
if(gotParamName && gotParamDescription)
paramName = foundParamName;
paramDescription = foundParamDescription;
return true;
return false;
bool parseReturn(ref string returnDescription)
string gotDescription;
bool foundDescription;
if(spacey(c) && !foundDescription)
gotDescription ~= c;
foundDescription = true;
returnDescription = gotDescription;
return true;
return false;
2024-04-16 19:11:55 +01:00
2024-04-16 19:11:55 +01:00
else if(c == '@')
2024-04-16 19:11:55 +01:00
string paramType;
while(getch(c) && !spacey(c))
2024-04-16 19:11:55 +01:00
paramType ~= c;
// @param
if(paramType == "param")
string paramName, paramDescr;
if(parseParam(paramName, paramDescr))
ds = DocStr.param(paramName, paramDescr);
2024-04-16 19:11:55 +01:00
return true;
return false;
// @return
else if (paramType == "return")
string returnDescr;
ds = DocStr.returns(returnDescr);
return true;
return false;
// @throws
else if(paramType == "throws")
string exceptionName, exceptionDescr;
if(parseParam(exceptionName, exceptionDescr)) // Has same structure as a `@param <1> <...>`
ds = DocStr.exception(exceptionName, exceptionDescr);
return true;
return false;
2024-04-16 19:11:55 +01:00
// Unknown @<thing>
WARN(format("Unknown docstring type '%s'", paramType));
2024-04-16 19:11:55 +01:00
return false;
return false;
2024-04-16 19:11:55 +01:00
return false;
2024-04-22 10:47:38 +01:00
* Strips out all lines with doc-strings
* (params) in them
* Params:
* input = the input lines
* Returns: the output lines
2024-04-16 19:11:55 +01:00
private string[] onlyParams(string[] input)
string[] withDoc;
2024-04-22 10:47:38 +01:00
// TODO: Use niknaks filter
2024-04-16 19:11:55 +01:00
foreach(string i; input)
withDoc ~= i;
return withDoc;
2024-04-22 10:47:38 +01:00
* Strips out any line which is a doc line
* (non-parameter line)
* Params:
* input = the input lines
* Returns: the output lines
2024-04-16 19:11:55 +01:00
private string[] stripOutDocLines(string[] input)
string[] withoutDoc;
2024-04-22 10:47:38 +01:00
// TODO: Use niknaks filter
2024-04-16 19:11:55 +01:00
foreach(string i; input)
DEBUG(format("'%s'", i));
2024-04-16 19:11:55 +01:00
// Strip left-hand side of any spaces
// and add trailing space
withoutDoc ~= stripLeft(i)~' ';
2024-04-16 19:11:55 +01:00
// Remove trailing whitespace on last item
withoutDoc[$-1] = stripRight(withoutDoc[$-1]);
2024-04-16 19:11:55 +01:00
return withoutDoc;
import std.stdio;
import std.string : format;
// It will NEVER start with a ' ' due to how it is tokenized
string source = `/**
* Hello
* there
CommentParser parser = new CommentParser(source);
2024-04-16 19:11:55 +01:00
CommentParts comment = parser.extract();
writeln(format("Comment: '%s'", comment));
assert("Hello there" == comment.bdy);
* Represents a comment
* which can be attached
* to a `Statement`
public final class Comment
2024-04-22 10:47:38 +01:00
* The comment's component
* parts
2024-04-16 19:11:55 +01:00
private CommentParts content;
2024-04-22 10:47:38 +01:00
* Constructs a new comment out
* of its parsed component parts
* Params:
* content = the parts
2024-04-16 19:11:55 +01:00
private this(CommentParts content)
this.content = content;
2024-04-22 10:47:38 +01:00
* Generates a comment from the
* provided token
* Params:
* commentToken = token containing
* the comment
* Returns: a `Comment`
2024-04-16 19:11:55 +01:00
public static Comment fromToken(Token commentToken)
return fromText(commentToken.getToken());
2024-04-22 10:47:38 +01:00
* Generates a comment from the
* provided comment text
* Params:
* text = the text containing
* the comment
* Returns: a `Comment`
2024-04-16 19:11:55 +01:00
private static Comment fromText(string text)
CommentParser parser = new CommentParser(text);
return new Comment(parser.extract());
2024-04-22 10:47:38 +01:00
* Extracts the comment's contents.
* This excludes param/doc-strings
* Returns: the contents
public string getContent()
2024-04-16 19:11:55 +01:00
return this.content.bdy;
2024-04-22 10:47:38 +01:00
* Extract all the doc-strings present
* within the comment
* Returns: an array of them
2024-04-16 19:11:55 +01:00
public DocStr[] getDocStrings()
return this.content.strs;
2024-04-22 10:47:38 +01:00
* Extracts all of the param-docs
* and places them into a key-value
* mapping whereby the key is
* the parameter's name and the
* value the doc itself
* Returns: a map
2024-04-16 19:11:55 +01:00
public ParamDoc[string] getAllParamDocs()
// TODO: Use niknaks
ParamDoc[string] d;
foreach(DocStr i; getDocStrings())
if(i.type == DocType.PARAM)
ParamDoc pDoc = i.content.param;
d[pDoc.param] = pDoc;
return d;
2024-04-22 10:47:38 +01:00
* Extracts the return doc-string
* from this comment
* Params:
* retDoc = the found `ReturnDoc`
* Returns: `true` if found, otheriwse
* `false`
public bool getReturnDoc(ref ReturnsDoc retDoc)
// TODO: Use niknaks flter
foreach(DocStr d; getDocStrings())
if(d.type == DocType.RETURNS)
retDoc = d.content.returns;
return true;
return false;