Files
lib_compiler/src/compiler/parse.c

1054 lines
41 KiB
C

const int ParseStmtBlock_AllowSingleStmt = 1;
LC_FUNCTION LC_Parser LC_MakeParser(LC_Lex *x) {
LC_Parser p = {0};
p.at = x->tokens;
p.begin = x->tokens;
p.end = x->tokens + x->token_count;
p.x = x;
return p;
}
LC_FUNCTION LC_Parser *LC_MakeParserQuick(char *str) {
LC_Lex *x = LC_LexStream("quick_lex", str, 0);
LC_InternTokens(x);
L->quick_parser = LC_MakeParser(x);
L->parser = &L->quick_parser;
return L->parser;
}
LC_FUNCTION LC_AST *LC_ReportParseError(LC_Token *pos, const char *str, ...) {
LC_FORMAT(L->arena, str, n);
LC_SendErrorMessage(pos, n);
L->errors += 1;
LC_AST *r = LC_CreateAST(pos, LC_ASTKind_Error);
return r;
}
LC_FUNCTION LC_Token *LC_Next(void) {
if (L->parser->at < L->parser->end) {
LC_Token *result = L->parser->at;
L->parser->at += 1;
return result;
}
return &L->NullToken;
}
LC_FUNCTION LC_Token *LC_Get(void) {
if (L->parser->at < L->parser->end) {
return L->parser->at;
}
return &L->NullToken;
}
LC_FUNCTION LC_Token *LC_GetI(int i) {
LC_Token *result = L->parser->at + i;
if (result >= L->parser->begin && result < L->parser->end) {
return result;
}
return &L->NullToken;
}
LC_FUNCTION LC_Token *LC_Is(LC_TokenKind kind) {
LC_Token *t = LC_Get();
if (t->kind == kind) {
return t;
}
return 0;
}
LC_FUNCTION LC_Token *LC_IsKeyword(LC_Intern intern) {
LC_Token *t = LC_Get();
if (t->kind == LC_TokenKind_Keyword && t->ident == intern) {
return t;
}
return 0;
}
LC_FUNCTION LC_Token *LC_Match(LC_TokenKind kind) {
LC_Token *t = LC_Get();
if (t->kind == kind) {
LC_Next();
return t;
}
return 0;
}
LC_FUNCTION LC_Token *LC_MatchKeyword(LC_Intern intern) {
LC_Token *t = LC_Get();
if (t->kind == LC_TokenKind_Keyword && t->ident == intern) {
LC_Next();
return t;
}
return 0;
}
#define LC_EXPECT(token, KIND, context) \
LC_Token *token = LC_Match(KIND); \
if (!token) { \
LC_Token *t = LC_Get(); \
return LC_ReportParseError(t, "expected %s got instead %s, this happened while parsing: %s", LC_TokenKindToString(KIND), LC_TokenKindToString(t->kind), context); \
}
#define LC_PROP_ERROR(expr, ...) \
expr = __VA_ARGS__; \
do { \
if (expr->kind == LC_ASTKind_Error) { \
return expr; \
} \
} while (0)
// Pratt expression parser
// Based on this really good article: https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
// clang-format off
LC_FUNCTION LC_Precedence LC_MakePrecedence(int left, int right) { return {left, right}; }
LC_FUNCTION LC_Precedence LC_GetPrecedence(LC_PrecedenceKind p, LC_TokenKind kind) {
if (p == LC_PrecedenceKind_Prefix) goto Prefix;
if (p == LC_PrecedenceKind_Infix) goto Infix;
if (p == LC_PrecedenceKind_Postfix) goto Postfix;
LC_ASSERT(NULL, !"invalid codepath");
Prefix:
switch (kind) {
case LC_TokenKind_OpenBracket: return LC_MakePrecedence(-2, 22);
case LC_TokenKind_Mul: case LC_TokenKind_BitAnd: case LC_TokenKind_Keyword: case LC_TokenKind_OpenParen:
case LC_TokenKind_Sub: case LC_TokenKind_Add: case LC_TokenKind_Neg: case LC_TokenKind_Not: case LC_TokenKind_OpenBrace: return LC_MakePrecedence(-2, 20);
default: return LC_MakePrecedence(-1, -1);
}
Infix:
switch (kind) {
case LC_TokenKind_Or: return LC_MakePrecedence(9, 10);
case LC_TokenKind_And: return LC_MakePrecedence(11, 12);
case LC_TokenKind_Equals: case LC_TokenKind_NotEquals: case LC_TokenKind_GreaterThen:
case LC_TokenKind_GreaterThenEq: case LC_TokenKind_LesserThen: case LC_TokenKind_LesserThenEq: return LC_MakePrecedence(13, 14);
case LC_TokenKind_Sub: case LC_TokenKind_Add: case LC_TokenKind_BitOr: case LC_TokenKind_BitXor: return LC_MakePrecedence(15, 16);
case LC_TokenKind_RightShift: case LC_TokenKind_LeftShift: case LC_TokenKind_BitAnd:
case LC_TokenKind_Mul: case LC_TokenKind_Div: case LC_TokenKind_Mod: return LC_MakePrecedence(17, 18);
default: return LC_MakePrecedence(0, 0);
}
Postfix:
switch (kind) {
case LC_TokenKind_Dot: case LC_TokenKind_OpenBracket: case LC_TokenKind_OpenParen: return LC_MakePrecedence(21, -2);
default: return LC_MakePrecedence(-1, -1);
}
}
LC_FUNCTION LC_AST *LC_ParseExprEx(int min_bp) {
LC_AST *left = NULL;
LC_Token *prev = LC_GetI(-1);
LC_Token *t = LC_Next();
LC_Precedence prefixbp = LC_GetPrecedence(LC_PrecedenceKind_Prefix, t->kind);
// parse prefix expression
switch (t->kind) {
case LC_TokenKind_RawString:
case LC_TokenKind_String: { left = LC_CreateAST(t, LC_ASTKind_ExprString); left->eatom.name = t->ident; } break;
case LC_TokenKind_Ident: { left = LC_CreateAST(t, LC_ASTKind_ExprIdent); left->eident.name = t->ident; } break;
case LC_TokenKind_Int: { left = LC_CreateAST(t, LC_ASTKind_ExprInt); left->eatom.i = t->i; } break;
case LC_TokenKind_Float: { left = LC_CreateAST(t, LC_ASTKind_ExprFloat); left->eatom.d = t->f64; } break;
case LC_TokenKind_Unicode: { left = LC_CreateAST(t, LC_ASTKind_ExprInt); left->eatom.i = t->i; } break;
case LC_TokenKind_Keyword: {
if (t->ident == L->kfalse) {
left = LC_CreateAST(t, LC_ASTKind_ExprBool);
LC_Bigint_init_signed(&left->eatom.i, false);
} else if (t->ident == L->ktrue) {
left = LC_CreateAST(t, LC_ASTKind_ExprBool);
LC_Bigint_init_signed(&left->eatom.i, true);
} else {
return LC_ReportParseError(t, "got unexpected keyword '%s' while parsing expression", t->ident);
}
} break;
case LC_TokenKind_AddPtr: {
LC_EXPECT(open_paren, LC_TokenKind_OpenParen, "addptr");
LC_AST *LC_PROP_ERROR(ptr, LC_ParseExprEx(0));
LC_EXPECT(comma, LC_TokenKind_Comma, "addptr");
LC_AST *LC_PROP_ERROR(offset, LC_ParseExprEx(0));
LC_EXPECT(close_paren, LC_TokenKind_CloseParen, "addptr");
left = LC_CreateBinary(t, ptr, offset, LC_TokenKind_EOF);
left->kind = LC_ASTKind_ExprAddPtr;
} break;
case LC_TokenKind_Colon: {
left = LC_CreateAST(t, LC_ASTKind_ExprType);
LC_PROP_ERROR(left->etype.type, LC_ParseType());
LC_Token *open = LC_Get();
if (LC_Match(LC_TokenKind_OpenBrace)) {
left = LC_ParseCompo(open, left);
if (left->kind == LC_ASTKind_Error) {
L->parser->at = t;
return left;
}
} else if (LC_Match(LC_TokenKind_OpenParen)) {
LC_AST *type = left;
left = LC_CreateAST(open, LC_ASTKind_ExprCast);
left->ecast.type = type->etype.type;
type->kind = LC_ASTKind_Ignore;
LC_PROP_ERROR(left->ecast.expr, LC_ParseExpr());
LC_EXPECT(close_paren, LC_TokenKind_CloseParen, "cast expression");
}
} break;
case LC_TokenKind_OpenBrace: {
left = LC_ParseCompo(t, left);
if (left->kind == LC_ASTKind_Error) {
L->parser->at = prev;
return left;
}
} break;
case LC_TokenKind_Not: case LC_TokenKind_Neg: case LC_TokenKind_Add: case LC_TokenKind_Sub: {
LC_AST *LC_PROP_ERROR(expr, LC_ParseExprEx(prefixbp.right));
left = LC_CreateUnary(t, t->kind, expr);
} break;
case LC_TokenKind_BitAnd: {
LC_AST *LC_PROP_ERROR(expr, LC_ParseExprEx(prefixbp.right));
left = LC_CreateUnary(t, t->kind, expr);
left->kind = LC_ASTKind_ExprGetPointerOfValue;
} break;
case LC_TokenKind_Mul: {
LC_AST *LC_PROP_ERROR(expr, LC_ParseExprEx(prefixbp.right));
left = LC_CreateUnary(t, t->kind, expr);
left->kind = LC_ASTKind_ExprGetValueOfPointer;
} break;
case LC_TokenKind_OpenParen: {
LC_AST *LC_PROP_ERROR(expr, LC_ParseExprEx(0));
left = expr;
LC_EXPECT(_c, LC_TokenKind_CloseParen, "expression");
} break;
default: return LC_ReportParseError(prev, "got invalid token: %s, while parsing expression", LC_TokenKindToString(t->kind));
}
for (;;) {
t = LC_Get();
// lets say [+] is left:1, right:2 and we parse 2+3+4
// We pass min_bp of 2 to the next recursion
// in recursion we check if left(1) > min_bp(2)
// it's not so we don't recurse - we break
// We do the for loop instead
LC_Precedence postfix_bp = LC_GetPrecedence(LC_PrecedenceKind_Postfix, t->kind);
LC_Precedence infix_bp = LC_GetPrecedence(LC_PrecedenceKind_Infix, t->kind);
// parse postfix expression
if (postfix_bp.left > min_bp) {
LC_Next();
switch (t->kind) {
case LC_TokenKind_OpenBracket: {
LC_AST *LC_PROP_ERROR(index, LC_ParseExprEx(0));
LC_EXPECT(close_bracket, LC_TokenKind_CloseBracket, "index expression");
left = LC_CreateIndex(t, left, index);
} break;
case LC_TokenKind_OpenParen: {
LC_PROP_ERROR(left, LC_ParseCompo(t, left));
} break;
case LC_TokenKind_Dot: {
LC_EXPECT(ident, LC_TokenKind_Ident, "field access expression");
LC_AST *field = LC_CreateAST(t, LC_ASTKind_ExprField);
field->efield.left = left;
field->efield.right = ident->ident;
left = field;
} break;
default: {}
}
}
// parse infix expression
else if (infix_bp.left > min_bp) {
t = LC_Next();
LC_AST *LC_PROP_ERROR(right, LC_ParseExprEx(infix_bp.right));
left = LC_CreateBinary(t, left, right, t->kind);
}
else break;
}
if (L->on_expr_parsed) L->on_expr_parsed(left);
return left;
}
// clang-format on
LC_FUNCTION LC_AST *LC_ParseCompo(LC_Token *pos, LC_AST *left) {
if (pos->kind != LC_TokenKind_OpenBrace && pos->kind != LC_TokenKind_OpenParen) {
return LC_ReportParseError(pos, "internal compiler error: expected open brace or open paren in %s", __FUNCTION__);
}
LC_ASTKind kind = pos->kind == LC_TokenKind_OpenBrace ? LC_ASTKind_ExprCompound : LC_ASTKind_ExprCall;
LC_TokenKind close_kind = pos->kind == LC_TokenKind_OpenBrace ? LC_TokenKind_CloseBrace : LC_TokenKind_CloseParen;
LC_ASTKind item_kind = pos->kind == LC_TokenKind_OpenBrace ? LC_ASTKind_ExprCompoundItem : LC_ASTKind_ExprCallItem;
LC_AST *n = LC_CreateAST(pos, kind);
n->ecompo.name = left;
while (!LC_Is(close_kind)) {
LC_Token *vpos = LC_Get();
LC_AST *v = LC_CreateAST(vpos, item_kind);
if (LC_Match(LC_TokenKind_OpenBracket)) {
if (kind == LC_ASTKind_ExprCall) return LC_ReportParseError(vpos, "procedure calls cant have indexed arguments");
LC_PROP_ERROR(v->ecompo_item.index, LC_ParseExpr());
LC_EXPECT(close_bracket, LC_TokenKind_CloseBracket, "compo expression");
LC_EXPECT(assign, LC_TokenKind_Assign, "compo expression");
LC_PROP_ERROR(v->ecompo_item.expr, LC_ParseExpr());
} else {
LC_PROP_ERROR(v->ecompo_item.expr, LC_ParseExpr());
if (LC_Match(LC_TokenKind_Assign)) {
LC_AST *e = v->ecompo_item.expr;
if (e->kind != LC_ASTKind_ExprIdent) return LC_ReportParseError(LC_GetI(-1), "named argument is required to be an identifier");
LC_PROP_ERROR(v->ecompo_item.expr, LC_ParseExpr());
v->ecompo_item.name = e->eident.name;
e->kind = LC_ASTKind_Ignore; // :FreeAST
}
}
n->ecompo.size += 1;
LC_DLLAdd(n->ecompo.first, n->ecompo.last, v);
if (!LC_Match(LC_TokenKind_Comma)) break;
}
LC_EXPECT(close_token, close_kind, "compo expression");
return n;
}
LC_FUNCTION LC_AST *LC_ParseExpr(void) {
return LC_ParseExprEx(0);
}
LC_FUNCTION LC_AST *LC_ParseProcType(LC_Token *pos) {
LC_AST *n = LC_CreateAST(pos, LC_ASTKind_TypespecProc);
LC_EXPECT(open_paren, LC_TokenKind_OpenParen, "procedure typespec");
if (!LC_Match(LC_TokenKind_CloseParen)) {
for (;;) {
if (LC_Match(LC_TokenKind_ThreeDots)) {
n->tproc.vargs = true;
LC_Token *any = LC_Get();
if (any->kind == LC_TokenKind_Ident && any->ident == L->iAny) {
n->tproc.vargs_any_promotion = true;
LC_Next();
}
break;
}
LC_EXPECT(ident, LC_TokenKind_Ident, "procedure typespec argument list");
LC_AST *v = LC_CreateAST(ident, LC_ASTKind_TypespecProcArg);
v->tproc_arg.name = ident->ident;
LC_EXPECT(colon, LC_TokenKind_Colon, "procedure typespec argument list");
LC_PROP_ERROR(v->tproc_arg.type, LC_ParseType());
if (LC_Match(LC_TokenKind_Assign)) {
LC_PROP_ERROR(v->tproc_arg.expr, LC_ParseExpr());
}
v->notes = LC_ParseNotes();
LC_DLLAdd(n->tproc.first, n->tproc.last, v);
if (!LC_Match(LC_TokenKind_Comma)) break;
}
LC_EXPECT(close_paren, LC_TokenKind_CloseParen, "procedure typespec argument list");
}
if (LC_Match(LC_TokenKind_Colon)) {
LC_PROP_ERROR(n->tproc.ret, LC_ParseType());
}
return n;
}
LC_FUNCTION LC_AST *LC_ParseType(void) {
LC_AST *n = NULL;
LC_Token *t = LC_Next();
if (t->kind == LC_TokenKind_Ident) {
n = LC_CreateAST(t, LC_ASTKind_TypespecIdent);
n->eident.name = t->ident;
LC_Token *dot = LC_Match(LC_TokenKind_Dot);
if (dot) {
LC_AST *field = LC_CreateAST(t, LC_ASTKind_TypespecField);
field->efield.left = n;
LC_EXPECT(ident, LC_TokenKind_Ident, "field access typespec");
field->efield.right = ident->ident;
return field;
}
} else if (t->kind == LC_TokenKind_Mul) {
n = LC_CreateAST(t, LC_ASTKind_TypespecPointer);
LC_PROP_ERROR(n->tpointer.base, LC_ParseType());
} else if (t->kind == LC_TokenKind_OpenBracket) {
n = LC_CreateAST(t, LC_ASTKind_TypespecArray);
if (!LC_Match(LC_TokenKind_CloseBracket)) {
LC_PROP_ERROR(n->tarray.index, LC_ParseExpr());
LC_EXPECT(close_bracket, LC_TokenKind_CloseBracket, "array typespec");
}
LC_PROP_ERROR(n->tarray.base, LC_ParseType());
} else if (t->kind == LC_TokenKind_Keyword && t->ident == L->kproc) {
LC_PROP_ERROR(n, LC_ParseProcType(t));
} else {
return LC_ReportParseError(t, "failed to parse typespec, invalid token %s", LC_TokenKindToString(t->kind));
}
if (L->on_typespec_parsed) L->on_typespec_parsed(n);
return n;
}
LC_FUNCTION LC_AST *LC_ParseForStmt(LC_Token *pos) {
LC_AST *n = LC_CreateAST(pos, LC_ASTKind_StmtFor);
if (LC_Get()->kind != LC_TokenKind_OpenBrace) {
if (!LC_Is(LC_TokenKind_Semicolon)) {
LC_PROP_ERROR(n->sfor.init, LC_ParseStmt(false));
if (n->sfor.init->kind == LC_ASTKind_StmtExpr) {
n->sfor.cond = n->sfor.init->sexpr.expr;
n->sfor.init->kind = LC_ASTKind_Ignore; // :FreeAST
n->sfor.init = NULL;
goto skip_to_last;
} else if (n->sfor.init->kind != LC_ASTKind_StmtVar && n->sfor.init->kind != LC_ASTKind_StmtAssign) {
return LC_ReportParseError(n->sfor.init->pos, "invalid for loop syntax, expected variable intializer or assignment");
}
}
if (LC_Match(LC_TokenKind_Semicolon) && !LC_Is(LC_TokenKind_Semicolon)) {
LC_PROP_ERROR(n->sfor.cond, LC_ParseExpr());
}
skip_to_last:;
if (LC_Match(LC_TokenKind_Semicolon)) {
LC_PROP_ERROR(n->sfor.inc, LC_ParseStmt(false));
if (n->sfor.inc->kind != LC_ASTKind_StmtAssign && n->sfor.inc->kind != LC_ASTKind_StmtExpr) {
return LC_ReportParseError(n->sfor.inc->pos, "invalid for loop syntax, expected assignment or expression");
}
}
}
LC_PROP_ERROR(n->sfor.body, LC_ParseStmtBlock(ParseStmtBlock_AllowSingleStmt));
n->sfor.body->sblock.kind = SBLK_Loop;
return n;
}
LC_FUNCTION LC_AST *LC_ParseSwitchStmt(LC_Token *pos) {
LC_AST *n = LC_CreateAST(pos, LC_ASTKind_StmtSwitch);
LC_PROP_ERROR(n->sswitch.expr, LC_ParseExpr());
LC_EXPECT(open_brace, LC_TokenKind_OpenBrace, "switch statement");
for (;;) {
LC_Token *pos = LC_Get();
if (LC_MatchKeyword(L->kcase)) {
LC_AST *v = LC_CreateAST(pos, LC_ASTKind_StmtSwitchCase);
do {
LC_AST *LC_PROP_ERROR(expr, LC_ParseExpr());
LC_DLLAdd(v->scase.first, v->scase.last, expr);
n->sswitch.total_switch_case_count += 1;
} while (LC_Match(LC_TokenKind_Comma));
LC_EXPECT(colon, LC_TokenKind_Colon, "switch statement case");
LC_PROP_ERROR(v->scase.body, LC_ParseStmtBlock(ParseStmtBlock_AllowSingleStmt));
v->notes = LC_ParseNotes();
LC_DLLAdd(n->sswitch.first, n->sswitch.last, v);
} else if (LC_MatchKeyword(L->kdefault)) {
LC_EXPECT(colon, LC_TokenKind_Colon, "switch statement default case");
LC_AST *v = LC_CreateAST(pos, LC_ASTKind_StmtSwitchDefault);
LC_PROP_ERROR(v->scase.body, LC_ParseStmtBlock(ParseStmtBlock_AllowSingleStmt));
LC_EXPECT(close_brace, LC_TokenKind_CloseBrace, "switch statement default case");
v->notes = LC_ParseNotes();
LC_DLLAdd(n->sswitch.first, n->sswitch.last, v);
break;
} else {
return LC_ReportParseError(LC_Get(), "invalid token while parsing switch statement");
}
if (LC_Match(LC_TokenKind_EOF)) return LC_ReportParseError(pos, "Unclosed '}' switch stmt, reached end of file");
if (LC_Match(LC_TokenKind_CloseBrace)) break;
}
return n;
}
LC_FUNCTION LC_AST *LC_ParseStmt(bool check_semicolon) {
LC_AST *n = 0;
LC_Token *pos = LC_Get();
LC_Token *pos1 = LC_GetI(1);
if (LC_MatchKeyword(L->kreturn)) {
n = LC_CreateAST(pos, LC_ASTKind_StmtReturn);
if (LC_Get()->kind != LC_TokenKind_Semicolon) {
LC_PROP_ERROR(n->sreturn.expr, LC_ParseExpr());
}
}
else if (LC_MatchKeyword(L->kbreak)) {
n = LC_CreateAST(pos, LC_ASTKind_StmtBreak);
LC_Token *ident = LC_Match(LC_TokenKind_Ident);
if (ident) n->sbreak.name = ident->ident;
}
else if (LC_MatchKeyword(L->kcontinue)) {
n = LC_CreateAST(pos, LC_ASTKind_StmtContinue);
LC_Token *ident = LC_Match(LC_TokenKind_Ident);
if (ident) n->scontinue.name = ident->ident;
}
else if (LC_MatchKeyword(L->kdefer)) {
check_semicolon = false;
n = LC_CreateAST(pos, LC_ASTKind_StmtDefer);
LC_PROP_ERROR(n->sdefer.body, LC_ParseStmtBlock(ParseStmtBlock_AllowSingleStmt));
n->sdefer.body->sblock.kind = SBLK_Defer;
}
else if (LC_MatchKeyword(L->kfor)) {
LC_PROP_ERROR(n, LC_ParseForStmt(pos));
check_semicolon = false;
}
else if (LC_MatchKeyword(L->kswitch)) {
LC_PROP_ERROR(n, LC_ParseSwitchStmt(pos));
check_semicolon = false;
}
else if (LC_MatchKeyword(L->kif)) {
n = LC_CreateAST(pos, LC_ASTKind_StmtIf);
LC_PROP_ERROR(n->sif.expr, LC_ParseExpr());
LC_PROP_ERROR(n->sif.body, LC_ParseStmtBlock(ParseStmtBlock_AllowSingleStmt));
for (;;) {
if (!LC_MatchKeyword(L->kelse)) break;
LC_AST *v = LC_CreateAST(LC_GetI(-1), LC_ASTKind_StmtElse);
if (LC_MatchKeyword(L->kif)) {
v->kind = LC_ASTKind_StmtElseIf;
LC_PROP_ERROR(v->sif.expr, LC_ParseExpr());
}
LC_PROP_ERROR(v->sif.body, LC_ParseStmtBlock(ParseStmtBlock_AllowSingleStmt));
LC_DLLAdd(n->sif.first, n->sif.last, v);
}
check_semicolon = false;
}
else if (LC_Match(LC_TokenKind_Hash)) { // #c(``);
n = LC_CreateAST(LC_Get(), LC_ASTKind_StmtNote);
LC_PROP_ERROR(n->snote.expr, LC_ParseNote());
} else if (pos->kind == LC_TokenKind_OpenBrace) { // { block }
n = LC_ParseStmtBlock(0);
check_semicolon = false;
}
else if (pos->kind == LC_TokenKind_Ident && pos1->kind == LC_TokenKind_Colon) { // Name: ...
LC_Next();
LC_Next();
if (LC_MatchKeyword(L->kfor)) {
LC_PROP_ERROR(n, LC_ParseForStmt(LC_GetI(-1)));
n->sfor.body->sblock.name = pos->ident;
check_semicolon = false;
} else {
n = LC_CreateAST(pos, LC_ASTKind_StmtVar);
LC_Intern name = pos->ident;
if (LC_Match(LC_TokenKind_Assign)) {
LC_PROP_ERROR(n->svar.expr, LC_ParseExpr());
n->svar.name = name;
} else if (LC_Match(LC_TokenKind_Colon)) {
n->kind = LC_ASTKind_StmtConst;
LC_PROP_ERROR(n->sconst.expr, LC_ParseExpr());
n->sconst.name = name;
} else {
n->svar.name = name;
LC_PROP_ERROR(n->svar.type, LC_ParseType());
if (LC_Match(LC_TokenKind_Assign)) {
if (LC_Match(LC_TokenKind_Hash)) {
LC_AST *note = LC_CreateAST(LC_Get(), LC_ASTKind_ExprNote);
LC_PROP_ERROR(note->enote.expr, LC_ParseNote());
n->svar.expr = note;
} else {
LC_PROP_ERROR(n->svar.expr, LC_ParseExpr());
}
}
}
}
} else {
n = LC_CreateAST(pos, LC_ASTKind_StmtExpr);
LC_PROP_ERROR(n->sexpr.expr, LC_ParseExpr());
LC_Token *t = LC_Get();
if (LC_IsAssign(t->kind)) {
LC_Next();
LC_AST *left = n->sexpr.expr;
n->kind = LC_ASTKind_StmtAssign;
LC_PROP_ERROR(n->sassign.right, LC_ParseExpr());
n->sassign.left = left;
n->sassign.op = t->kind;
}
}
if (check_semicolon) {
if (!LC_Match(LC_TokenKind_Semicolon)) return LC_ReportParseError(LC_GetI(-1), "statement lacks a semicolon at the end");
}
n->notes = LC_ParseNotes();
return n;
}
LC_FUNCTION LC_AST *LC_ParseStmtBlock(int flags) {
LC_AST *n = LC_CreateAST(LC_Get(), LC_ASTKind_StmtBlock);
bool single_stmt = false;
if (flags & ParseStmtBlock_AllowSingleStmt) {
if (!LC_Is(LC_TokenKind_OpenBrace)) {
LC_AST *LC_PROP_ERROR(v, LC_ParseStmt(true));
LC_DLLAdd(n->sblock.first, n->sblock.last, v);
single_stmt = true;
}
}
if (!single_stmt) {
LC_EXPECT(open_brace, LC_TokenKind_OpenBrace, "statement block");
if (!LC_Match(LC_TokenKind_CloseBrace)) {
for (;;) {
LC_AST *v = LC_ParseStmt(true);
// Eat until next statement in case of error
if (v->kind == LC_ASTKind_Error) {
for (;;) {
if (LC_Is(LC_TokenKind_EOF) || LC_Is(LC_TokenKind_OpenBrace) || LC_Match(LC_TokenKind_CloseBrace)) return v;
if (LC_Match(LC_TokenKind_Semicolon)) break;
LC_Next();
}
}
if (L->on_stmt_parsed) L->on_stmt_parsed(v);
LC_DLLAdd(n->sblock.first, n->sblock.last, v);
if (LC_Match(LC_TokenKind_EOF)) return LC_ReportParseError(open_brace, "Unclosed '}' stmt list, reached end of file");
if (LC_Match(LC_TokenKind_CloseBrace)) break;
}
}
}
if (L->on_stmt_parsed) L->on_stmt_parsed(n);
return n;
}
LC_FUNCTION LC_AST *LC_ParseProcDecl(LC_Token *name) {
LC_AST *n = LC_CreateAST(name, LC_ASTKind_DeclProc);
n->dbase.name = name->ident;
LC_PROP_ERROR(n->dproc.type, LC_ParseProcType(name));
LC_Token *ob = LC_Get();
if (ob->kind == LC_TokenKind_OpenBrace) {
// Here I added additional error handling which slows down compilation a bit.
// We can for sure deduce where procs end and where they begin because of the syntaxes
// nature - so to avoid any error spills from one procedure to another and I
// seek for the last brace of procedure and set 'end' on parser to 1 after that token.
LC_Token *cb = ob;
LC_Token *last_open_brace = ob;
int pair_counter = 0;
// Seek for the last '}' close brace of procedure
for (;;) {
LC_Token *d = LC_GetI(3);
if (LC_GetI(0)->kind == LC_TokenKind_Ident && LC_GetI(1)->kind == LC_TokenKind_Colon && LC_GetI(2)->kind == LC_TokenKind_Colon && d->kind == LC_TokenKind_Keyword) {
if (d->ident == L->kproc || d->ident == L->kstruct || d->ident == L->kunion || d->ident == L->ktypedef) {
break;
}
}
LC_Token *token = LC_Next();
if (token == &L->NullToken) break;
if (token->kind == LC_TokenKind_OpenBrace) pair_counter += 1;
if (token->kind == LC_TokenKind_OpenBrace) last_open_brace = token;
if (token->kind == LC_TokenKind_CloseBrace) pair_counter -= 1;
if (token->kind == LC_TokenKind_CloseBrace) cb = token;
}
if (pair_counter != 0) return LC_ReportParseError(last_open_brace, "unclosed open brace '{' inside this procedure");
L->parser->at = ob;
// Set the parsing boundary to one after the last close brace
LC_Token *save_end = L->parser->end;
L->parser->end = cb + 1;
n->dproc.body = LC_ParseStmtBlock(0);
L->parser->end = save_end;
if (n->dproc.body->kind == LC_ASTKind_Error) return n->dproc.body;
n->dproc.body->sblock.kind = SBLK_Proc;
} else {
LC_EXPECT(semicolon, LC_TokenKind_Semicolon, "procedure declaration");
}
return n;
}
LC_FUNCTION LC_AST *LC_ParseStruct(LC_ASTKind kind, LC_Token *ident) {
LC_AST *n = LC_CreateAST(ident, kind);
n->dbase.name = ident->ident;
LC_EXPECT(open_brace, LC_TokenKind_OpenBrace, "struct declaration");
for (;;) {
LC_AST *v = LC_CreateAST(ident, LC_ASTKind_TypespecAggMem);
LC_EXPECT(ident, LC_TokenKind_Ident, "struct member");
v->tagg_mem.name = ident->ident;
LC_EXPECT(colon, LC_TokenKind_Colon, "struct member");
LC_PROP_ERROR(v->tagg_mem.type, LC_ParseType());
LC_EXPECT(semicolon, LC_TokenKind_Semicolon, "struct member");
v->notes = LC_ParseNotes();
LC_DLLAdd(n->dagg.first, n->dagg.last, v);
if (LC_Match(LC_TokenKind_EOF)) return LC_ReportParseError(ident, "Unclosed '}' struct, reached end of file");
if (LC_Match(LC_TokenKind_CloseBrace)) break;
}
return n;
}
LC_FUNCTION LC_AST *LC_ParseTypedef(LC_Token *ident) {
LC_AST *n = LC_CreateAST(ident, LC_ASTKind_DeclTypedef);
n->dbase.name = ident->ident;
LC_PROP_ERROR(n->dtypedef.type, LC_ParseType());
if (!LC_Match(LC_TokenKind_Semicolon)) return LC_ReportParseError(LC_GetI(-1), "expected semicolon ';' after typedef declaration, got instead %s", LC_TokenKindToString(LC_GetI(-1)->kind));
return n;
}
LC_FUNCTION LC_AST *LC_CreateNote(LC_Token *pos, LC_Intern ident) {
LC_AST *n = LC_CreateAST(pos, LC_ASTKind_Note);
LC_AST *astident = LC_CreateAST(pos, LC_ASTKind_ExprIdent);
astident->eident.name = ident;
n->ecompo.name = astident;
return n;
}
LC_FUNCTION LC_AST *LC_ParseNote(void) {
LC_AST *n = NULL;
// syntactic sugar
// #`stuff` => #c(`stuff`)
LC_Token *str_token = LC_Match(LC_TokenKind_RawString);
if (str_token) {
n = LC_CreateNote(str_token, L->ic);
// Add CallItem
{
LC_AST *astcallitem = LC_CreateAST(str_token, LC_ASTKind_ExprCallItem);
astcallitem->ecompo_item.expr = LC_CreateAST(str_token, LC_ASTKind_ExprString);
astcallitem->ecompo_item.expr->eatom.name = str_token->ident;
LC_DLLAdd(n->ecompo.first, n->ecompo.last, astcallitem);
}
} else {
LC_EXPECT(ident, LC_TokenKind_Ident, "note");
if (!LC_IsNoteDeclared(ident->ident)) {
LC_ReportParseError(ident, "unregistered note name: '%s'", ident->ident);
}
LC_AST *astident = LC_CreateAST(ident, LC_ASTKind_ExprIdent);
astident->eident.name = ident->ident;
LC_Token *open_paren = LC_Match(LC_TokenKind_OpenParen);
if (open_paren) {
n = LC_ParseCompo(open_paren, astident);
} else {
n = LC_CreateAST(ident, LC_ASTKind_Note);
}
n->ecompo.name = astident;
n->kind = LC_ASTKind_Note;
}
return n;
}
LC_FUNCTION LC_AST *LC_ParseNotes(void) {
LC_Token *pos = LC_Get();
LC_AST *first = 0;
LC_AST *last = 0;
for (;;) {
LC_Token *t = LC_Match(LC_TokenKind_Note);
if (!t) break;
LC_AST *n = LC_ParseNote();
if (n->kind == LC_ASTKind_Error) continue;
LC_DLLAdd(first, last, n);
}
if (first) {
LC_AST *n = LC_CreateAST(pos, LC_ASTKind_NoteList);
n->anote_list.first = first;
n->anote_list.last = last;
return n;
}
return 0;
}
LC_FUNCTION bool ParseHashBuildIf(LC_AST *n) {
LC_Token *t0 = LC_GetI(0);
LC_Token *t1 = LC_GetI(1);
if (t0->kind == LC_TokenKind_Hash && t1->kind == LC_TokenKind_Ident && t1->ident == L->ibuild_if) {
LC_Next();
LC_AST *note = LC_ParseNote();
if (note->kind == LC_ASTKind_Error) {
LC_EatUntilNextValidDecl();
return true;
}
if (!LC_Match(LC_TokenKind_Semicolon)) {
LC_ReportParseError(LC_GetI(-1), "expected ';' semicolon");
LC_EatUntilNextValidDecl();
return true;
}
LC_AST *note_list = LC_CreateAST(t0, LC_ASTKind_NoteList);
LC_DLLAdd(note_list->anote_list.first, note_list->anote_list.last, note);
n->notes = note_list;
return LC_ResolveBuildIf(note);
}
return true;
}
LC_FUNCTION bool LC_ResolveBuildIf(LC_AST *build_if) {
LC_ExprCompo *note = &build_if->anote;
if (note->size != 1) {
LC_ReportParseError(LC_GetI(-1), "invalid argument count for #build_if directive, expected 1, got %d", note->size);
return true;
}
LC_ExprCompoItem *item = &note->first->ecompo_item;
if (item->index != NULL || item->name != 0) {
LC_ReportParseError(LC_GetI(-1), "invalid syntax, you have passed in a named or indexed argument to #build_if");
return true;
}
LC_PUSH_PACKAGE(L->builtin_package);
LC_Operand op = LC_ResolveExpr(item->expr);
LC_POP_PACKAGE();
if (!LC_IsUTConst(op)) {
LC_ReportParseError(LC_GetI(-1), "expected #build_if to have an untyped constant expcession");
return true;
}
if (!LC_IsUTInt(op.type)) {
LC_ReportParseError(LC_GetI(-1), "expected #build_if to have expression of type untyped int");
return true;
}
int64_t result = LC_Bigint_as_signed(&op.v.i);
return (bool)result;
}
LC_FUNCTION LC_AST *LC_ParseDecl(LC_AST *file) {
LC_AST *n = 0;
LC_Token *doc_comment = LC_Match(LC_TokenKind_DocComment);
LC_Token *ident = LC_Get();
if (LC_Match(LC_TokenKind_Ident)) {
if (LC_Match(LC_TokenKind_Colon)) {
if (LC_Match(LC_TokenKind_Colon)) {
if (LC_MatchKeyword(L->kproc)) {
LC_PROP_ERROR(n, LC_ParseProcDecl(ident));
} else if (LC_MatchKeyword(L->kstruct)) {
LC_PROP_ERROR(n, LC_ParseStruct(LC_ASTKind_DeclStruct, ident));
} else if (LC_MatchKeyword(L->kunion)) {
LC_PROP_ERROR(n, LC_ParseStruct(LC_ASTKind_DeclUnion, ident));
} else if (LC_MatchKeyword(L->ktypedef)) {
LC_PROP_ERROR(n, LC_ParseTypedef(ident));
} else {
n = LC_CreateAST(ident, LC_ASTKind_DeclConst);
n->dbase.name = ident->ident;
if (LC_Match(LC_TokenKind_BitXor)) {
LC_AST *last_decl = file->afile.ldecl;
if (!last_decl || last_decl->kind != LC_ASTKind_DeclConst) return LC_ReportParseError(LC_GetI(-1), "invalid usage, there is no constant declaration preceding '^', this operator implies - PREV_CONST + 1");
LC_AST *left = LC_CreateAST(n->pos, LC_ASTKind_ExprIdent);
left->eident.name = last_decl->dbase.name;
LC_AST *right = LC_CreateAST(n->pos, LC_ASTKind_ExprInt);
right->eatom.i = LC_Bigint_u64(1);
n->dconst.expr = LC_CreateBinary(n->pos, left, right, LC_TokenKind_Add);
} else if (LC_Match(LC_TokenKind_LeftShift)) {
LC_AST *last_decl = file->afile.ldecl;
if (!last_decl || last_decl->kind != LC_ASTKind_DeclConst) return LC_ReportParseError(LC_GetI(-1), "invalid usage, there is no constant declaration preceding '^', this operator implies - PREV_CONST << 1");
LC_AST *left = LC_CreateAST(n->pos, LC_ASTKind_ExprIdent);
left->eident.name = last_decl->dbase.name;
LC_AST *right = LC_CreateAST(n->pos, LC_ASTKind_ExprInt);
right->eatom.i = LC_Bigint_u64(1);
n->dconst.expr = LC_CreateBinary(n->pos, left, right, LC_TokenKind_LeftShift);
} else {
LC_PROP_ERROR(n->dconst.expr, LC_ParseExpr());
}
if (!LC_Match(LC_TokenKind_Semicolon)) return LC_ReportParseError(LC_GetI(-1), "expected ';' semicolon");
}
} else if (LC_Match(LC_TokenKind_Assign)) {
n = LC_CreateAST(ident, LC_ASTKind_DeclVar);
LC_PROP_ERROR(n->dvar.expr, LC_ParseExpr());
n->dbase.name = ident->ident;
if (!LC_Match(LC_TokenKind_Semicolon)) return LC_ReportParseError(LC_GetI(-1), "expected ';' semicolon");
} else {
n = LC_CreateAST(ident, LC_ASTKind_DeclVar);
n->dbase.name = ident->ident;
LC_PROP_ERROR(n->dvar.type, LC_ParseType());
if (LC_Match(LC_TokenKind_Assign)) {
if (LC_Match(LC_TokenKind_Hash)) {
LC_AST *note = LC_CreateAST(LC_Get(), LC_ASTKind_ExprNote);
LC_PROP_ERROR(note->enote.expr, LC_ParseNote());
n->dvar.expr = note;
} else {
LC_PROP_ERROR(n->dvar.expr, LC_ParseExpr());
}
}
if (!LC_Match(LC_TokenKind_Semicolon)) return LC_ReportParseError(LC_GetI(-1), "expected ';' semicolon");
}
} else return LC_ReportParseError(ident, "got unexpected token: %s, while parsing declaration", LC_TokenKindToString(ident->kind));
} else if (LC_Match(LC_TokenKind_Hash)) {
n = LC_CreateAST(ident, LC_ASTKind_DeclNote);
LC_PROP_ERROR(n->dnote.expr, LC_ParseNote());
if (!LC_Match(LC_TokenKind_Semicolon)) return LC_ReportParseError(LC_GetI(-1), "expected ';' semicolon");
} else if (LC_MatchKeyword(L->kimport)) {
return LC_ReportParseError(LC_Get(), "imports can only appear at the top level");
} else if (ident->kind == LC_TokenKind_EOF) return NULL;
else return LC_ReportParseError(ident, "got unexpected token: %s, while parsing declaration", LC_TokenKindToString(ident->kind));
LC_AST *notes = LC_ParseNotes();
if (n) {
n->notes = notes;
n->dbase.doc_comment = doc_comment;
}
return n;
}
LC_FUNCTION bool LC_EatUntilNextValidDecl(void) {
for (;;) {
LC_Token *a = LC_GetI(0);
if (a->kind == LC_TokenKind_Keyword && a->ident == L->kimport) {
return true;
}
LC_Token *d = LC_GetI(3);
if (a->kind == LC_TokenKind_Ident && LC_GetI(1)->kind == LC_TokenKind_Colon && LC_GetI(2)->kind == LC_TokenKind_Colon && d->kind == LC_TokenKind_Keyword) {
if (d->ident == L->kproc || d->ident == L->kstruct || d->ident == L->kunion || d->ident == L->ktypedef) {
return false;
}
}
LC_Token *token = LC_Next();
if (token == &L->NullToken) {
return false;
}
}
}
LC_FUNCTION LC_AST *LC_ParseImport(void) {
LC_AST *n = NULL;
LC_Token *import = LC_MatchKeyword(L->kimport);
if (import) {
n = LC_CreateAST(import, LC_ASTKind_GlobImport);
LC_Token *ident = LC_Match(LC_TokenKind_Ident);
if (ident) n->gimport.name = ident->ident;
LC_Token *path = LC_Match(LC_TokenKind_String);
if (!path) return LC_ReportParseError(LC_GetI(-1), "expected string after an import, instead got %s", LC_TokenKindToString(LC_Get()->kind));
n->gimport.path = path->ident;
if (!LC_Match(LC_TokenKind_Semicolon)) return LC_ReportParseError(LC_GetI(-1), "expected ';' semicolon");
}
return n;
}
LC_FUNCTION void LC_AddFileToPackage(LC_AST *pkg, LC_AST *f) {
f->afile.package = pkg;
LC_DLLAdd(pkg->apackage.ffile, pkg->apackage.lfile, f);
}
LC_FUNCTION LC_AST *LC_ParseFileEx(LC_AST *package) {
LC_Token *package_doc_comment = LC_Match(LC_TokenKind_PackageDocComment);
LC_AST *n = LC_CreateAST(LC_Get(), LC_ASTKind_File);
n->afile.x = L->parser->x;
n->afile.doc_comment = LC_Match(LC_TokenKind_FileDocComment);
ParseHashBuildIf(n);
// Parse imports
while (!LC_Is(LC_TokenKind_EOF)) {
LC_AST *import = LC_ParseImport();
if (!import) break;
if (import->kind == LC_ASTKind_Error) {
bool is_import = LC_EatUntilNextValidDecl();
if (!is_import) break;
} else {
LC_DLLAdd(n->afile.fimport, n->afile.limport, import);
}
}
// Parse top level decls
while (!LC_Is(LC_TokenKind_EOF)) {
LC_AST *decl = LC_ParseDecl(n);
if (!decl) continue;
if (decl->kind == LC_ASTKind_Error) {
LC_EatUntilNextValidDecl();
} else {
LC_DLLAdd(n->afile.fdecl, n->afile.ldecl, decl);
if (L->on_decl_parsed) L->on_decl_parsed(decl);
}
}
if (package) {
if (package->apackage.ext->doc_comment) LC_ReportParseError(package_doc_comment, "there are more then 1 package doc comments in %s package", (char *)package->apackage.name);
package->apackage.ext->doc_comment = package_doc_comment;
LC_AddFileToPackage(package, n);
}
return n;
}
LC_FUNCTION LC_AST *LC_ParseTokens(LC_AST *package, LC_Lex *x) {
LC_Parser p = LC_MakeParser(x);
L->parser = &p;
LC_AST *file = LC_ParseFileEx(package);
return L->errors ? NULL : file;
}
LC_FUNCTION LC_AST *LC_ParseFile(LC_AST *package, char *filename, char *content, int line) {
if (content == NULL) {
LC_SendErrorMessagef(NULL, NULL, "internal compiler error: file passed to %s is null", __FUNCTION__);
return NULL;
}
if (filename == NULL) {
LC_SendErrorMessagef(NULL, NULL, "internal compiler error: filename passed to %s is null", __FUNCTION__);
return NULL;
}
LC_Lex *x = LC_LexStream(filename, content, line);
if (L->errors) return NULL;
LC_InternTokens(x);
LC_AST *file = LC_ParseTokens(package, x);
if (!file) return NULL;
return file;
}
LC_FUNCTION LC_AST *LC_ParseStmtf(const char *str, ...) {
LC_FORMAT(L->arena, str, s8);
LC_Parser *old = L->parser;
LC_Parser *p = LC_MakeParserQuick(s8.str);
LC_AST *result = LC_ParseStmt(false);
L->parser = old;
return result;
}
LC_FUNCTION LC_AST *LC_ParseExprf(const char *str, ...) {
LC_FORMAT(L->arena, str, s8);
LC_Parser *old = L->parser;
LC_Parser *p = LC_MakeParserQuick(s8.str);
LC_AST *result = LC_ParseExpr();
L->parser = old;
return result;
}
LC_FUNCTION LC_AST *LC_ParseDeclf(const char *str, ...) {
LC_FORMAT(L->arena, str, s8);
LC_Parser *old = L->parser;
LC_Parser *p = LC_MakeParserQuick(s8.str);
LC_AST *result = LC_ParseDecl(&L->NullAST);
L->parser = old;
return result;
}
#undef LC_EXPECT
#undef LC_PROP_ERROR