551 lines
15 KiB
C++
551 lines
15 KiB
C++
|
|
function void
|
|
parsing_error(Token *token, const char *str, ...){
|
|
Scratch scratch;
|
|
STRING_FMT(scratch, str, string);
|
|
|
|
// @Note(Krzosa): Print nice error message
|
|
printf("\nError: %s", string.str);
|
|
if(token){
|
|
printf(" %s:%d\n", token->file.str, (S32)token->line + 1);
|
|
|
|
// @Note(Krzosa): Print error line
|
|
{
|
|
int i = 0;
|
|
while(token->line_begin[i]!='\n' && token->line_begin[i]!=0) i++;
|
|
printf("%.*s\n", i, token->line_begin);
|
|
|
|
// @Note(Krzosa): Print error marker
|
|
int token_i = token->str - token->line_begin;
|
|
for(int i = 0; i < token_i-2; i++) printf(" ");
|
|
printf("^^^^^^\n");
|
|
}
|
|
}
|
|
|
|
__debugbreak();
|
|
}
|
|
|
|
function Token *
|
|
token_get(S64 i = 0){
|
|
i += pctx->token_iter;
|
|
if(i >= pctx->tokens.len){
|
|
return &pctx->empty_token;
|
|
}
|
|
Token *result = &pctx->tokens[i];
|
|
return result;
|
|
}
|
|
|
|
function Token *
|
|
token_is_scope(){
|
|
Token *token = token_get();
|
|
if(lex_is_scope(token)) return token;
|
|
return 0;
|
|
}
|
|
|
|
function Token *
|
|
token_next(){
|
|
Token *token = token_get();
|
|
if(lex_is_scope(token)) pctx->indent = token->indent;
|
|
pctx->token_iter++;
|
|
return token;
|
|
}
|
|
|
|
function Token *
|
|
token_is(Token_Kind kind, S64 lookahead = 0){
|
|
Token *token = token_get(lookahead);
|
|
if(token->kind == kind){
|
|
return token;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
function Token *
|
|
token_is_keyword(Intern_String keyword, S64 lookahead = 0){
|
|
Token *token = token_get(lookahead);
|
|
if(token->kind == TK_Keyword){
|
|
if(keyword.str == token->intern_val.str){
|
|
return token;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
function Token *
|
|
token_match(Token_Kind kind){
|
|
Token *token = token_get();
|
|
if(token->kind == kind){
|
|
return token_next();
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
function Token *
|
|
token_match(Token_Kind a, Token_Kind b){
|
|
Token *ta = token_get();
|
|
Token *tb = token_get(1);
|
|
if(ta->kind == a && tb->kind == b){
|
|
token_next(); token_next();
|
|
return ta;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
function Token *
|
|
token_match_keyword(Intern_String string){
|
|
Token *token = token_get();
|
|
if(token->kind == TK_Keyword){
|
|
if(string.str == token->intern_val.str){
|
|
token = token_next();
|
|
return token;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
function Token *
|
|
token_expect(Token_Kind kind){
|
|
Token *token = token_get();
|
|
if(token->kind == kind) return token_next();
|
|
parsing_error(token, "Expected token of kind: [%s], got instead token of kind: [%s]", token_kind_string(kind).str, token_kind_string(token->kind).str);
|
|
return 0;
|
|
}
|
|
|
|
function Ast_Expr *parse_expr(S64 minbp = 0);
|
|
|
|
function Ast_Expr *
|
|
parse_init_stmt(Ast_Expr *expr){
|
|
Token *token = token_get();
|
|
if(token->kind == TK_ColonAssign && expr->kind != AST_IDENT) parsing_error(expr->pos, "Binding with [:=] to something that is not an identifier");
|
|
if(token_is_assign(token)){
|
|
token_next();
|
|
Ast_Expr *value = parse_expr();
|
|
Ast_Expr *result = ast_expr_binary((Ast_Atom *)expr, value, token);
|
|
result->flags = set_flag(result->flags, AST_STMT);
|
|
return result;
|
|
}
|
|
|
|
return expr;
|
|
}
|
|
|
|
function Ast_Call *
|
|
parse_expr_call(Ast_Expr *left){
|
|
Scratch scratch;
|
|
Token *pos = token_get();
|
|
Array<Ast_Call_Item *> exprs = {scratch};
|
|
|
|
while(!token_is(TK_CloseParen)){
|
|
Token *token = token_get();
|
|
Ast_Expr *index = 0;
|
|
Ast_Atom *name = 0;
|
|
if(token_match(TK_OpenBracket)){
|
|
index = parse_expr();
|
|
token_expect(TK_CloseBracket);
|
|
token_expect(TK_Assign);
|
|
}
|
|
|
|
Ast_Expr *item = parse_expr();
|
|
if(!index && token_match(TK_Assign)){
|
|
assert(is_flag_set(item->flags, AST_ATOM));
|
|
name = (Ast_Atom *)item;
|
|
item = parse_expr();
|
|
}
|
|
|
|
Ast_Call_Item *item_comp = ast_call_item(token, index, name, item);
|
|
exprs.add(item_comp);
|
|
|
|
if(!token_match(TK_Comma)){
|
|
break;
|
|
}
|
|
}
|
|
token_expect(TK_CloseParen);
|
|
|
|
Ast_Call *result = ast_call(pos, left, exprs);
|
|
return result;
|
|
}
|
|
|
|
function Ast_Expr *
|
|
parse_optional_type(){
|
|
Ast_Expr *result = 0;
|
|
if(token_match(TK_Colon)) result = parse_expr();
|
|
return result;
|
|
}
|
|
|
|
function Ast_Named *parse_named(B32);
|
|
function Ast_Block *
|
|
parse_block(){
|
|
Ast_Block *block = 0;
|
|
|
|
if(token_expect(OPEN_SCOPE)){ // @todo: Fix error message here, it doesn't show proper token context
|
|
Token *token_block = token_get();
|
|
|
|
Scratch scratch;
|
|
Array<Ast *> stmts = {scratch};
|
|
do{
|
|
Token *token = token_get();
|
|
if(token_match_keyword(keyword_return)){
|
|
Ast_Expr *expr = 0;
|
|
if(!token_is_scope()) expr = parse_expr();
|
|
stmts.add(ast_return(token, expr));
|
|
}
|
|
|
|
else if(token_match_keyword(keyword_pass)){
|
|
stmts.add(ast_pass(token));
|
|
}
|
|
|
|
else if(token_match_keyword(keyword_for)){
|
|
Ast_Expr *expr_first = parse_expr();
|
|
Ast_Expr *init = parse_init_stmt(expr_first);
|
|
|
|
Ast_Expr *cond = 0;
|
|
Ast_Expr *iter = 0;
|
|
if(token_match(TK_Comma)){
|
|
cond = parse_expr();
|
|
if(token_match(TK_Comma)){
|
|
iter = parse_expr();
|
|
iter = parse_init_stmt(iter);
|
|
}
|
|
}
|
|
|
|
Ast_Block *for_block = parse_block();
|
|
stmts.add(ast_for(token, init, cond, iter, for_block));
|
|
}
|
|
|
|
else if(token_match_keyword(keyword_if)){
|
|
Array<Ast_If_Node *> if_nodes = {scratch};
|
|
Ast_Expr *expr = parse_expr();
|
|
Ast_Expr *init_val = parse_init_stmt(expr);
|
|
if(init_val != expr){
|
|
if(token_match(TK_Comma)) expr = parse_expr();
|
|
else expr = 0;
|
|
}
|
|
|
|
Ast_Block *if_block = parse_block();
|
|
Ast_If_Node *if_node = ast_if_node(token, init_val, expr, if_block);
|
|
if_nodes.add(if_node);
|
|
|
|
while(token_is(SAME_SCOPE) && token_is_keyword(keyword_else, 1)){
|
|
token_next();
|
|
token = token_next();
|
|
if(token_match_keyword(keyword_if)){
|
|
Ast_Expr *expr = parse_expr();
|
|
Ast_Block *else_if_block = parse_block();
|
|
Ast_If_Node *if_node = ast_if_node(token, 0, expr, else_if_block);
|
|
if_nodes.add(if_node);
|
|
}
|
|
else{
|
|
Ast_Block *else_block = parse_block();
|
|
Ast_If_Node *if_node = ast_if_node(token, 0, 0, else_block);
|
|
if_nodes.add(if_node);
|
|
break;
|
|
}
|
|
}
|
|
Ast_If *result_if = ast_if(token, if_nodes);
|
|
stmts.add(result_if);
|
|
|
|
}
|
|
else{
|
|
Ast *result = parse_named(false);
|
|
if(!result){
|
|
result = parse_expr();
|
|
result = parse_init_stmt((Ast_Expr *)result);
|
|
}
|
|
|
|
if(result) stmts.add(result);
|
|
else parsing_error(token, "Unexpected token [%s] while parsing statement", token_kind_string(token->kind).str);
|
|
|
|
}
|
|
} while(token_match(SAME_SCOPE));
|
|
token_expect(CLOSE_SCOPE);
|
|
block = ast_block(token_block, stmts);
|
|
}
|
|
return block;
|
|
}
|
|
|
|
function Ast_Lambda *
|
|
parse_lambda(Token *token){
|
|
Scratch scratch;
|
|
|
|
Array<Ast_Lambda_Arg *> params = {scratch};
|
|
if(!token_is(TK_CloseParen)){
|
|
for(;;){
|
|
Token *name = token_expect(TK_Identifier);
|
|
token_expect(TK_Colon);
|
|
Ast_Expr *typespec = parse_expr();
|
|
|
|
Ast_Expr *default_value = 0;
|
|
if(token_match(TK_Assign)) {
|
|
default_value = parse_expr();
|
|
}
|
|
|
|
Ast_Lambda_Arg *param = ast_expr_lambda_arg(name, name->intern_val, typespec, default_value);
|
|
params.add(param);
|
|
if(!token_match(TK_Comma)){
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
token_expect(TK_CloseParen);
|
|
|
|
Ast_Expr *ret = parse_optional_type();
|
|
Ast_Block *block = token_is(OPEN_SCOPE) ? parse_block() : 0;
|
|
Ast_Lambda *result = ast_lambda(token, params, ret, block);
|
|
return result;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Pratt expression parser
|
|
// Based on this really good article: https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
|
|
//-----------------------------------------------------------------------------
|
|
struct Binding_Power{S64 left;S64 right;};
|
|
enum Binding{Binding_Prefix,Binding_Infix,Binding_Postfix};
|
|
|
|
function Binding_Power
|
|
binding_power(Binding binding, Token_Kind kind){
|
|
if(binding == Binding_Prefix) goto Prefix;
|
|
if(binding == Binding_Infix) goto Infix;
|
|
if(binding == Binding_Postfix) goto Postfix;
|
|
else invalid_codepath;
|
|
|
|
Prefix: switch(kind){
|
|
case TK_Pointer:
|
|
case TK_Dereference:
|
|
case TK_OpenBracket:
|
|
case TK_Keyword:
|
|
case TK_OpenParen:
|
|
return{-2, 20};
|
|
default: return {-1, -1};
|
|
}
|
|
Infix: switch(kind){
|
|
case TK_Or:
|
|
return {9,10};
|
|
case TK_And:
|
|
return {11,12};
|
|
case TK_Equals:
|
|
case TK_NotEquals:
|
|
case TK_GreaterThen:
|
|
case TK_GreaterThenOrEqual:
|
|
case TK_LesserThen:
|
|
case TK_LesserThenOrEqual:
|
|
return {13,14};
|
|
case TK_Sub:
|
|
case TK_Add:
|
|
case TK_BitOr:
|
|
case TK_BitXor:
|
|
return {15,16};
|
|
case TK_RightShift:
|
|
case TK_LeftShift:
|
|
case TK_BitAnd:
|
|
case TK_Mul:
|
|
case TK_Div:
|
|
case TK_Mod:
|
|
return {17,18};
|
|
case TK_Dot:
|
|
return {20,19};
|
|
default: return {};
|
|
}
|
|
Postfix: switch(kind){
|
|
case TK_OpenBracket:
|
|
case TK_OpenParen:
|
|
return {20, -2};
|
|
default: return{-1,-1};
|
|
}
|
|
}
|
|
|
|
function Ast_Expr *
|
|
parse_expr(S64 min_bp){
|
|
Ast_Expr *left = 0;
|
|
Token *token = token_next();
|
|
Binding_Power prefix_bp = binding_power(Binding_Prefix, token->kind);
|
|
|
|
// @note: parse prefix expression
|
|
switch(token->kind){
|
|
case TK_StringLit : left = ast_str(token, token->intern_val); break;
|
|
case TK_Identifier : left = ast_ident(token, token->intern_val); break;
|
|
case TK_Integer : left = ast_int(token, token->int_val); break;
|
|
case TK_Float : left = ast_float(token, token->f64_val); break;
|
|
case TK_Pointer : left = ast_expr_unary(token, TK_Pointer, parse_expr(prefix_bp.right)); break;
|
|
case TK_Dereference: left = ast_expr_unary(token, TK_Dereference, parse_expr(prefix_bp.right)); break;
|
|
|
|
case TK_OpenBracket: {
|
|
Ast_Array *result = ast_array(token, parse_expr(0));
|
|
token_expect(TK_CloseBracket);
|
|
result->base = parse_expr(prefix_bp.right);
|
|
left = result;
|
|
}break;
|
|
|
|
case TK_Keyword: {
|
|
if(token->intern_val == keyword_cast){
|
|
token_expect(TK_OpenParen);
|
|
Ast_Expr *expr = parse_expr(0);
|
|
token_expect(TK_Colon);
|
|
Ast_Expr *typespec = parse_expr(0);
|
|
token_expect(TK_CloseParen);
|
|
left = ast_expr_cast(token, expr, typespec);
|
|
}
|
|
else parsing_error(token, "Unexpected keyword: [%s], expected keyword [cast]", token->intern_val.str);
|
|
}break;
|
|
|
|
case TK_OpenParen: {
|
|
if(token_is(TK_CloseParen)) left = parse_lambda(token);
|
|
else if(token_is(TK_Identifier) && token_is(TK_Colon, 1)) left = parse_lambda(token);
|
|
else{
|
|
left = parse_expr(0);
|
|
token_expect(TK_CloseParen);
|
|
}
|
|
}break;
|
|
default: parsing_error(token, "Unexpected token of kind: [%s] in expression", token_kind_string(token->kind).str); return 0;
|
|
}
|
|
|
|
for(;;){
|
|
token = token_get();
|
|
|
|
// lets say [+] is left:1, right:2 and we parse 2+3+4
|
|
// We pass min_bp of 2 to the next recursion
|
|
// in recursion we check if left(1) > min_bp(2)
|
|
// it's not so we don't recurse - we break
|
|
// We do standard do the for loop instead
|
|
|
|
Binding_Power postfix_bp = binding_power(Binding_Postfix, token->kind);
|
|
Binding_Power infix_bp = binding_power(Binding_Infix, token->kind);
|
|
|
|
// @note: parse postfix expression
|
|
if(postfix_bp.left > min_bp){
|
|
token_next();
|
|
switch(token->kind){
|
|
case TK_OpenBracket:{
|
|
Ast_Expr *index = parse_expr(0);
|
|
token_expect(TK_CloseBracket);
|
|
left = ast_expr_index(token, left, index);
|
|
}break;
|
|
case TK_OpenParen:{
|
|
left = parse_expr_call(left);
|
|
}break;
|
|
default:{
|
|
assert(token->kind == TK_Increment || token->kind == TK_Decrement);
|
|
if(token->kind == TK_Increment) token->kind = TK_PostIncrement;
|
|
else if(token->kind == TK_Decrement) token->kind = TK_PostDecrement;
|
|
left = ast_expr_unary(token, token->kind, left);
|
|
}
|
|
}
|
|
}
|
|
|
|
// @note: parse infix expression
|
|
else if(infix_bp.left > min_bp){
|
|
token = token_next();
|
|
Ast_Expr *right = parse_expr(infix_bp.right);
|
|
left = ast_expr_binary(left, right, token);
|
|
}
|
|
|
|
else break;
|
|
|
|
}
|
|
return left;
|
|
}
|
|
|
|
function Ast_Expr *
|
|
parse_assign_expr(){
|
|
Ast_Expr *result = 0;
|
|
if(token_match(TK_Assign)) result = parse_expr();
|
|
return result;
|
|
}
|
|
|
|
function Ast_Struct *
|
|
parse_struct(Token *pos){
|
|
Scratch scratch;
|
|
Array<Ast_Var *> members = {scratch};
|
|
Array<Ast_Const *> members_const = {scratch};
|
|
|
|
token_match(OPEN_SCOPE);
|
|
do{
|
|
Token *token = token_get();
|
|
Ast_Named *named = parse_named(false);
|
|
named->flags = set_flag(named->flags, AST_AGGREGATE_CHILD);
|
|
if(!named) parsing_error(token, "Failed to parse struct member");
|
|
if(named->kind == AST_CONST){
|
|
members_const.add((Ast_Const *)named);
|
|
} else {
|
|
assert(named->kind == AST_VAR);
|
|
members.add((Ast_Var *)named);
|
|
}
|
|
|
|
}while(token_match(SAME_SCOPE));
|
|
token_expect(CLOSE_SCOPE);
|
|
|
|
Ast_Struct *result = ast_struct(pos, members, members_const);
|
|
return result;
|
|
}
|
|
|
|
function Ast_Enum *
|
|
parse_enum(Token *pos){
|
|
Scratch scratch;
|
|
Array<Ast_Enum_Member *> members = {scratch};
|
|
Ast_Expr *typespec = parse_optional_type();
|
|
|
|
token_match(OPEN_SCOPE);
|
|
do{
|
|
Token *name = token_expect(TK_Identifier);
|
|
Ast_Expr *value = parse_assign_expr();
|
|
Ast_Enum_Member *member = ast_enum_member(name, name->intern_val, value);
|
|
members.add(member);
|
|
}while(token_match(SAME_SCOPE));
|
|
token_expect(CLOSE_SCOPE);
|
|
|
|
Ast_Enum *result = ast_enum(pos, typespec, members);
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
Needs peeking only because I didn't want to duplicate code
|
|
for parsing statements and it makes code nicer.
|
|
Statements can have named syntax i :=
|
|
*/
|
|
function Ast_Named *
|
|
parse_named(B32 is_global){
|
|
Ast_Named *result = 0;
|
|
if(is_global) {
|
|
token_match(SAME_SCOPE);
|
|
if(pctx->indent != 0){
|
|
parsing_error(token_get(), "Top level declarations shouldn't be indented");
|
|
}
|
|
}
|
|
|
|
Token *name = token_get();
|
|
if(token_match(TK_Identifier, TK_DoubleColon)){
|
|
// @note parse struct binding
|
|
Token *struct_pos = token_get();
|
|
if(token_match_keyword(keyword_struct)){
|
|
Ast_Struct *struct_val = parse_struct(struct_pos);
|
|
result = ast_const(name, name->intern_val, (Ast_Expr *)struct_val);
|
|
}
|
|
|
|
else if(token_match_keyword(keyword_enum)){
|
|
Ast_Enum *enum_val = parse_enum(struct_pos);
|
|
result = ast_const(name, name->intern_val, (Ast_Expr *)enum_val);
|
|
}
|
|
|
|
// @note parse constant expression
|
|
else{
|
|
Ast_Expr *expr = parse_expr();
|
|
result = ast_const(name, name->intern_val, expr);
|
|
}
|
|
}
|
|
else if(token_match(TK_Identifier, TK_Colon)){
|
|
Ast_Expr *typespec = typespec = parse_expr();
|
|
Ast_Expr *expr = parse_assign_expr();
|
|
result = ast_var(name, typespec, name->intern_val, expr);
|
|
}
|
|
|
|
else if(token_match(TK_Identifier, TK_ColonAssign)){
|
|
Ast_Expr *expr = parse_expr();
|
|
result = ast_var(name, 0, name->intern_val, expr);
|
|
}
|
|
else if(is_global && name->kind != TK_End){
|
|
parsing_error(name, "Unexpected token: [%s] when parsing a declaration", token_kind_string(name->kind).str);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
|