function Ast_Decl *parse_decl(B32 is_global); function void print_token_context(Token *token){ printf(" :: %s:%d\n", token->file.str, (S32)token->line + 1); // @Note(Krzosa): Print error line { int i = 0; while(token->line_begin[i]!='\n' && token->line_begin[i]!=0) i++; printf("%.*s\n", i, token->line_begin); // @Note(Krzosa): Print error marker int token_i = token->str - token->line_begin; for(int i = 0; i < token_i-2; i++) printf(" "); printf("^^^^^^\n"); } } function void compiler_error(Token *token, const char *str, ...){ Scratch scratch; STRING_FMT(scratch, str, string); // @Note(Krzosa): Print nice error message printf("\nError :: %s", string.str); if(token){ if(token->kind == TK_Error){ printf("Token Error: %.*s", (int)token->error_val.len, token->error_val.str); } print_token_context(token); } __debugbreak(); } function Token * token_get(S64 i = 0){ i += pctx->token_iter; if(i >= pctx->tokens.len){ return &null_token; } Token *result = &pctx->tokens[i]; return result; } function Token * token_is_scope(){ Token *token = token_get(); if(lex_is_scope(token)) return token; return 0; } function Token * token_next(){ Token *token = token_get(); if(lex_is_scope(token)) pctx->indent = token->indent; pctx->token_iter++; return token; } function Token * token_is(Token_Kind kind, S64 lookahead = 0){ Token *token = token_get(lookahead); if(token->kind == kind){ return token; } return 0; } function Token * token_is_keyword(Intern_String keyword, S64 lookahead = 0){ Token *token = token_get(lookahead); if(token->kind == TK_Keyword){ if(keyword.str == token->intern_val.str){ return token; } } return 0; } function Token * token_match_pound(Intern_String string){ Token *token = token_get(); if(token->kind == TK_Pound){ if(token->intern_val == string){ return token_next(); } } return 0; } function Token * token_match(Token_Kind kind){ Token *token = token_get(); if(token->kind == kind){ return token_next(); } return 0; } function Token * token_match(Token_Kind a, Token_Kind b){ Token *ta = token_get(); Token *tb = token_get(1); if(ta->kind == a && tb->kind == b){ token_next(); token_next(); return ta; } return 0; } function Token * token_match_keyword(Intern_String string){ Token *token = token_get(); if(token->kind == TK_Keyword){ if(string.str == token->intern_val.str){ token = token_next(); return token; } } return 0; } function Token * token_expect(Token_Kind kind){ Token *token = token_get(); if(token->kind == kind) return token_next(); compiler_error(token, "Expected token of kind: [%s], got instead token of kind: [%s]", name(kind), name(token->kind)); return 0; } function Ast_Expr *parse_expr(S64 minbp = 0); function Ast_Expr * parse_init_stmt(Ast_Expr *expr){ Token *token = token_get(); if(token->kind == TK_ColonAssign && expr->kind != AST_IDENT) compiler_error(expr->pos, "Binding with [:=] to something that is not an identifier"); else if(token_is_assign(token)){ token_next(); Ast_Expr *value = parse_expr(); Ast_Expr *result = 0; if(token->kind == TK_ColonAssign){ Ast_Atom *name = (Ast_Atom *)expr; result = (Ast_Expr *)ast_var(token, 0, name->intern_val, value); set_flag(result->flags, AST_EXPR); } else{ result = ast_expr_binary((Ast_Atom *)expr, value, token); } set_flag(result->flags, AST_STMT); return result; } return expr; } function Ast_Call * parse_expr_call(Ast_Expr *left, Token_Kind close_kind){ Scratch scratch; Token *pos = token_get(); Array exprs = {scratch}; while(!token_is(close_kind)){ Token *token = token_get(); Ast_Atom *name = 0; Ast_Expr *index = 0; if(token_match(TK_OpenBracket)){ index = parse_expr(0); token_expect(TK_CloseBracket); token_expect(TK_Assign); } Ast_Expr *item = parse_expr(); if(token_match(TK_Assign)){ assert(is_flag_set(item->flags, AST_ATOM)); name = (Ast_Atom *)item; item = parse_expr(); } if(name && index) compiler_error(token, "Both index and name are present, that is invalid"); if(close_kind == TK_OpenParen && index) compiler_error(token, "Lambda calls can't have indexed arguments"); Ast_Call_Item *item_comp = ast_call_item(token, name, index, item); exprs.add(item_comp); if(!token_match(TK_Comma)){ break; } } token_expect(close_kind); Ast_Call *result = ast_call(pos, left, exprs); return result; } function Ast_Expr * parse_optional_type(){ Ast_Expr *result = 0; if(token_match(TK_Colon)) result = parse_expr(); return result; } function Ast_Scope * parse_stmt_scope(Ast_Scope *scope_defined_outside = 0){ Ast_Scope *scope = scope_defined_outside; if(token_expect(OPEN_SCOPE)){ // @todo: Fix error message here, it doesn't show proper token context Token *token_block = token_get(); Scratch scratch; if(!scope_defined_outside) scope = begin_stmt_scope(scratch, token_block); do{ Token *token = token_get(); if(token_match_keyword(keyword_return)){ Ast_Expr *expr = 0; if(!token_is_scope()) expr = parse_expr(); scope->stmts.add(ast_return(token, expr)); } else if(token_match_keyword(keyword_pass)){ scope->stmts.add(ast_pass(token)); } else if(token_match_keyword(keyword_for)){ Ast_Scope *for_scope = begin_stmt_scope(scratch, token_get()); Ast_Expr *init = 0; Ast_Expr *cond = 0; Ast_Expr *iter = 0; if(!token_is(OPEN_SCOPE)){ if(!token_is(TK_Comma)){ Ast_Expr *expr_first = parse_expr(); init = parse_init_stmt(expr_first); } if(token_match(TK_Comma)){ if(!token_is(TK_Comma)) cond = parse_expr(); if(token_match(TK_Comma)){ iter = parse_expr(); iter = parse_init_stmt(iter); } } } parse_stmt_scope(for_scope); finalize_stmt_scope(for_scope); scope->stmts.add(ast_for(token, init, cond, iter, for_scope)); } else if(token_match_keyword(keyword_if)){ Array if_nodes = {scratch}; Ast_Expr *expr = parse_expr(); Ast_Expr *init_val = parse_init_stmt(expr); if(init_val != expr){ if(token_match(TK_Comma)) expr = parse_expr(); else expr = 0; } if(init_val == expr) init_val = 0; Ast_Scope *if_block = parse_stmt_scope(); Ast_If_Node *if_node = ast_if_node(token, init_val, expr, if_block); if_nodes.add(if_node); while(token_is(SAME_SCOPE) && token_is_keyword(keyword_else, 1)){ token_next(); token = token_next(); if(token_match_keyword(keyword_if)){ Ast_Expr *expr = parse_expr(); Ast_Scope *else_if_block = parse_stmt_scope(); Ast_If_Node *if_node = ast_if_node(token, 0, expr, else_if_block); if_nodes.add(if_node); } else{ Ast_Scope *else_block = parse_stmt_scope(); Ast_If_Node *if_node = ast_if_node(token, 0, 0, else_block); if_nodes.add(if_node); break; } } Ast_If *result_if = ast_if(token, if_nodes); scope->stmts.add(result_if); } else{ Ast *result = parse_decl(false); if(!result){ result = parse_expr(); result = parse_init_stmt((Ast_Expr *)result); } if(result) { result->flags = set_flag(result->flags, AST_STMT); scope->stmts.add(result); } else { compiler_error(token, "Unexpected token [%s] while parsing statement", name(token->kind)); } } } while(token_match(SAME_SCOPE)); token_expect(CLOSE_SCOPE); if(!scope_defined_outside) finalize_stmt_scope(scope); } return scope; } function Ast_Lambda * parse_lambda(Token *token){ Scratch scratch; B32 has_var_args = false; Array params = {scratch}; if(!token_is(TK_CloseParen)){ for(;;){ Token *name = token_get(); if(token_match(TK_Identifier)){ token_expect(TK_Colon); Ast_Expr *typespec = parse_expr(); Ast_Expr *default_value = 0; if(token_match(TK_Assign)) { default_value = parse_expr(); } Ast_Decl *param = ast_var(name, typespec, name->intern_val, default_value); params.add(param); } else if(token_match(TK_ThreeDots)){ has_var_args = true; break; } else compiler_error(name, "Expected [Identifier] or [...] when parsing lambda arguments"); if(!token_match(TK_Comma)) break; } } token_expect(TK_CloseParen); Ast_Expr *ret = parse_optional_type(); Ast_Scope *scope = token_is(OPEN_SCOPE) ? parse_stmt_scope() : 0; Ast_Lambda *result = ast_lambda(token, params, ret, scope); return result; } //----------------------------------------------------------------------------- // Pratt expression parser // Based on this really good article: https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html //----------------------------------------------------------------------------- struct Binding_Power{S64 left;S64 right;}; enum Binding{Binding_Prefix,Binding_Infix,Binding_Postfix}; function Binding_Power binding_power(Binding binding, Token_Kind kind){ if(binding == Binding_Prefix) goto Prefix; if(binding == Binding_Infix) goto Infix; if(binding == Binding_Postfix) goto Postfix; else invalid_codepath; Prefix: switch(kind){ case TK_OpenBracket: return {-2, 22}; case TK_Increment: case TK_Decrement: case TK_Pointer: case TK_Dereference: case TK_Keyword: case TK_OpenParen: case TK_OpenBrace: case TK_Sub: case TK_Add: case TK_Neg: case TK_Not: return{-2, 20}; default: return {-1, -1}; } Infix: switch(kind){ case TK_Or: return {9,10}; case TK_And: return {11,12}; case TK_Equals: case TK_NotEquals: case TK_GreaterThen: case TK_GreaterThenOrEqual: case TK_LesserThen: case TK_LesserThenOrEqual: return {13,14}; case TK_Sub: case TK_Add: case TK_BitOr: case TK_BitXor: return {15,16}; case TK_RightShift: case TK_LeftShift: case TK_BitAnd: case TK_Mul: case TK_Div: case TK_Mod: return {17,18}; case TK_Dot: return {24,23}; default: return {}; } Postfix: switch(kind){ case TK_Increment: case TK_Decrement: case TK_OpenBracket: case TK_OpenParen: case TK_OpenBrace: return {21, -2}; default: return{-1,-1}; } } function Ast_Expr * parse_expr(S64 min_bp){ Ast_Expr *left = 0; Token *token = token_next(); Binding_Power prefix_bp = binding_power(Binding_Prefix, token->kind); // @note: parse prefix expression switch(token->kind){ case TK_StringLit : left = ast_str(token, token->intern_val); break; case TK_Identifier : left = ast_ident(token, token->intern_val); break; case TK_Integer : left = ast_int(token, token->int_val); break; case TK_UnicodeLit : left = ast_int(token, token->unicode); break; case TK_Float : left = ast_float(token, token->f64_val); break; case TK_Pointer : left = ast_expr_unary(token, TK_Pointer, parse_expr(prefix_bp.right)); break; case TK_Dereference: left = ast_expr_unary(token, TK_Dereference, parse_expr(prefix_bp.right)); break; case TK_Sub : left = ast_expr_unary(token, TK_Sub, parse_expr(prefix_bp.right)); break; case TK_Add : left = ast_expr_unary(token, TK_Add, parse_expr(prefix_bp.right)); break; case TK_Not : left = ast_expr_unary(token, TK_Not, parse_expr(prefix_bp.right)); break; case TK_Neg : left = ast_expr_unary(token, TK_Neg, parse_expr(prefix_bp.right)); break; case TK_Increment : left = ast_expr_unary(token, TK_Increment, parse_expr(prefix_bp.right)); break; case TK_Decrement : left = ast_expr_unary(token, TK_Decrement, parse_expr(prefix_bp.right)); break; case TK_OpenBracket: { Ast_Expr *expr = 0; if(!token_is(TK_CloseBracket)) expr = parse_expr(0); Ast_Array *result = ast_array(token, expr); token_expect(TK_CloseBracket); result->base = parse_expr(prefix_bp.right); left = result; }break; case TK_OpenBrace: { left = parse_expr_call(0, TK_CloseBrace); left->kind = AST_COMPOUND; }break; case TK_Keyword: { if(token->intern_val == keyword_true) left = ast_bool(token, 1); else if(token->intern_val == keyword_false) left = ast_bool(token, 0); else if(token->intern_val == keyword_cast){ token_expect(TK_OpenParen); Ast_Expr *expr = parse_expr(0); token_expect(TK_Colon); Ast_Expr *typespec = parse_expr(0); token_expect(TK_CloseParen); left = ast_expr_cast(token, expr, typespec); } else compiler_error(token, "Unexpected keyword: [%s], expected keyword [cast]", token->intern_val.str); }break; case TK_OpenParen: { if(token_is(TK_CloseParen) || (token_is(TK_Identifier) && token_is(TK_Colon, 1)) || token_is(TK_ThreeDots)) left = parse_lambda(token); else{ left = parse_expr(0); token_expect(TK_CloseParen); } }break; default: compiler_error(token, "Unexpected token of kind: [%s] in expression", name(token->kind)); return 0; } for(;;){ token = token_get(); // lets say [+] is left:1, right:2 and we parse 2+3+4 // We pass min_bp of 2 to the next recursion // in recursion we check if left(1) > min_bp(2) // it's not so we don't recurse - we break // We do standard do the for loop instead Binding_Power postfix_bp = binding_power(Binding_Postfix, token->kind); Binding_Power infix_bp = binding_power(Binding_Infix, token->kind); // @note: parse postfix expression if(postfix_bp.left > min_bp){ token_next(); switch(token->kind){ case TK_OpenBracket:{ Ast_Expr *index = parse_expr(0); token_expect(TK_CloseBracket); left = ast_expr_index(token, left, index); }break; case TK_OpenBrace: { left = parse_expr_call(left, TK_CloseBrace); left->kind = AST_COMPOUND; }break; case TK_OpenParen:{ left = parse_expr_call(left, TK_CloseParen); }break; default:{ assert(token->kind == TK_Increment || token->kind == TK_Decrement); if(token->kind == TK_Increment) token->kind = TK_PostIncrement; else if(token->kind == TK_Decrement) token->kind = TK_PostDecrement; left = ast_expr_unary(token, token->kind, left); } } } // @note: parse infix expression else if(infix_bp.left > min_bp){ token = token_next(); Ast_Expr *right = parse_expr(infix_bp.right); left = ast_expr_binary(left, right, token); } else break; } return left; } function Ast_Expr * parse_assign_expr(){ Ast_Expr *result = 0; if(token_match(TK_Assign)) result = parse_expr(); return result; } function Ast_Decl * parse_struct(Token *pos){ Scratch scratch; token_match(OPEN_SCOPE); Ast_Scope *scope = begin_decl_scope(scratch, token_get()); do{ Token *token = token_get(); Ast_Decl *decl = parse_decl(false); if(!decl) compiler_error(token, "Failed to parse struct member"); decl->flags = set_flag(decl->flags, AST_AGGREGATE_CHILD); scope->decls.add(decl); }while(token_match(SAME_SCOPE)); token_expect(CLOSE_SCOPE); finalize_decl_scope(scope); Ast_Decl *result = ast_struct(pos, scope); return result; } function Ast_Decl * parse_enum(Token *pos){ Scratch scratch; Ast_Expr *typespec = parse_optional_type(); Token *flag = token_match_pound(intern_flag); token_match(OPEN_SCOPE); Ast_Scope *scope = begin_decl_scope(scratch, token_get()); do{ Token *name = token_expect(TK_Identifier); Ast_Expr *value = 0; if(token_match(TK_DoubleColon)) value = parse_expr(); Ast_Decl *member = ast_const(name, name->intern_val, value); member->flags = set_flag(member->flags, AST_AGGREGATE_CHILD); scope->decls.add(member); }while(token_match(SAME_SCOPE)); finalize_decl_scope(scope); token_expect(CLOSE_SCOPE); Ast_Decl *result = ast_enum(pos, typespec, scope); if(flag) set_flag(result->flags, AST_FLAG); return result; } function Ast_File * register_ast_file(Intern_String filename, Ast_Module *module, B32 global_implicit_load){ Ast_File *file = 0; For(module->files){ if(it->filename == filename){ file = it; break; } } if(!file){ file = exp_alloc_type(pctx->perm, Ast_File, AF_ZeroMemory); file->filename = filename; file->module = module; file->module->files.add(file); } if(global_implicit_load) file->global_implicit_load = true; return file; } function Ast_File * parse_load(B32 global_implicit_load){ Token *file = token_expect(TK_StringLit); Ast_File *result = register_ast_file(file->intern_val, pctx->currently_parsed_file->module, global_implicit_load); return result; } /* Needs peeking only because I didn't want to duplicate code for parsing statements and it makes code nicer. Statements can have named syntax i := */ function Ast_Decl * parse_decl(B32 is_global){ Ast_Decl *result = 0; if(is_global) { token_match(SAME_SCOPE); if(pctx->indent != 0){ compiler_error(token_get(), "Top level declarations shouldn't be indented"); } } Token *tname = token_get(); if(token_match(TK_Identifier, TK_DoubleColon)){ Ast_Flag flags = 0; if(token_match_pound(intern_foreign)){ set_flag(flags, AST_FOREIGN); } // @note parse struct binding if(token_match_keyword(keyword_struct)){ result = parse_struct(tname); } else if(token_match_keyword(keyword_enum)){ result = parse_enum(tname); } else if(token_match_pound(pctx->intern("load"_s))){ Ast_File *file = parse_load(false); result = ast_file_namespace(tname, file, tname->intern_val); } else{ Ast_Expr *expr = parse_expr(); result = ast_const(tname, tname->intern_val, expr); if(expr->kind == AST_LAMBDA_EXPR){ auto a = (Ast_Lambda *)expr; if(a->scope || is_flag_set(flags, AST_FOREIGN)){ set_flag(result->flags, flags); result->kind = AST_LAMBDA; } } } } else if(token_match(TK_Identifier, TK_Colon)){ Ast_Expr *typespec = parse_expr(); Ast_Expr *expr = parse_assign_expr(); result = ast_var(tname, typespec, tname->intern_val, expr); } else if(token_match(TK_Identifier, TK_ColonAssign)){ Ast_Expr *expr = parse_expr(); result = ast_var(tname, 0, tname->intern_val, expr); } else if(is_global && tname->kind != TK_End){ compiler_error(tname, "Unexpected token: [%s] when parsing a declaration", name(tname->kind)); } if(result){ result->name = tname->intern_val; } return result; }