From 494a937d1f0cfc253dd3535a8dbbc6090165000c Mon Sep 17 00:00:00 2001 From: Krzosa Karol Date: Wed, 1 Jun 2022 18:59:38 +0200 Subject: [PATCH] Rewritten the expression parser, tldr that '.' is actually right associative binary operator, I brain farted really hard this time --- lexer.kl | 4 +- main.cpp | 2 +- new_lex.cpp | 2 +- new_parse.cpp | 167 +++++++++++++++++++++++++++++--------------------- program.c | 16 +---- 5 files changed, 103 insertions(+), 88 deletions(-) diff --git a/lexer.kl b/lexer.kl index 52ed103..ce29feb 100644 --- a/lexer.kl +++ b/lexer.kl @@ -1,9 +1,10 @@ +/* Lex_Stream :: struct stream: String offset: int lexc :: (s: *Lex_Stream): String // @todo U8 U S - return s.stream + s.offset // @todo parsing fields wrong + s.offset // + s.offset @todo Actual string support + return s.stream + s.offset // s.offset @todo Actual string support main :: () string_to_lex := "Identifier 2425525 Not_Number" @@ -12,3 +13,4 @@ main :: () for inf:=0, inf, inf // @todo for pass +*/ \ No newline at end of file diff --git a/main.cpp b/main.cpp index 3001456..b65d45e 100644 --- a/main.cpp +++ b/main.cpp @@ -95,7 +95,7 @@ int main(){ test_intern_table(); String result = {}; -#if 0 +#if 1 result = compile_file("globals.kl"_s); printf("%s", result.str); result = compile_file("enums.kl"_s); diff --git a/new_lex.cpp b/new_lex.cpp index fba9fd2..c9ac720 100644 --- a/new_lex.cpp +++ b/new_lex.cpp @@ -664,7 +664,7 @@ token_kind_string(Token_Kind kind){ case TK_Neg: return "~"_s; case TK_Not: return "!"_s; case TK_OpenParen: return "("_s; - case TK_CloseParen: return " "_s; + case TK_CloseParen: return ")"_s; case TK_OpenBrace: return "{"_s; case TK_CloseBrace: return "}"_s; case TK_OpenBracket: return "["_s; diff --git a/new_parse.cpp b/new_parse.cpp index 06a29af..8df5e2b 100644 --- a/new_parse.cpp +++ b/new_parse.cpp @@ -208,6 +208,7 @@ function Ast_Named *parse_named(B32); function Ast_Block * parse_block(){ Ast_Block *block = 0; + if(token_expect(OPEN_SCOPE)){ // @todo: Fix error message here, it doesn't show proper token context Token *token_block = token_get(); @@ -295,7 +296,7 @@ parse_block(){ } function Ast_Lambda * -parse_lambda(Token *token, B32 is_typespec = false){ +parse_lambda(Token *token){ Scratch scratch; Array params = {scratch}; @@ -319,104 +320,128 @@ parse_lambda(Token *token, B32 is_typespec = false){ } token_expect(TK_CloseParen); - Ast_Expr *ret = parse_optional_type(); - Ast_Block *block = is_typespec ? 0 : parse_block(); + Ast_Expr *ret = parse_optional_type(); + Ast_Block *block = token_is(OPEN_SCOPE) ? parse_block() : 0; Ast_Lambda *result = ast_lambda(token, params, ret, block); return result; } +//----------------------------------------------------------------------------- +// Pratt expression parser +// Based on this really good article: https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html +//----------------------------------------------------------------------------- +struct Binding_Power{S64 left;S64 right;}; +enum Binding{Binding_Prefix,Binding_Infix,Binding_Postfix}; + +function Binding_Power +binding_power(Binding binding, Token_Kind kind){ + if(binding == Binding_Prefix) goto Prefix; + if(binding == Binding_Infix) goto Infix; + if(binding == Binding_Postfix) goto Postfix; + else invalid_codepath; + + Prefix: switch(kind){ + case TK_Pointer: + case TK_Dereference: + case TK_OpenBracket: + case TK_Keyword: + case TK_OpenParen: + return{-2, 20}; + default: return {-1, -1}; + } + Infix: switch(kind){ + case TK_Equals: + case TK_NotEquals: + case TK_GreaterThen: + case TK_GreaterThenOrEqual: + case TK_LesserThen: + case TK_LesserThenOrEqual: + return {3,4}; + case TK_Sub: + case TK_Add: + return {5,6}; + case TK_RightShift: + case TK_LeftShift: + case TK_Mul: + case TK_Div: + case TK_Mod: + return {7,8}; + case TK_Dot: + return {10,9}; + default: return {}; + } + Postfix: switch(kind){ + case TK_OpenBracket: + case TK_OpenParen: + return {20, -2}; + default: return{-1,-1}; + } +} + function Ast_Expr * -null_denotation(Token *token){ +parse_expr(S64 min_bp){ + Ast_Expr *left = 0; + Token *token = token_next(); + Binding_Power prefix_bp = binding_power(Binding_Prefix, token->kind); + + // @note: parse prefix expression switch(token->kind){ - case TK_StringLit : return ast_str(token, token->intern_val); - case TK_Identifier : return ast_ident(token, token->intern_val); - case TK_Integer : return ast_int(token, token->int_val); - case TK_Pointer : return ast_expr_unary(token, TK_Pointer, parse_expr()); - case TK_Dereference: return ast_expr_unary(token, TK_Dereference, parse_expr()); + case TK_StringLit : left = ast_str(token, token->intern_val); break; + case TK_Identifier : left = ast_ident(token, token->intern_val); break; + case TK_Integer : left = ast_int(token, token->int_val); break; + case TK_Pointer : left = ast_expr_unary(token, TK_Pointer, parse_expr(prefix_bp.right)); break; + case TK_Dereference: left = ast_expr_unary(token, TK_Dereference, parse_expr(prefix_bp.right)); break; case TK_OpenBracket: { - Ast_Array *result = ast_array(token, parse_expr()); + Ast_Array *result = ast_array(token, parse_expr(0)); token_expect(TK_CloseBracket); - result->base = parse_expr(1); - return result; + result->base = parse_expr(prefix_bp.right); + left = result; }break; case TK_Keyword: { if(token->intern_val == keyword_cast){ token_expect(TK_OpenParen); - Ast_Expr *expr = parse_expr(); + Ast_Expr *expr = parse_expr(0); token_expect(TK_Colon); - Ast_Expr *typespec = parse_expr(); + Ast_Expr *typespec = parse_expr(0); token_expect(TK_CloseParen); - return ast_expr_cast(token, expr, typespec); - } - else { - parsing_error(token, "Unexpected keyword: [%s], expected keyword [cast]", token->intern_val.str); - return 0; + left = ast_expr_cast(token, expr, typespec); } + else parsing_error(token, "Unexpected keyword: [%s], expected keyword [cast]", token->intern_val.str); }break; case TK_OpenParen: { - if (token_is(TK_CloseParen)) return parse_lambda(token); - else if(token_is(TK_Identifier) && token_is(TK_Colon, 1)) return parse_lambda(token); + if(token_is(TK_CloseParen)) left = parse_lambda(token); + else if(token_is(TK_Identifier) && token_is(TK_Colon, 1)) left = parse_lambda(token); else{ - Ast_Expr *result = parse_expr(); + left = parse_expr(0); token_expect(TK_CloseParen); - return result; } - } + }break; default: parsing_error(token, "Unexpected token of kind: [%s] in expression", token_kind_string(token->kind).str); return 0; } -} -function S64 -left_binding_power(Token_Kind kind){ - switch(kind){ - case TK_Sub: case TK_Add: return 1; - case TK_Mul: case TK_Div: return 2; - default: return 0; - } -} - -function Ast_Expr * -left_denotation(Token *op, Ast_Expr *left){ - enum{ Left_Associative, Right_Associative }; - S64 assoc = Left_Associative; - Ast_Expr *right = parse_expr(left_binding_power(op->kind) - assoc); - switch(op->kind){ - case TK_Add: case TK_Mul: case TK_Sub: case TK_Div: return ast_expr_binary(left, right, op); - default: parsing_error(op, "Unexpected token of kind: [%s] in expression", token_kind_string(op->kind).str); return 0; - } -} - -function S64 -postfix_binding_power(Token_Kind kind){ - switch(kind){ - case TK_Dot: case TK_Decrement: case TK_Increment: case TK_OpenBracket: case TK_OpenParen: return 3; - default: return 0; - } -} - -function Ast_Expr * -parse_expr(S64 rbp){ - Token *token = token_next(); - Ast_Expr *left = null_denotation(token); for(;;){ token = token_get(); - // @note: parse postfix - S64 pbp = postfix_binding_power(token->kind); - if(pbp > rbp){ + // lets say [+] is left:1, right:2 and we parse 2+3+4 + // We pass min_bp of 2 to the next recursion + // in recursion we check if left(1) > min_bp(2) + // it's not so we don't recurse - we break + // We do standard do the for loop instead + + Binding_Power postfix_bp = binding_power(Binding_Postfix, token->kind); + Binding_Power infix_bp = binding_power(Binding_Infix, token->kind); + + // @note: parse postfix expression + if(postfix_bp.left > min_bp){ token_next(); switch(token->kind){ - case TK_Dot: { - Ast_Expr *right = parse_expr(pbp-1); - left = ast_expr_binary(left, right, token); - }break; case TK_OpenBracket:{ - Ast_Expr *index = parse_expr(pbp-1); - left = ast_expr_index(token, left, index); + Ast_Expr *index = parse_expr(0); token_expect(TK_CloseBracket); + left = ast_expr_index(token, left, index); }break; case TK_OpenParen:{ left = parse_expr_call(left); @@ -425,16 +450,18 @@ parse_expr(S64 rbp){ assert(token->kind == TK_Increment || token->kind == TK_Decrement); if(token->kind == TK_Increment) token->kind = TK_PostIncrement; else if(token->kind == TK_Decrement) token->kind = TK_PostDecrement; - left = ast_expr_unary(token, token->kind, left); + left = ast_expr_unary(token, token->kind, left); } } } - // @note: parse right - else if(rbp < left_binding_power(token->kind)){ + // @note: parse infix expression + else if(infix_bp.left > min_bp){ token = token_next(); - left = left_denotation(token, left); + Ast_Expr *right = parse_expr(infix_bp.right); + left = ast_expr_binary(left, right, token); } + else break; } diff --git a/program.c b/program.c index dcc35a0..0fb999a 100644 --- a/program.c +++ b/program.c @@ -3,18 +3,4 @@ #define NULL_POINTER 0 #define NULL_LAMBDA 0 //------------------------------- - -struct Lex_Stream{ - String stream; - int offset; -}; -static String lexc(Lex_Stream *s){ - return s->stream; -} -static void main(){ - String string_to_lex = LIT("Identifier 2425525 Not_Number"); - Lex_Stream s = (Lex_Stream ){.stream = string_to_lex}; - for(int inf = 0;inf;inf){ - //pass - } -} \ No newline at end of file + \ No newline at end of file