Rewritten the expression parser, tldr that '.' is actually right associative binary operator, I brain farted really hard this time

This commit is contained in:
Krzosa Karol
2022-06-01 18:59:38 +02:00
parent 364daed1c7
commit 494a937d1f
5 changed files with 103 additions and 88 deletions

View File

@@ -1,9 +1,10 @@
/*
Lex_Stream :: struct
stream: String
offset: int
lexc :: (s: *Lex_Stream): String // @todo U8 U S
return s.stream + s.offset // @todo parsing fields wrong + s.offset // + s.offset @todo Actual string support
return s.stream + s.offset // s.offset @todo Actual string support
main :: ()
string_to_lex := "Identifier 2425525 Not_Number"
@@ -12,3 +13,4 @@ main :: ()
for inf:=0, inf, inf // @todo for
pass
*/

View File

@@ -95,7 +95,7 @@ int main(){
test_intern_table();
String result = {};
#if 0
#if 1
result = compile_file("globals.kl"_s);
printf("%s", result.str);
result = compile_file("enums.kl"_s);

View File

@@ -664,7 +664,7 @@ token_kind_string(Token_Kind kind){
case TK_Neg: return "~"_s;
case TK_Not: return "!"_s;
case TK_OpenParen: return "("_s;
case TK_CloseParen: return " "_s;
case TK_CloseParen: return ")"_s;
case TK_OpenBrace: return "{"_s;
case TK_CloseBrace: return "}"_s;
case TK_OpenBracket: return "["_s;

View File

@@ -208,6 +208,7 @@ function Ast_Named *parse_named(B32);
function Ast_Block *
parse_block(){
Ast_Block *block = 0;
if(token_expect(OPEN_SCOPE)){ // @todo: Fix error message here, it doesn't show proper token context
Token *token_block = token_get();
@@ -295,7 +296,7 @@ parse_block(){
}
function Ast_Lambda *
parse_lambda(Token *token, B32 is_typespec = false){
parse_lambda(Token *token){
Scratch scratch;
Array<Ast_Lambda_Arg *> params = {scratch};
@@ -320,103 +321,127 @@ parse_lambda(Token *token, B32 is_typespec = false){
token_expect(TK_CloseParen);
Ast_Expr *ret = parse_optional_type();
Ast_Block *block = is_typespec ? 0 : parse_block();
Ast_Block *block = token_is(OPEN_SCOPE) ? parse_block() : 0;
Ast_Lambda *result = ast_lambda(token, params, ret, block);
return result;
}
//-----------------------------------------------------------------------------
// Pratt expression parser
// Based on this really good article: https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
//-----------------------------------------------------------------------------
struct Binding_Power{S64 left;S64 right;};
enum Binding{Binding_Prefix,Binding_Infix,Binding_Postfix};
function Binding_Power
binding_power(Binding binding, Token_Kind kind){
if(binding == Binding_Prefix) goto Prefix;
if(binding == Binding_Infix) goto Infix;
if(binding == Binding_Postfix) goto Postfix;
else invalid_codepath;
Prefix: switch(kind){
case TK_Pointer:
case TK_Dereference:
case TK_OpenBracket:
case TK_Keyword:
case TK_OpenParen:
return{-2, 20};
default: return {-1, -1};
}
Infix: switch(kind){
case TK_Equals:
case TK_NotEquals:
case TK_GreaterThen:
case TK_GreaterThenOrEqual:
case TK_LesserThen:
case TK_LesserThenOrEqual:
return {3,4};
case TK_Sub:
case TK_Add:
return {5,6};
case TK_RightShift:
case TK_LeftShift:
case TK_Mul:
case TK_Div:
case TK_Mod:
return {7,8};
case TK_Dot:
return {10,9};
default: return {};
}
Postfix: switch(kind){
case TK_OpenBracket:
case TK_OpenParen:
return {20, -2};
default: return{-1,-1};
}
}
function Ast_Expr *
null_denotation(Token *token){
parse_expr(S64 min_bp){
Ast_Expr *left = 0;
Token *token = token_next();
Binding_Power prefix_bp = binding_power(Binding_Prefix, token->kind);
// @note: parse prefix expression
switch(token->kind){
case TK_StringLit : return ast_str(token, token->intern_val);
case TK_Identifier : return ast_ident(token, token->intern_val);
case TK_Integer : return ast_int(token, token->int_val);
case TK_Pointer : return ast_expr_unary(token, TK_Pointer, parse_expr());
case TK_Dereference: return ast_expr_unary(token, TK_Dereference, parse_expr());
case TK_StringLit : left = ast_str(token, token->intern_val); break;
case TK_Identifier : left = ast_ident(token, token->intern_val); break;
case TK_Integer : left = ast_int(token, token->int_val); break;
case TK_Pointer : left = ast_expr_unary(token, TK_Pointer, parse_expr(prefix_bp.right)); break;
case TK_Dereference: left = ast_expr_unary(token, TK_Dereference, parse_expr(prefix_bp.right)); break;
case TK_OpenBracket: {
Ast_Array *result = ast_array(token, parse_expr());
Ast_Array *result = ast_array(token, parse_expr(0));
token_expect(TK_CloseBracket);
result->base = parse_expr(1);
return result;
result->base = parse_expr(prefix_bp.right);
left = result;
}break;
case TK_Keyword: {
if(token->intern_val == keyword_cast){
token_expect(TK_OpenParen);
Ast_Expr *expr = parse_expr();
Ast_Expr *expr = parse_expr(0);
token_expect(TK_Colon);
Ast_Expr *typespec = parse_expr();
Ast_Expr *typespec = parse_expr(0);
token_expect(TK_CloseParen);
return ast_expr_cast(token, expr, typespec);
}
else {
parsing_error(token, "Unexpected keyword: [%s], expected keyword [cast]", token->intern_val.str);
return 0;
left = ast_expr_cast(token, expr, typespec);
}
else parsing_error(token, "Unexpected keyword: [%s], expected keyword [cast]", token->intern_val.str);
}break;
case TK_OpenParen: {
if (token_is(TK_CloseParen)) return parse_lambda(token);
else if(token_is(TK_Identifier) && token_is(TK_Colon, 1)) return parse_lambda(token);
if(token_is(TK_CloseParen)) left = parse_lambda(token);
else if(token_is(TK_Identifier) && token_is(TK_Colon, 1)) left = parse_lambda(token);
else{
Ast_Expr *result = parse_expr();
left = parse_expr(0);
token_expect(TK_CloseParen);
return result;
}
}
}break;
default: parsing_error(token, "Unexpected token of kind: [%s] in expression", token_kind_string(token->kind).str); return 0;
}
}
function S64
left_binding_power(Token_Kind kind){
switch(kind){
case TK_Sub: case TK_Add: return 1;
case TK_Mul: case TK_Div: return 2;
default: return 0;
}
}
function Ast_Expr *
left_denotation(Token *op, Ast_Expr *left){
enum{ Left_Associative, Right_Associative };
S64 assoc = Left_Associative;
Ast_Expr *right = parse_expr(left_binding_power(op->kind) - assoc);
switch(op->kind){
case TK_Add: case TK_Mul: case TK_Sub: case TK_Div: return ast_expr_binary(left, right, op);
default: parsing_error(op, "Unexpected token of kind: [%s] in expression", token_kind_string(op->kind).str); return 0;
}
}
function S64
postfix_binding_power(Token_Kind kind){
switch(kind){
case TK_Dot: case TK_Decrement: case TK_Increment: case TK_OpenBracket: case TK_OpenParen: return 3;
default: return 0;
}
}
function Ast_Expr *
parse_expr(S64 rbp){
Token *token = token_next();
Ast_Expr *left = null_denotation(token);
for(;;){
token = token_get();
// @note: parse postfix
S64 pbp = postfix_binding_power(token->kind);
if(pbp > rbp){
// lets say [+] is left:1, right:2 and we parse 2+3+4
// We pass min_bp of 2 to the next recursion
// in recursion we check if left(1) > min_bp(2)
// it's not so we don't recurse - we break
// We do standard do the for loop instead
Binding_Power postfix_bp = binding_power(Binding_Postfix, token->kind);
Binding_Power infix_bp = binding_power(Binding_Infix, token->kind);
// @note: parse postfix expression
if(postfix_bp.left > min_bp){
token_next();
switch(token->kind){
case TK_Dot: {
Ast_Expr *right = parse_expr(pbp-1);
left = ast_expr_binary(left, right, token);
}break;
case TK_OpenBracket:{
Ast_Expr *index = parse_expr(pbp-1);
left = ast_expr_index(token, left, index);
Ast_Expr *index = parse_expr(0);
token_expect(TK_CloseBracket);
left = ast_expr_index(token, left, index);
}break;
case TK_OpenParen:{
left = parse_expr_call(left);
@@ -430,11 +455,13 @@ parse_expr(S64 rbp){
}
}
// @note: parse right
else if(rbp < left_binding_power(token->kind)){
// @note: parse infix expression
else if(infix_bp.left > min_bp){
token = token_next();
left = left_denotation(token, left);
Ast_Expr *right = parse_expr(infix_bp.right);
left = ast_expr_binary(left, right, token);
}
else break;
}

View File

@@ -4,17 +4,3 @@
#define NULL_LAMBDA 0
//-------------------------------
struct Lex_Stream{
String stream;
int offset;
};
static String lexc(Lex_Stream *s){
return s->stream;
}
static void main(){
String string_to_lex = LIT("Identifier 2425525 Not_Number");
Lex_Stream s = (Lex_Stream ){.stream = string_to_lex};
for(int inf = 0;inf;inf){
//pass
}
}