Rewritten the expression parser, tldr that '.' is actually right associative binary operator, I brain farted really hard this time
This commit is contained in:
4
lexer.kl
4
lexer.kl
@@ -1,9 +1,10 @@
|
||||
/*
|
||||
Lex_Stream :: struct
|
||||
stream: String
|
||||
offset: int
|
||||
|
||||
lexc :: (s: *Lex_Stream): String // @todo U8 U S
|
||||
return s.stream + s.offset // @todo parsing fields wrong + s.offset // + s.offset @todo Actual string support
|
||||
return s.stream + s.offset // s.offset @todo Actual string support
|
||||
|
||||
main :: ()
|
||||
string_to_lex := "Identifier 2425525 Not_Number"
|
||||
@@ -12,3 +13,4 @@ main :: ()
|
||||
for inf:=0, inf, inf // @todo for
|
||||
pass
|
||||
|
||||
*/
|
||||
2
main.cpp
2
main.cpp
@@ -95,7 +95,7 @@ int main(){
|
||||
test_intern_table();
|
||||
|
||||
String result = {};
|
||||
#if 0
|
||||
#if 1
|
||||
result = compile_file("globals.kl"_s);
|
||||
printf("%s", result.str);
|
||||
result = compile_file("enums.kl"_s);
|
||||
|
||||
@@ -664,7 +664,7 @@ token_kind_string(Token_Kind kind){
|
||||
case TK_Neg: return "~"_s;
|
||||
case TK_Not: return "!"_s;
|
||||
case TK_OpenParen: return "("_s;
|
||||
case TK_CloseParen: return " "_s;
|
||||
case TK_CloseParen: return ")"_s;
|
||||
case TK_OpenBrace: return "{"_s;
|
||||
case TK_CloseBrace: return "}"_s;
|
||||
case TK_OpenBracket: return "["_s;
|
||||
|
||||
163
new_parse.cpp
163
new_parse.cpp
@@ -208,6 +208,7 @@ function Ast_Named *parse_named(B32);
|
||||
function Ast_Block *
|
||||
parse_block(){
|
||||
Ast_Block *block = 0;
|
||||
|
||||
if(token_expect(OPEN_SCOPE)){ // @todo: Fix error message here, it doesn't show proper token context
|
||||
Token *token_block = token_get();
|
||||
|
||||
@@ -295,7 +296,7 @@ parse_block(){
|
||||
}
|
||||
|
||||
function Ast_Lambda *
|
||||
parse_lambda(Token *token, B32 is_typespec = false){
|
||||
parse_lambda(Token *token){
|
||||
Scratch scratch;
|
||||
|
||||
Array<Ast_Lambda_Arg *> params = {scratch};
|
||||
@@ -320,103 +321,127 @@ parse_lambda(Token *token, B32 is_typespec = false){
|
||||
token_expect(TK_CloseParen);
|
||||
|
||||
Ast_Expr *ret = parse_optional_type();
|
||||
Ast_Block *block = is_typespec ? 0 : parse_block();
|
||||
Ast_Block *block = token_is(OPEN_SCOPE) ? parse_block() : 0;
|
||||
Ast_Lambda *result = ast_lambda(token, params, ret, block);
|
||||
return result;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Pratt expression parser
|
||||
// Based on this really good article: https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
|
||||
//-----------------------------------------------------------------------------
|
||||
struct Binding_Power{S64 left;S64 right;};
|
||||
enum Binding{Binding_Prefix,Binding_Infix,Binding_Postfix};
|
||||
|
||||
function Binding_Power
|
||||
binding_power(Binding binding, Token_Kind kind){
|
||||
if(binding == Binding_Prefix) goto Prefix;
|
||||
if(binding == Binding_Infix) goto Infix;
|
||||
if(binding == Binding_Postfix) goto Postfix;
|
||||
else invalid_codepath;
|
||||
|
||||
Prefix: switch(kind){
|
||||
case TK_Pointer:
|
||||
case TK_Dereference:
|
||||
case TK_OpenBracket:
|
||||
case TK_Keyword:
|
||||
case TK_OpenParen:
|
||||
return{-2, 20};
|
||||
default: return {-1, -1};
|
||||
}
|
||||
Infix: switch(kind){
|
||||
case TK_Equals:
|
||||
case TK_NotEquals:
|
||||
case TK_GreaterThen:
|
||||
case TK_GreaterThenOrEqual:
|
||||
case TK_LesserThen:
|
||||
case TK_LesserThenOrEqual:
|
||||
return {3,4};
|
||||
case TK_Sub:
|
||||
case TK_Add:
|
||||
return {5,6};
|
||||
case TK_RightShift:
|
||||
case TK_LeftShift:
|
||||
case TK_Mul:
|
||||
case TK_Div:
|
||||
case TK_Mod:
|
||||
return {7,8};
|
||||
case TK_Dot:
|
||||
return {10,9};
|
||||
default: return {};
|
||||
}
|
||||
Postfix: switch(kind){
|
||||
case TK_OpenBracket:
|
||||
case TK_OpenParen:
|
||||
return {20, -2};
|
||||
default: return{-1,-1};
|
||||
}
|
||||
}
|
||||
|
||||
function Ast_Expr *
|
||||
null_denotation(Token *token){
|
||||
parse_expr(S64 min_bp){
|
||||
Ast_Expr *left = 0;
|
||||
Token *token = token_next();
|
||||
Binding_Power prefix_bp = binding_power(Binding_Prefix, token->kind);
|
||||
|
||||
// @note: parse prefix expression
|
||||
switch(token->kind){
|
||||
case TK_StringLit : return ast_str(token, token->intern_val);
|
||||
case TK_Identifier : return ast_ident(token, token->intern_val);
|
||||
case TK_Integer : return ast_int(token, token->int_val);
|
||||
case TK_Pointer : return ast_expr_unary(token, TK_Pointer, parse_expr());
|
||||
case TK_Dereference: return ast_expr_unary(token, TK_Dereference, parse_expr());
|
||||
case TK_StringLit : left = ast_str(token, token->intern_val); break;
|
||||
case TK_Identifier : left = ast_ident(token, token->intern_val); break;
|
||||
case TK_Integer : left = ast_int(token, token->int_val); break;
|
||||
case TK_Pointer : left = ast_expr_unary(token, TK_Pointer, parse_expr(prefix_bp.right)); break;
|
||||
case TK_Dereference: left = ast_expr_unary(token, TK_Dereference, parse_expr(prefix_bp.right)); break;
|
||||
|
||||
case TK_OpenBracket: {
|
||||
Ast_Array *result = ast_array(token, parse_expr());
|
||||
Ast_Array *result = ast_array(token, parse_expr(0));
|
||||
token_expect(TK_CloseBracket);
|
||||
result->base = parse_expr(1);
|
||||
return result;
|
||||
result->base = parse_expr(prefix_bp.right);
|
||||
left = result;
|
||||
}break;
|
||||
|
||||
case TK_Keyword: {
|
||||
if(token->intern_val == keyword_cast){
|
||||
token_expect(TK_OpenParen);
|
||||
Ast_Expr *expr = parse_expr();
|
||||
Ast_Expr *expr = parse_expr(0);
|
||||
token_expect(TK_Colon);
|
||||
Ast_Expr *typespec = parse_expr();
|
||||
Ast_Expr *typespec = parse_expr(0);
|
||||
token_expect(TK_CloseParen);
|
||||
return ast_expr_cast(token, expr, typespec);
|
||||
}
|
||||
else {
|
||||
parsing_error(token, "Unexpected keyword: [%s], expected keyword [cast]", token->intern_val.str);
|
||||
return 0;
|
||||
left = ast_expr_cast(token, expr, typespec);
|
||||
}
|
||||
else parsing_error(token, "Unexpected keyword: [%s], expected keyword [cast]", token->intern_val.str);
|
||||
}break;
|
||||
|
||||
case TK_OpenParen: {
|
||||
if (token_is(TK_CloseParen)) return parse_lambda(token);
|
||||
else if(token_is(TK_Identifier) && token_is(TK_Colon, 1)) return parse_lambda(token);
|
||||
if(token_is(TK_CloseParen)) left = parse_lambda(token);
|
||||
else if(token_is(TK_Identifier) && token_is(TK_Colon, 1)) left = parse_lambda(token);
|
||||
else{
|
||||
Ast_Expr *result = parse_expr();
|
||||
left = parse_expr(0);
|
||||
token_expect(TK_CloseParen);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}break;
|
||||
default: parsing_error(token, "Unexpected token of kind: [%s] in expression", token_kind_string(token->kind).str); return 0;
|
||||
}
|
||||
}
|
||||
|
||||
function S64
|
||||
left_binding_power(Token_Kind kind){
|
||||
switch(kind){
|
||||
case TK_Sub: case TK_Add: return 1;
|
||||
case TK_Mul: case TK_Div: return 2;
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
function Ast_Expr *
|
||||
left_denotation(Token *op, Ast_Expr *left){
|
||||
enum{ Left_Associative, Right_Associative };
|
||||
S64 assoc = Left_Associative;
|
||||
Ast_Expr *right = parse_expr(left_binding_power(op->kind) - assoc);
|
||||
switch(op->kind){
|
||||
case TK_Add: case TK_Mul: case TK_Sub: case TK_Div: return ast_expr_binary(left, right, op);
|
||||
default: parsing_error(op, "Unexpected token of kind: [%s] in expression", token_kind_string(op->kind).str); return 0;
|
||||
}
|
||||
}
|
||||
|
||||
function S64
|
||||
postfix_binding_power(Token_Kind kind){
|
||||
switch(kind){
|
||||
case TK_Dot: case TK_Decrement: case TK_Increment: case TK_OpenBracket: case TK_OpenParen: return 3;
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
function Ast_Expr *
|
||||
parse_expr(S64 rbp){
|
||||
Token *token = token_next();
|
||||
Ast_Expr *left = null_denotation(token);
|
||||
for(;;){
|
||||
token = token_get();
|
||||
|
||||
// @note: parse postfix
|
||||
S64 pbp = postfix_binding_power(token->kind);
|
||||
if(pbp > rbp){
|
||||
// lets say [+] is left:1, right:2 and we parse 2+3+4
|
||||
// We pass min_bp of 2 to the next recursion
|
||||
// in recursion we check if left(1) > min_bp(2)
|
||||
// it's not so we don't recurse - we break
|
||||
// We do standard do the for loop instead
|
||||
|
||||
Binding_Power postfix_bp = binding_power(Binding_Postfix, token->kind);
|
||||
Binding_Power infix_bp = binding_power(Binding_Infix, token->kind);
|
||||
|
||||
// @note: parse postfix expression
|
||||
if(postfix_bp.left > min_bp){
|
||||
token_next();
|
||||
switch(token->kind){
|
||||
case TK_Dot: {
|
||||
Ast_Expr *right = parse_expr(pbp-1);
|
||||
left = ast_expr_binary(left, right, token);
|
||||
}break;
|
||||
case TK_OpenBracket:{
|
||||
Ast_Expr *index = parse_expr(pbp-1);
|
||||
left = ast_expr_index(token, left, index);
|
||||
Ast_Expr *index = parse_expr(0);
|
||||
token_expect(TK_CloseBracket);
|
||||
left = ast_expr_index(token, left, index);
|
||||
}break;
|
||||
case TK_OpenParen:{
|
||||
left = parse_expr_call(left);
|
||||
@@ -430,11 +455,13 @@ parse_expr(S64 rbp){
|
||||
}
|
||||
}
|
||||
|
||||
// @note: parse right
|
||||
else if(rbp < left_binding_power(token->kind)){
|
||||
// @note: parse infix expression
|
||||
else if(infix_bp.left > min_bp){
|
||||
token = token_next();
|
||||
left = left_denotation(token, left);
|
||||
Ast_Expr *right = parse_expr(infix_bp.right);
|
||||
left = ast_expr_binary(left, right, token);
|
||||
}
|
||||
|
||||
else break;
|
||||
|
||||
}
|
||||
|
||||
14
program.c
14
program.c
@@ -4,17 +4,3 @@
|
||||
#define NULL_LAMBDA 0
|
||||
//-------------------------------
|
||||
|
||||
struct Lex_Stream{
|
||||
String stream;
|
||||
int offset;
|
||||
};
|
||||
static String lexc(Lex_Stream *s){
|
||||
return s->stream;
|
||||
}
|
||||
static void main(){
|
||||
String string_to_lex = LIT("Identifier 2425525 Not_Number");
|
||||
Lex_Stream s = (Lex_Stream ){.stream = string_to_lex};
|
||||
for(int inf = 0;inf;inf){
|
||||
//pass
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user