Rewritten the expression parser, tldr that '.' is actually right associative binary operator, I brain farted really hard this time
This commit is contained in:
4
lexer.kl
4
lexer.kl
@@ -1,9 +1,10 @@
|
|||||||
|
/*
|
||||||
Lex_Stream :: struct
|
Lex_Stream :: struct
|
||||||
stream: String
|
stream: String
|
||||||
offset: int
|
offset: int
|
||||||
|
|
||||||
lexc :: (s: *Lex_Stream): String // @todo U8 U S
|
lexc :: (s: *Lex_Stream): String // @todo U8 U S
|
||||||
return s.stream + s.offset // @todo parsing fields wrong + s.offset // + s.offset @todo Actual string support
|
return s.stream + s.offset // s.offset @todo Actual string support
|
||||||
|
|
||||||
main :: ()
|
main :: ()
|
||||||
string_to_lex := "Identifier 2425525 Not_Number"
|
string_to_lex := "Identifier 2425525 Not_Number"
|
||||||
@@ -12,3 +13,4 @@ main :: ()
|
|||||||
for inf:=0, inf, inf // @todo for
|
for inf:=0, inf, inf // @todo for
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
*/
|
||||||
2
main.cpp
2
main.cpp
@@ -95,7 +95,7 @@ int main(){
|
|||||||
test_intern_table();
|
test_intern_table();
|
||||||
|
|
||||||
String result = {};
|
String result = {};
|
||||||
#if 0
|
#if 1
|
||||||
result = compile_file("globals.kl"_s);
|
result = compile_file("globals.kl"_s);
|
||||||
printf("%s", result.str);
|
printf("%s", result.str);
|
||||||
result = compile_file("enums.kl"_s);
|
result = compile_file("enums.kl"_s);
|
||||||
|
|||||||
@@ -664,7 +664,7 @@ token_kind_string(Token_Kind kind){
|
|||||||
case TK_Neg: return "~"_s;
|
case TK_Neg: return "~"_s;
|
||||||
case TK_Not: return "!"_s;
|
case TK_Not: return "!"_s;
|
||||||
case TK_OpenParen: return "("_s;
|
case TK_OpenParen: return "("_s;
|
||||||
case TK_CloseParen: return " "_s;
|
case TK_CloseParen: return ")"_s;
|
||||||
case TK_OpenBrace: return "{"_s;
|
case TK_OpenBrace: return "{"_s;
|
||||||
case TK_CloseBrace: return "}"_s;
|
case TK_CloseBrace: return "}"_s;
|
||||||
case TK_OpenBracket: return "["_s;
|
case TK_OpenBracket: return "["_s;
|
||||||
|
|||||||
167
new_parse.cpp
167
new_parse.cpp
@@ -208,6 +208,7 @@ function Ast_Named *parse_named(B32);
|
|||||||
function Ast_Block *
|
function Ast_Block *
|
||||||
parse_block(){
|
parse_block(){
|
||||||
Ast_Block *block = 0;
|
Ast_Block *block = 0;
|
||||||
|
|
||||||
if(token_expect(OPEN_SCOPE)){ // @todo: Fix error message here, it doesn't show proper token context
|
if(token_expect(OPEN_SCOPE)){ // @todo: Fix error message here, it doesn't show proper token context
|
||||||
Token *token_block = token_get();
|
Token *token_block = token_get();
|
||||||
|
|
||||||
@@ -295,7 +296,7 @@ parse_block(){
|
|||||||
}
|
}
|
||||||
|
|
||||||
function Ast_Lambda *
|
function Ast_Lambda *
|
||||||
parse_lambda(Token *token, B32 is_typespec = false){
|
parse_lambda(Token *token){
|
||||||
Scratch scratch;
|
Scratch scratch;
|
||||||
|
|
||||||
Array<Ast_Lambda_Arg *> params = {scratch};
|
Array<Ast_Lambda_Arg *> params = {scratch};
|
||||||
@@ -319,104 +320,128 @@ parse_lambda(Token *token, B32 is_typespec = false){
|
|||||||
}
|
}
|
||||||
token_expect(TK_CloseParen);
|
token_expect(TK_CloseParen);
|
||||||
|
|
||||||
Ast_Expr *ret = parse_optional_type();
|
Ast_Expr *ret = parse_optional_type();
|
||||||
Ast_Block *block = is_typespec ? 0 : parse_block();
|
Ast_Block *block = token_is(OPEN_SCOPE) ? parse_block() : 0;
|
||||||
Ast_Lambda *result = ast_lambda(token, params, ret, block);
|
Ast_Lambda *result = ast_lambda(token, params, ret, block);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//-----------------------------------------------------------------------------
|
||||||
|
// Pratt expression parser
|
||||||
|
// Based on this really good article: https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
|
||||||
|
//-----------------------------------------------------------------------------
|
||||||
|
struct Binding_Power{S64 left;S64 right;};
|
||||||
|
enum Binding{Binding_Prefix,Binding_Infix,Binding_Postfix};
|
||||||
|
|
||||||
|
function Binding_Power
|
||||||
|
binding_power(Binding binding, Token_Kind kind){
|
||||||
|
if(binding == Binding_Prefix) goto Prefix;
|
||||||
|
if(binding == Binding_Infix) goto Infix;
|
||||||
|
if(binding == Binding_Postfix) goto Postfix;
|
||||||
|
else invalid_codepath;
|
||||||
|
|
||||||
|
Prefix: switch(kind){
|
||||||
|
case TK_Pointer:
|
||||||
|
case TK_Dereference:
|
||||||
|
case TK_OpenBracket:
|
||||||
|
case TK_Keyword:
|
||||||
|
case TK_OpenParen:
|
||||||
|
return{-2, 20};
|
||||||
|
default: return {-1, -1};
|
||||||
|
}
|
||||||
|
Infix: switch(kind){
|
||||||
|
case TK_Equals:
|
||||||
|
case TK_NotEquals:
|
||||||
|
case TK_GreaterThen:
|
||||||
|
case TK_GreaterThenOrEqual:
|
||||||
|
case TK_LesserThen:
|
||||||
|
case TK_LesserThenOrEqual:
|
||||||
|
return {3,4};
|
||||||
|
case TK_Sub:
|
||||||
|
case TK_Add:
|
||||||
|
return {5,6};
|
||||||
|
case TK_RightShift:
|
||||||
|
case TK_LeftShift:
|
||||||
|
case TK_Mul:
|
||||||
|
case TK_Div:
|
||||||
|
case TK_Mod:
|
||||||
|
return {7,8};
|
||||||
|
case TK_Dot:
|
||||||
|
return {10,9};
|
||||||
|
default: return {};
|
||||||
|
}
|
||||||
|
Postfix: switch(kind){
|
||||||
|
case TK_OpenBracket:
|
||||||
|
case TK_OpenParen:
|
||||||
|
return {20, -2};
|
||||||
|
default: return{-1,-1};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function Ast_Expr *
|
function Ast_Expr *
|
||||||
null_denotation(Token *token){
|
parse_expr(S64 min_bp){
|
||||||
|
Ast_Expr *left = 0;
|
||||||
|
Token *token = token_next();
|
||||||
|
Binding_Power prefix_bp = binding_power(Binding_Prefix, token->kind);
|
||||||
|
|
||||||
|
// @note: parse prefix expression
|
||||||
switch(token->kind){
|
switch(token->kind){
|
||||||
case TK_StringLit : return ast_str(token, token->intern_val);
|
case TK_StringLit : left = ast_str(token, token->intern_val); break;
|
||||||
case TK_Identifier : return ast_ident(token, token->intern_val);
|
case TK_Identifier : left = ast_ident(token, token->intern_val); break;
|
||||||
case TK_Integer : return ast_int(token, token->int_val);
|
case TK_Integer : left = ast_int(token, token->int_val); break;
|
||||||
case TK_Pointer : return ast_expr_unary(token, TK_Pointer, parse_expr());
|
case TK_Pointer : left = ast_expr_unary(token, TK_Pointer, parse_expr(prefix_bp.right)); break;
|
||||||
case TK_Dereference: return ast_expr_unary(token, TK_Dereference, parse_expr());
|
case TK_Dereference: left = ast_expr_unary(token, TK_Dereference, parse_expr(prefix_bp.right)); break;
|
||||||
|
|
||||||
case TK_OpenBracket: {
|
case TK_OpenBracket: {
|
||||||
Ast_Array *result = ast_array(token, parse_expr());
|
Ast_Array *result = ast_array(token, parse_expr(0));
|
||||||
token_expect(TK_CloseBracket);
|
token_expect(TK_CloseBracket);
|
||||||
result->base = parse_expr(1);
|
result->base = parse_expr(prefix_bp.right);
|
||||||
return result;
|
left = result;
|
||||||
}break;
|
}break;
|
||||||
|
|
||||||
case TK_Keyword: {
|
case TK_Keyword: {
|
||||||
if(token->intern_val == keyword_cast){
|
if(token->intern_val == keyword_cast){
|
||||||
token_expect(TK_OpenParen);
|
token_expect(TK_OpenParen);
|
||||||
Ast_Expr *expr = parse_expr();
|
Ast_Expr *expr = parse_expr(0);
|
||||||
token_expect(TK_Colon);
|
token_expect(TK_Colon);
|
||||||
Ast_Expr *typespec = parse_expr();
|
Ast_Expr *typespec = parse_expr(0);
|
||||||
token_expect(TK_CloseParen);
|
token_expect(TK_CloseParen);
|
||||||
return ast_expr_cast(token, expr, typespec);
|
left = ast_expr_cast(token, expr, typespec);
|
||||||
}
|
|
||||||
else {
|
|
||||||
parsing_error(token, "Unexpected keyword: [%s], expected keyword [cast]", token->intern_val.str);
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
else parsing_error(token, "Unexpected keyword: [%s], expected keyword [cast]", token->intern_val.str);
|
||||||
}break;
|
}break;
|
||||||
|
|
||||||
case TK_OpenParen: {
|
case TK_OpenParen: {
|
||||||
if (token_is(TK_CloseParen)) return parse_lambda(token);
|
if(token_is(TK_CloseParen)) left = parse_lambda(token);
|
||||||
else if(token_is(TK_Identifier) && token_is(TK_Colon, 1)) return parse_lambda(token);
|
else if(token_is(TK_Identifier) && token_is(TK_Colon, 1)) left = parse_lambda(token);
|
||||||
else{
|
else{
|
||||||
Ast_Expr *result = parse_expr();
|
left = parse_expr(0);
|
||||||
token_expect(TK_CloseParen);
|
token_expect(TK_CloseParen);
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
}
|
}break;
|
||||||
default: parsing_error(token, "Unexpected token of kind: [%s] in expression", token_kind_string(token->kind).str); return 0;
|
default: parsing_error(token, "Unexpected token of kind: [%s] in expression", token_kind_string(token->kind).str); return 0;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
function S64
|
|
||||||
left_binding_power(Token_Kind kind){
|
|
||||||
switch(kind){
|
|
||||||
case TK_Sub: case TK_Add: return 1;
|
|
||||||
case TK_Mul: case TK_Div: return 2;
|
|
||||||
default: return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function Ast_Expr *
|
|
||||||
left_denotation(Token *op, Ast_Expr *left){
|
|
||||||
enum{ Left_Associative, Right_Associative };
|
|
||||||
S64 assoc = Left_Associative;
|
|
||||||
Ast_Expr *right = parse_expr(left_binding_power(op->kind) - assoc);
|
|
||||||
switch(op->kind){
|
|
||||||
case TK_Add: case TK_Mul: case TK_Sub: case TK_Div: return ast_expr_binary(left, right, op);
|
|
||||||
default: parsing_error(op, "Unexpected token of kind: [%s] in expression", token_kind_string(op->kind).str); return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function S64
|
|
||||||
postfix_binding_power(Token_Kind kind){
|
|
||||||
switch(kind){
|
|
||||||
case TK_Dot: case TK_Decrement: case TK_Increment: case TK_OpenBracket: case TK_OpenParen: return 3;
|
|
||||||
default: return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function Ast_Expr *
|
|
||||||
parse_expr(S64 rbp){
|
|
||||||
Token *token = token_next();
|
|
||||||
Ast_Expr *left = null_denotation(token);
|
|
||||||
for(;;){
|
for(;;){
|
||||||
token = token_get();
|
token = token_get();
|
||||||
|
|
||||||
// @note: parse postfix
|
// lets say [+] is left:1, right:2 and we parse 2+3+4
|
||||||
S64 pbp = postfix_binding_power(token->kind);
|
// We pass min_bp of 2 to the next recursion
|
||||||
if(pbp > rbp){
|
// in recursion we check if left(1) > min_bp(2)
|
||||||
|
// it's not so we don't recurse - we break
|
||||||
|
// We do standard do the for loop instead
|
||||||
|
|
||||||
|
Binding_Power postfix_bp = binding_power(Binding_Postfix, token->kind);
|
||||||
|
Binding_Power infix_bp = binding_power(Binding_Infix, token->kind);
|
||||||
|
|
||||||
|
// @note: parse postfix expression
|
||||||
|
if(postfix_bp.left > min_bp){
|
||||||
token_next();
|
token_next();
|
||||||
switch(token->kind){
|
switch(token->kind){
|
||||||
case TK_Dot: {
|
|
||||||
Ast_Expr *right = parse_expr(pbp-1);
|
|
||||||
left = ast_expr_binary(left, right, token);
|
|
||||||
}break;
|
|
||||||
case TK_OpenBracket:{
|
case TK_OpenBracket:{
|
||||||
Ast_Expr *index = parse_expr(pbp-1);
|
Ast_Expr *index = parse_expr(0);
|
||||||
left = ast_expr_index(token, left, index);
|
|
||||||
token_expect(TK_CloseBracket);
|
token_expect(TK_CloseBracket);
|
||||||
|
left = ast_expr_index(token, left, index);
|
||||||
}break;
|
}break;
|
||||||
case TK_OpenParen:{
|
case TK_OpenParen:{
|
||||||
left = parse_expr_call(left);
|
left = parse_expr_call(left);
|
||||||
@@ -425,16 +450,18 @@ parse_expr(S64 rbp){
|
|||||||
assert(token->kind == TK_Increment || token->kind == TK_Decrement);
|
assert(token->kind == TK_Increment || token->kind == TK_Decrement);
|
||||||
if(token->kind == TK_Increment) token->kind = TK_PostIncrement;
|
if(token->kind == TK_Increment) token->kind = TK_PostIncrement;
|
||||||
else if(token->kind == TK_Decrement) token->kind = TK_PostDecrement;
|
else if(token->kind == TK_Decrement) token->kind = TK_PostDecrement;
|
||||||
left = ast_expr_unary(token, token->kind, left);
|
left = ast_expr_unary(token, token->kind, left);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// @note: parse right
|
// @note: parse infix expression
|
||||||
else if(rbp < left_binding_power(token->kind)){
|
else if(infix_bp.left > min_bp){
|
||||||
token = token_next();
|
token = token_next();
|
||||||
left = left_denotation(token, left);
|
Ast_Expr *right = parse_expr(infix_bp.right);
|
||||||
|
left = ast_expr_binary(left, right, token);
|
||||||
}
|
}
|
||||||
|
|
||||||
else break;
|
else break;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
16
program.c
16
program.c
@@ -3,18 +3,4 @@
|
|||||||
#define NULL_POINTER 0
|
#define NULL_POINTER 0
|
||||||
#define NULL_LAMBDA 0
|
#define NULL_LAMBDA 0
|
||||||
//-------------------------------
|
//-------------------------------
|
||||||
|
|
||||||
struct Lex_Stream{
|
|
||||||
String stream;
|
|
||||||
int offset;
|
|
||||||
};
|
|
||||||
static String lexc(Lex_Stream *s){
|
|
||||||
return s->stream;
|
|
||||||
}
|
|
||||||
static void main(){
|
|
||||||
String string_to_lex = LIT("Identifier 2425525 Not_Number");
|
|
||||||
Lex_Stream s = (Lex_Stream ){.stream = string_to_lex};
|
|
||||||
for(int inf = 0;inf;inf){
|
|
||||||
//pass
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user