From 9552126da25fb1ab5d57ff2c11d09d3bd1823202 Mon Sep 17 00:00:00 2001 From: Krzosa Karol Date: Fri, 6 May 2022 11:28:34 +0200 Subject: [PATCH] Parsing expressions --- main.c | 1 + new_ast.c | 9 ++ new_lex.c | 113 ++++++++++++++++++--- new_parse.c | 266 ++++++++++++++++++++++++++++++++++++++++++++++++-- token_array.c | 27 ++--- 5 files changed, 384 insertions(+), 32 deletions(-) diff --git a/main.c b/main.c index 6e7a0fe..17af699 100644 --- a/main.c +++ b/main.c @@ -18,4 +18,5 @@ int main(){ lex_test(); test_ast(); + parse_test(); } \ No newline at end of file diff --git a/new_ast.c b/new_ast.c index eade645..db6ef36 100644 --- a/new_ast.c +++ b/new_ast.c @@ -40,6 +40,7 @@ typedef enum Expr_Kind{ EK_None, EK_Int, EK_String, + EK_Identifier, EK_Unary, EK_Binary, EK_Ternary, @@ -126,6 +127,14 @@ expr_str(Arena *p, Token *token){ return expr; } +function Expr * +expr_name(Arena *p, Token *token){ + assert(token->kind == TK_Identifier); + Expr *expr = expr_new(p, EK_Identifier, token); + expr->intern_val = token->intern_val; + return expr; +} + function Expr * expr_unary(Arena *p, Token *op, Expr *exp){ Expr *expr = expr_new(p, EK_Unary, op); diff --git a/new_lex.c b/new_lex.c index 89a3a2a..dde0d70 100644 --- a/new_lex.c +++ b/new_lex.c @@ -1,5 +1,6 @@ global Intern_String keyword_if; global Intern_String keyword_for; +global Intern_String keyword_cast; global Intern_String keyword_else; global Intern_String keyword_sizeof; global Intern_String keyword_typeof; @@ -17,6 +18,7 @@ init_default_keywords(Intern_Table *t){ keyword_if = intern_string(t, lit("if")); first_keyword = keyword_if.s.str; + keyword_cast = intern_string(t, lit("cast")); keyword_for = intern_string(t, lit("for")); keyword_else = intern_string(t, lit("else")); keyword_sizeof = intern_string(t, lit("sizeof")); @@ -39,14 +41,37 @@ lex_is_keyword(Intern_String str){ typedef enum Token_Kind{ TK_End, + TK_Mul, TK_Div, + TK_Mod, + TK_LeftShift, + TK_RightShift, + TK_FirstMul = TK_Mul, + TK_LastMul = TK_RightShift, + TK_Add, TK_Sub, - TK_Mod, + TK_FirstAdd = TK_Add, + TK_LastAdd = TK_Sub, + + TK_Equals, + TK_LesserThenOrEqual, + TK_GreaterThenOrEqual, + TK_LesserThen, + TK_GreaterThen, + TK_NotEquals, + TK_FirstCompare = TK_Equals, + TK_LastCompare = TK_NotEquals, + TK_BitAnd, TK_BitOr, TK_BitXor, + TK_And, + TK_Or, + TK_FirstLogical = TK_BitAnd, + TK_LastLogical = TK_Or, + TK_Neg, TK_Not, TK_OpenParen, @@ -61,8 +86,7 @@ typedef enum Token_Kind{ TK_ThreeDots, TK_Semicolon, TK_Dot, - TK_LesserThen, - TK_GreaterThen, + TK_Colon, TK_Assign, TK_DivAssign, @@ -81,14 +105,7 @@ typedef enum Token_Kind{ TK_Increment, TK_PostDecrement, TK_PostIncrement, - TK_LesserThenOrEqual, - TK_GreaterThenOrEqual, - TK_Equals, - TK_And, - TK_Or, - TK_NotEquals, - TK_LeftShift, - TK_RightShift, + TK_Arrow, TK_ExprSizeof, TK_DocComment, @@ -488,7 +505,7 @@ lex_test(){ TK_ThreeDots, TK_Dot, TK_Arrow, TK_Comma, TK_DoubleColon, TK_Colon, TK_StringLit, TK_Identifier, TK_StringLit, TK_AddAssign, TK_SubAssign, TK_Equals, TK_Int, TK_Int, TK_Int, TK_Keyword, TK_Keyword, - TK_Keyword, TK_Keyword + TK_Keyword, TK_Keyword, TK_End }; String strs[] = { lit("18446744073709551616"),lit("{"),lit("}"),lit(")"),lit("("), @@ -496,7 +513,7 @@ lex_test(){ lit("..."),lit("."),lit("->"),lit(","),lit("::"),lit(":"), lit("Thing"),lit("Thingy"),lit("Test_Meme"), lit("+="),lit("-="), lit("=="),lit("42524"),lit("4294967295"),lit("18446744073709551615"), - lit("for"), lit("if"), lit("while"), lit("switch"), + lit("for"), lit("if"), lit("while"), lit("switch"), lit(""), }; U64 vals[] = { 42524, 4294967295, 18446744073709551615llu @@ -504,7 +521,7 @@ lex_test(){ int i = 0; int ui = 0; - for(Token *t = token_array_iter_begin(&array); t; t = token_array_iter_next(&array)){ + for(Token *t = token_array_iter_begin(&array); t->kind != TK_End; t = token_array_iter_next(&array)){ assert(t->kind == kind[i]); assert(string_compare(t->string, strs[i++])); if(t->kind == TK_Int){ @@ -514,3 +531,71 @@ lex_test(){ arena_end_scratch(); } + +//----------------------------------------------------------------------------- +// Token metadata +//----------------------------------------------------------------------------- +global const char *token_kind_string[] = { + [TK_End] = "End of stream", + [TK_Mul] = "*", + [TK_Div] = "/", + [TK_Add] = "+", + [TK_Sub] = "-", + [TK_Mod] = "%", + [TK_BitAnd] = "&", + [TK_BitOr] = "|", + [TK_BitXor] = "^", + [TK_Neg] = "~", + [TK_Not] = "!", + [TK_OpenParen] = "(", + [TK_CloseParen] = " ", + [TK_OpenBrace] = "{", + [TK_CloseBrace] = "}", + [TK_OpenBracket] = "[", + [TK_CloseBracket] = "]", + [TK_Comma] = ",", + [TK_Pound] = "#", + [TK_Question] = "?", + [TK_ThreeDots] = "...", + [TK_Semicolon] = ";", + [TK_Dot] = ".", + [TK_LesserThen] = "<", + [TK_GreaterThen] = ">", + [TK_Colon] = ":", + [TK_Assign] = "=", + [TK_DivAssign] = "/=", + [TK_MulAssign] = "*=", + [TK_ModAssign] = "%=", + [TK_SubAssign] = "-=", + [TK_AddAssign] = "+=", + [TK_AndAssign] = "&=", + [TK_OrAssign] = "|=", + [TK_XorAssign] = "^=", + [TK_LeftShiftAssign] = "<<=", + [TK_RightShiftAssign] = ">>=", + [TK_DoubleColon] = "::", + [TK_At] = "@", + [TK_Decrement] = "--", + [TK_Increment] = "++", + [TK_PostDecrement] = "--", + [TK_PostIncrement] = "++", + [TK_LesserThenOrEqual] = "<=", + [TK_GreaterThenOrEqual] = ">=", + [TK_Equals] = "==", + [TK_And] = "&&", + [TK_Or] = "||", + [TK_NotEquals] = "!=", + [TK_LeftShift] = "<<", + [TK_RightShift] = ">>", + [TK_Arrow] = "->", + [TK_ExprSizeof] = "sizeof", + [TK_DocComment] = "DocComment", + [TK_Comment] = "Comment", + [TK_Identifier] = "Identifier", + [TK_StringLit] = "StringLit", + [TK_Character] = "Character", + [TK_Error] = "Error", + [TK_Float] = "Float", + [TK_Int] = "Int", + [TK_Keyword] = "Keyword", +}; diff --git a/new_parse.c b/new_parse.c index e2b00ea..9f035ed 100644 --- a/new_parse.c +++ b/new_parse.c @@ -1,18 +1,62 @@ +typedef struct Parser_Error Parser_Error; + +struct Parser_Error{ + Parser_Error *next; + String message; + Token *token; +}; typedef struct Parser{ - Token_Array array; + Token_Array tokens; Arena *arena; + + Parser_Error *first_error; + Parser_Error *last_error; }Parser; +function void +parser_push_error(Parser *p, Token *token, char *str, ...){ + String string; + { + va_list args1, args2; + va_start(args1, str); + va_copy(args2, args1); + string.len = vsnprintf(0, 0, str, args2); + va_end(args2); + + string.str = arena_push_size(p->arena, string.len + 1); + vsnprintf((char*)string.str, string.len + 1, str, args1); + va_end(args1); + } + + printf("Error: %s %s:%d\n", string.str, token->file.str, (S32)token->line); + Parser_Error *error = arena_push_struct(p->arena, Parser_Error); + error->message = string; + error->next = 0; + error->token = token; + SLLQueuePush(p->first_error, p->last_error, error); + + __debugbreak(); +} + + function Token * token_get(Parser *p){ - Token *result = p->array.iter_bucket->data + p->array.iter_len; + Token *result = token_array_iter_peek(&p->tokens, 0); return result; } +function Token * +token_is(Parser *p, Token_Kind kind){ + Token *result = token_get(p); + if(result->kind == kind) + return result; + return 0; +} + function Token * token_next(Parser *p){ - Token *result = token_array_iter_next(&p->array); + Token *result = token_array_iter_next(&p->tokens); return result; } @@ -26,9 +70,47 @@ token_match(Parser *p, Token_Kind kind){ return 0; } +function Token * +token_match_keyword(Parser *p, Intern_String string){ + Token *token = token_get(p); + if(token->kind == TK_Keyword){ + if(intern_compare(token->intern_val, string)){ + token = token_next(p); + return token; + } + } + return 0; +} + +function Token * +token_expect(Parser *p, Token_Kind kind){ + Token *token = token_get(p); + if(token->kind == kind){ + token = token_next(p); + return token; + } + parser_push_error(p, token, "Expected token of kind: %s, got instead token of kind: %s.", token_kind_string[kind], token_kind_string[token->kind]); + return 0; +} + /* +add = [+-] +mul = [/%*] +compare = == | != | >= | > | <= | < +logical = [&|^] | && | || +expr_atom = Int +| Float +| String +| Identifier +| 'cast' '(' typespec ',' expr ')' +mul_expr = expr_atom (mul expr_atom)* +add_expr = mul_expr (add mul_expr)* +compare_expr = add_expr (compare add_expr)* +logical_expr = compare_expr (logical compare_expr)* +ternary_expr = logical_expr ('?' ternary_expr ':' ternary_expr)? +expr = logical_expr */ function Expr * @@ -42,12 +124,182 @@ parse_expr_atom(Parser *p){ Expr *result = expr_int(p->arena, token); return result; } - - invalid_codepath; - return 0; + else if((token = token_match_keyword(p, keyword_cast))){ + token_expect(p, TK_OpenParen); + token_expect(p, TK_Identifier); + token_expect(p, TK_Comma); + token_expect(p, TK_Identifier); + token_expect(p, TK_CloseParen); + return 0; + } + else{ + parser_push_error(p, token_get(p), "Failed to parse expression"); + return 0; + } +} + +function B32 +token_is_mul(Parser *p){ + Token *token = token_get(p); + B32 result = token->kind >= TK_FirstMul && token->kind <= TK_LastMul; + return result; +} + +function Expr * +parse_expr_mul(Parser *p){ + Expr *left = parse_expr_atom(p); + while(token_is_mul(p)){ + Token *op = token_next(p); + Expr *right = parse_expr_atom(p); + left = expr_binary(p->arena, op, left, right); + } + return left; +} + +function B32 +token_is_add(Parser *p){ + Token *token = token_get(p); + B32 result = token->kind >= TK_FirstAdd && token->kind <= TK_LastAdd; + return result; +} + +function Expr * +parse_expr_add(Parser *p){ + Expr *left = parse_expr_mul(p); + while(token_is_add(p)){ + Token *op = token_next(p); + Expr *right = parse_expr_mul(p); + left = expr_binary(p->arena, op, left, right); + } + return left; +} + +function B32 +token_is_compare(Parser *p){ + Token *token = token_get(p); + B32 result = token->kind >= TK_FirstCompare && token->kind <= TK_LastCompare; + return result; +} + +function Expr * +parse_expr_compare(Parser *p){ + Expr *left = parse_expr_add(p); + while(token_is_compare(p)){ + Token *op = token_next(p); + Expr *right = parse_expr_add(p); + left = expr_binary(p->arena, op, left, right); + } + return left; +} + +function B32 +token_is_logical(Parser *p){ + Token *token = token_get(p); + B32 result = token->kind >= TK_FirstLogical && token->kind <= TK_LastLogical; + return result; +} + +function Expr * +parse_expr_logical(Parser *p){ + Expr *left = parse_expr_compare(p); + while(token_is_logical(p)){ + Token *op = token_next(p); + Expr *right = parse_expr_compare(p); + left = expr_binary(p->arena, op, left, right); + } + return left; +} + +function Expr * +parse_expr_ternary(Parser *p){ + Expr *cond = parse_expr_logical(p); + Token *token = 0; + if((token = token_match(p, TK_Question))){ + Expr *on_true = parse_expr_ternary(p); + token_expect(p, TK_Colon); + Expr *on_false = parse_expr_ternary(p); + Expr *result = expr_ternary(p->arena, token, cond, on_true, on_false); + return result; + } + return cond; } function Expr * parse_expr(Parser *p){ + return parse_expr_ternary(p); +} + +function S64 +eval_expr(Expr *expr){ + switch(expr->kind){ + case EK_Int: return expr->int_val; break; + case EK_Ternary:{ + S64 cond = eval_expr(expr->ternary.cond); + if(cond) return eval_expr(expr->ternary.on_true); + else return eval_expr(expr->ternary.on_false); + } break; + case EK_Binary: { + S64 left = eval_expr(expr->binary.left); + S64 right = eval_expr(expr->binary.right); + switch(expr->binary.op){ + case TK_Add: return left + right; break; + case TK_Sub: return left - right; break; + case TK_Mul: return left * right; break; + case TK_Div: return left / right; break; + case TK_Mod: return left % right; break; + case TK_Equals: return left == right; break; + case TK_NotEquals: return left != right; break; + case TK_GreaterThenOrEqual: return left >= right; break; + case TK_LesserThenOrEqual: return left <= right; break; + case TK_GreaterThen: return left > right; break; + case TK_LesserThen: return left < right; break; + case TK_BitAnd: return left & right; break; + case TK_BitOr: return left | right; break; + case TK_BitXor: return left ^ right; break; + case TK_And: return left && right; break; + case TK_Or: return left || right; break; + case TK_LeftShift: return left << right; break; + case TK_RightShift: return left >> right; break; + default: invalid_codepath; + } + } break; + default: invalid_codepath; + } return 0; -} \ No newline at end of file +} + +function void +parse_test(){ + Arena *scratch = arena_begin_scratch(); + String test_case = lit("32+52-242*2/424%5-23" + " 1<<5>>6<<2 " + " 1&&5*3 " + " 1&&5||0 " + " 1>5>=5==0 " + " 1>5 ? 1 : 2 " + ); + Parser parser = { + .tokens = lex_stream(scratch, test_case, lit("expr_test")), + .arena = scratch, + }; + Parser *p = &parser; + + S64 t = 5; + S64 test_val[] = { + (32+52-242*2/424%5-23), + (((1<<5)>>6)<<2), + 1&&(t*3), + (1&&t)||0, + 1>t>=t==0, + 1>t ? 1 : 2, + }; + for(int i = 0; i < buff_cap(test_val); i++){ + Expr *expr = parse_expr(p); + S64 val = eval_expr(expr); + assert(val == test_val[i]); + } + + arena_end_scratch(); +} + + diff --git a/token_array.c b/token_array.c index 82e5776..6fba214 100644 --- a/token_array.c +++ b/token_array.c @@ -1,6 +1,7 @@ typedef struct Token_Bucket Token_Bucket; typedef struct Token_Array Token_Array; +global Token token_end_of_stream; struct Token_Bucket{ Token_Bucket *next; @@ -36,8 +37,10 @@ token_array_make(Arena *arena){ Token_Array result = { .last = &result.first, .arena = arena, - .interns = intern_table(arena, 4096*4) + .interns = intern_table(arena, 4096*4), + .iter_bucket = &result.first }; + return result; } @@ -52,16 +55,10 @@ token_array_push(Token_Array *array, Token *p){ array->last->data[array->len++] = *p; } -function B32 -token_array_iter_is_end(Token_Array *array){ - B32 result = array->iter_len == array->len && array->iter_block == array->block; - return result; -} - function Token * token_array_iter_next(Token_Array *array){ - if(token_array_iter_is_end(array)){ - return 0; + if((array->iter_len >= array->len) && (array->iter_block >= array->block)){ + return &token_end_of_stream; } if(array->iter_len >= buff_cap(array->first.data)){ array->iter_len = 0; @@ -75,16 +72,24 @@ token_array_iter_next(Token_Array *array){ function Token * token_array_iter_peek(Token_Array *array, S64 i){ S64 save_len = array->iter_len; + S64 save_block = array->iter_block; Token_Bucket *save_bucket = array->iter_bucket; assert(i < buff_cap(array->first.data)); S64 over = i; if(array->iter_len + i >= buff_cap(array->first.data)){ over = buff_cap(array->first.data) - (array->iter_len + i); - array->iter_len = 0; + array->iter_len = 0; + array->iter_block += 1; array->iter_bucket = array->iter_bucket->next; } - Token *result = array->iter_bucket->data + array->iter_len + over; + + over = array->iter_len + over; + if(over == array->len && array->iter_block == array->block){ + return &token_end_of_stream; + } + + Token *result = array->iter_bucket->data + over; array->iter_len = save_len; array->iter_bucket = save_bucket; return result;