Parsing expressions

This commit is contained in:
Krzosa Karol
2022-05-06 11:28:34 +02:00
parent e3b5e9b33a
commit 9552126da2
5 changed files with 384 additions and 32 deletions

1
main.c
View File

@@ -18,4 +18,5 @@
int main(){
lex_test();
test_ast();
parse_test();
}

View File

@@ -40,6 +40,7 @@ typedef enum Expr_Kind{
EK_None,
EK_Int,
EK_String,
EK_Identifier,
EK_Unary,
EK_Binary,
EK_Ternary,
@@ -126,6 +127,14 @@ expr_str(Arena *p, Token *token){
return expr;
}
function Expr *
expr_name(Arena *p, Token *token){
assert(token->kind == TK_Identifier);
Expr *expr = expr_new(p, EK_Identifier, token);
expr->intern_val = token->intern_val;
return expr;
}
function Expr *
expr_unary(Arena *p, Token *op, Expr *exp){
Expr *expr = expr_new(p, EK_Unary, op);

113
new_lex.c
View File

@@ -1,5 +1,6 @@
global Intern_String keyword_if;
global Intern_String keyword_for;
global Intern_String keyword_cast;
global Intern_String keyword_else;
global Intern_String keyword_sizeof;
global Intern_String keyword_typeof;
@@ -17,6 +18,7 @@ init_default_keywords(Intern_Table *t){
keyword_if = intern_string(t, lit("if"));
first_keyword = keyword_if.s.str;
keyword_cast = intern_string(t, lit("cast"));
keyword_for = intern_string(t, lit("for"));
keyword_else = intern_string(t, lit("else"));
keyword_sizeof = intern_string(t, lit("sizeof"));
@@ -39,14 +41,37 @@ lex_is_keyword(Intern_String str){
typedef enum Token_Kind{
TK_End,
TK_Mul,
TK_Div,
TK_Mod,
TK_LeftShift,
TK_RightShift,
TK_FirstMul = TK_Mul,
TK_LastMul = TK_RightShift,
TK_Add,
TK_Sub,
TK_Mod,
TK_FirstAdd = TK_Add,
TK_LastAdd = TK_Sub,
TK_Equals,
TK_LesserThenOrEqual,
TK_GreaterThenOrEqual,
TK_LesserThen,
TK_GreaterThen,
TK_NotEquals,
TK_FirstCompare = TK_Equals,
TK_LastCompare = TK_NotEquals,
TK_BitAnd,
TK_BitOr,
TK_BitXor,
TK_And,
TK_Or,
TK_FirstLogical = TK_BitAnd,
TK_LastLogical = TK_Or,
TK_Neg,
TK_Not,
TK_OpenParen,
@@ -61,8 +86,7 @@ typedef enum Token_Kind{
TK_ThreeDots,
TK_Semicolon,
TK_Dot,
TK_LesserThen,
TK_GreaterThen,
TK_Colon,
TK_Assign,
TK_DivAssign,
@@ -81,14 +105,7 @@ typedef enum Token_Kind{
TK_Increment,
TK_PostDecrement,
TK_PostIncrement,
TK_LesserThenOrEqual,
TK_GreaterThenOrEqual,
TK_Equals,
TK_And,
TK_Or,
TK_NotEquals,
TK_LeftShift,
TK_RightShift,
TK_Arrow,
TK_ExprSizeof,
TK_DocComment,
@@ -488,7 +505,7 @@ lex_test(){
TK_ThreeDots, TK_Dot, TK_Arrow, TK_Comma, TK_DoubleColon, TK_Colon,
TK_StringLit, TK_Identifier, TK_StringLit, TK_AddAssign, TK_SubAssign,
TK_Equals, TK_Int, TK_Int, TK_Int, TK_Keyword, TK_Keyword,
TK_Keyword, TK_Keyword
TK_Keyword, TK_Keyword, TK_End
};
String strs[] = {
lit("18446744073709551616"),lit("{"),lit("}"),lit(")"),lit("("),
@@ -496,7 +513,7 @@ lex_test(){
lit("..."),lit("."),lit("->"),lit(","),lit("::"),lit(":"),
lit("Thing"),lit("Thingy"),lit("Test_Meme"), lit("+="),lit("-="),
lit("=="),lit("42524"),lit("4294967295"),lit("18446744073709551615"),
lit("for"), lit("if"), lit("while"), lit("switch"),
lit("for"), lit("if"), lit("while"), lit("switch"), lit(""),
};
U64 vals[] = {
42524, 4294967295, 18446744073709551615llu
@@ -504,7 +521,7 @@ lex_test(){
int i = 0;
int ui = 0;
for(Token *t = token_array_iter_begin(&array); t; t = token_array_iter_next(&array)){
for(Token *t = token_array_iter_begin(&array); t->kind != TK_End; t = token_array_iter_next(&array)){
assert(t->kind == kind[i]);
assert(string_compare(t->string, strs[i++]));
if(t->kind == TK_Int){
@@ -514,3 +531,71 @@ lex_test(){
arena_end_scratch();
}
//-----------------------------------------------------------------------------
// Token metadata
//-----------------------------------------------------------------------------
global const char *token_kind_string[] = {
[TK_End] = "End of stream",
[TK_Mul] = "*",
[TK_Div] = "/",
[TK_Add] = "+",
[TK_Sub] = "-",
[TK_Mod] = "%",
[TK_BitAnd] = "&",
[TK_BitOr] = "|",
[TK_BitXor] = "^",
[TK_Neg] = "~",
[TK_Not] = "!",
[TK_OpenParen] = "(",
[TK_CloseParen] = " ",
[TK_OpenBrace] = "{",
[TK_CloseBrace] = "}",
[TK_OpenBracket] = "[",
[TK_CloseBracket] = "]",
[TK_Comma] = ",",
[TK_Pound] = "#",
[TK_Question] = "?",
[TK_ThreeDots] = "...",
[TK_Semicolon] = ";",
[TK_Dot] = ".",
[TK_LesserThen] = "<",
[TK_GreaterThen] = ">",
[TK_Colon] = ":",
[TK_Assign] = "=",
[TK_DivAssign] = "/=",
[TK_MulAssign] = "*=",
[TK_ModAssign] = "%=",
[TK_SubAssign] = "-=",
[TK_AddAssign] = "+=",
[TK_AndAssign] = "&=",
[TK_OrAssign] = "|=",
[TK_XorAssign] = "^=",
[TK_LeftShiftAssign] = "<<=",
[TK_RightShiftAssign] = ">>=",
[TK_DoubleColon] = "::",
[TK_At] = "@",
[TK_Decrement] = "--",
[TK_Increment] = "++",
[TK_PostDecrement] = "--",
[TK_PostIncrement] = "++",
[TK_LesserThenOrEqual] = "<=",
[TK_GreaterThenOrEqual] = ">=",
[TK_Equals] = "==",
[TK_And] = "&&",
[TK_Or] = "||",
[TK_NotEquals] = "!=",
[TK_LeftShift] = "<<",
[TK_RightShift] = ">>",
[TK_Arrow] = "->",
[TK_ExprSizeof] = "sizeof",
[TK_DocComment] = "DocComment",
[TK_Comment] = "Comment",
[TK_Identifier] = "Identifier",
[TK_StringLit] = "StringLit",
[TK_Character] = "Character",
[TK_Error] = "Error",
[TK_Float] = "Float",
[TK_Int] = "Int",
[TK_Keyword] = "Keyword",
};

View File

@@ -1,18 +1,62 @@
typedef struct Parser_Error Parser_Error;
struct Parser_Error{
Parser_Error *next;
String message;
Token *token;
};
typedef struct Parser{
Token_Array array;
Token_Array tokens;
Arena *arena;
Parser_Error *first_error;
Parser_Error *last_error;
}Parser;
function void
parser_push_error(Parser *p, Token *token, char *str, ...){
String string;
{
va_list args1, args2;
va_start(args1, str);
va_copy(args2, args1);
string.len = vsnprintf(0, 0, str, args2);
va_end(args2);
string.str = arena_push_size(p->arena, string.len + 1);
vsnprintf((char*)string.str, string.len + 1, str, args1);
va_end(args1);
}
printf("Error: %s %s:%d\n", string.str, token->file.str, (S32)token->line);
Parser_Error *error = arena_push_struct(p->arena, Parser_Error);
error->message = string;
error->next = 0;
error->token = token;
SLLQueuePush(p->first_error, p->last_error, error);
__debugbreak();
}
function Token *
token_get(Parser *p){
Token *result = p->array.iter_bucket->data + p->array.iter_len;
Token *result = token_array_iter_peek(&p->tokens, 0);
return result;
}
function Token *
token_is(Parser *p, Token_Kind kind){
Token *result = token_get(p);
if(result->kind == kind)
return result;
return 0;
}
function Token *
token_next(Parser *p){
Token *result = token_array_iter_next(&p->array);
Token *result = token_array_iter_next(&p->tokens);
return result;
}
@@ -26,9 +70,47 @@ token_match(Parser *p, Token_Kind kind){
return 0;
}
function Token *
token_match_keyword(Parser *p, Intern_String string){
Token *token = token_get(p);
if(token->kind == TK_Keyword){
if(intern_compare(token->intern_val, string)){
token = token_next(p);
return token;
}
}
return 0;
}
function Token *
token_expect(Parser *p, Token_Kind kind){
Token *token = token_get(p);
if(token->kind == kind){
token = token_next(p);
return token;
}
parser_push_error(p, token, "Expected token of kind: %s, got instead token of kind: %s.", token_kind_string[kind], token_kind_string[token->kind]);
return 0;
}
/*
add = [+-]
mul = [/%*]
compare = == | != | >= | > | <= | <
logical = [&|^] | && | ||
expr_atom = Int
| Float
| String
| Identifier
| 'cast' '(' typespec ',' expr ')'
mul_expr = expr_atom (mul expr_atom)*
add_expr = mul_expr (add mul_expr)*
compare_expr = add_expr (compare add_expr)*
logical_expr = compare_expr (logical compare_expr)*
ternary_expr = logical_expr ('?' ternary_expr ':' ternary_expr)?
expr = logical_expr
*/
function Expr *
@@ -42,12 +124,182 @@ parse_expr_atom(Parser *p){
Expr *result = expr_int(p->arena, token);
return result;
}
invalid_codepath;
else if((token = token_match_keyword(p, keyword_cast))){
token_expect(p, TK_OpenParen);
token_expect(p, TK_Identifier);
token_expect(p, TK_Comma);
token_expect(p, TK_Identifier);
token_expect(p, TK_CloseParen);
return 0;
}
else{
parser_push_error(p, token_get(p), "Failed to parse expression");
return 0;
}
}
function B32
token_is_mul(Parser *p){
Token *token = token_get(p);
B32 result = token->kind >= TK_FirstMul && token->kind <= TK_LastMul;
return result;
}
function Expr *
parse_expr_mul(Parser *p){
Expr *left = parse_expr_atom(p);
while(token_is_mul(p)){
Token *op = token_next(p);
Expr *right = parse_expr_atom(p);
left = expr_binary(p->arena, op, left, right);
}
return left;
}
function B32
token_is_add(Parser *p){
Token *token = token_get(p);
B32 result = token->kind >= TK_FirstAdd && token->kind <= TK_LastAdd;
return result;
}
function Expr *
parse_expr_add(Parser *p){
Expr *left = parse_expr_mul(p);
while(token_is_add(p)){
Token *op = token_next(p);
Expr *right = parse_expr_mul(p);
left = expr_binary(p->arena, op, left, right);
}
return left;
}
function B32
token_is_compare(Parser *p){
Token *token = token_get(p);
B32 result = token->kind >= TK_FirstCompare && token->kind <= TK_LastCompare;
return result;
}
function Expr *
parse_expr_compare(Parser *p){
Expr *left = parse_expr_add(p);
while(token_is_compare(p)){
Token *op = token_next(p);
Expr *right = parse_expr_add(p);
left = expr_binary(p->arena, op, left, right);
}
return left;
}
function B32
token_is_logical(Parser *p){
Token *token = token_get(p);
B32 result = token->kind >= TK_FirstLogical && token->kind <= TK_LastLogical;
return result;
}
function Expr *
parse_expr_logical(Parser *p){
Expr *left = parse_expr_compare(p);
while(token_is_logical(p)){
Token *op = token_next(p);
Expr *right = parse_expr_compare(p);
left = expr_binary(p->arena, op, left, right);
}
return left;
}
function Expr *
parse_expr_ternary(Parser *p){
Expr *cond = parse_expr_logical(p);
Token *token = 0;
if((token = token_match(p, TK_Question))){
Expr *on_true = parse_expr_ternary(p);
token_expect(p, TK_Colon);
Expr *on_false = parse_expr_ternary(p);
Expr *result = expr_ternary(p->arena, token, cond, on_true, on_false);
return result;
}
return cond;
}
function Expr *
parse_expr(Parser *p){
return parse_expr_ternary(p);
}
function S64
eval_expr(Expr *expr){
switch(expr->kind){
case EK_Int: return expr->int_val; break;
case EK_Ternary:{
S64 cond = eval_expr(expr->ternary.cond);
if(cond) return eval_expr(expr->ternary.on_true);
else return eval_expr(expr->ternary.on_false);
} break;
case EK_Binary: {
S64 left = eval_expr(expr->binary.left);
S64 right = eval_expr(expr->binary.right);
switch(expr->binary.op){
case TK_Add: return left + right; break;
case TK_Sub: return left - right; break;
case TK_Mul: return left * right; break;
case TK_Div: return left / right; break;
case TK_Mod: return left % right; break;
case TK_Equals: return left == right; break;
case TK_NotEquals: return left != right; break;
case TK_GreaterThenOrEqual: return left >= right; break;
case TK_LesserThenOrEqual: return left <= right; break;
case TK_GreaterThen: return left > right; break;
case TK_LesserThen: return left < right; break;
case TK_BitAnd: return left & right; break;
case TK_BitOr: return left | right; break;
case TK_BitXor: return left ^ right; break;
case TK_And: return left && right; break;
case TK_Or: return left || right; break;
case TK_LeftShift: return left << right; break;
case TK_RightShift: return left >> right; break;
default: invalid_codepath;
}
} break;
default: invalid_codepath;
}
return 0;
}
function void
parse_test(){
Arena *scratch = arena_begin_scratch();
String test_case = lit("32+52-242*2/424%5-23"
" 1<<5>>6<<2 "
" 1&&5*3 "
" 1&&5||0 "
" 1>5>=5==0 "
" 1>5 ? 1 : 2 "
);
Parser parser = {
.tokens = lex_stream(scratch, test_case, lit("expr_test")),
.arena = scratch,
};
Parser *p = &parser;
S64 t = 5;
S64 test_val[] = {
(32+52-242*2/424%5-23),
(((1<<5)>>6)<<2),
1&&(t*3),
(1&&t)||0,
1>t>=t==0,
1>t ? 1 : 2,
};
for(int i = 0; i < buff_cap(test_val); i++){
Expr *expr = parse_expr(p);
S64 val = eval_expr(expr);
assert(val == test_val[i]);
}
arena_end_scratch();
}

View File

@@ -1,6 +1,7 @@
typedef struct Token_Bucket Token_Bucket;
typedef struct Token_Array Token_Array;
global Token token_end_of_stream;
struct Token_Bucket{
Token_Bucket *next;
@@ -36,8 +37,10 @@ token_array_make(Arena *arena){
Token_Array result = {
.last = &result.first,
.arena = arena,
.interns = intern_table(arena, 4096*4)
.interns = intern_table(arena, 4096*4),
.iter_bucket = &result.first
};
return result;
}
@@ -52,16 +55,10 @@ token_array_push(Token_Array *array, Token *p){
array->last->data[array->len++] = *p;
}
function B32
token_array_iter_is_end(Token_Array *array){
B32 result = array->iter_len == array->len && array->iter_block == array->block;
return result;
}
function Token *
token_array_iter_next(Token_Array *array){
if(token_array_iter_is_end(array)){
return 0;
if((array->iter_len >= array->len) && (array->iter_block >= array->block)){
return &token_end_of_stream;
}
if(array->iter_len >= buff_cap(array->first.data)){
array->iter_len = 0;
@@ -75,6 +72,7 @@ token_array_iter_next(Token_Array *array){
function Token *
token_array_iter_peek(Token_Array *array, S64 i){
S64 save_len = array->iter_len;
S64 save_block = array->iter_block;
Token_Bucket *save_bucket = array->iter_bucket;
assert(i < buff_cap(array->first.data));
@@ -82,9 +80,16 @@ token_array_iter_peek(Token_Array *array, S64 i){
if(array->iter_len + i >= buff_cap(array->first.data)){
over = buff_cap(array->first.data) - (array->iter_len + i);
array->iter_len = 0;
array->iter_block += 1;
array->iter_bucket = array->iter_bucket->next;
}
Token *result = array->iter_bucket->data + array->iter_len + over;
over = array->iter_len + over;
if(over == array->len && array->iter_block == array->block){
return &token_end_of_stream;
}
Token *result = array->iter_bucket->data + over;
array->iter_len = save_len;
array->iter_bucket = save_bucket;
return result;