Files
corelang/new_parse.c
2022-05-06 12:52:33 +02:00

516 lines
13 KiB
C

typedef struct Parser_Error Parser_Error;
struct Parser_Error{
Parser_Error *next;
String message;
Token *token;
};
typedef struct Parser{
Token_Array tokens;
Arena *arena;
Parser_Error *first_error;
Parser_Error *last_error;
}Parser;
function void
parser_push_error(Parser *p, Token *token, char *str, ...){
String string;
{
va_list args1, args2;
va_start(args1, str);
va_copy(args2, args1);
string.len = vsnprintf(0, 0, str, args2);
va_end(args2);
string.str = arena_push_size(p->arena, string.len + 1);
vsnprintf((char*)string.str, string.len + 1, str, args1);
va_end(args1);
}
printf("Error: %s %s:%d\n", string.str, token->file.str, (S32)token->line);
Parser_Error *error = arena_push_struct(p->arena, Parser_Error);
error->message = string;
error->next = 0;
error->token = token;
SLLQueuePush(p->first_error, p->last_error, error);
__debugbreak();
}
//-----------------------------------------------------------------------------
// Parsing helpers
//-----------------------------------------------------------------------------
function Token *
token_get(Parser *p){
Token *result = token_array_iter_peek(&p->tokens, 0);
return result;
}
function Token *
token_is(Parser *p, Token_Kind kind){
Token *result = token_get(p);
if(result->kind == kind)
return result;
return 0;
}
function Token *
token_next(Parser *p){
Token *result = token_array_iter_next(&p->tokens);
return result;
}
function Token *
token_match(Parser *p, Token_Kind kind){
Token *token = token_get(p);
if(token->kind == kind){
token = token_next(p);
return token;
}
return 0;
}
function Token *
token_match_keyword(Parser *p, Intern_String string){
Token *token = token_get(p);
if(token->kind == TK_Keyword){
if(intern_compare(token->intern_val, string)){
token = token_next(p);
return token;
}
}
return 0;
}
function Token *
token_expect(Parser *p, Token_Kind kind){
Token *token = token_get(p);
if(token->kind == kind){
token = token_next(p);
return token;
}
parser_push_error(p, token, "Expected token of kind: %s, got instead token of kind: %s.", token_kind_string[kind], token_kind_string[token->kind]);
return 0;
}
//-----------------------------------------------------------------------------
// Expression parsing
//-----------------------------------------------------------------------------
/*
add = [+-]
mul = [/%*]
compare = == | != | >= | > | <= | <
logical = [&|^] | && | ||
unary = [&*-!~+] | ++ | --
atom_expr = Int
| Float
| String
| Identifier
| 'cast' '(' typespec ',' expr ')'
postfix_expr = atom_expr ('[' expr ']' | '.' Identifier | ++ | -- | '(' expr_list ')')*
unary_expr = unary ? unary_expr : atom_expr
mul_expr = atom_expr (mul atom_expr)*
add_expr = mul_expr (add mul_expr)*
compare_expr = add_expr (compare add_expr)*
logical_expr = compare_expr (logical compare_expr)*
ternary_expr = logical_expr ('?' ternary_expr ':' ternary_expr)?
expr = logical_expr
*/
function Expr *parse_expr(Parser *p);
function Expr *
parse_expr_atom(Parser *p){
Token *token = 0;
if((token = token_match(p, TK_StringLit))){
Expr *result = expr_str(p->arena, token);
return result;
}
else if((token = token_match(p, TK_Identifier))){
Expr *result = expr_identifier(p->arena, token);
return result;
}
else if((token = token_match(p, TK_Int))){
Expr *result = expr_int(p->arena, token);
return result;
}
else if((token = token_match(p, TK_OpenParen))){
Expr *expr = parse_expr(p);
token_expect(p, TK_CloseParen);
Expr *result = expr_paren(p->arena, token, expr);
return result;
}
else if((token = token_match_keyword(p, keyword_cast))){
token_expect(p, TK_OpenParen);
token_expect(p, TK_Identifier);
token_expect(p, TK_Comma);
token_expect(p, TK_Identifier);
token_expect(p, TK_CloseParen);
return 0;
}
else{
parser_push_error(p, token_get(p), "Failed to parse expression");
return 0;
}
}
function B32
token_is_postfix(Parser *p){
Token *token = token_get(p);
B32 result = token->kind == TK_OpenBracket
|| token->kind == TK_OpenParen
|| token->kind == TK_Dot
|| token->kind == TK_Increment
|| token->kind == TK_Decrement;
return result;
}
function Expr *
parse_expr_postfix(Parser *p){
Expr *left = parse_expr_atom(p);
while(token_is_postfix(p)){
Token *token = 0;
if((token = token_match(p, TK_OpenBracket))){
Expr *size = parse_expr(p);
token_expect(p, TK_CloseBracket);
left = expr_index(p->arena, token, left, size);
}
else if((token = token_match(p, TK_OpenParen))){
left = expr_call(p->arena, token, left);
if(!token_is(p, TK_CloseParen)){
expr_call_push(left, parse_expr(p));
while(token_match(p, TK_Comma)){
expr_call_push(left, parse_expr(p));
}
}
token_expect(p, TK_CloseParen);
}
else if(token_match(p, TK_Dot)){
token = token_expect(p, TK_Identifier);
left = expr_field(p->arena, token, left);
}
else{
token = token_next(p);
assert(token->kind == TK_Increment || token->kind == TK_Decrement);
left = expr_postfix_unary(p->arena, token, left);
}
}
return left;
}
function B32
token_is_unary(Parser *p){
Token *token = token_get(p);
B32 result = token->kind == TK_Add
|| token->kind == TK_Increment
|| token->kind == TK_Decrement
|| token->kind == TK_Sub
|| token->kind == TK_Mul
|| token->kind == TK_BitAnd
|| token->kind == TK_Neg
|| token->kind == TK_Not;
return result;
}
function Expr *
parse_expr_unary(Parser *p){
if(token_is_unary(p)){
Token *op = token_next(p);
Expr *right = parse_expr_unary(p);
Expr *result = expr_unary(p->arena, op, right);
return result;
}
else{
return parse_expr_postfix(p);
}
}
function B32
token_is_mul(Parser *p){
Token *token = token_get(p);
B32 result = token->kind >= TK_FirstMul && token->kind <= TK_LastMul;
return result;
}
function Expr *
parse_expr_mul(Parser *p){
Expr *left = parse_expr_unary(p);
while(token_is_mul(p)){
Token *op = token_next(p);
Expr *right = parse_expr_unary(p);
left = expr_binary(p->arena, op, left, right);
}
return left;
}
function B32
token_is_add(Parser *p){
Token *token = token_get(p);
B32 result = token->kind >= TK_FirstAdd && token->kind <= TK_LastAdd;
return result;
}
function Expr *
parse_expr_add(Parser *p){
Expr *left = parse_expr_mul(p);
while(token_is_add(p)){
Token *op = token_next(p);
Expr *right = parse_expr_mul(p);
left = expr_binary(p->arena, op, left, right);
}
return left;
}
function B32
token_is_compare(Parser *p){
Token *token = token_get(p);
B32 result = token->kind >= TK_FirstCompare && token->kind <= TK_LastCompare;
return result;
}
function Expr *
parse_expr_compare(Parser *p){
Expr *left = parse_expr_add(p);
while(token_is_compare(p)){
Token *op = token_next(p);
Expr *right = parse_expr_add(p);
left = expr_binary(p->arena, op, left, right);
}
return left;
}
function B32
token_is_logical(Parser *p){
Token *token = token_get(p);
B32 result = token->kind >= TK_FirstLogical && token->kind <= TK_LastLogical;
return result;
}
function Expr *
parse_expr_logical(Parser *p){
Expr *left = parse_expr_compare(p);
while(token_is_logical(p)){
Token *op = token_next(p);
Expr *right = parse_expr_compare(p);
left = expr_binary(p->arena, op, left, right);
}
return left;
}
function Expr *
parse_expr_ternary(Parser *p){
Expr *cond = parse_expr_logical(p);
Token *token = 0;
if((token = token_match(p, TK_Question))){
Expr *on_true = parse_expr_ternary(p);
token_expect(p, TK_Colon);
Expr *on_false = parse_expr_ternary(p);
Expr *result = expr_ternary(p->arena, token, cond, on_true, on_false);
return result;
}
return cond;
}
function Expr *
parse_expr(Parser *p){
return parse_expr_ternary(p);
}
//-----------------------------------------------------------------------------
// Test code
//-----------------------------------------------------------------------------
function void
token_print(Token *token){
printf("%.*s", (S32)token->len, token->str);
}
function void
expr_print(Expr *expr){
switch(expr->kind) {
case EK_Int:case EK_String:case EK_Identifier: {
token_print(expr->token);
} break;
case EK_Sizeof:{
printf("sizeof(");
if(expr->size_of.kind == SIZEOF_Expr){
expr_print(expr->size_of.expr);
}
else{
assert(expr->size_of.kind == SIZEOF_Type);
//print_typespec(expr->size_of.type);
}
printf(")");
}break;
case EK_Paren:{
printf("(");
expr_print(expr->paren.expr);
printf(")");
} break;
case EK_Field:{
expr_print(expr->field.expr);
printf(".%s", expr->field.name.s.str);
} break;
case EK_Binary:{
printf("(");
expr_print(expr->binary.left);
token_print(expr->token);
expr_print(expr->binary.right);
printf(")");
} break;
case EK_PostfixUnary:{
printf("(");
expr_print(expr->unary.expr);
token_print(expr->token);
printf(")");
} break;
case EK_Unary:{
printf("(");
token_print(expr->token);
expr_print(expr->unary.expr);
printf(")");
} break;
case EK_Ternary:{
printf("(");
expr_print(expr->ternary.cond);
printf("?");
expr_print(expr->ternary.on_true);
printf(":");
expr_print(expr->ternary.on_false);
printf(")");
} break;
case EK_Cast:{
printf("(");
printf("(");
//print_typespec(expr->cast.type);
printf(")");
expr_print(expr->cast.expr);
printf(")");
} break;
case EK_Index:{
expr_print(expr->index.atom);
printf("[");
expr_print(expr->index.index);
printf("]");
}break;
case EK_Call:{
expr_print(expr->call.atom);
printf("(");
for(Expr *n = expr->call.first; n; n=n->next){
expr_print(n);
if(n!=expr->call.last) printf(",");
}
printf(")");
}break;
default: {invalid_codepath;} break;
}
}
function S64
eval_expr(Expr *expr){
switch(expr->kind){
case EK_Int: return expr->int_val; break;
case EK_Unary:{
S64 left = eval_expr(expr->unary.expr);
switch(expr->unary.op){
case TK_Not: return !left; break;
case TK_Neg: return ~left; break;
case TK_Sub: return -left; break;
case TK_Add: return +left; break;
default: invalid_codepath;
}
} break;
case EK_Ternary:{
S64 cond = eval_expr(expr->ternary.cond);
if(cond) return eval_expr(expr->ternary.on_true);
else return eval_expr(expr->ternary.on_false);
} break;
case EK_Paren: return eval_expr(expr->paren.expr); break;
case EK_Binary: {
S64 left = eval_expr(expr->binary.left);
S64 right = eval_expr(expr->binary.right);
switch(expr->binary.op){
case TK_Add: return left + right; break;
case TK_Sub: return left - right; break;
case TK_Mul: return left * right; break;
case TK_Div: return left / right; break;
case TK_Mod: return left % right; break;
case TK_Equals: return left == right; break;
case TK_NotEquals: return left != right; break;
case TK_GreaterThenOrEqual: return left >= right; break;
case TK_LesserThenOrEqual: return left <= right; break;
case TK_GreaterThen: return left > right; break;
case TK_LesserThen: return left < right; break;
case TK_BitAnd: return left & right; break;
case TK_BitOr: return left | right; break;
case TK_BitXor: return left ^ right; break;
case TK_And: return left && right; break;
case TK_Or: return left || right; break;
case TK_LeftShift: return left << right; break;
case TK_RightShift: return left >> right; break;
default: invalid_codepath;
}
} break;
default: invalid_codepath;
}
return 0;
}
function void
parse_test(){
Arena *scratch = arena_begin_scratch();
String test_case = lit("32+52-242*2/424%5-23"
" 1<<5>>6<<2 "
" 5*(4/3)*(2+5) "
" 1&&5*3 "
" 1&&5||0 "
" 1>5>=5==0 "
" 1>5 ? 1 : 2 "
" !!!!!1 "
" ~~1 + -!2 "
" 1 + ++Thing[12]++ + ++Thing[12].expr + --Not_Thing[156](Thing) + test_func(func1, func2, func3)"
);
Parser parser = {
.tokens = lex_stream(scratch, test_case, lit("expr_test")),
.arena = scratch,
};
Parser *p = &parser;
S64 t = 5;
S64 test_val[] = {
(32+52-242*2/424%5-23),
(((1<<5)>>6)<<2),
5*(4/3)*(2+5),
1&&(t*3),
(1&&t)||0,
1>t>=t==0,
1>t ? 1 : 2,
!!!!!1,
~~1 + -!2,
};
for(int i = 0; i < buff_cap(test_val); i++){
Expr *expr = parse_expr(p);
S64 val = eval_expr(expr);
assert(val == test_val[i]);
}
Expr *expr = parse_expr(p);
expr_print(expr);
arena_end_scratch();
}