Files
wasm_transcript_browser/src/meta/parser.c
2024-12-29 13:30:13 +01:00

307 lines
11 KiB
C

typedef enum {
#define AST_FLAG_XLIST\
X(null)\
X(string)\
X(integer)\
X(real)\
X(binary)\
X(enum)\
X(enum_member)\
X(struct)\
X(struct_member)\
X(var)\
X(type_name)\
X(type_pointer)\
X(type_array)\
#define X(NAME) ast_flag_##NAME,
AST_FLAG_XLIST
#undef X
} ast_flag_t;
typedef struct ast_t ast_t;
struct ast_t {
ast_flag_t flags;
lex_t *pos;
ast_t *next;
ast_t *first;
ast_t *last;
i32 len;
s8_t string;
f64 real;
i64 integer;
};
s8_t s8_serial_ast_flag_t(ma_arena_t *arena, ast_flag_t flag) {
sb8_t *sb = sb8_serial_begin(arena);
#define X(NAME) if (flag & set_bit(ast_flag_##NAME)) sb8_printf(sb, #NAME);
AST_FLAG_XLIST
#undef X
s8_t result = sb8_serial_end(sb);
return result;
}
ast_t *create_ast(parser_t *par, lex_t *pos, ast_flag_t flags) {
ast_t *result = ma_push_type(par->arena, ast_t);
memset(result, 0, sizeof(ast_t));
result->flags = flags;
result->pos = pos;
return result;
}
void ast_append(ast_t *parent, ast_t *node) {
SLLQ_APPEND(parent->first, parent->last, node);
parent->len += 1;
}
ast_t *create_ast_binary(parser_t *par, lex_t *pos, ast_t *left, lex_kind_t op, ast_t *right) {
ast_t *result = create_ast(par, pos, set_bit(ast_flag_string) | set_bit(ast_flag_binary) | set_bit(ast_flag_integer));
ast_append(result, left);
ast_append(result, right);
result->integer = op;
result->string = lex_kind_to_simple_s8(op);
return result;
}
ast_t *parse_expr(parser_t *par);
ast_t *parse_lit_expr(parser_t *par) {
lex_t *token = parser_next(par);
if (token->kind == lex_kind_integer) {
ast_t *result = create_ast(par, token, set_bit(ast_flag_integer) | set_bit(ast_flag_string));
result->integer = token->integer;
result->string = token->string;
return result;
} else if (token->kind == lex_kind_real) {
ast_t *result = create_ast(par, token, set_bit(ast_flag_real) | set_bit(ast_flag_string));
result->real = (double)token->real;
result->string = token->string;
return result;
} else if (token->kind == lex_kind_open_paren) {
ast_t *result = parse_expr(par);
parser_expect(par, lex_kind_close_paren);
return result;
} else {
lex_panicf(token, "got invalid token of kind: %S while parsing expression", lex_kind_to_s8(token->kind));
return 0;
}
}
ast_t *parse_mul_expr(parser_t *par) {
ast_t *left = parse_lit_expr(par);
while (par->at->kind == lex_kind_multiply || par->at->kind == lex_kind_divide || par->at->kind == lex_kind_modulo) {
lex_t *op = parser_next(par);
left = create_ast_binary(par, op, left, op->kind, parse_lit_expr(par));
}
return left;
}
ast_t *parse_add_expr(parser_t *par) {
ast_t *left = parse_mul_expr(par);
while (par->at->kind == lex_kind_plus || par->at->kind == lex_kind_minus) {
lex_t *op = parser_next(par);
left = create_ast_binary(par, op, left, op->kind, parse_lit_expr(par));
}
return left;
}
ast_t *parse_logical_and_expr(parser_t *par) {
ast_t *left = parse_add_expr(par);
while (par->at->kind == lex_kind_or) {
lex_t *op = parser_next(par);
left = create_ast_binary(par, op, left, op->kind, parse_lit_expr(par));
}
return left;
}
ast_t *parse_logical_or_expr(parser_t *par) {
ast_t *left = parse_logical_and_expr(par);
while (par->at->kind == lex_kind_or) {
lex_t *op = parser_next(par);
left = create_ast_binary(par, op, left, op->kind, parse_lit_expr(par));
}
return left;
}
ast_t *parse_expr(parser_t *par) {
ast_t *expr = parse_logical_or_expr(par);
return expr;
}
ast_t *parse_expr_str(ma_arena_t *arena, char *file_name, char *stream) {
lex_array_t tokens = lex_tokens(arena, file_name, stream);
parser_t *par = parser_make(arena, tokens.data);
ast_t *result = parse_expr(par);
return result;
}
i64 eval_const_expr(ast_t *expr) {
if (expr->flags & set_bit(ast_flag_integer)) {
return expr->integer;
} else if (expr->flags & set_bit(ast_flag_binary)) {
assert(expr->first != expr->last);
i64 left = eval_const_expr(expr->first);
i64 right = eval_const_expr(expr->last);
switch(expr->integer) {
case lex_kind_plus: return left + right;
case lex_kind_minus: return left - right;
case lex_kind_multiply: return left * right;
case lex_kind_divide: return left / right;
case lex_kind_modulo: return left % right;
case lex_kind_and: return left && right;
case lex_kind_or: return left || right;
default: lex_panicf(expr->pos, "unhandled binary operator: %S", lex_kind_to_s8(expr->integer));
}
} else {
ma_temp_t scratch = ma_begin_scratch();
lex_panicf(expr->pos, "unhandled ast in const expression evaluation: %S", s8_serial_ast_flag_t(scratch.arena, expr->flags));
ma_end_scratch(scratch);
}
return 0;
}
#define test_expr(x) do {\
lex_array_t tokens = lex_tokens(scratch.arena, "parser_test", #x);\
parser_t *par = parser_make(scratch.arena, tokens.data);\
ast_t *expr = parse_expr(par);\
assert(expr != NULL);\
i64 value = eval_const_expr(expr);\
assert(value == x);\
} while (0)
void run_parser_test() {
ma_temp_t scratch = ma_begin_scratch();
test_expr(32 + 2 + 5 + 5);
test_expr(32 - 2 + 5 - 5);
test_expr(2 * 2 / 4 * 5 + 2 + 3);
test_expr(2 * 5 * 5 / 2 + 2 - 1 - 1);
test_expr(2 * (5 * 5) / 2 + (2 - 1 - 1));
test_expr((2 * (5 * 5) / (2)) + (2 - 1 - 1));
test_expr(10 % 3);
test_expr(10 % 3 + 4 || 2);
test_expr(10 % 3 + 4 || 2 && (4 && 2) || 3 && 1 || 0);
ma_end_scratch(scratch);
}
ast_t *parse_struct_mem(parser_t *par, s8_t *name) {
lex_t *type_name = parser_expect(par, lex_kind_ident);
ast_t *type = create_ast(par, type_name, set_bit(ast_flag_type_name) | set_bit(ast_flag_string));
type->string = type_name->string;
while (parser_match(par, lex_kind_multiply)) {
ast_t *pointer = create_ast(par, par->at, set_bit(ast_flag_type_pointer) | set_bit(ast_flag_string));
ast_append(pointer, type);
pointer->string = s8_fmt(par->arena, "%S*", type->string);
type = pointer;
}
*name = parser_expect(par, lex_kind_ident)->string;
while (parser_match(par, lex_kind_open_bracket)) {
ast_t *array = create_ast(par, par->at, set_bit(ast_flag_type_array) | set_bit(ast_flag_string));
ast_append(array, type);
lex_t *num = parser_match(par, lex_kind_integer);
if (num) {
array->flags |= set_bit(ast_flag_integer);
array->integer = (int)num->integer;
array->string = s8_fmt(par->arena, "%S[%d]", type->string, (int)array->integer);
} else {
array->string = s8_fmt(par->arena, "%S[]", type->string);
}
parser_expect(par, lex_kind_close_bracket);
type = array;
}
return type;
}
ast_t *parse_decls(ma_arena_t *arena, char *file, char *code) {
lex_array_t tokens = lex_tokens(arena, file, code);
parser_t *par = parser_make(arena, tokens.data);
ast_t *result = create_ast(par, par->at, set_bit(ast_flag_string));
result->string = s8_copy_char(arena, file);
for (;par->at->kind != lex_kind_eof;) {
lex_t *pos = par->at;
if (parser_matchi(par, s8_lit("enum"))) {
ast_t *n = create_ast(par, pos, set_bit(ast_flag_string) | set_bit(ast_flag_enum));
ast_append(result, n);
parser_expect(par, lex_kind_open_brace);
while (par->at->kind == lex_kind_ident) {
lex_t *val = parser_expect(par, lex_kind_ident);
ast_t *mem = create_ast(par, val, set_bit(ast_flag_enum_member) | set_bit(ast_flag_string));
mem->string = val->string;
ast_append(n, mem);
// if (parser_match(par, lex_kind_assign)) {
// parse_expr();
// }
if (!parser_match(par, lex_kind_comma)) break;
}
parser_expect(par, lex_kind_close_brace);
n->string = parser_expect(par, lex_kind_ident)->string;
parser_expect(par, lex_kind_semicolon);
} else if (parser_matchi(par, s8_lit("struct"))) {
ast_t *n = create_ast(par, pos, set_bit(ast_flag_string) | set_bit(ast_flag_struct));
ast_append(result, n);
n->string = parser_expect(par, lex_kind_ident)->string;
parser_expect(par, lex_kind_open_brace);
while (par->at->kind != lex_kind_close_brace) {
ast_t *mem = create_ast(par, par->at, set_bit(ast_flag_struct_member) | set_bit(ast_flag_var) | set_bit(ast_flag_string));
ast_append(n, mem);
ast_t *type = parse_struct_mem(par, &mem->string);
ast_append(mem, type);
parser_expect(par, lex_kind_semicolon);
}
parser_expect(par, lex_kind_close_brace);
parser_expect(par, lex_kind_semicolon);
} else {
parser_next(par);
}
}
return result;
}
ast_t *parse_table(ma_arena_t *arena, char *file, char *code) {
lex_array_t tokens = lex_tokens(arena, file, code);
parser_t *par = parser_make(arena, tokens.data);
ast_t *table = create_ast(par, par->at, 0);
while (par->at->kind != lex_kind_eof) {
ast_t *row = create_ast(par, par->at, 0);
ast_append(table, row);
while (par->at->kind != lex_kind_eof) {
parser_match(par, lex_kind_bit_or);
lex_t *token = par->at;
if (parser_match(par, lex_kind_ident) || parser_match(par, lex_kind_string)) {
ast_t *col = create_ast(par, par->at, set_bit(ast_flag_string));
ast_append(row, col);
col->string = token->string;
} else if (parser_match(par, lex_kind_integer)) {
ast_t *col = create_ast(par, par->at, set_bit(ast_flag_string) | set_bit(ast_flag_integer));
ast_append(row, col);
col->string = token->string;
col->integer = token->integer;
} else if (parser_match(par, lex_kind_real)) {
ast_t *col = create_ast(par, par->at, set_bit(ast_flag_string) | set_bit(ast_flag_real));
ast_append(row, col);
col->string = token->string;
col->real = token->real;
} else if (parser_match(par, lex_kind_bit_or) || parser_match(par, lex_kind_eof)) {
break;
} else {
lex_panicf(par->at, "invalid token: %S", lex_kind_to_s8(par->at->kind));
}
}
}
return table;
}