typedef enum { #define AST_FLAG_XLIST\ X(null)\ X(string)\ X(integer)\ X(real)\ X(binary)\ X(enum)\ X(enum_member)\ X(struct)\ X(struct_member)\ X(var)\ X(type_name)\ X(type_pointer)\ X(type_array)\ #define X(NAME) ast_flag_##NAME, AST_FLAG_XLIST #undef X } ast_flag_t; typedef struct ast_t ast_t; struct ast_t { ast_flag_t flags; lex_t *pos; ast_t *next; ast_t *first; ast_t *last; i32 len; s8_t string; f64 real; i64 integer; }; s8_t s8_serial_ast_flag_t(ma_arena_t *arena, ast_flag_t flag) { sb8_t *sb = sb8_serial_begin(arena); #define X(NAME) if (flag & set_bit(ast_flag_##NAME)) sb8_printf(sb, #NAME); AST_FLAG_XLIST #undef X s8_t result = sb8_serial_end(sb); return result; } ast_t *create_ast(parser_t *par, lex_t *pos, ast_flag_t flags) { ast_t *result = ma_push_type(par->arena, ast_t); memset(result, 0, sizeof(ast_t)); result->flags = flags; result->pos = pos; return result; } void ast_append(ast_t *parent, ast_t *node) { SLLQ_APPEND(parent->first, parent->last, node); parent->len += 1; } ast_t *create_ast_binary(parser_t *par, lex_t *pos, ast_t *left, lex_kind_t op, ast_t *right) { ast_t *result = create_ast(par, pos, set_bit(ast_flag_string) | set_bit(ast_flag_binary) | set_bit(ast_flag_integer)); ast_append(result, left); ast_append(result, right); result->integer = op; result->string = lex_kind_to_simple_s8(op); return result; } ast_t *parse_expr(parser_t *par); ast_t *parse_lit_expr(parser_t *par) { lex_t *token = parser_next(par); if (token->kind == lex_kind_integer) { ast_t *result = create_ast(par, token, set_bit(ast_flag_integer) | set_bit(ast_flag_string)); result->integer = token->integer; result->string = token->string; return result; } else if (token->kind == lex_kind_real) { ast_t *result = create_ast(par, token, set_bit(ast_flag_real) | set_bit(ast_flag_string)); result->real = (double)token->real; result->string = token->string; return result; } else if (token->kind == lex_kind_open_paren) { ast_t *result = parse_expr(par); parser_expect(par, lex_kind_close_paren); return result; } else { lex_panicf(token, "got invalid token of kind: %S while parsing expression", lex_kind_to_s8(token->kind)); return 0; } } ast_t *parse_mul_expr(parser_t *par) { ast_t *left = parse_lit_expr(par); while (par->at->kind == lex_kind_multiply || par->at->kind == lex_kind_divide || par->at->kind == lex_kind_modulo) { lex_t *op = parser_next(par); left = create_ast_binary(par, op, left, op->kind, parse_lit_expr(par)); } return left; } ast_t *parse_add_expr(parser_t *par) { ast_t *left = parse_mul_expr(par); while (par->at->kind == lex_kind_plus || par->at->kind == lex_kind_minus) { lex_t *op = parser_next(par); left = create_ast_binary(par, op, left, op->kind, parse_lit_expr(par)); } return left; } ast_t *parse_logical_and_expr(parser_t *par) { ast_t *left = parse_add_expr(par); while (par->at->kind == lex_kind_or) { lex_t *op = parser_next(par); left = create_ast_binary(par, op, left, op->kind, parse_lit_expr(par)); } return left; } ast_t *parse_logical_or_expr(parser_t *par) { ast_t *left = parse_logical_and_expr(par); while (par->at->kind == lex_kind_or) { lex_t *op = parser_next(par); left = create_ast_binary(par, op, left, op->kind, parse_lit_expr(par)); } return left; } ast_t *parse_expr(parser_t *par) { ast_t *expr = parse_logical_or_expr(par); return expr; } ast_t *parse_expr_str(ma_arena_t *arena, char *file_name, char *stream) { lex_array_t tokens = lex_tokens(arena, file_name, stream); parser_t *par = parser_make(arena, tokens.data); ast_t *result = parse_expr(par); return result; } i64 eval_const_expr(ast_t *expr) { if (expr->flags & set_bit(ast_flag_integer)) { return expr->integer; } else if (expr->flags & set_bit(ast_flag_binary)) { assert(expr->first != expr->last); i64 left = eval_const_expr(expr->first); i64 right = eval_const_expr(expr->last); switch(expr->integer) { case lex_kind_plus: return left + right; case lex_kind_minus: return left - right; case lex_kind_multiply: return left * right; case lex_kind_divide: return left / right; case lex_kind_modulo: return left % right; case lex_kind_and: return left && right; case lex_kind_or: return left || right; default: lex_panicf(expr->pos, "unhandled binary operator: %S", lex_kind_to_s8(expr->integer)); } } else { ma_temp_t scratch = ma_begin_scratch(); lex_panicf(expr->pos, "unhandled ast in const expression evaluation: %S", s8_serial_ast_flag_t(scratch.arena, expr->flags)); ma_end_scratch(scratch); } return 0; } #define test_expr(x) do {\ lex_array_t tokens = lex_tokens(scratch.arena, "parser_test", #x);\ parser_t *par = parser_make(scratch.arena, tokens.data);\ ast_t *expr = parse_expr(par);\ assert(expr != NULL);\ i64 value = eval_const_expr(expr);\ assert(value == x);\ } while (0) void run_parser_test() { ma_temp_t scratch = ma_begin_scratch(); test_expr(32 + 2 + 5 + 5); test_expr(32 - 2 + 5 - 5); test_expr(2 * 2 / 4 * 5 + 2 + 3); test_expr(2 * 5 * 5 / 2 + 2 - 1 - 1); test_expr(2 * (5 * 5) / 2 + (2 - 1 - 1)); test_expr((2 * (5 * 5) / (2)) + (2 - 1 - 1)); test_expr(10 % 3); test_expr(10 % 3 + 4 || 2); test_expr(10 % 3 + 4 || 2 && (4 && 2) || 3 && 1 || 0); ma_end_scratch(scratch); } ast_t *parse_struct_mem(parser_t *par, s8_t *name) { lex_t *type_name = parser_expect(par, lex_kind_ident); ast_t *type = create_ast(par, type_name, set_bit(ast_flag_type_name) | set_bit(ast_flag_string)); type->string = type_name->string; while (parser_match(par, lex_kind_multiply)) { ast_t *pointer = create_ast(par, par->at, set_bit(ast_flag_type_pointer) | set_bit(ast_flag_string)); ast_append(pointer, type); pointer->string = s8_fmt(par->arena, "%S*", type->string); type = pointer; } *name = parser_expect(par, lex_kind_ident)->string; while (parser_match(par, lex_kind_open_bracket)) { ast_t *array = create_ast(par, par->at, set_bit(ast_flag_type_array) | set_bit(ast_flag_string)); ast_append(array, type); lex_t *num = parser_match(par, lex_kind_integer); if (num) { array->flags |= set_bit(ast_flag_integer); array->integer = (int)num->integer; array->string = s8_fmt(par->arena, "%S[%d]", type->string, (int)array->integer); } else { array->string = s8_fmt(par->arena, "%S[]", type->string); } parser_expect(par, lex_kind_close_bracket); type = array; } return type; } ast_t *parse_decls(ma_arena_t *arena, char *file, char *code) { lex_array_t tokens = lex_tokens(arena, file, code); parser_t *par = parser_make(arena, tokens.data); ast_t *result = create_ast(par, par->at, set_bit(ast_flag_string)); result->string = s8_copy_char(arena, file); for (;par->at->kind != lex_kind_eof;) { lex_t *pos = par->at; if (parser_matchi(par, s8_lit("enum"))) { ast_t *n = create_ast(par, pos, set_bit(ast_flag_string) | set_bit(ast_flag_enum)); ast_append(result, n); parser_expect(par, lex_kind_open_brace); while (par->at->kind == lex_kind_ident) { lex_t *val = parser_expect(par, lex_kind_ident); ast_t *mem = create_ast(par, val, set_bit(ast_flag_enum_member) | set_bit(ast_flag_string)); mem->string = val->string; ast_append(n, mem); // if (parser_match(par, lex_kind_assign)) { // parse_expr(); // } if (!parser_match(par, lex_kind_comma)) break; } parser_expect(par, lex_kind_close_brace); n->string = parser_expect(par, lex_kind_ident)->string; parser_expect(par, lex_kind_semicolon); } else if (parser_matchi(par, s8_lit("struct"))) { ast_t *n = create_ast(par, pos, set_bit(ast_flag_string) | set_bit(ast_flag_struct)); ast_append(result, n); n->string = parser_expect(par, lex_kind_ident)->string; parser_expect(par, lex_kind_open_brace); while (par->at->kind != lex_kind_close_brace) { ast_t *mem = create_ast(par, par->at, set_bit(ast_flag_struct_member) | set_bit(ast_flag_var) | set_bit(ast_flag_string)); ast_append(n, mem); ast_t *type = parse_struct_mem(par, &mem->string); ast_append(mem, type); parser_expect(par, lex_kind_semicolon); } parser_expect(par, lex_kind_close_brace); parser_expect(par, lex_kind_semicolon); } else { parser_next(par); } } return result; } ast_t *parse_table(ma_arena_t *arena, char *file, char *code) { lex_array_t tokens = lex_tokens(arena, file, code); parser_t *par = parser_make(arena, tokens.data); ast_t *table = create_ast(par, par->at, 0); while (par->at->kind != lex_kind_eof) { ast_t *row = create_ast(par, par->at, 0); ast_append(table, row); while (par->at->kind != lex_kind_eof) { parser_match(par, lex_kind_bit_or); lex_t *token = par->at; if (parser_match(par, lex_kind_ident) || parser_match(par, lex_kind_string)) { ast_t *col = create_ast(par, par->at, set_bit(ast_flag_string)); ast_append(row, col); col->string = token->string; } else if (parser_match(par, lex_kind_integer)) { ast_t *col = create_ast(par, par->at, set_bit(ast_flag_string) | set_bit(ast_flag_integer)); ast_append(row, col); col->string = token->string; col->integer = token->integer; } else if (parser_match(par, lex_kind_real)) { ast_t *col = create_ast(par, par->at, set_bit(ast_flag_string) | set_bit(ast_flag_real)); ast_append(row, col); col->string = token->string; col->real = token->real; } else if (parser_match(par, lex_kind_bit_or) || parser_match(par, lex_kind_eof)) { break; } else { lex_panicf(par->at, "invalid token: %S", lex_kind_to_s8(par->at->kind)); } } } return table; }