#include #include #include #include #include #include #include #include #include "base.c" #include "meta_gen.c" #include "lex.c" typedef struct Parser { Token *at; Token *end; } Parser; typedef enum Ast_Kind { AST_NONE, AST_ERROR, AST_INT, AST_UNARY, AST_BINARY, } Ast_Kind; typedef struct Ast Ast; struct Ast { Ast_Kind kind; Token *pos; union { uint64_t u; struct { Token_Kind op; Ast *l; Ast *r; }; char *error; }; }; Token *next_token(Parser *p) { if (p->at < p->end) { return p->at++; } return p->at; } Token *match_token(Parser *p, Token_Kind kind) { if (p->at->kind == kind) { return next_token(p); } return NULL; } Token *expect_token(Parser *p, Token_Kind kind) { if (p->at->kind == kind) { return next_token(p); } panicf("expected token kind: %s, got instead: %s", token_to_name(p->at->kind), token_to_name(kind)); } Ast *create_ast(Token *token, Ast_Kind kind) { Ast *result = calloc(1, sizeof(Ast)); result->pos = token; result->kind = kind; return result; } Ast *create_binary_expr(Token *token, Token_Kind op, Ast *left, Ast *right) { Ast *result = create_ast(token, AST_BINARY); result->op = op; result->l = left; result->r = right; return result; } Ast *parse_expr(Parser *p, int power_of_binding_to_right); Ast *parse_atom(Parser *p) { Token *token = p->at; Ast *n = NULL; if (match_token(p, TOK_INT)) { n = create_ast(token, AST_INT); n->u = token->u; } else if (match_token(p, TOK_LPAREN)) { // @todo: do a comma list here n = parse_expr(p, 0); expect_token(p, TOK_RPAREN); } else { panicf("unknown token in %s. %.*s (%s/%d), ", __FUNCTION__, token->len, token->str, token_to_name(token->kind), token->kind); } return n; } int get_binding_power(Token *tok) { switch (tok->kind) { case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: return 120; case TOK_PLUS: case TOK_MINUS: return 110; case TOK_LSHIFT: case TOK_RSHIFT: return 100; case TOK_LT: case TOK_LEQ: case TOK_GT: case TOK_GEQ: return 90; case TOK_EQ: case TOK_NEQ: return 80; case TOK_BITAND: return 70; case TOK_BITXOR: return 60; case TOK_BITOR: return 50; case TOK_AND: return 40; case TOK_OR: return 30; default: return 0; } } Ast *parse_valid_left_binding(Parser *p, Token *tok, Ast *left) { switch (tok->kind) { case TOK_PLUS: case TOK_MINUS: case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: case TOK_EQ: case TOK_NEQ: case TOK_LT: case TOK_LEQ: case TOK_GT: case TOK_GEQ: case TOK_BITAND: case TOK_BITOR: case TOK_BITXOR: case TOK_AND: case TOK_OR: case TOK_LSHIFT: case TOK_RSHIFT: { return create_binary_expr(tok, tok->kind, left, parse_expr(p, get_binding_power(tok))); } break; default: panicf("unknown token in %s. %.*s (%s/%d), ", __FUNCTION__, tok->len, tok->str, token_to_name(tok->kind), tok->kind); } return NULL; } Ast *parse_expr(Parser *p, int power_of_binding_to_right) { Ast *n = parse_atom(p); while (get_binding_power(p->at) > power_of_binding_to_right) { Token *tok = next_token(p); n = parse_valid_left_binding(p, tok, n); } return n; } int64_t eval_expr(Ast *n) { switch (n->kind) { case AST_INT: return (int64_t)n->u; case AST_BINARY: { int64_t left = eval_expr(n->l); int64_t right = eval_expr(n->r); switch (n->op) { case TOK_PLUS: return left + right; case TOK_MINUS: return left - right; case TOK_STAR: return left * right; case TOK_SLASH: return left / right; case TOK_PERCENT: return left % right; case TOK_EQ: return left == right; case TOK_NEQ: return left != right; case TOK_LT: return left < right; case TOK_LEQ: return left <= right; case TOK_GT: return left > right; case TOK_GEQ: return left >= right; case TOK_BITAND: return left & right; case TOK_BITOR: return left | right; case TOK_BITXOR: return left ^ right; case TOK_AND: return left && right; case TOK_OR: return left || right; case TOK_LSHIFT: return left << right; case TOK_RSHIFT: return left >> right; default: panicf("invalid token kind in eval_expr, binary"); } } break; default: panicf("invalid ast kind in eval_expr"); } } void print_expr(Ast *n) { switch (n->kind) { case AST_INT: printf("%lu", n->u); break; case AST_BINARY: { print_expr(n->l); printf(" %s ", token_to_op(n->op)); print_expr(n->r); } break; default: panicf("encountered invalid ast kind in %s of kind: %d\n", __FUNCTION__, n->kind); } } void parser_test(void) { #define TEST_EVAL(expr) do { \ Token_Array tokens = lex_file("eval_test", (#expr), strlen((#expr)));\ Parser p = {tokens.data, tokens.data + tokens.len};\ Ast *result = parse_expr(&p, 0);\ int64_t left = eval_expr(result);\ int64_t right = (expr);\ if (left != right) {\ printf("%s:%d expected: %ld, got: %ld\n expression: ", __FILE__, __LINE__, left, right);\ print_expr(result);\ printf("\n");\ }\ } while (0) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wparentheses" TEST_EVAL(32+5-4); TEST_EVAL(16/2/2); TEST_EVAL(9*4/6); TEST_EVAL(17%5); TEST_EVAL(5125-42|(4&3)^2|2%1242); TEST_EVAL((45%2)^(23&3)); TEST_EVAL(1<16*2); TEST_EVAL(16>1+2); TEST_EVAL(4<=2+2); TEST_EVAL(5>=2+2); TEST_EVAL(4==2+2); TEST_EVAL(5!=2+2); TEST_EVAL(1+1+1+1+2-2-3-4-5); TEST_EVAL(5%2^5&6|3); TEST_EVAL(6&3); TEST_EVAL(6|3); TEST_EVAL(6^3); TEST_EVAL(1&&2); TEST_EVAL(0||3); TEST_EVAL(1||0&&0); TEST_EVAL(8<<2); TEST_EVAL(32>>3); TEST_EVAL((2+3)*(4+5)); TEST_EVAL(9>3&1); TEST_EVAL(8|1<4); TEST_EVAL(7<=3+4); TEST_EVAL(8>=2*4); TEST_EVAL(4==2+2); TEST_EVAL(5!=2+2); TEST_EVAL(1&&2); TEST_EVAL(0||3); TEST_EVAL(1||0&&0); TEST_EVAL(8<<2); TEST_EVAL(32>>3); TEST_EVAL(1+2<<3); TEST_EVAL(16>>1+1); #pragma clang diagnostic pop printf("parser tests passed\n"); } void lex_test(void) { char *src = "12 + 34.5 * 6\n- 7 % 2 / 1 == 1 != 2 <= 3 >= 4 && 3 || 4 << 1 >> 2"; Lexer lex = make_lexer("test.c", src, (int)strlen(src)); assert_token(lex_token(&lex), TOK_INT, "12", 0, 0); assert_token(lex_token(&lex), TOK_PLUS, "+", 0, 3); assert_token(lex_token(&lex), TOK_FLOAT, "34.5", 0, 5); assert_token(lex_token(&lex), TOK_STAR, "*", 0, 10); assert_token(lex_token(&lex), TOK_INT, "6", 0, 12); assert_token(lex_token(&lex), TOK_MINUS, "-", 1, 0); assert_token(lex_token(&lex), TOK_INT, "7", 1, 2); assert_token(lex_token(&lex), TOK_PERCENT, "%", 1, 4); assert_token(lex_token(&lex), TOK_INT, "2", 1, 6); assert_token(lex_token(&lex), TOK_SLASH, "/", 1, 8); assert_token(lex_token(&lex), TOK_INT, "1", 1, 10); assert_token(lex_token(&lex), TOK_EQ, "==", 1, 12); assert_token(lex_token(&lex), TOK_INT, "1", 1, 15); assert_token(lex_token(&lex), TOK_NEQ, "!=", 1, 17); assert_token(lex_token(&lex), TOK_INT, "2", 1, 20); assert_token(lex_token(&lex), TOK_LEQ, "<=", 1, 22); assert_token(lex_token(&lex), TOK_INT, "3", 1, 25); assert_token(lex_token(&lex), TOK_GEQ, ">=", 1, 27); assert_token(lex_token(&lex), TOK_INT, "4", 1, 30); assert_token(lex_token(&lex), TOK_AND, "&&", 1, 32); assert_token(lex_token(&lex), TOK_INT, "3", 1, 35); assert_token(lex_token(&lex), TOK_OR, "||", 1, 37); assert_token(lex_token(&lex), TOK_INT, "4", 1, 40); assert_token(lex_token(&lex), TOK_LSHIFT, "<<", 1, 42); assert_token(lex_token(&lex), TOK_INT, "1", 1, 45); assert_token(lex_token(&lex), TOK_RSHIFT, ">>", 1, 47); assert_token(lex_token(&lex), TOK_INT, "2", 1, 50); Token eof = lex_token(&lex); assert(eof.kind == TOK_EOF); assert(eof.len == 0); assert(eof.line == 1); assert(eof.column == 51); Token_Array array = lex_file("test.c", src, (int)strlen(src)); assert(array.len == 28); printf("lexer tests passed\n"); } int main() { vec_test(); lex_test(); parser_test(); }