diff --git a/base.c b/base.c index abfaa2a..d77b2c5 100644 --- a/base.c +++ b/base.c @@ -1,3 +1,18 @@ +#define panicf(...) base_panicf(__FILE__, __LINE__, __VA_ARGS__) + +_Noreturn +void base_panicf(char *file, int line, const char *fmt, ...) { + fprintf(stderr, "%s:%d", file, line); + va_list args; + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + fprintf(stderr, "\n"); + fflush(stderr); + fflush(stdout); + exit(1); +} + #define Vec(T) struct { T *data; int len; int cap; } typedef Vec(void) VecVoid; diff --git a/build.sh b/build.sh index 887abb6..21b8726 100644 --- a/build.sh +++ b/build.sh @@ -4,7 +4,7 @@ if [[ ! -e build ]]; then mkdir build fi cd build -clang -o meta ../meta.c -g -Wall -Wextra -Wshadow -fdiagnostics-absolute-paths +clang -o meta $(realpath ../meta.c) -g -Wall -Wextra -Wshadow -fdiagnostics-absolute-paths ./meta > ../meta_gen.c -clang -o main ../main.c -g -Wall -Wextra -Wshadow -fdiagnostics-absolute-paths +clang -o main $(realpath ../main.c) -g -Wall -Wextra -Wshadow -fdiagnostics-absolute-paths ./main \ No newline at end of file diff --git a/main.c b/main.c index 5a0313c..d735d4a 100644 --- a/main.c +++ b/main.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "base.c" #include "meta_gen.c" @@ -115,13 +116,31 @@ Token lex_token(Lexer *lex) { switch (c) { case 0: t.kind = TOK_EOF; break; + case '(': t.kind = TOK_LPAREN; break; + case ')': t.kind = TOK_RPAREN; break; + case '[': t.kind = TOK_LBRACKET; break; + case ']': t.kind = TOK_RBRACKET; break; + case '{': t.kind = TOK_LBRACE; break; + case '}': t.kind = TOK_RBRACE; break; + case ',': t.kind = TOK_COMMA; break; + case '.': t.kind = TOK_DOT; break; + case ':': t.kind = TOK_COLON; break; + case ';': t.kind = TOK_SEMICOLON; break; + case '?': t.kind = TOK_QUESTION; break; + case '#': t.kind = TOK_HASH; break; case '+': t.kind = TOK_PLUS; break; case '-': t.kind = TOK_MINUS; break; case '*': t.kind = TOK_STAR; break; case '/': t.kind = TOK_SLASH; break; case '%': t.kind = TOK_PERCENT; break; - case '{': t.kind = TOK_LBRACE; break; - case '}': t.kind = TOK_RBRACE; break; + case '=': t.kind = TOK_ASSIGN; break; + case '<': t.kind = TOK_LT; break; + case '>': t.kind = TOK_GT; break; + case '!': t.kind = TOK_NOT; break; + case '~': t.kind = TOK_BITNOT; break; + case '&': t.kind = TOK_BITAND; break; + case '|': t.kind = TOK_BITOR; break; + case '^': t.kind = TOK_BITXOR; break; default: { // @todo: lexer perhaps should have a static buffer of size 1024, error message // should be put there and piped to the upper program. The token should be filled @@ -198,6 +217,13 @@ Token *match_token(Parser *p, Token_Kind kind) { return NULL; } +Token *expect_token(Parser *p, Token_Kind kind) { + if (p->at->kind == kind) { + return next_token(p); + } + panicf("expected token kind: %s, got instead: %s", token_to_name[p->at->kind], token_to_name[kind]); +} + Ast *create_ast(Token *token, Ast_Kind kind) { Ast *result = calloc(1, sizeof(Ast)); result->pos = token; @@ -224,28 +250,37 @@ Ast *parse_atom(Parser *p) { } else if (match_token(p, TOK_LPAREN)) { // @todo: do a comma list here n = parse_expr(p, 0); + expect_token(p, TOK_RPAREN); } else { - fprintf(stderr, "encountered invalid token while parsing atom: %.*s\n", token->len, token->str); - exit(1); + panicf("unknown token in %s. %.*s (%s/%d), ", __FUNCTION__, token->len, token->str, token_to_name[token->kind], token->kind); } return n; } int get_binding_power(Token *tok) { switch (tok->kind) { - case TOK_LSHIFT: case TOK_RSHIFT: return 100; - case TOK_PLUS: case TOK_MINUS: return 110; case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: return 120; + case TOK_PLUS: case TOK_MINUS: return 110; + case TOK_LSHIFT: case TOK_RSHIFT: return 100; + case TOK_LT: case TOK_LEQ: case TOK_GT: case TOK_GEQ: return 90; + case TOK_EQ: case TOK_NEQ: return 80; + case TOK_BITAND: return 70; + case TOK_BITXOR: return 60; + case TOK_BITOR: return 50; + case TOK_AND: return 40; + case TOK_OR: return 30; default: return 0; } } Ast *parse_valid_left_binding(Parser *p, Token *tok, Ast *left) { switch (tok->kind) { - case TOK_LSHIFT: case TOK_RSHIFT: case TOK_PLUS: case TOK_MINUS: case TOK_SLASH: case TOK_STAR: case TOK_PERCENT: { + case TOK_PLUS: case TOK_MINUS: case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: + case TOK_EQ: case TOK_NEQ: case TOK_LT: case TOK_LEQ: case TOK_GT: case TOK_GEQ: case TOK_BITAND: + case TOK_BITOR: case TOK_BITXOR: case TOK_AND: case TOK_OR: case TOK_LSHIFT: case TOK_RSHIFT: { return create_binary_expr(tok, tok->kind, left, parse_expr(p, get_binding_power(tok))); } break; - default: fprintf(stderr, "ERROR"); exit(1); + default: panicf("unknown token in %s. %.*s (%s/%d), ", __FUNCTION__, tok->len, tok->str, token_to_name[tok->kind], tok->kind); } return NULL; } @@ -271,16 +306,23 @@ int64_t eval_expr(Ast *n) { case TOK_STAR: return left * right; case TOK_SLASH: return left / right; case TOK_PERCENT: return left % right; - default: { - fprintf(stderr, "invalid token kind in eval_expr, binary"); - exit(1); - } + case TOK_EQ: return left == right; + case TOK_NEQ: return left != right; + case TOK_LT: return left < right; + case TOK_LEQ: return left <= right; + case TOK_GT: return left > right; + case TOK_GEQ: return left >= right; + case TOK_BITAND: return left & right; + case TOK_BITOR: return left | right; + case TOK_BITXOR: return left ^ right; + case TOK_AND: return left && right; + case TOK_OR: return left || right; + case TOK_LSHIFT: return left << right; + case TOK_RSHIFT: return left >> right; + default: panicf("invalid token kind in eval_expr, binary"); } } break; - default: { - fprintf(stderr, "invalid ast kind in eval_expr"); - exit(1); - } break; + default: panicf("invalid ast kind in eval_expr"); } } @@ -292,20 +334,37 @@ void print_expr(Ast *n) { printf(" %s ", token_to_op[n->op]); print_expr(n->r); } break; - default: fprintf(stderr, "memes"); exit(1); + default: panicf("encountered invalid ast kind in %s of kind: %d\n", __FUNCTION__, n->kind); } } void parser_test(void) { #define TEST_EVAL(expr) do { \ - Token_Array tokens = lex_file("eval_test", #expr, strlen(#expr));\ + Token_Array tokens = lex_file("eval_test", (#expr), strlen((#expr)));\ Parser p = {tokens.data, tokens.data + tokens.len};\ Ast *result = parse_expr(&p, 0);\ - assert(eval_expr(result) == expr);\ + int64_t left = eval_expr(result);\ + int64_t right = (expr);\ + if (left != right) {\ + printf("%s:%d expected: %ld, got: %ld\n expression: ", __FILE__, __LINE__, left, right);\ + print_expr(result);\ + printf("\n");\ + }\ } while (0) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wparentheses" + TEST_EVAL(32+5-4); TEST_EVAL(16/2/2); + TEST_EVAL(5125-42|(4&3)^2|2%1242); + TEST_EVAL((45%2)^(23&3)); + TEST_EVAL(1<16*2); + TEST_EVAL(1+1+1+1+2-2-3-4-5); + TEST_EVAL(5%2^5&6|3); + +#pragma clang diagnostic pop + printf("parser tests passed\n"); } diff --git a/meta.c b/meta.c index cc51862..30c9693 100644 --- a/meta.c +++ b/meta.c @@ -86,4 +86,12 @@ int main() { } } printf("};\n"); + + printf("char *token_to_name[] = {\n"); + for (int i = 0; i < ilen(kinds); i += 1) { + if (kinds[i].name) { + printf(" [TOK_%s] = \"%s\",\n", kinds[i].name, kinds[i].name); + } + } + printf("};\n"); } diff --git a/meta_gen.c b/meta_gen.c index 9756ea3..e98a878 100644 --- a/meta_gen.c +++ b/meta_gen.c @@ -106,3 +106,61 @@ char *token_to_op[] = { [TOK_LSHIFT] = "<<", [TOK_RSHIFT] = ">>", }; +char *token_to_name[] = { + [TOK_EOF] = "EOF", + [TOK_ERROR] = "ERROR", + [TOK_IDENT] = "IDENT", + [TOK_KEYWORD] = "KEYWORD", + [TOK_INT] = "INT", + [TOK_FLOAT] = "FLOAT", + [TOK_CHAR] = "CHAR", + [TOK_STRING] = "STRING", + [TOK_LPAREN] = "LPAREN", + [TOK_RPAREN] = "RPAREN", + [TOK_LBRACKET] = "LBRACKET", + [TOK_RBRACKET] = "RBRACKET", + [TOK_LBRACE] = "LBRACE", + [TOK_RBRACE] = "RBRACE", + [TOK_COMMA] = "COMMA", + [TOK_DOT] = "DOT", + [TOK_ARROW] = "ARROW", + [TOK_ELLIPSIS] = "ELLIPSIS", + [TOK_COLON] = "COLON", + [TOK_SEMICOLON] = "SEMICOLON", + [TOK_QUESTION] = "QUESTION", + [TOK_HASH] = "HASH", + [TOK_HASHHASH] = "HASHHASH", + [TOK_PLUS] = "PLUS", + [TOK_MINUS] = "MINUS", + [TOK_STAR] = "STAR", + [TOK_SLASH] = "SLASH", + [TOK_PERCENT] = "PERCENT", + [TOK_INC] = "INC", + [TOK_DEC] = "DEC", + [TOK_ASSIGN] = "ASSIGN", + [TOK_PLUS_ASSIGN] = "PLUS_ASSIGN", + [TOK_MINUS_ASSIGN] = "MINUS_ASSIGN", + [TOK_MUL_ASSIGN] = "MUL_ASSIGN", + [TOK_DIV_ASSIGN] = "DIV_ASSIGN", + [TOK_MOD_ASSIGN] = "MOD_ASSIGN", + [TOK_LSHIFT_ASSIGN] = "LSHIFT_ASSIGN", + [TOK_RSHIFT_ASSIGN] = "RSHIFT_ASSIGN", + [TOK_AND_ASSIGN] = "AND_ASSIGN", + [TOK_XOR_ASSIGN] = "XOR_ASSIGN", + [TOK_OR_ASSIGN] = "OR_ASSIGN", + [TOK_EQ] = "EQ", + [TOK_NEQ] = "NEQ", + [TOK_LT] = "LT", + [TOK_LEQ] = "LEQ", + [TOK_GT] = "GT", + [TOK_GEQ] = "GEQ", + [TOK_NOT] = "NOT", + [TOK_BITNOT] = "BITNOT", + [TOK_BITAND] = "BITAND", + [TOK_BITOR] = "BITOR", + [TOK_BITXOR] = "BITXOR", + [TOK_AND] = "AND", + [TOK_OR] = "OR", + [TOK_LSHIFT] = "LSHIFT", + [TOK_RSHIFT] = "RSHIFT", +};