diff --git a/base.c b/base.c index d77b2c5..a3e96a0 100644 --- a/base.c +++ b/base.c @@ -2,7 +2,7 @@ _Noreturn void base_panicf(char *file, int line, const char *fmt, ...) { - fprintf(stderr, "%s:%d", file, line); + fprintf(stderr, "%s:%d: ", file, line); va_list args; va_start(args, fmt); vfprintf(stderr, fmt, args); diff --git a/build.sh b/build.sh index 21b8726..3df45d8 100644 --- a/build.sh +++ b/build.sh @@ -1,4 +1,20 @@ -set -euo pipefail +# set -euo pipefail + +assert_eq() { + expected="$1" + actual="$2" + if [ "$expected" != "$actual" ]; then + echo "assert failed: expected '$expected', got '$actual'" + exit 1 + fi +} + +verify_expr() { + ./main "$1" + clang out.s -o test + ./test + assert_eq $? $2 +} if [[ ! -e build ]]; then mkdir build @@ -7,4 +23,10 @@ cd build clang -o meta $(realpath ../meta.c) -g -Wall -Wextra -Wshadow -fdiagnostics-absolute-paths ./meta > ../meta_gen.c clang -o main $(realpath ../main.c) -g -Wall -Wextra -Wshadow -fdiagnostics-absolute-paths -./main \ No newline at end of file + +./main +verify_expr "2+1-1" 2 +verify_expr "(2+1-1)*3" 6 + + +echo done \ No newline at end of file diff --git a/emit_asm_x64.c b/emit_asm_x64.c new file mode 100644 index 0000000..054c028 --- /dev/null +++ b/emit_asm_x64.c @@ -0,0 +1,34 @@ +void emit_expr(FILE *file, Ast *n) { + switch (n->kind) { + case AST_INT: { + fprintf(file, " mov rax, %lu\n", n->u); + } break; + case AST_BINARY: { + emit_expr(file, n->l); + fprintf(file, " push rax\n"); + emit_expr(file, n->r); + fprintf(file, " pop rcx\n"); + + if (n->op == TOK_PLUS) { + fprintf(file, " add rax, rcx\n"); + } else if (n->op == TOK_STAR) { + fprintf(file, " imul rax, rcx\n"); + } else if (n->op == TOK_MINUS) { + fprintf(file, " mov rdx, rax\n"); + fprintf(file, " mov rax, rcx\n"); + fprintf(file, " sub rax, rdx\n"); + } else { + panicf("error"); + } + } break; + default: panicf("error"); + } +} + +void emit_program(FILE *file, Ast *n) { + fprintf(file, ".intel_syntax noprefix\n"); + fprintf(file, ".global main\n"); + fprintf(file, "main:\n"); + emit_expr(file, n); + fprintf(file, " ret\n"); +} diff --git a/lex.c b/lex.c index ef43d56..53d85de 100644 --- a/lex.c +++ b/lex.c @@ -191,4 +191,49 @@ Token_Array lex_file(char *file, char *src, int len) { } } return result; -} \ No newline at end of file +} + + +void lex_test(void) { + char *src = "12 + 34.5 * 6\n- 7 % 2 / 1 == 1 != 2 <= 3 >= 4 && 3 || 4 << 1 >> 2"; + Lexer lex = make_lexer("test.c", src, (int)strlen(src)); + + assert_token(lex_token(&lex), TOK_INT, "12", 0, 0); + assert_token(lex_token(&lex), TOK_PLUS, "+", 0, 3); + assert_token(lex_token(&lex), TOK_FLOAT, "34.5", 0, 5); + assert_token(lex_token(&lex), TOK_STAR, "*", 0, 10); + assert_token(lex_token(&lex), TOK_INT, "6", 0, 12); + assert_token(lex_token(&lex), TOK_MINUS, "-", 1, 0); + assert_token(lex_token(&lex), TOK_INT, "7", 1, 2); + assert_token(lex_token(&lex), TOK_PERCENT, "%", 1, 4); + assert_token(lex_token(&lex), TOK_INT, "2", 1, 6); + assert_token(lex_token(&lex), TOK_SLASH, "/", 1, 8); + assert_token(lex_token(&lex), TOK_INT, "1", 1, 10); + assert_token(lex_token(&lex), TOK_EQ, "==", 1, 12); + assert_token(lex_token(&lex), TOK_INT, "1", 1, 15); + assert_token(lex_token(&lex), TOK_NEQ, "!=", 1, 17); + assert_token(lex_token(&lex), TOK_INT, "2", 1, 20); + assert_token(lex_token(&lex), TOK_LEQ, "<=", 1, 22); + assert_token(lex_token(&lex), TOK_INT, "3", 1, 25); + assert_token(lex_token(&lex), TOK_GEQ, ">=", 1, 27); + assert_token(lex_token(&lex), TOK_INT, "4", 1, 30); + assert_token(lex_token(&lex), TOK_AND, "&&", 1, 32); + assert_token(lex_token(&lex), TOK_INT, "3", 1, 35); + assert_token(lex_token(&lex), TOK_OR, "||", 1, 37); + assert_token(lex_token(&lex), TOK_INT, "4", 1, 40); + assert_token(lex_token(&lex), TOK_LSHIFT, "<<", 1, 42); + assert_token(lex_token(&lex), TOK_INT, "1", 1, 45); + assert_token(lex_token(&lex), TOK_RSHIFT, ">>", 1, 47); + assert_token(lex_token(&lex), TOK_INT, "2", 1, 50); + + Token eof = lex_token(&lex); + assert(eof.kind == TOK_EOF); + assert(eof.len == 0); + assert(eof.line == 1); + assert(eof.column == 51); + + Token_Array array = lex_file("test.c", src, (int)strlen(src)); + assert(array.len == 28); + + printf("lexer tests passed\n"); +} diff --git a/main.c b/main.c index 65d2a59..96de294 100644 --- a/main.c +++ b/main.c @@ -9,277 +9,21 @@ #include "base.c" #include "meta_gen.c" #include "lex.c" +#include "parser.c" +#include "emit_asm_x64.c" -typedef struct Parser { - Token *at; - Token *end; -} Parser; - -typedef enum Ast_Kind { - AST_NONE, - AST_ERROR, - AST_INT, - AST_UNARY, - AST_BINARY, -} Ast_Kind; - -typedef struct Ast Ast; -struct Ast { - Ast_Kind kind; - Token *pos; - - union { - uint64_t u; - struct { - Token_Kind op; - Ast *l; - Ast *r; - }; - char *error; - }; -}; - -Token *next_token(Parser *p) { - if (p->at < p->end) { - return p->at++; - } - return p->at; -} - -Token *match_token(Parser *p, Token_Kind kind) { - if (p->at->kind == kind) { - return next_token(p); - } - return NULL; -} - -Token *expect_token(Parser *p, Token_Kind kind) { - if (p->at->kind == kind) { - return next_token(p); - } - panicf("expected token kind: %s, got instead: %s", token_to_name(p->at->kind), token_to_name(kind)); -} - -Ast *create_ast(Token *token, Ast_Kind kind) { - Ast *result = calloc(1, sizeof(Ast)); - result->pos = token; - result->kind = kind; - return result; -} - -Ast *create_binary_expr(Token *token, Token_Kind op, Ast *left, Ast *right) { - Ast *result = create_ast(token, AST_BINARY); - result->op = op; - result->l = left; - result->r = right; - return result; -} - -Ast *parse_expr(Parser *p, int power_of_binding_to_right); - -Ast *parse_atom(Parser *p) { - Token *token = p->at; - Ast *n = NULL; - if (match_token(p, TOK_INT)) { - n = create_ast(token, AST_INT); - n->u = token->u; - } else if (match_token(p, TOK_LPAREN)) { - // @todo: do a comma list here - n = parse_expr(p, 0); - expect_token(p, TOK_RPAREN); +int main(int argc, char **argv) { + if (argc == 2) { + Token_Array tokens = lex_file("expr", argv[1], strlen(argv[1])); + Parser parser = {tokens.data, tokens.data + tokens.len}; + Ast *ast = parse_expr(&parser, 0); + FILE *file = fopen("out.s", "w"); + emit_program(file, ast); + fclose(file); } else { - panicf("unknown token in %s. %.*s (%s/%d), ", __FUNCTION__, token->len, token->str, token_to_name(token->kind), token->kind); + vec_test(); + lex_test(); + parser_test(); } - return n; -} - -int get_binding_power(Token *tok) { - switch (tok->kind) { - case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: return 120; - case TOK_PLUS: case TOK_MINUS: return 110; - case TOK_LSHIFT: case TOK_RSHIFT: return 100; - case TOK_LT: case TOK_LEQ: case TOK_GT: case TOK_GEQ: return 90; - case TOK_EQ: case TOK_NEQ: return 80; - case TOK_BITAND: return 70; - case TOK_BITXOR: return 60; - case TOK_BITOR: return 50; - case TOK_AND: return 40; - case TOK_OR: return 30; - default: return 0; - } -} - -Ast *parse_valid_left_binding(Parser *p, Token *tok, Ast *left) { - switch (tok->kind) { - case TOK_PLUS: case TOK_MINUS: case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: - case TOK_EQ: case TOK_NEQ: case TOK_LT: case TOK_LEQ: case TOK_GT: case TOK_GEQ: case TOK_BITAND: - case TOK_BITOR: case TOK_BITXOR: case TOK_AND: case TOK_OR: case TOK_LSHIFT: case TOK_RSHIFT: { - return create_binary_expr(tok, tok->kind, left, parse_expr(p, get_binding_power(tok))); - } break; - default: panicf("unknown token in %s. %.*s (%s/%d), ", __FUNCTION__, tok->len, tok->str, token_to_name(tok->kind), tok->kind); - } - return NULL; -} - -Ast *parse_expr(Parser *p, int power_of_binding_to_right) { - Ast *n = parse_atom(p); - while (get_binding_power(p->at) > power_of_binding_to_right) { - Token *tok = next_token(p); - n = parse_valid_left_binding(p, tok, n); - } - return n; -} - -int64_t eval_expr(Ast *n) { - switch (n->kind) { - case AST_INT: return (int64_t)n->u; - case AST_BINARY: { - int64_t left = eval_expr(n->l); - int64_t right = eval_expr(n->r); - switch (n->op) { - case TOK_PLUS: return left + right; - case TOK_MINUS: return left - right; - case TOK_STAR: return left * right; - case TOK_SLASH: return left / right; - case TOK_PERCENT: return left % right; - case TOK_EQ: return left == right; - case TOK_NEQ: return left != right; - case TOK_LT: return left < right; - case TOK_LEQ: return left <= right; - case TOK_GT: return left > right; - case TOK_GEQ: return left >= right; - case TOK_BITAND: return left & right; - case TOK_BITOR: return left | right; - case TOK_BITXOR: return left ^ right; - case TOK_AND: return left && right; - case TOK_OR: return left || right; - case TOK_LSHIFT: return left << right; - case TOK_RSHIFT: return left >> right; - default: panicf("invalid token kind in eval_expr, binary"); - } - } break; - default: panicf("invalid ast kind in eval_expr"); - } -} - -void print_expr(Ast *n) { - switch (n->kind) { - case AST_INT: printf("%lu", n->u); break; - case AST_BINARY: { - print_expr(n->l); - printf(" %s ", token_to_op(n->op)); - print_expr(n->r); - } break; - default: panicf("encountered invalid ast kind in %s of kind: %d\n", __FUNCTION__, n->kind); - } -} - -void parser_test(void) { -#define TEST_EVAL(expr) do { \ - Token_Array tokens = lex_file("eval_test", (#expr), strlen((#expr)));\ - Parser p = {tokens.data, tokens.data + tokens.len};\ - Ast *result = parse_expr(&p, 0);\ - int64_t left = eval_expr(result);\ - int64_t right = (expr);\ - if (left != right) {\ - printf("%s:%d expected: %ld, got: %ld\n expression: ", __FILE__, __LINE__, left, right);\ - print_expr(result);\ - printf("\n");\ - }\ -} while (0) - -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wparentheses" - - TEST_EVAL(32+5-4); - TEST_EVAL(16/2/2); - TEST_EVAL(9*4/6); - TEST_EVAL(17%5); - TEST_EVAL(5125-42|(4&3)^2|2%1242); - TEST_EVAL((45%2)^(23&3)); - TEST_EVAL(1<16*2); - TEST_EVAL(16>1+2); - TEST_EVAL(4<=2+2); - TEST_EVAL(5>=2+2); - TEST_EVAL(4==2+2); - TEST_EVAL(5!=2+2); - TEST_EVAL(1+1+1+1+2-2-3-4-5); - TEST_EVAL(5%2^5&6|3); - TEST_EVAL(6&3); - TEST_EVAL(6|3); - TEST_EVAL(6^3); - TEST_EVAL(1&&2); - TEST_EVAL(0||3); - TEST_EVAL(1||0&&0); - TEST_EVAL(8<<2); - TEST_EVAL(32>>3); - TEST_EVAL((2+3)*(4+5)); - TEST_EVAL(9>3&1); - TEST_EVAL(8|1<4); - TEST_EVAL(7<=3+4); - TEST_EVAL(8>=2*4); - TEST_EVAL(4==2+2); - TEST_EVAL(5!=2+2); - TEST_EVAL(1&&2); - TEST_EVAL(0||3); - TEST_EVAL(1||0&&0); - TEST_EVAL(8<<2); - TEST_EVAL(32>>3); - TEST_EVAL(1+2<<3); - TEST_EVAL(16>>1+1); - -#pragma clang diagnostic pop - - - printf("parser tests passed\n"); -} - -void lex_test(void) { - char *src = "12 + 34.5 * 6\n- 7 % 2 / 1 == 1 != 2 <= 3 >= 4 && 3 || 4 << 1 >> 2"; - Lexer lex = make_lexer("test.c", src, (int)strlen(src)); - - assert_token(lex_token(&lex), TOK_INT, "12", 0, 0); - assert_token(lex_token(&lex), TOK_PLUS, "+", 0, 3); - assert_token(lex_token(&lex), TOK_FLOAT, "34.5", 0, 5); - assert_token(lex_token(&lex), TOK_STAR, "*", 0, 10); - assert_token(lex_token(&lex), TOK_INT, "6", 0, 12); - assert_token(lex_token(&lex), TOK_MINUS, "-", 1, 0); - assert_token(lex_token(&lex), TOK_INT, "7", 1, 2); - assert_token(lex_token(&lex), TOK_PERCENT, "%", 1, 4); - assert_token(lex_token(&lex), TOK_INT, "2", 1, 6); - assert_token(lex_token(&lex), TOK_SLASH, "/", 1, 8); - assert_token(lex_token(&lex), TOK_INT, "1", 1, 10); - assert_token(lex_token(&lex), TOK_EQ, "==", 1, 12); - assert_token(lex_token(&lex), TOK_INT, "1", 1, 15); - assert_token(lex_token(&lex), TOK_NEQ, "!=", 1, 17); - assert_token(lex_token(&lex), TOK_INT, "2", 1, 20); - assert_token(lex_token(&lex), TOK_LEQ, "<=", 1, 22); - assert_token(lex_token(&lex), TOK_INT, "3", 1, 25); - assert_token(lex_token(&lex), TOK_GEQ, ">=", 1, 27); - assert_token(lex_token(&lex), TOK_INT, "4", 1, 30); - assert_token(lex_token(&lex), TOK_AND, "&&", 1, 32); - assert_token(lex_token(&lex), TOK_INT, "3", 1, 35); - assert_token(lex_token(&lex), TOK_OR, "||", 1, 37); - assert_token(lex_token(&lex), TOK_INT, "4", 1, 40); - assert_token(lex_token(&lex), TOK_LSHIFT, "<<", 1, 42); - assert_token(lex_token(&lex), TOK_INT, "1", 1, 45); - assert_token(lex_token(&lex), TOK_RSHIFT, ">>", 1, 47); - assert_token(lex_token(&lex), TOK_INT, "2", 1, 50); - - Token eof = lex_token(&lex); - assert(eof.kind == TOK_EOF); - assert(eof.len == 0); - assert(eof.line == 1); - assert(eof.column == 51); - - Token_Array array = lex_file("test.c", src, (int)strlen(src)); - assert(array.len == 28); - - printf("lexer tests passed\n"); -} - -int main() { - vec_test(); - lex_test(); - parser_test(); + } \ No newline at end of file diff --git a/parser.c b/parser.c new file mode 100644 index 0000000..ab83d47 --- /dev/null +++ b/parser.c @@ -0,0 +1,220 @@ +typedef struct Parser { + Token *at; + Token *end; +} Parser; + +typedef enum Ast_Kind { + AST_NONE, + AST_ERROR, + AST_INT, + AST_UNARY, + AST_BINARY, +} Ast_Kind; + +typedef struct Ast Ast; +struct Ast { + Ast_Kind kind; + Token *pos; + + union { + uint64_t u; + struct { + Token_Kind op; + Ast *l; + Ast *r; + }; + char *error; + }; +}; + +Token *next_token(Parser *p) { + if (p->at < p->end) { + return p->at++; + } + return p->at; +} + +Token *match_token(Parser *p, Token_Kind kind) { + if (p->at->kind == kind) { + return next_token(p); + } + return NULL; +} + +Token *expect_token(Parser *p, Token_Kind kind) { + if (p->at->kind == kind) { + return next_token(p); + } + panicf("expected token kind: %s, got instead: %s", token_to_name(p->at->kind), token_to_name(kind)); +} + +Ast *create_ast(Token *token, Ast_Kind kind) { + Ast *result = calloc(1, sizeof(Ast)); + result->pos = token; + result->kind = kind; + return result; +} + +Ast *create_binary_expr(Token *token, Token_Kind op, Ast *left, Ast *right) { + Ast *result = create_ast(token, AST_BINARY); + result->op = op; + result->l = left; + result->r = right; + return result; +} + +Ast *parse_expr(Parser *p, int power_of_binding_to_right); + +Ast *parse_atom(Parser *p) { + Token *token = p->at; + Ast *n = NULL; + if (match_token(p, TOK_INT)) { + n = create_ast(token, AST_INT); + n->u = token->u; + } else if (match_token(p, TOK_LPAREN)) { + // @todo: do a comma list here + n = parse_expr(p, 0); + expect_token(p, TOK_RPAREN); + } else { + panicf("unknown token in %s. %.*s (%s/%d), ", __FUNCTION__, token->len, token->str, token_to_name(token->kind), token->kind); + } + return n; +} + +int get_binding_power(Token *tok) { + switch (tok->kind) { + case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: return 120; + case TOK_PLUS: case TOK_MINUS: return 110; + case TOK_LSHIFT: case TOK_RSHIFT: return 100; + case TOK_LT: case TOK_LEQ: case TOK_GT: case TOK_GEQ: return 90; + case TOK_EQ: case TOK_NEQ: return 80; + case TOK_BITAND: return 70; + case TOK_BITXOR: return 60; + case TOK_BITOR: return 50; + case TOK_AND: return 40; + case TOK_OR: return 30; + default: return 0; + } +} + +Ast *parse_valid_left_binding(Parser *p, Token *tok, Ast *left) { + switch (tok->kind) { + case TOK_PLUS: case TOK_MINUS: case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: + case TOK_EQ: case TOK_NEQ: case TOK_LT: case TOK_LEQ: case TOK_GT: case TOK_GEQ: case TOK_BITAND: + case TOK_BITOR: case TOK_BITXOR: case TOK_AND: case TOK_OR: case TOK_LSHIFT: case TOK_RSHIFT: { + return create_binary_expr(tok, tok->kind, left, parse_expr(p, get_binding_power(tok))); + } break; + default: panicf("unknown token in %s. %.*s (%s/%d), ", __FUNCTION__, tok->len, tok->str, token_to_name(tok->kind), tok->kind); + } + return NULL; +} + +Ast *parse_expr(Parser *p, int power_of_binding_to_right) { + Ast *n = parse_atom(p); + while (get_binding_power(p->at) > power_of_binding_to_right) { + Token *tok = next_token(p); + n = parse_valid_left_binding(p, tok, n); + } + return n; +} + +int64_t eval_expr(Ast *n) { + switch (n->kind) { + case AST_INT: return (int64_t)n->u; + case AST_BINARY: { + int64_t left = eval_expr(n->l); + int64_t right = eval_expr(n->r); + switch (n->op) { + case TOK_PLUS: return left + right; + case TOK_MINUS: return left - right; + case TOK_STAR: return left * right; + case TOK_SLASH: return left / right; + case TOK_PERCENT: return left % right; + case TOK_EQ: return left == right; + case TOK_NEQ: return left != right; + case TOK_LT: return left < right; + case TOK_LEQ: return left <= right; + case TOK_GT: return left > right; + case TOK_GEQ: return left >= right; + case TOK_BITAND: return left & right; + case TOK_BITOR: return left | right; + case TOK_BITXOR: return left ^ right; + case TOK_AND: return left && right; + case TOK_OR: return left || right; + case TOK_LSHIFT: return left << right; + case TOK_RSHIFT: return left >> right; + default: panicf("invalid token kind in eval_expr, binary"); + } + } break; + default: panicf("invalid ast kind in eval_expr"); + } +} + +void print_expr(Ast *n) { + switch (n->kind) { + case AST_INT: printf("%lu", n->u); break; + case AST_BINARY: { + print_expr(n->l); + printf(" %s ", token_to_op(n->op)); + print_expr(n->r); + } break; + default: panicf("encountered invalid ast kind in %s of kind: %d\n", __FUNCTION__, n->kind); + } +} + +void parser_test(void) { +#define TEST_EVAL(expr) do { \ + Token_Array tokens = lex_file("eval_test", (#expr), strlen((#expr)));\ + Parser p = {tokens.data, tokens.data + tokens.len};\ + Ast *result = parse_expr(&p, 0);\ + int64_t left = eval_expr(result);\ + int64_t right = (expr);\ + if (left != right) {\ + printf("%s:%d expected: %ld, got: %ld\n expression: ", __FILE__, __LINE__, left, right);\ + print_expr(result);\ + printf("\n");\ + }\ +} while (0) + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wparentheses" + TEST_EVAL(32+5-4); + TEST_EVAL(16/2/2); + TEST_EVAL(9*4/6); + TEST_EVAL(17%5); + TEST_EVAL(5125-42|(4&3)^2|2%1242); + TEST_EVAL((45%2)^(23&3)); + TEST_EVAL(1<16*2); + TEST_EVAL(16>1+2); + TEST_EVAL(4<=2+2); + TEST_EVAL(5>=2+2); + TEST_EVAL(4==2+2); + TEST_EVAL(5!=2+2); + TEST_EVAL(1+1+1+1+2-2-3-4-5); + TEST_EVAL(5%2^5&6|3); + TEST_EVAL(6&3); + TEST_EVAL(6|3); + TEST_EVAL(6^3); + TEST_EVAL(1&&2); + TEST_EVAL(0||3); + TEST_EVAL(1||0&&0); + TEST_EVAL(8<<2); + TEST_EVAL(32>>3); + TEST_EVAL((2+3)*(4+5)); + TEST_EVAL(9>3&1); + TEST_EVAL(8|1<4); + TEST_EVAL(7<=3+4); + TEST_EVAL(8>=2*4); + TEST_EVAL(4==2+2); + TEST_EVAL(5!=2+2); + TEST_EVAL(1&&2); + TEST_EVAL(0||3); + TEST_EVAL(1||0&&0); + TEST_EVAL(8<<2); + TEST_EVAL(32>>3); + TEST_EVAL(1+2<<3); + TEST_EVAL(16>>1+1); +#pragma clang diagnostic pop + + printf("parser tests passed\n"); +}