#include #include #include #include #include #include #include "base.c" #include "meta_gen.c" typedef struct Token { Token_Kind kind; char *str; int len; char *file; int line, column; union { uint64_t u; }; } Token; typedef Vec(Token) Token_Array; typedef struct Lexer { char *at; char *end; char *file; int line; int column; } Lexer; void lex_advance(Lexer *lex) { if (lex->at >= lex->end) { return; } if (*lex->at == '\n') { lex->line++; lex->column = 0; } else { lex->column++; } lex->at += 1; } void eat_whitespace(Lexer *lex) { while (lex->at < lex->end) { switch (*lex->at) { case ' ': case '\t': case '\r': case '\n': lex_advance(lex); break; default: return; } } } Lexer make_lexer(char *file, char *src, int len) { Lexer lex = { .at = src, .end = src + len, .file = file, .line = 0, .column = 0, }; return lex; } Token lex_token(Lexer *lex) { eat_whitespace(lex); Token t = { .str = lex->at, .line = lex->line, .column = lex->column, .file = lex->file, }; if (lex->at >= lex->end) { t.kind = TOK_EOF; t.len = 0; return t; } char c = *lex->at; if (isdigit(c)) { t.kind = TOK_INT; while (lex->at < lex->end && isdigit(*lex->at)) { lex_advance(lex); } // @todo: proper lexing of floats (as well as postfixes) if (lex->at < lex->end && *lex->at == '.') { t.kind = TOK_FLOAT; lex_advance(lex); while (lex->at < lex->end && isdigit(*lex->at)) { lex_advance(lex); } } if (t.kind == TOK_INT) { t.u = strtoull(t.str, NULL, 10); } t.len = (int)(lex->at - t.str); return t; } lex_advance(lex); switch (c) { case 0: t.kind = TOK_EOF; break; case '+': t.kind = TOK_PLUS; break; case '-': t.kind = TOK_MINUS; break; case '*': t.kind = TOK_STAR; break; case '/': t.kind = TOK_SLASH; break; case '%': t.kind = TOK_PERCENT; break; case '{': t.kind = TOK_LBRACE; break; case '}': t.kind = TOK_RBRACE; break; default: { // @todo: lexer perhaps should have a static buffer of size 1024, error message // should be put there and piped to the upper program. The token should be filled // with that message t.kind = TOK_ERROR; } } t.len = (int)(lex->at - t.str); return t; } void assert_token(Token t, Token_Kind kind, char *text, int line, int column) { assert(t.kind == kind); assert(t.line == line); assert(t.column == column); assert(t.len == (int)strlen(text)); assert(strncmp(t.str, text, t.len) == 0); } Token_Array lex_file(char *file, char *src, int len) { Lexer lex = make_lexer(file, src, len); Token_Array result = {0}; for (;;) { Token token = lex_token(&lex); vec_push(&result, token); if (token.kind == TOK_EOF) { break; } } return result; } typedef struct Parser { Token *at; Token *end; } Parser; typedef enum Ast_Kind { AST_NONE, AST_ERROR, AST_INT, AST_UNARY, AST_BINARY, } Ast_Kind; typedef struct Ast Ast; struct Ast { Ast_Kind kind; Token *pos; union { uint64_t u; struct { Token_Kind op; Ast *l; Ast *r; }; char *error; }; }; Token *next_token(Parser *p) { if (p->at < p->end) { return p->at++; } return p->at; } Token *match_token(Parser *p, Token_Kind kind) { if (p->at->kind == kind) { return next_token(p); } return NULL; } Ast *create_ast(Token *token, Ast_Kind kind) { Ast *result = calloc(1, sizeof(Ast)); result->pos = token; result->kind = kind; return result; } Ast *create_binary_expr(Token *token, Token_Kind op, Ast *left, Ast *right) { Ast *result = create_ast(token, AST_BINARY); result->op = op; result->l = left; result->r = right; return result; } Ast *parse_expr(Parser *p, int power_of_binding_to_right); Ast *parse_atom(Parser *p) { Token *token = p->at; Ast *n = NULL; if (match_token(p, TOK_INT)) { n = create_ast(token, AST_INT); n->u = token->u; } else if (match_token(p, TOK_LPAREN)) { // @todo: do a comma list here n = parse_expr(p, 0); } else { fprintf(stderr, "encountered invalid token while parsing atom: %.*s\n", token->len, token->str); exit(1); } return n; } int get_binding_power(Token *tok) { switch (tok->kind) { case TOK_LSHIFT: case TOK_RSHIFT: return 100; case TOK_PLUS: case TOK_MINUS: return 110; case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: return 120; default: return 0; } } Ast *parse_valid_left_binding(Parser *p, Token *tok, Ast *left) { switch (tok->kind) { case TOK_LSHIFT: case TOK_RSHIFT: case TOK_PLUS: case TOK_MINUS: case TOK_SLASH: case TOK_STAR: case TOK_PERCENT: { return create_binary_expr(tok, tok->kind, left, parse_expr(p, get_binding_power(tok))); } break; default: fprintf(stderr, "ERROR"); exit(1); } return NULL; } Ast *parse_expr(Parser *p, int power_of_binding_to_right) { Ast *n = parse_atom(p); while (get_binding_power(p->at) > power_of_binding_to_right) { Token *tok = next_token(p); n = parse_valid_left_binding(p, tok, n); } return n; } int64_t eval_expr(Ast *n) { switch (n->kind) { case AST_INT: return (int64_t)n->u; case AST_BINARY: { int64_t left = eval_expr(n->l); int64_t right = eval_expr(n->r); switch (n->op) { case TOK_PLUS: return left + right; case TOK_MINUS: return left - right; case TOK_STAR: return left * right; case TOK_SLASH: return left / right; case TOK_PERCENT: return left % right; default: { fprintf(stderr, "invalid token kind in eval_expr, binary"); exit(1); } } } break; default: { fprintf(stderr, "invalid ast kind in eval_expr"); exit(1); } break; } } void print_expr(Ast *n) { switch (n->kind) { case AST_INT: printf("%lu", n->u); break; case AST_BINARY: { print_expr(n->l); printf(" %s ", token_to_op[n->op]); print_expr(n->r); } break; default: fprintf(stderr, "memes"); exit(1); } } void parser_test(void) { #define TEST_EVAL(expr) do { \ Token_Array tokens = lex_file("eval_test", #expr, strlen(#expr));\ Parser p = {tokens.data, tokens.data + tokens.len};\ Ast *result = parse_expr(&p, 0);\ assert(eval_expr(result) == expr);\ } while (0) TEST_EVAL(32+5-4); TEST_EVAL(16/2/2); printf("parser tests passed\n"); } void lex_test(void) { char *src = "12 + 34.5 * 6\n- 7 % 2 / 1"; Lexer lex = make_lexer("test.c", src, (int)strlen(src)); assert_token(lex_token(&lex), TOK_INT, "12", 0, 0); assert_token(lex_token(&lex), TOK_PLUS, "+", 0, 3); assert_token(lex_token(&lex), TOK_FLOAT, "34.5", 0, 5); assert_token(lex_token(&lex), TOK_STAR, "*", 0, 10); assert_token(lex_token(&lex), TOK_INT, "6", 0, 12); assert_token(lex_token(&lex), TOK_MINUS, "-", 1, 0); assert_token(lex_token(&lex), TOK_INT, "7", 1, 2); assert_token(lex_token(&lex), TOK_PERCENT, "%", 1, 4); assert_token(lex_token(&lex), TOK_INT, "2", 1, 6); assert_token(lex_token(&lex), TOK_SLASH, "/", 1, 8); assert_token(lex_token(&lex), TOK_INT, "1", 1, 10); Token eof = lex_token(&lex); assert(eof.kind == TOK_EOF); assert(eof.len == 0); assert(eof.line == 1); assert(eof.column == 11); Token_Array array = lex_file("test.c", src, (int)strlen(src)); assert(array.len == 12); printf("lexer tests passed\n"); } int main() { vec_test(); lex_test(); parser_test(); }