Init repo

2026-05-10 13:24:39 +02:00
commit 9792517c41
6 changed files with 689 additions and 0 deletions
--- a/main.c
+++ b/main.c
@@ -0,0 +1,345 @@
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <stdint.h>
+#include "base.c"
+#include "meta_gen.c"
+
+typedef struct Token {
+    Token_Kind kind;
+    char *str;
+    int len;
+
+    char *file;
+    int line, column;
+
+    union {
+        uint64_t u;
+    };
+} Token;
+
+typedef Vec(Token) Token_Array;
+
+typedef struct Lexer {
+    char *at;
+    char *end;
+    char *file;
+    int line;
+    int column;
+} Lexer;
+
+void lex_advance(Lexer *lex) {
+    if (lex->at >= lex->end) {
+        return;
+    }
+
+    if (*lex->at == '\n') {
+        lex->line++;
+        lex->column = 0;
+    } else {
+        lex->column++;
+    }
+    lex->at += 1;
+}
+
+void eat_whitespace(Lexer *lex) {
+    while (lex->at < lex->end) {
+        switch (*lex->at) {
+            case ' ':
+            case '\t':
+            case '\r':
+            case '\n':
+                lex_advance(lex);
+                break;
+            default:
+                return;
+        }
+    }
+}
+
+Lexer make_lexer(char *file, char *src, int len) {
+    Lexer lex = {
+        .at = src,
+        .end = src + len,
+        .file = file,
+        .line = 0,
+        .column = 0,
+    };
+    return lex;
+}
+
+Token lex_token(Lexer *lex) {
+    eat_whitespace(lex);
+    Token t = {
+        .str = lex->at,
+        .line = lex->line,
+        .column = lex->column,
+        .file = lex->file,
+    };
+
+    if (lex->at >= lex->end) {
+        t.kind = TOK_EOF;
+        t.len = 0;
+        return t;
+    }
+
+    char c = *lex->at;
+
+    if (isdigit(c)) {
+        t.kind = TOK_INT;
+        while (lex->at < lex->end && isdigit(*lex->at)) {
+            lex_advance(lex);
+        }
+
+        // @todo: proper lexing of floats (as well as postfixes)
+        if (lex->at < lex->end && *lex->at == '.') {
+            t.kind = TOK_FLOAT;
+            lex_advance(lex);
+
+            while (lex->at < lex->end && isdigit(*lex->at)) {
+                lex_advance(lex);
+            }
+        }
+
+        if (t.kind == TOK_INT) {
+            t.u = strtoull(t.str, NULL, 10);
+        }
+
+        t.len = (int)(lex->at - t.str);
+        return t;
+    }
+
+    lex_advance(lex);
+
+    switch (c) {
+        case 0: t.kind = TOK_EOF; break;
+        case '+': t.kind = TOK_PLUS; break;
+        case '-': t.kind = TOK_MINUS; break;
+        case '*': t.kind = TOK_STAR; break;
+        case '/': t.kind = TOK_SLASH; break;
+        case '%': t.kind = TOK_PERCENT; break;
+        case '{': t.kind = TOK_LBRACE; break;
+        case '}': t.kind = TOK_RBRACE; break;
+        default: {
+            // @todo: lexer perhaps should have a static buffer of size 1024, error message
+            // should be put there and piped to the upper program. The token should be filled
+            // with that message
+            t.kind = TOK_ERROR;
+        }
+    }
+
+    t.len = (int)(lex->at - t.str);
+    return t;
+}
+
+void assert_token(Token t, Token_Kind kind, char *text, int line, int column) {
+    assert(t.kind == kind);
+    assert(t.line == line);
+    assert(t.column == column);
+    assert(t.len == (int)strlen(text));
+    assert(strncmp(t.str, text, t.len) == 0);
+}
+
+Token_Array lex_file(char *file, char *src, int len) {
+    Lexer lex = make_lexer(file, src, len);
+    Token_Array result = {0};
+    for (;;) {
+        Token token = lex_token(&lex);
+        vec_push(&result, token);
+        if (token.kind == TOK_EOF) {
+            break;
+        }
+    }
+    return result;
+}
+
+typedef struct Parser {
+    Token *at;
+    Token *end;
+} Parser;
+
+typedef enum Ast_Kind {
+    AST_NONE,
+    AST_ERROR,
+    AST_INT,
+    AST_UNARY,
+    AST_BINARY,
+} Ast_Kind;
+
+typedef struct Ast Ast;
+struct Ast {
+    Ast_Kind kind;
+    Token *pos;
+
+    union {
+        uint64_t u;
+        struct {
+            Token_Kind op;
+            Ast *l;
+            Ast *r;
+        };
+        char *error;
+    };
+};
+
+Token *next_token(Parser *p) {
+    if (p->at < p->end) {
+        return p->at++;
+    }
+    return p->at;
+}
+
+Token *match_token(Parser *p, Token_Kind kind) {
+    if (p->at->kind == kind) {
+        return next_token(p);
+    }
+    return NULL;
+}
+
+Ast *create_ast(Token *token, Ast_Kind kind) {
+    Ast *result = calloc(1, sizeof(Ast));
+    result->pos = token;
+    result->kind = kind;
+    return result;
+}
+
+Ast *create_binary_expr(Token *token, Token_Kind op, Ast *left, Ast *right) {
+    Ast *result = create_ast(token, AST_BINARY);
+    result->op = op;
+    result->l = left;
+    result->r = right;
+    return result;
+}
+
+Ast *parse_expr(Parser *p, int power_of_binding_to_right);
+
+Ast *parse_atom(Parser *p) {
+    Token *token = p->at;
+    Ast *n = NULL;
+    if (match_token(p, TOK_INT)) {
+        n = create_ast(token, AST_INT);
+        n->u = token->u;
+    } else if (match_token(p, TOK_LPAREN)) {
+        // @todo: do a comma list here
+        n = parse_expr(p, 0);
+    } else {
+        fprintf(stderr, "encountered invalid token while parsing atom: %.*s\n", token->len, token->str);
+        exit(1);
+    }
+    return n;
+}
+
+int get_binding_power(Token *tok) {
+    switch (tok->kind) {
+        case TOK_LSHIFT: case TOK_RSHIFT: return 100;
+        case TOK_PLUS: case TOK_MINUS: return 110;
+        case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: return 120;
+        default: return 0;
+    }
+}
+
+Ast *parse_valid_left_binding(Parser *p, Token *tok, Ast *left) {
+    switch (tok->kind) {
+        case TOK_LSHIFT: case TOK_RSHIFT: case TOK_PLUS: case TOK_MINUS: case TOK_SLASH: case TOK_STAR: case TOK_PERCENT: {
+            return create_binary_expr(tok, tok->kind, left, parse_expr(p, get_binding_power(tok)));
+        } break;
+        default: fprintf(stderr, "ERROR"); exit(1);
+    }
+    return NULL;
+}
+
+Ast *parse_expr(Parser *p, int power_of_binding_to_right) {
+    Ast *n = parse_atom(p);
+    while (get_binding_power(p->at) > power_of_binding_to_right) {
+        Token *tok = next_token(p);
+        n = parse_valid_left_binding(p, tok, n);
+    }
+    return n;
+}
+
+int64_t eval_expr(Ast *n) {
+    switch (n->kind) {
+        case AST_INT: return (int64_t)n->u;
+        case AST_BINARY: {
+            int64_t left = eval_expr(n->l);
+            int64_t right = eval_expr(n->r);
+            switch (n->op) {
+                case TOK_PLUS: return left + right;
+                case TOK_MINUS: return left - right;
+                case TOK_STAR: return left * right;
+                case TOK_SLASH: return left / right;
+                case TOK_PERCENT: return left % right;
+                default: {
+                    fprintf(stderr, "invalid token kind in eval_expr, binary");
+                    exit(1);
+                }
+            }
+        } break;
+        default: {
+            fprintf(stderr, "invalid ast kind in eval_expr");
+            exit(1);
+        } break;
+    }
+}
+
+void print_expr(Ast *n) {
+    switch (n->kind) {
+        case AST_INT: printf("%lu", n->u); break;
+        case AST_BINARY: {
+            print_expr(n->l);
+            printf(" %s ", token_to_op[n->op]);
+            print_expr(n->r);
+        } break;
+        default: fprintf(stderr, "memes"); exit(1);
+    }
+}
+
+void parser_test(void) {
+#define TEST_EVAL(expr) do { \
+    Token_Array tokens = lex_file("eval_test", #expr, strlen(#expr));\
+    Parser p = {tokens.data, tokens.data + tokens.len};\
+    Ast *result = parse_expr(&p, 0);\
+    assert(eval_expr(result) == expr);\
+} while (0)
+
+    TEST_EVAL(32+5-4);
+    TEST_EVAL(16/2/2);
+
+    printf("parser tests passed\n");
+}
+
+void lex_test(void) {
+    char *src = "12 + 34.5 * 6\n- 7 % 2 / 1";
+    Lexer lex = make_lexer("test.c", src, (int)strlen(src));
+
+    assert_token(lex_token(&lex), TOK_INT, "12", 0, 0);
+    assert_token(lex_token(&lex), TOK_PLUS, "+", 0, 3);
+    assert_token(lex_token(&lex), TOK_FLOAT, "34.5", 0, 5);
+    assert_token(lex_token(&lex), TOK_STAR, "*", 0, 10);
+    assert_token(lex_token(&lex), TOK_INT, "6", 0, 12);
+    assert_token(lex_token(&lex), TOK_MINUS, "-", 1, 0);
+    assert_token(lex_token(&lex), TOK_INT, "7", 1, 2);
+    assert_token(lex_token(&lex), TOK_PERCENT, "%", 1, 4);
+    assert_token(lex_token(&lex), TOK_INT, "2", 1, 6);
+    assert_token(lex_token(&lex), TOK_SLASH, "/", 1, 8);
+    assert_token(lex_token(&lex), TOK_INT, "1", 1, 10);
+
+    Token eof = lex_token(&lex);
+    assert(eof.kind == TOK_EOF);
+    assert(eof.len == 0);
+    assert(eof.line == 1);
+    assert(eof.column == 11);
+
+    Token_Array array = lex_file("test.c", src, (int)strlen(src));
+    assert(array.len == 12);
+
+    printf("lexer tests passed\n");
+}
+
+int main() {
+    vec_test();
+    lex_test();
+    parser_test();
+}