From 9792517c415f479066930b5800c3824466954adf Mon Sep 17 00:00:00 2001 From: Krzosa Karol Date: Sun, 10 May 2026 13:24:39 +0200 Subject: [PATCH] Init repo --- .gitignore | 1 + base.c | 136 +++++++++++++++++++++ build.sh | 10 ++ main.c | 345 +++++++++++++++++++++++++++++++++++++++++++++++++++++ meta.c | 89 ++++++++++++++ meta_gen.c | 108 +++++++++++++++++ 6 files changed, 689 insertions(+) create mode 100644 .gitignore create mode 100644 base.c create mode 100644 build.sh create mode 100644 main.c create mode 100644 meta.c create mode 100644 meta_gen.c diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d163863 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +build/ \ No newline at end of file diff --git a/base.c b/base.c new file mode 100644 index 0000000..abfaa2a --- /dev/null +++ b/base.c @@ -0,0 +1,136 @@ +#define Vec(T) struct { T *data; int len; int cap; } +typedef Vec(void) VecVoid; + +#define vec_push(arr, elem) \ + (vec_grow((VecVoid *)(arr), sizeof((arr)->data[0]), 1), (arr)->data[(arr)->len++] = (elem)) + +#define vec_insert(arr, idx, elem) do { \ + assert((idx) >= 0 && (idx) <= (arr)->len); \ + vec_grow((VecVoid *)(arr), sizeof((arr)->data[0]), 1); \ + memmove(&(arr)->data[(idx) + 1], &(arr)->data[(idx)], sizeof((arr)->data[0]) * ((arr)->len - (idx))); \ + (arr)->data[(idx)] = (elem); \ + (arr)->len += 1; \ +} while (0) + +#define vec_pop(arr) \ + (assert((arr)->len > 0), (arr)->data[--(arr)->len]) + +#define vec_del(arr, idx) do { \ + assert((idx) >= 0 && (idx) < (arr)->len); \ + memmove(&(arr)->data[(idx)], &(arr)->data[(idx) + 1], sizeof((arr)->data[0]) * ((arr)->len - (idx) - 1)); \ + (arr)->len -= 1; \ +} while (0) + +#define vec_swap_del(arr, idx) do { \ + assert((idx) >= 0 && (idx) < (arr)->len); \ + (arr)->data[(idx)] = (arr)->data[(arr)->len - 1]; \ + (arr)->len -= 1; \ +} while (0) + +#define vec_free(arr) do { \ + free((arr)->data); \ + (arr)->data = NULL; \ + (arr)->len = 0; \ + (arr)->cap = 0; \ +} while (0) + +void vec_grow(VecVoid *array, int elem_size, int add_len) { + if (array->len + add_len > array->cap) { + int cap = array->cap ? array->cap : 15; + int new_cap = (cap + add_len) * 2; + assert(new_cap > array->len + add_len); + array->data = realloc(array->data, elem_size * new_cap); + assert(array->data); + array->cap = new_cap; + } +} + +void vec_test(void) { + Vec(int) int_vec = {0}; + assert(int_vec.data == NULL); + assert(int_vec.len == 0); + assert(int_vec.cap == 0); + + for (int i = 0; i < 32; i += 1) { + vec_push(&int_vec, i * 3); + assert(int_vec.len == i + 1); + assert(int_vec.cap >= int_vec.len); + assert(int_vec.data[int_vec.len - 1] == i * 3); + } + + for (int i = 0; i < 32; i += 1) { + assert(int_vec.data[i] == i * 3); + } + + int cap_after_ints = int_vec.cap; + vec_push(&int_vec, 12345); + assert(int_vec.len == 33); + assert(int_vec.data[32] == 12345); + assert(int_vec.cap >= cap_after_ints); + + vec_insert(&int_vec, 0, 111); + assert(int_vec.len == 34); + assert(int_vec.data[0] == 111); + assert(int_vec.data[1] == 0); + + vec_insert(&int_vec, 5, 222); + assert(int_vec.len == 35); + assert(int_vec.data[5] == 222); + assert(int_vec.data[6] == 12); + + vec_insert(&int_vec, int_vec.len, 333); + assert(int_vec.len == 36); + assert(int_vec.data[int_vec.len - 1] == 333); + + int popped = vec_pop(&int_vec); + assert(popped == 333); + assert(int_vec.len == 35); + + vec_del(&int_vec, 5); + assert(int_vec.len == 34); + assert(int_vec.data[5] == 12); + + int before_swap_del = int_vec.data[int_vec.len - 1]; + vec_swap_del(&int_vec, 1); + assert(int_vec.len == 33); + assert(int_vec.data[1] == before_swap_del); + + vec_free(&int_vec); + assert(int_vec.data == NULL); + assert(int_vec.len == 0); + assert(int_vec.cap == 0); + + Vec(char *) str_vec = {0}; + vec_push(&str_vec, "a"); + vec_push(&str_vec, "bb"); + vec_push(&str_vec, "ccc"); + + assert(str_vec.len == 3); + assert(strcmp(str_vec.data[0], "a") == 0); + assert(strcmp(str_vec.data[1], "bb") == 0); + assert(strcmp(str_vec.data[2], "ccc") == 0); + + vec_insert(&str_vec, 1, "inserted"); + assert(str_vec.len == 4); + assert(strcmp(str_vec.data[1], "inserted") == 0); + assert(strcmp(str_vec.data[2], "bb") == 0); + + assert(strcmp(vec_pop(&str_vec), "ccc") == 0); + assert(str_vec.len == 3); + + vec_del(&str_vec, 1); + assert(str_vec.len == 2); + assert(strcmp(str_vec.data[0], "a") == 0); + assert(strcmp(str_vec.data[1], "bb") == 0); + + vec_swap_del(&str_vec, 0); + assert(str_vec.len == 1); + assert(strcmp(str_vec.data[0], "bb") == 0); + + vec_free(&str_vec); + assert(str_vec.data == NULL); + assert(str_vec.len == 0); + assert(str_vec.cap == 0); + + printf("vector tests passed\n"); +} diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..887abb6 --- /dev/null +++ b/build.sh @@ -0,0 +1,10 @@ +set -euo pipefail + +if [[ ! -e build ]]; then + mkdir build +fi +cd build +clang -o meta ../meta.c -g -Wall -Wextra -Wshadow -fdiagnostics-absolute-paths +./meta > ../meta_gen.c +clang -o main ../main.c -g -Wall -Wextra -Wshadow -fdiagnostics-absolute-paths +./main \ No newline at end of file diff --git a/main.c b/main.c new file mode 100644 index 0000000..5a0313c --- /dev/null +++ b/main.c @@ -0,0 +1,345 @@ +#include +#include +#include +#include +#include +#include +#include "base.c" +#include "meta_gen.c" + +typedef struct Token { + Token_Kind kind; + char *str; + int len; + + char *file; + int line, column; + + union { + uint64_t u; + }; +} Token; + +typedef Vec(Token) Token_Array; + +typedef struct Lexer { + char *at; + char *end; + char *file; + int line; + int column; +} Lexer; + +void lex_advance(Lexer *lex) { + if (lex->at >= lex->end) { + return; + } + + if (*lex->at == '\n') { + lex->line++; + lex->column = 0; + } else { + lex->column++; + } + lex->at += 1; +} + +void eat_whitespace(Lexer *lex) { + while (lex->at < lex->end) { + switch (*lex->at) { + case ' ': + case '\t': + case '\r': + case '\n': + lex_advance(lex); + break; + default: + return; + } + } +} + +Lexer make_lexer(char *file, char *src, int len) { + Lexer lex = { + .at = src, + .end = src + len, + .file = file, + .line = 0, + .column = 0, + }; + return lex; +} + +Token lex_token(Lexer *lex) { + eat_whitespace(lex); + Token t = { + .str = lex->at, + .line = lex->line, + .column = lex->column, + .file = lex->file, + }; + + if (lex->at >= lex->end) { + t.kind = TOK_EOF; + t.len = 0; + return t; + } + + char c = *lex->at; + + if (isdigit(c)) { + t.kind = TOK_INT; + while (lex->at < lex->end && isdigit(*lex->at)) { + lex_advance(lex); + } + + // @todo: proper lexing of floats (as well as postfixes) + if (lex->at < lex->end && *lex->at == '.') { + t.kind = TOK_FLOAT; + lex_advance(lex); + + while (lex->at < lex->end && isdigit(*lex->at)) { + lex_advance(lex); + } + } + + if (t.kind == TOK_INT) { + t.u = strtoull(t.str, NULL, 10); + } + + t.len = (int)(lex->at - t.str); + return t; + } + + lex_advance(lex); + + switch (c) { + case 0: t.kind = TOK_EOF; break; + case '+': t.kind = TOK_PLUS; break; + case '-': t.kind = TOK_MINUS; break; + case '*': t.kind = TOK_STAR; break; + case '/': t.kind = TOK_SLASH; break; + case '%': t.kind = TOK_PERCENT; break; + case '{': t.kind = TOK_LBRACE; break; + case '}': t.kind = TOK_RBRACE; break; + default: { + // @todo: lexer perhaps should have a static buffer of size 1024, error message + // should be put there and piped to the upper program. The token should be filled + // with that message + t.kind = TOK_ERROR; + } + } + + t.len = (int)(lex->at - t.str); + return t; +} + +void assert_token(Token t, Token_Kind kind, char *text, int line, int column) { + assert(t.kind == kind); + assert(t.line == line); + assert(t.column == column); + assert(t.len == (int)strlen(text)); + assert(strncmp(t.str, text, t.len) == 0); +} + +Token_Array lex_file(char *file, char *src, int len) { + Lexer lex = make_lexer(file, src, len); + Token_Array result = {0}; + for (;;) { + Token token = lex_token(&lex); + vec_push(&result, token); + if (token.kind == TOK_EOF) { + break; + } + } + return result; +} + +typedef struct Parser { + Token *at; + Token *end; +} Parser; + +typedef enum Ast_Kind { + AST_NONE, + AST_ERROR, + AST_INT, + AST_UNARY, + AST_BINARY, +} Ast_Kind; + +typedef struct Ast Ast; +struct Ast { + Ast_Kind kind; + Token *pos; + + union { + uint64_t u; + struct { + Token_Kind op; + Ast *l; + Ast *r; + }; + char *error; + }; +}; + +Token *next_token(Parser *p) { + if (p->at < p->end) { + return p->at++; + } + return p->at; +} + +Token *match_token(Parser *p, Token_Kind kind) { + if (p->at->kind == kind) { + return next_token(p); + } + return NULL; +} + +Ast *create_ast(Token *token, Ast_Kind kind) { + Ast *result = calloc(1, sizeof(Ast)); + result->pos = token; + result->kind = kind; + return result; +} + +Ast *create_binary_expr(Token *token, Token_Kind op, Ast *left, Ast *right) { + Ast *result = create_ast(token, AST_BINARY); + result->op = op; + result->l = left; + result->r = right; + return result; +} + +Ast *parse_expr(Parser *p, int power_of_binding_to_right); + +Ast *parse_atom(Parser *p) { + Token *token = p->at; + Ast *n = NULL; + if (match_token(p, TOK_INT)) { + n = create_ast(token, AST_INT); + n->u = token->u; + } else if (match_token(p, TOK_LPAREN)) { + // @todo: do a comma list here + n = parse_expr(p, 0); + } else { + fprintf(stderr, "encountered invalid token while parsing atom: %.*s\n", token->len, token->str); + exit(1); + } + return n; +} + +int get_binding_power(Token *tok) { + switch (tok->kind) { + case TOK_LSHIFT: case TOK_RSHIFT: return 100; + case TOK_PLUS: case TOK_MINUS: return 110; + case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: return 120; + default: return 0; + } +} + +Ast *parse_valid_left_binding(Parser *p, Token *tok, Ast *left) { + switch (tok->kind) { + case TOK_LSHIFT: case TOK_RSHIFT: case TOK_PLUS: case TOK_MINUS: case TOK_SLASH: case TOK_STAR: case TOK_PERCENT: { + return create_binary_expr(tok, tok->kind, left, parse_expr(p, get_binding_power(tok))); + } break; + default: fprintf(stderr, "ERROR"); exit(1); + } + return NULL; +} + +Ast *parse_expr(Parser *p, int power_of_binding_to_right) { + Ast *n = parse_atom(p); + while (get_binding_power(p->at) > power_of_binding_to_right) { + Token *tok = next_token(p); + n = parse_valid_left_binding(p, tok, n); + } + return n; +} + +int64_t eval_expr(Ast *n) { + switch (n->kind) { + case AST_INT: return (int64_t)n->u; + case AST_BINARY: { + int64_t left = eval_expr(n->l); + int64_t right = eval_expr(n->r); + switch (n->op) { + case TOK_PLUS: return left + right; + case TOK_MINUS: return left - right; + case TOK_STAR: return left * right; + case TOK_SLASH: return left / right; + case TOK_PERCENT: return left % right; + default: { + fprintf(stderr, "invalid token kind in eval_expr, binary"); + exit(1); + } + } + } break; + default: { + fprintf(stderr, "invalid ast kind in eval_expr"); + exit(1); + } break; + } +} + +void print_expr(Ast *n) { + switch (n->kind) { + case AST_INT: printf("%lu", n->u); break; + case AST_BINARY: { + print_expr(n->l); + printf(" %s ", token_to_op[n->op]); + print_expr(n->r); + } break; + default: fprintf(stderr, "memes"); exit(1); + } +} + +void parser_test(void) { +#define TEST_EVAL(expr) do { \ + Token_Array tokens = lex_file("eval_test", #expr, strlen(#expr));\ + Parser p = {tokens.data, tokens.data + tokens.len};\ + Ast *result = parse_expr(&p, 0);\ + assert(eval_expr(result) == expr);\ +} while (0) + + TEST_EVAL(32+5-4); + TEST_EVAL(16/2/2); + + printf("parser tests passed\n"); +} + +void lex_test(void) { + char *src = "12 + 34.5 * 6\n- 7 % 2 / 1"; + Lexer lex = make_lexer("test.c", src, (int)strlen(src)); + + assert_token(lex_token(&lex), TOK_INT, "12", 0, 0); + assert_token(lex_token(&lex), TOK_PLUS, "+", 0, 3); + assert_token(lex_token(&lex), TOK_FLOAT, "34.5", 0, 5); + assert_token(lex_token(&lex), TOK_STAR, "*", 0, 10); + assert_token(lex_token(&lex), TOK_INT, "6", 0, 12); + assert_token(lex_token(&lex), TOK_MINUS, "-", 1, 0); + assert_token(lex_token(&lex), TOK_INT, "7", 1, 2); + assert_token(lex_token(&lex), TOK_PERCENT, "%", 1, 4); + assert_token(lex_token(&lex), TOK_INT, "2", 1, 6); + assert_token(lex_token(&lex), TOK_SLASH, "/", 1, 8); + assert_token(lex_token(&lex), TOK_INT, "1", 1, 10); + + Token eof = lex_token(&lex); + assert(eof.kind == TOK_EOF); + assert(eof.len == 0); + assert(eof.line == 1); + assert(eof.column == 11); + + Token_Array array = lex_file("test.c", src, (int)strlen(src)); + assert(array.len == 12); + + printf("lexer tests passed\n"); +} + +int main() { + vec_test(); + lex_test(); + parser_test(); +} \ No newline at end of file diff --git a/meta.c b/meta.c new file mode 100644 index 0000000..cc51862 --- /dev/null +++ b/meta.c @@ -0,0 +1,89 @@ +#include + +#define len(x) (sizeof((x))/sizeof((x)[0])) +#define ilen(x) ((int)len(x)) + +int main() { + typedef struct { + char *name; + char *serialized_operator; + } Task; + Task kinds[] = { + {"EOF", 0}, + {"ERROR", 0}, + + {"IDENT", 0}, + {"KEYWORD", 0}, + {"INT", 0}, + {"FLOAT", 0}, + {"CHAR", 0}, + {"STRING", 0}, + + {"LPAREN", "("}, + {"RPAREN", ")"}, + {"LBRACKET", "["}, + {"RBRACKET", "]"}, + {"LBRACE", "{"}, + {"RBRACE", "}"}, + {"COMMA", ","}, + {"DOT", "."}, + {"ARROW", "->"}, + {"ELLIPSIS", "..."}, + {"COLON", ":"}, + {"SEMICOLON", ";"}, + {"QUESTION", "?"}, + {"HASH", "#"}, + {"HASHHASH", "##"}, + + {"PLUS", "+"}, + {"MINUS", "-"}, + {"STAR", "*"}, + {"SLASH", "/"}, + {"PERCENT", "%"}, + {"INC", "++"}, + {"DEC", "--"}, + + {"ASSIGN", "="}, + {"PLUS_ASSIGN", "+="}, + {"MINUS_ASSIGN", "-="}, + {"MUL_ASSIGN", "*="}, + {"DIV_ASSIGN", "/="}, + {"MOD_ASSIGN", "%="}, + {"LSHIFT_ASSIGN", "<<="}, + {"RSHIFT_ASSIGN", ">>="}, + {"AND_ASSIGN", "&="}, + {"XOR_ASSIGN", "^="}, + {"OR_ASSIGN", "|="}, + + {"EQ", "=="}, + {"NEQ", "!="}, + {"LT", "<"}, + {"LEQ", "<="}, + {"GT", ">"}, + {"GEQ", ">="}, + + {"NOT", "!"}, + {"BITNOT", "~"}, + {"BITAND", "&"}, + {"BITOR", "|"}, + {"BITXOR", "^"}, + {"AND", "&&"}, + {"OR", "||"}, + {"LSHIFT", "<<"}, + {"RSHIFT", ">>"}, + }; + + printf("typedef enum {\n"); + for (int i = 0; i < ilen(kinds); i += 1) { + printf(" TOK_%s,\n", kinds[i].name); + } + printf("} Token_Kind;\n"); + + printf("char *token_to_op[] = {\n"); + for (int i = 0; i < ilen(kinds); i += 1) { + if (kinds[i].serialized_operator) { + printf(" [TOK_%s] = \"%s\",\n", kinds[i].name, kinds[i].serialized_operator); + } + } + printf("};\n"); +} diff --git a/meta_gen.c b/meta_gen.c new file mode 100644 index 0000000..9756ea3 --- /dev/null +++ b/meta_gen.c @@ -0,0 +1,108 @@ +typedef enum { + TOK_EOF, + TOK_ERROR, + TOK_IDENT, + TOK_KEYWORD, + TOK_INT, + TOK_FLOAT, + TOK_CHAR, + TOK_STRING, + TOK_LPAREN, + TOK_RPAREN, + TOK_LBRACKET, + TOK_RBRACKET, + TOK_LBRACE, + TOK_RBRACE, + TOK_COMMA, + TOK_DOT, + TOK_ARROW, + TOK_ELLIPSIS, + TOK_COLON, + TOK_SEMICOLON, + TOK_QUESTION, + TOK_HASH, + TOK_HASHHASH, + TOK_PLUS, + TOK_MINUS, + TOK_STAR, + TOK_SLASH, + TOK_PERCENT, + TOK_INC, + TOK_DEC, + TOK_ASSIGN, + TOK_PLUS_ASSIGN, + TOK_MINUS_ASSIGN, + TOK_MUL_ASSIGN, + TOK_DIV_ASSIGN, + TOK_MOD_ASSIGN, + TOK_LSHIFT_ASSIGN, + TOK_RSHIFT_ASSIGN, + TOK_AND_ASSIGN, + TOK_XOR_ASSIGN, + TOK_OR_ASSIGN, + TOK_EQ, + TOK_NEQ, + TOK_LT, + TOK_LEQ, + TOK_GT, + TOK_GEQ, + TOK_NOT, + TOK_BITNOT, + TOK_BITAND, + TOK_BITOR, + TOK_BITXOR, + TOK_AND, + TOK_OR, + TOK_LSHIFT, + TOK_RSHIFT, +} Token_Kind; +char *token_to_op[] = { + [TOK_LPAREN] = "(", + [TOK_RPAREN] = ")", + [TOK_LBRACKET] = "[", + [TOK_RBRACKET] = "]", + [TOK_LBRACE] = "{", + [TOK_RBRACE] = "}", + [TOK_COMMA] = ",", + [TOK_DOT] = ".", + [TOK_ARROW] = "->", + [TOK_ELLIPSIS] = "...", + [TOK_COLON] = ":", + [TOK_SEMICOLON] = ";", + [TOK_QUESTION] = "?", + [TOK_HASH] = "#", + [TOK_HASHHASH] = "##", + [TOK_PLUS] = "+", + [TOK_MINUS] = "-", + [TOK_STAR] = "*", + [TOK_SLASH] = "/", + [TOK_PERCENT] = "%", + [TOK_INC] = "++", + [TOK_DEC] = "--", + [TOK_ASSIGN] = "=", + [TOK_PLUS_ASSIGN] = "+=", + [TOK_MINUS_ASSIGN] = "-=", + [TOK_MUL_ASSIGN] = "*=", + [TOK_DIV_ASSIGN] = "/=", + [TOK_MOD_ASSIGN] = "%=", + [TOK_LSHIFT_ASSIGN] = "<<=", + [TOK_RSHIFT_ASSIGN] = ">>=", + [TOK_AND_ASSIGN] = "&=", + [TOK_XOR_ASSIGN] = "^=", + [TOK_OR_ASSIGN] = "|=", + [TOK_EQ] = "==", + [TOK_NEQ] = "!=", + [TOK_LT] = "<", + [TOK_LEQ] = "<=", + [TOK_GT] = ">", + [TOK_GEQ] = ">=", + [TOK_NOT] = "!", + [TOK_BITNOT] = "~", + [TOK_BITAND] = "&", + [TOK_BITOR] = "|", + [TOK_BITXOR] = "^", + [TOK_AND] = "&&", + [TOK_OR] = "||", + [TOK_LSHIFT] = "<<", + [TOK_RSHIFT] = ">>", +};