Files
smallprojects/main.c
2026-05-10 14:38:03 +02:00

565 lines
17 KiB
C

#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <stdint.h>
#include <stdarg.h>
#include "base.c"
#include "meta_gen.c"
typedef struct Token {
Token_Kind kind;
char *str;
int len;
char *file;
int line, column;
union {
uint64_t u;
};
} Token;
typedef Vec(Token) Token_Array;
typedef struct Lexer {
char *at;
char *end;
char *file;
int line;
int column;
} Lexer;
void lex_advance(Lexer *lex) {
if (lex->at >= lex->end) {
return;
}
if (*lex->at == '\n') {
lex->line++;
lex->column = 0;
} else {
lex->column++;
}
lex->at += 1;
}
void eat_whitespace(Lexer *lex) {
while (lex->at < lex->end) {
switch (*lex->at) {
case ' ':
case '\t':
case '\r':
case '\n':
lex_advance(lex);
break;
default:
return;
}
}
}
Lexer make_lexer(char *file, char *src, int len) {
Lexer lex = {
.at = src,
.end = src + len,
.file = file,
.line = 0,
.column = 0,
};
return lex;
}
Token lex_token(Lexer *lex) {
eat_whitespace(lex);
Token t = {
.str = lex->at,
.line = lex->line,
.column = lex->column,
.file = lex->file,
};
if (lex->at >= lex->end) {
t.kind = TOK_EOF;
t.len = 0;
return t;
}
char c = *lex->at;
if (isdigit(c)) {
t.kind = TOK_INT;
while (lex->at < lex->end && isdigit(*lex->at)) {
lex_advance(lex);
}
// @todo: proper lexing of floats (as well as postfixes)
if (lex->at < lex->end && *lex->at == '.') {
t.kind = TOK_FLOAT;
lex_advance(lex);
while (lex->at < lex->end && isdigit(*lex->at)) {
lex_advance(lex);
}
}
if (t.kind == TOK_INT) {
t.u = strtoull(t.str, NULL, 10);
}
t.len = (int)(lex->at - t.str);
return t;
}
lex_advance(lex);
switch (c) {
case 0: t.kind = TOK_EOF; break;
case '(': t.kind = TOK_LPAREN; break;
case ')': t.kind = TOK_RPAREN; break;
case '[': t.kind = TOK_LBRACKET; break;
case ']': t.kind = TOK_RBRACKET; break;
case '{': t.kind = TOK_LBRACE; break;
case '}': t.kind = TOK_RBRACE; break;
case ',': t.kind = TOK_COMMA; break;
case '.': t.kind = TOK_DOT; break;
case ':': t.kind = TOK_COLON; break;
case ';': t.kind = TOK_SEMICOLON; break;
case '?': t.kind = TOK_QUESTION; break;
case '#': t.kind = TOK_HASH; break;
case '+': {
if (lex->at < lex->end && *lex->at == '+') {
lex_advance(lex);
t.kind = TOK_INC;
} else if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_PLUS_ASSIGN;
} else {
t.kind = TOK_PLUS;
}
} break;
case '-': {
if (lex->at < lex->end && *lex->at == '-') {
lex_advance(lex);
t.kind = TOK_DEC;
} else if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_MINUS_ASSIGN;
} else if (lex->at < lex->end && *lex->at == '>') {
lex_advance(lex);
t.kind = TOK_ARROW;
} else {
t.kind = TOK_MINUS;
}
} break;
case '*': {
if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_MUL_ASSIGN;
} else {
t.kind = TOK_STAR;
}
} break;
case '/': {
if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_DIV_ASSIGN;
} else {
t.kind = TOK_SLASH;
}
} break;
case '%': {
if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_MOD_ASSIGN;
} else {
t.kind = TOK_PERCENT;
}
} break;
case '=': {
if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_EQ;
} else {
t.kind = TOK_ASSIGN;
}
} break;
case '<': {
if (lex->at < lex->end && *lex->at == '<') {
lex_advance(lex);
if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_LSHIFT_ASSIGN;
} else {
t.kind = TOK_LSHIFT;
}
} else if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_LEQ;
} else {
t.kind = TOK_LT;
}
} break;
case '>': {
if (lex->at < lex->end && *lex->at == '>') {
lex_advance(lex);
if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_RSHIFT_ASSIGN;
} else {
t.kind = TOK_RSHIFT;
}
} else if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_GEQ;
} else {
t.kind = TOK_GT;
}
} break;
case '!': {
if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_NEQ;
} else {
t.kind = TOK_NOT;
}
} break;
case '~': t.kind = TOK_BITNOT; break;
case '&': {
if (lex->at < lex->end && *lex->at == '&') {
lex_advance(lex);
t.kind = TOK_AND;
} else if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_AND_ASSIGN;
} else {
t.kind = TOK_BITAND;
}
} break;
case '|': {
if (lex->at < lex->end && *lex->at == '|') {
lex_advance(lex);
t.kind = TOK_OR;
} else if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_OR_ASSIGN;
} else {
t.kind = TOK_BITOR;
}
} break;
case '^': {
if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_XOR_ASSIGN;
} else {
t.kind = TOK_BITXOR;
}
} break;
default: {
// @todo: lexer perhaps should have a static buffer of size 1024, error message
// should be put there and piped to the upper program. The token should be filled
// with that message
t.kind = TOK_ERROR;
}
}
t.len = (int)(lex->at - t.str);
return t;
}
void assert_token(Token t, Token_Kind kind, char *text, int line, int column) {
assert(t.kind == kind);
assert(t.line == line);
assert(t.column == column);
assert(t.len == (int)strlen(text));
assert(strncmp(t.str, text, t.len) == 0);
}
Token_Array lex_file(char *file, char *src, int len) {
Lexer lex = make_lexer(file, src, len);
Token_Array result = {0};
for (;;) {
Token token = lex_token(&lex);
vec_push(&result, token);
if (token.kind == TOK_EOF) {
break;
}
}
return result;
}
typedef struct Parser {
Token *at;
Token *end;
} Parser;
typedef enum Ast_Kind {
AST_NONE,
AST_ERROR,
AST_INT,
AST_UNARY,
AST_BINARY,
} Ast_Kind;
typedef struct Ast Ast;
struct Ast {
Ast_Kind kind;
Token *pos;
union {
uint64_t u;
struct {
Token_Kind op;
Ast *l;
Ast *r;
};
char *error;
};
};
Token *next_token(Parser *p) {
if (p->at < p->end) {
return p->at++;
}
return p->at;
}
Token *match_token(Parser *p, Token_Kind kind) {
if (p->at->kind == kind) {
return next_token(p);
}
return NULL;
}
Token *expect_token(Parser *p, Token_Kind kind) {
if (p->at->kind == kind) {
return next_token(p);
}
panicf("expected token kind: %s, got instead: %s", token_to_name[p->at->kind], token_to_name[kind]);
}
Ast *create_ast(Token *token, Ast_Kind kind) {
Ast *result = calloc(1, sizeof(Ast));
result->pos = token;
result->kind = kind;
return result;
}
Ast *create_binary_expr(Token *token, Token_Kind op, Ast *left, Ast *right) {
Ast *result = create_ast(token, AST_BINARY);
result->op = op;
result->l = left;
result->r = right;
return result;
}
Ast *parse_expr(Parser *p, int power_of_binding_to_right);
Ast *parse_atom(Parser *p) {
Token *token = p->at;
Ast *n = NULL;
if (match_token(p, TOK_INT)) {
n = create_ast(token, AST_INT);
n->u = token->u;
} else if (match_token(p, TOK_LPAREN)) {
// @todo: do a comma list here
n = parse_expr(p, 0);
expect_token(p, TOK_RPAREN);
} else {
panicf("unknown token in %s. %.*s (%s/%d), ", __FUNCTION__, token->len, token->str, token_to_name[token->kind], token->kind);
}
return n;
}
int get_binding_power(Token *tok) {
switch (tok->kind) {
case TOK_STAR: case TOK_SLASH: case TOK_PERCENT: return 120;
case TOK_PLUS: case TOK_MINUS: return 110;
case TOK_LSHIFT: case TOK_RSHIFT: return 100;
case TOK_LT: case TOK_LEQ: case TOK_GT: case TOK_GEQ: return 90;
case TOK_EQ: case TOK_NEQ: return 80;
case TOK_BITAND: return 70;
case TOK_BITXOR: return 60;
case TOK_BITOR: return 50;
case TOK_AND: return 40;
case TOK_OR: return 30;
default: return 0;
}
}
Ast *parse_valid_left_binding(Parser *p, Token *tok, Ast *left) {
switch (tok->kind) {
case TOK_PLUS: case TOK_MINUS: case TOK_STAR: case TOK_SLASH: case TOK_PERCENT:
case TOK_EQ: case TOK_NEQ: case TOK_LT: case TOK_LEQ: case TOK_GT: case TOK_GEQ: case TOK_BITAND:
case TOK_BITOR: case TOK_BITXOR: case TOK_AND: case TOK_OR: case TOK_LSHIFT: case TOK_RSHIFT: {
return create_binary_expr(tok, tok->kind, left, parse_expr(p, get_binding_power(tok)));
} break;
default: panicf("unknown token in %s. %.*s (%s/%d), ", __FUNCTION__, tok->len, tok->str, token_to_name[tok->kind], tok->kind);
}
return NULL;
}
Ast *parse_expr(Parser *p, int power_of_binding_to_right) {
Ast *n = parse_atom(p);
while (get_binding_power(p->at) > power_of_binding_to_right) {
Token *tok = next_token(p);
n = parse_valid_left_binding(p, tok, n);
}
return n;
}
int64_t eval_expr(Ast *n) {
switch (n->kind) {
case AST_INT: return (int64_t)n->u;
case AST_BINARY: {
int64_t left = eval_expr(n->l);
int64_t right = eval_expr(n->r);
switch (n->op) {
case TOK_PLUS: return left + right;
case TOK_MINUS: return left - right;
case TOK_STAR: return left * right;
case TOK_SLASH: return left / right;
case TOK_PERCENT: return left % right;
case TOK_EQ: return left == right;
case TOK_NEQ: return left != right;
case TOK_LT: return left < right;
case TOK_LEQ: return left <= right;
case TOK_GT: return left > right;
case TOK_GEQ: return left >= right;
case TOK_BITAND: return left & right;
case TOK_BITOR: return left | right;
case TOK_BITXOR: return left ^ right;
case TOK_AND: return left && right;
case TOK_OR: return left || right;
case TOK_LSHIFT: return left << right;
case TOK_RSHIFT: return left >> right;
default: panicf("invalid token kind in eval_expr, binary");
}
} break;
default: panicf("invalid ast kind in eval_expr");
}
}
void print_expr(Ast *n) {
switch (n->kind) {
case AST_INT: printf("%lu", n->u); break;
case AST_BINARY: {
print_expr(n->l);
printf(" %s ", token_to_op[n->op]);
print_expr(n->r);
} break;
default: panicf("encountered invalid ast kind in %s of kind: %d\n", __FUNCTION__, n->kind);
}
}
void parser_test(void) {
#define TEST_EVAL(expr) do { \
Token_Array tokens = lex_file("eval_test", (#expr), strlen((#expr)));\
Parser p = {tokens.data, tokens.data + tokens.len};\
Ast *result = parse_expr(&p, 0);\
int64_t left = eval_expr(result);\
int64_t right = (expr);\
if (left != right) {\
printf("%s:%d expected: %ld, got: %ld\n expression: ", __FILE__, __LINE__, left, right);\
print_expr(result);\
printf("\n");\
}\
} while (0)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wparentheses"
TEST_EVAL(32+5-4);
TEST_EVAL(16/2/2);
TEST_EVAL(9*4/6);
TEST_EVAL(17%5);
TEST_EVAL(5125-42|(4&3)^2|2%1242);
TEST_EVAL((45%2)^(23&3));
TEST_EVAL(1<16*2);
TEST_EVAL(16>1+2);
TEST_EVAL(4<=2+2);
TEST_EVAL(5>=2+2);
TEST_EVAL(4==2+2);
TEST_EVAL(5!=2+2);
TEST_EVAL(1+1+1+1+2-2-3-4-5);
TEST_EVAL(5%2^5&6|3);
TEST_EVAL(6&3);
TEST_EVAL(6|3);
TEST_EVAL(6^3);
TEST_EVAL(1&&2);
TEST_EVAL(0||3);
TEST_EVAL(1||0&&0);
TEST_EVAL(8<<2);
TEST_EVAL(32>>3);
TEST_EVAL((2+3)*(4+5));
TEST_EVAL(9>3&1);
TEST_EVAL(8|1<4);
// TEST_EVAL(7<=3+4);
// TEST_EVAL(8>=2*4);
// TEST_EVAL(4==2+2);
// TEST_EVAL(5!=2+2);
// TEST_EVAL(1&&2);
// TEST_EVAL(0||3);
// TEST_EVAL(1||0&&0);
// TEST_EVAL(8<<2);
// TEST_EVAL(32>>3);
// TEST_EVAL(1+2<<3);
// TEST_EVAL(16>>1+1);
#pragma clang diagnostic pop
printf("parser tests passed\n");
}
void lex_test(void) {
char *src = "12 + 34.5 * 6\n- 7 % 2 / 1 == 1 != 2 <= 3 >= 4 && 3 || 4 << 1 >> 2";
Lexer lex = make_lexer("test.c", src, (int)strlen(src));
assert_token(lex_token(&lex), TOK_INT, "12", 0, 0);
assert_token(lex_token(&lex), TOK_PLUS, "+", 0, 3);
assert_token(lex_token(&lex), TOK_FLOAT, "34.5", 0, 5);
assert_token(lex_token(&lex), TOK_STAR, "*", 0, 10);
assert_token(lex_token(&lex), TOK_INT, "6", 0, 12);
assert_token(lex_token(&lex), TOK_MINUS, "-", 1, 0);
assert_token(lex_token(&lex), TOK_INT, "7", 1, 2);
assert_token(lex_token(&lex), TOK_PERCENT, "%", 1, 4);
assert_token(lex_token(&lex), TOK_INT, "2", 1, 6);
assert_token(lex_token(&lex), TOK_SLASH, "/", 1, 8);
assert_token(lex_token(&lex), TOK_INT, "1", 1, 10);
assert_token(lex_token(&lex), TOK_EQ, "==", 1, 12);
assert_token(lex_token(&lex), TOK_INT, "1", 1, 15);
assert_token(lex_token(&lex), TOK_NEQ, "!=", 1, 17);
assert_token(lex_token(&lex), TOK_INT, "2", 1, 20);
assert_token(lex_token(&lex), TOK_LEQ, "<=", 1, 22);
assert_token(lex_token(&lex), TOK_INT, "3", 1, 25);
assert_token(lex_token(&lex), TOK_GEQ, ">=", 1, 27);
assert_token(lex_token(&lex), TOK_INT, "4", 1, 30);
assert_token(lex_token(&lex), TOK_AND, "&&", 1, 32);
assert_token(lex_token(&lex), TOK_INT, "3", 1, 35);
assert_token(lex_token(&lex), TOK_OR, "||", 1, 37);
assert_token(lex_token(&lex), TOK_INT, "4", 1, 40);
assert_token(lex_token(&lex), TOK_LSHIFT, "<<", 1, 42);
assert_token(lex_token(&lex), TOK_INT, "1", 1, 45);
assert_token(lex_token(&lex), TOK_RSHIFT, ">>", 1, 47);
assert_token(lex_token(&lex), TOK_INT, "2", 1, 50);
Token eof = lex_token(&lex);
assert(eof.kind == TOK_EOF);
assert(eof.len == 0);
assert(eof.line == 1);
assert(eof.column == 51);
Token_Array array = lex_file("test.c", src, (int)strlen(src));
assert(array.len == 28);
printf("lexer tests passed\n");
}
int main() {
vec_test();
lex_test();
parser_test();
}