reinit repo after broken git

This commit is contained in:
krzosa
2024-12-29 10:10:09 +01:00
commit a30a897272
40 changed files with 13769 additions and 0 deletions

493
src/core/lexer.c Normal file
View File

@@ -0,0 +1,493 @@
typedef enum lex_kind_t lex_kind_t;
enum lex_kind_t {
#define LEX_KIND_XLIST\
X(eof, "end of file", "---")\
X(int, "integer", "---")\
X(real, "real", "---")\
X(ident, "identifier", "---")\
X(string, "string", "---")\
X(comment, "comment", "---")\
X(open_brace, "'{' open brace", "{")\
X(close_brace, "'}' close brace", "}")\
X(open_paren, "'(' open parenthesis", "(")\
X(close_paren, "')' close parenthesis", ")")\
X(open_bracket, "'[' open bracket", "[")\
X(close_bracket, "']' close bracket", "]")\
X(plus, "'+' plus", "+")\
X(minus, "'-' minus", "-")\
X(divide, "'/' division sign", "/")\
X(multiply, "'*' multiplication sign", "*")\
X(modulo, "'%' modulo", "%")\
X(or, "'||' logical or", "||")\
X(and, "'&&' logical and", "&&")\
X(negation, "'!' logical negation", "!")\
X(bit_negation, "'~' bit negation", "~")\
X(bit_left_shift, "'<<' bit left shift", "<<")\
X(bit_right_shift, "'>>' bit right shift", ">>")\
X(bit_or, "'|' bit or", "|")\
X(bit_and, "'&' bit and", "&")\
X(bit_xor, "'^' bit xor", "^")\
X(decrement, "'--' decrement", "--")\
X(increment, "'++' increment", "++")\
X(post_decrement, "'--' post decrement", "--")\
X(post_increment, "'++' post increment", "++")\
X(assign, "'=' assignment", "=")\
X(divide_assign, "'/=' divide assignment", "/=")\
X(multiply_assign, "'*=' multiply assignment", "*=")\
X(plus_assign, "'+=' plus assignment", "+=")\
X(minus_assign, "'-=' minus assignment", "-=")\
X(modulo_assign, "'%=' modulo assignment", "%=")\
X(bit_and_assign, "&=", "&=")\
X(bit_or_assign, "'|=' bit or assignment", "|=")\
X(bit_xor_assign, "'^=' bit xor assignment", "^=")\
X(bit_left_shift_assign, "'<<=' bit left shift assignment", "<<=")\
X(bit_right_shift_assign, "'>>=' bit right shift assignment", ">>=")\
X(equals, "'==' equals sign", "==")\
X(not_equals, "'!=' not equals sign", "!=")\
X(lesser, "'<' lesser then", "<")\
X(greater, "'>' greater then", ">")\
X(lesser_or_equal, "'<=' lesser then or equal", "<=")\
X(greater_or_equal, "'>=' greater then or equal", ">=")\
X(comma, "',' comma", ",")\
X(dot, "'.' dot", ".")\
X(three_dots, "'...' three dots", "...")\
X(semicolon, "';' semicolon", ";")\
X(colon, "':' colon", ":")\
X(arrow, "'->' arrow", "->")\
X(question, "'?' question mark", "?")\
#define X(KIND, STR, SIMPLE) lex_kind_##KIND,
LEX_KIND_XLIST
#undef X
lex_kind_count,
};
typedef enum lex_suffix_t lex_suffix_t;
enum lex_suffix_t {
#define LEX_SUFFIX_XLIST X(none) X(f) X(d) X(u) X(ul) X(ull) X(l) X(ll)
#define X(KIND) lex_suffix_##KIND,
LEX_SUFFIX_XLIST
#undef X
lex_suffix_count,
};
typedef struct lex_t lex_t;
struct lex_t {
lex_kind_t kind;
lex_suffix_t suffix;
union {
struct {char *str; i64 len;};
s8_t s8;
};
int line;
int column;
char *file_name;
union {
u64 integer;
f64 real;
char *error;
};
};
typedef struct lexer_t lexer_t;
struct lexer_t {
char *at;
char *file_name;
int line;
int column;
};
typedef struct lex_array_t lex_array_t;
struct lex_array_t {
lex_t *data;
int len;
};
void lex_panicf(lex_t *token, const char *str, ...) {
ma_temp_t scratch = ma_begin_scratch();
S8_FMT(scratch.arena, str, str8);
panicf("%s(%d:%d): error: %S", token->file_name, token->line, token->column, str8);
ma_end_scratch(scratch);
}
lexer_t lex_make(char *begin, char *file_name) {
lexer_t result = {.at = begin, .file_name = file_name};
return result;
}
void lex_advance(lexer_t *lex) {
if (lex->at[0] == 0) return;
if (lex->at[0] == '\n') { lex->column = 0; lex->line += 1; }
lex->column += 1;
lex->at += 1;
}
b32 lex_match(lexer_t *lex, char c) {
if (lex->at[0] == c) {
lex_advance(lex);
return true;
}
return false;
}
void lex_eat_whitespace(lexer_t *lex) {
while (char_is_whitespace(lex->at[0])) lex_advance(lex);
}
u64 lex_map_char_to_int(char c) {
switch (c) {
case '0': return 0; break;
case '1': return 1; break;
case '2': return 2; break;
case '3': return 3; break;
case '4': return 4; break;
case '5': return 5; break;
case '6': return 6; break;
case '7': return 7; break;
case '8': return 8; break;
case '9': return 9; break;
case 'a':
case 'A': return 10; break;
case 'b':
case 'B': return 11; break;
case 'c':
case 'C': return 12; break;
case 'd':
case 'D': return 13; break;
case 'e':
case 'E': return 14; break;
case 'f':
case 'F': return 15; break;
default: return 255;
}
}
u64 lex_deserial_u64(char *string, i64 len, u64 base) {
assert(base >= 2 && base <= 16);
u64 acc = 0;
for (i64 i = 0; i < len; i++) {
u64 num = lex_map_char_to_int(string[i]);
if (num >= base) {
panicf("invalid number");
break;
}
acc *= base;
acc += num;
}
return acc;
}
void lex_eat_number(lexer_t *lex, lex_t *token) {
token->kind = lex_kind_int;
for (;;) {
if (char_is_digit(lex->at[0])) {
lex_advance(lex);
continue;
}
if (lex_match(lex, '.')) {
if (token->kind == lex_kind_real) {
lex_panicf(token, "multiple '.' periods in floating point number literal");
}
token->kind = lex_kind_real;
continue;
}
break;
}
if (lex_match(lex, 'f')) {
token->kind = lex_kind_real;
token->suffix = lex_suffix_f;
} else if (lex_match(lex, 'd')) {
token->kind = lex_kind_real;
token->suffix = lex_suffix_d;
} else if (token->kind == lex_kind_int && ((lex->at[0] == 'u' && lex->at[1] == 'l' && lex->at[2] == 'l') || (lex->at[0] == 'U' && lex->at[1] == 'L' && lex->at[2] == 'L'))) {
token->suffix = lex_suffix_ull;
lex_advance(lex); lex_advance(lex); lex_advance(lex);
} else if (token->kind == lex_kind_int && ((lex->at[0] == 'u' && lex->at[1] == 'l') || (lex->at[0] == 'U' && lex->at[1] == 'L'))) {
token->suffix = lex_suffix_ul;
lex_advance(lex); lex_advance(lex);
} else if (token->kind == lex_kind_int && (lex->at[0] == 'l' || lex->at[0] == 'L')) {
token->suffix = lex_suffix_l;
lex_advance(lex);
} else if (token->kind == lex_kind_int && ((lex->at[0] == 'l' && lex->at[1] == 'l') || (lex->at[0] == 'L' && lex->at[1] == 'L'))) {
token->suffix = lex_suffix_ll;
lex_advance(lex); lex_advance(lex);
}
}
void lex_eat_until(lexer_t *lex, char c) {
while (lex->at[0] != c && lex->at[0] != 0) lex_advance(lex);
}
void lex_eat_string(lexer_t *lex, lex_t *token) {
token->kind = lex_kind_string;
for (;;) {
if (lex_match(lex, token->str[0])) {
break;
}
if (lex->at[0] == 0) {
lex_panicf(token, "unclosed string");
}
lex_advance(lex);
}
}
#define LEX_CASE3(C1, K1, C2, K2, C3, K3)\
case C1: {\
token->kind = K1;\
if (lex_match(lex, C2)) {\
lex_advance(lex);\
token->kind = K2;\
} else if (lex_match(lex, C3)) {\
lex_advance(lex);\
token->kind = K3;\
}\
} break
void lex_token_ex(lexer_t *lex, lex_t *token) {
lex_eat_whitespace(lex);
*token = (lex_t){.str = lex->at, .file_name = lex->file_name, .line = lex->line, .column = lex->column};
lex_advance(lex);
switch (token->str[0]) {
case '\0': token->kind = lex_kind_eof; break;
case '{': token->kind = lex_kind_open_brace; break;
case '}': token->kind = lex_kind_close_brace; break;
case '(': token->kind = lex_kind_open_paren; break;
case ')': token->kind = lex_kind_close_paren; break;
case '[': token->kind = lex_kind_open_bracket; break;
case ']': token->kind = lex_kind_close_bracket; break;
case '~': token->kind = lex_kind_bit_negation; break;
case ';': token->kind = lex_kind_semicolon; break;
case ':': token->kind = lex_kind_colon; break;
case ',': token->kind = lex_kind_comma; break;
case '"': lex_eat_string(lex, token); break;
case '`': lex_eat_string(lex, token); break;
case '\'': lex_eat_string(lex, token); break;
case '.': {
token->kind = lex_kind_dot;
if (lex->at[0] == '.' && lex->at[1] == '.') {
lex_advance(lex);
lex_advance(lex);
token->kind = lex_kind_three_dots;
}
} break;
case '/': {
token->kind = lex_kind_divide;
if (lex_match(lex, '/')) {
token->kind = lex_kind_comment;
lex_eat_until(lex, '\n');
} else if (lex_match(lex, '*')) {
token->kind = lex_kind_comment;
for (;;) {
if (lex->at[0] == '*' && lex->at[1] == '/') {
break;
}
if (lex->at[0] == 0) {
lex_panicf(token, "Unclosed block comment");
return;
}
lex_advance(lex);
}
lex_advance(lex);
lex_advance(lex);
} else if (lex_match(lex, '=')) {
token->kind = lex_kind_divide_assign;
lex_advance(lex);
}
} break;
LEX_CASE3('^', lex_kind_bit_xor, '=', lex_kind_bit_xor_assign, /*ignored option*/'=', lex_kind_bit_xor_assign);
LEX_CASE3('=', lex_kind_assign, '=', lex_kind_equals, /*ignored option*/'=', lex_kind_equals);
LEX_CASE3('!', lex_kind_negation, '=', lex_kind_not_equals, /*ignored option*/'=', lex_kind_not_equals);
LEX_CASE3('%', lex_kind_modulo, '=', lex_kind_modulo_assign, /*ignored option*/'=', lex_kind_modulo_assign);
LEX_CASE3('*', lex_kind_multiply, '=', lex_kind_multiply_assign, /*ignored option*/'=', lex_kind_multiply_assign);
LEX_CASE3('+', lex_kind_plus, '+', lex_kind_increment, '=', lex_kind_plus_assign);
LEX_CASE3('-', lex_kind_minus, '-', lex_kind_decrement, '=', lex_kind_minus_assign);
LEX_CASE3('&', lex_kind_bit_and, '&', lex_kind_and, '=', lex_kind_bit_and_assign);
LEX_CASE3('|', lex_kind_bit_or, '|', lex_kind_or, '=', lex_kind_bit_or_assign);
case '>': {
token->kind = lex_kind_greater;
if (lex_match(lex, '=')) {
token->kind = lex_kind_greater_or_equal;
} else if (lex_match(lex, '>')) {
token->kind = lex_kind_bit_right_shift;
if (lex_match(lex, '=')) {
token->kind = lex_kind_bit_right_shift_assign;
}
}
} break;
case '<': {
token->kind = lex_kind_lesser;
if (lex_match(lex, '=')) {
token->kind = lex_kind_lesser_or_equal;
}
else if (lex_match(lex, '<')) {
token->kind = lex_kind_bit_left_shift;
if (lex_match(lex, '=')) {
token->kind = lex_kind_bit_left_shift_assign;
}
}
} break;
case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '0': {
lex_eat_number(lex, token);
} break;
case 'A': case 'a': case 'B': case 'b': case 'C': case 'c':
case 'D': case 'd': case 'E': case 'e': case 'F': case 'f':
case 'G': case 'g': case 'H': case 'h': case 'I': case 'i':
case 'J': case 'j': case 'K': case 'k': case 'L': case 'l':
case 'M': case 'm': case 'N': case 'n': case 'O': case 'o':
case 'P': case 'p': case 'Q': case 'q': case 'R': case 'r':
case 'S': case 's': case 'T': case 't': case 'U': case 'u':
case 'V': case 'v': case 'W': case 'w': case 'X': case 'x':
case 'Y': case 'y': case 'Z': case 'z': case '_': {
token->kind = lex_kind_ident;
while (char_is_alphanumeric(lex->at[0]) || lex->at[0] == '_') lex_advance(lex);
} break;
default: {
lex_panicf(token, "found invalid character in the token stream (%d)", token->str[0]);
}
}
token->len = (int)(lex->at - token->str);
if (token->kind == lex_kind_int) {
token->integer = lex_deserial_u64(token->str, token->len, 10);
} else if (token->kind == lex_kind_real) {
token->real = s8_deserial_f64(token->s8);
} else if (token->kind == lex_kind_string) {
token->str += 1;
token->len -= 2;
}
}
lex_t lex_token(lexer_t *lex) {
lex_t result = {0};
lex_token_ex(lex, &result);
return result;
}
// @todo: use s8_t instead
lex_array_t lex_tokens(ma_arena_t *arena, char *file_name, char *stream) {
usize align = arena->align;
arena->align = 0;
lex_array_t token_array = {0};
lexer_t l = lex_make(stream, file_name);
for (;;) {
lex_t *token = ma_push_type(arena, lex_t);
if (token_array.data == NULL) {
token_array.data = token;
}
token_array.len += 1;
do {
lex_token_ex(&l, token);
} while (token->kind == lex_kind_comment);
if (token->kind == lex_kind_eof) break;
}
arena->align = align;
return token_array;
}
s8_t global_lex_kind_simple_strings[] = {
#define X(KIND, STR, SIMPLE) s8_const_lit(SIMPLE),
LEX_KIND_XLIST
#undef X
};
s8_t s8_serial_simple_lex_kind_t(lex_kind_t kind) {
assert(kind >= 0 && kind < lex_kind_count);
return global_lex_kind_simple_strings[kind];
}
s8_t global_lex_kind_strings[] = {
#define X(KIND, STR, SIMPLE) s8_const_lit(STR),
LEX_KIND_XLIST
#undef X
};
s8_t s8_serial_lex_kind_t(lex_kind_t kind) {
assert(kind >= 0 && kind < lex_kind_count);
return global_lex_kind_strings[kind];
}
type_member_t members__lex_kind_t[] = {
#define X(KIND, STR, SIMPLE) {.name = s8_const_lit("lex_kind_" #KIND), .value = lex_kind_##KIND},
LEX_KIND_XLIST
#undef X
};
DEFINE_ENUM(lex_kind_t);
type_member_t members__lex_suffix_t[] = {
#define X(KIND) {.name = s8_const_lit("lex_suffix_" #KIND), .value = lex_suffix_##KIND},
LEX_SUFFIX_XLIST
#undef X
};
DEFINE_ENUM(lex_suffix_t);
//
//
typedef struct parser_t parser_t;
struct parser_t {
ma_arena_t *arena;
lex_t *at;
};
#define parser_make(ARENA, TOKEN) &(parser_t){.arena = ARENA, .at = TOKEN}
lex_t *parser_next(parser_t *par) {
lex_t *result = par->at;
if (result->kind != lex_kind_eof) par->at += 1;
return result;
}
lex_t *parser_match(parser_t *par, lex_kind_t kind) {
if (par->at->kind == kind) {
return parser_next(par);
} else {
return NULL;
}
}
lex_t *parser_matchi(parser_t *par, s8_t str) {
if (par->at->kind == lex_kind_ident && s8_equal(par->at->s8, str)) {
return parser_next(par);
} else {
return NULL;
}
}
lex_t *parser_expect(parser_t *par, lex_kind_t kind) {
lex_t *token = parser_match(par, kind);
if (!token) lex_panicf(par->at, "expected token kind: %S, got instead: %S", s8_serial_lex_kind_t(kind), s8_serial_lex_kind_t(par->at->kind));
return token;
}
void parser_eat_until(parser_t *par, lex_kind_t kind) {
while (par->at->kind != kind && par->at->kind != lex_kind_eof) {
parser_next(par);
}
}
void parser_eat_including(parser_t *par, lex_kind_t kind) {
parser_eat_until(par, kind);
parser_next(par);
}