improving lexer

This commit is contained in:
Krzosa Karol
2025-01-17 23:08:52 +01:00
parent a3e6730e0b
commit 63dda7bf13
5 changed files with 204 additions and 63 deletions

View File

@@ -7,12 +7,17 @@ const i32 module_lex = 1;
} while (0)
fn lexer_t lex_make(s8_t stream, char *file_name) {
lexer_t result = {.at = stream.str, .end = stream.str + stream.len, .file_name = file_name};
lexer_t result = {.at = stream.str, .end = stream.str + stream.len, .file = file_name};
return result;
}
fn b32 lex_at_end(lexer_t *lex) {
b32 result = lex->at >= lex->end;
return result;
}
fn void lex_advance(lexer_t *lex) {
if (lex->at >= lex->end) return;
if (lex_at_end(lex)) return;
if (lex->at[0] == '\n') { lex->column = 0; lex->line += 1; }
lex->column += 1;
lex->at += 1;
@@ -27,7 +32,22 @@ fn b32 lex_match(lexer_t *lex, char c) {
}
fn void lex_eat_whitespace(lexer_t *lex) {
while (char_is_whitespace(lex->at[0])) lex_advance(lex);
for (;;) {
if (char_is_whitespace(lex->at[0])) {
if (lex->at[0] == '\n') lex->inside_macro = false;
lex_advance(lex);
} else if (lex->at[0] == '\\' && lex->at[1] == '\n') {
lex_advance(lex); lex_advance(lex);
} else if (lex->at[0] == '\\' && lex->at[1] == '\r' && lex->at[2] == '\n') {
lex_advance(lex); lex_advance(lex); lex_advance(lex);
} else {
break;
}
}
}
fn void lex_eat_macro_whitespace(lexer_t *lex) {
while (lex->at[0] == ' ' || lex->at[0] == '\t') lex_advance(lex);
}
fn void lex_suffix(lexer_t *lex, lex_t *token) {
@@ -76,13 +96,15 @@ fn void lex_eat_number(lexer_t *lex, lex_t *token) {
}
fn void lex_eat_until(lexer_t *lex, char c) {
while (lex->at[0] != c && lex->at[0] != 0) lex_advance(lex);
while (lex->at[0] != c && !lex_at_end(lex)) lex_advance(lex);
}
fn void lex_eat_string(lexer_t *lex, lex_t *token) {
token->kind = lex_kind_string;
for (;;) {
if (lex_match(lex, token->str[0])) {
if (lex->at[0] == '\\') {
lex_advance(lex);
} else if (lex_match(lex, token->str[0])) {
break;
}
@@ -95,6 +117,69 @@ fn void lex_eat_string(lexer_t *lex, lex_t *token) {
}
}
fn b32 lex_macro(lexer_t *lex, lex_t *token) {
lex_eat_macro_whitespace(lex);
token->str = lex->at;
while (char_is_alphabetic(lex->at[0])) lex_advance(lex);
token->len = (i32)(lex->at - token->str);
if (s8_are_equal(token->string, s8_lit("define"))) {
token->kind = lex_kind_preproc_define;
} else if (s8_are_equal(token->string, s8_lit("ifdef"))) {
token->kind = lex_kind_preproc_ifdef;
} else if (s8_are_equal(token->string, s8_lit("ifndef"))) {
token->kind = lex_kind_preproc_ifndef;
} else if (s8_are_equal(token->string, s8_lit("include"))) {
token->kind = lex_kind_preproc_include;
lex_eat_macro_whitespace(lex);
char end = 0;
if (lex_match(lex, '"')) {
end = '"';
} else if (lex_match(lex, '<')) {
end = '>';
token->system_include = true;
} else {
lex_error(token, "invalid include directive, should be followed by string or '<'");
return false;
}
token->str = lex->at;
while (lex->at[0] != end) {
if (lex_at_end(lex)) {
lex_error(token, "invalid include directive, reached end of file");
return false;
}
if (lex->at[0] == '\n') {
lex_error(token, "invalid include directive, reached end of line");
return false;
}
lex_advance(lex);
}
lex_advance(lex);
} else if (s8_are_equal(token->string, s8_lit("if"))) {
token->kind = lex_kind_preproc_if;
} else if (s8_are_equal(token->string, s8_lit("endif"))) {
token->kind = lex_kind_preproc_endif;
} else if (s8_are_equal(token->string, s8_lit("error"))) {
token->kind = lex_kind_preproc_error;
lex_eat_macro_whitespace(lex);
token->str = lex->at;
lex_eat_until(lex, '\n');
} else if (s8_are_equal(token->string, s8_lit("else"))) {
token->kind = lex_kind_preproc_else;
} else if (s8_are_equal(token->string, s8_lit("elif"))) {
token->kind = lex_kind_preproc_elif;
} else if (s8_are_equal(token->string, s8_lit("pragma"))) {
token->kind = lex_kind_preproc_pragma;
} else if (s8_are_equal(token->string, s8_lit("undef"))) {
token->kind = lex_kind_preproc_undef;
} else {
return false;
}
return true;
}
#define LEX_CASE3(C1, K1, C2, K2, C3, K3) \
case C1: { \
token->kind = K1; \
@@ -109,7 +194,7 @@ fn void lex_eat_string(lexer_t *lex, lex_t *token) {
fn void lex_token_ex(lexer_t *lex, lex_t *token) {
lex_eat_whitespace(lex);
*token = (lex_t){.str = lex->at, .file_name = lex->file_name, .line = lex->line, .column = lex->column};
*token = (lex_t){.str = lex->at, .file = lex->file, .line = lex->line, .column = lex->column};
lex_advance(lex);
switch (token->str[0]) {
@@ -125,10 +210,21 @@ fn void lex_token_ex(lexer_t *lex, lex_t *token) {
case ':': token->kind = lex_kind_colon; break;
case ',': token->kind = lex_kind_comma; break;
case '@': token->kind = lex_kind_tag; break;
case '?': token->kind = lex_kind_question; break;
case '"': lex_eat_string(lex, token); break;
case '`': lex_eat_string(lex, token); break;
case '\'': lex_eat_string(lex, token); break;
LEX_CASE3('^', lex_kind_bit_xor, '=', lex_kind_bit_xor_assign, /*ignored option*/'=', lex_kind_bit_xor_assign);
LEX_CASE3('=', lex_kind_assign, '=', lex_kind_equals, /*ignored option*/'=', lex_kind_equals);
LEX_CASE3('!', lex_kind_negation, '=', lex_kind_not_equals, /*ignored option*/'=', lex_kind_not_equals);
LEX_CASE3('%', lex_kind_modulo, '=', lex_kind_modulo_assign, /*ignored option*/'=', lex_kind_modulo_assign);
LEX_CASE3('*', lex_kind_multiply, '=', lex_kind_multiply_assign, /*ignored option*/'=', lex_kind_multiply_assign);
LEX_CASE3('+', lex_kind_plus, '+', lex_kind_increment, '=', lex_kind_plus_assign);
LEX_CASE3('-', lex_kind_minus, '-', lex_kind_decrement, '=', lex_kind_minus_assign);
LEX_CASE3('&', lex_kind_bit_and, '&', lex_kind_and, '=', lex_kind_bit_and_assign);
LEX_CASE3('|', lex_kind_bit_or, '|', lex_kind_or, '=', lex_kind_bit_or_assign);
case '.': {
token->kind = lex_kind_dot;
if (lex->at[0] == '.' && lex->at[1] == '.') {
@@ -159,20 +255,9 @@ fn void lex_token_ex(lexer_t *lex, lex_t *token) {
lex_advance(lex);
} else if (lex_match(lex, '=')) {
token->kind = lex_kind_divide_assign;
lex_advance(lex);
}
} break;
LEX_CASE3('^', lex_kind_bit_xor, '=', lex_kind_bit_xor_assign, /*ignored option*/'=', lex_kind_bit_xor_assign);
LEX_CASE3('=', lex_kind_assign, '=', lex_kind_equals, /*ignored option*/'=', lex_kind_equals);
LEX_CASE3('!', lex_kind_negation, '=', lex_kind_not_equals, /*ignored option*/'=', lex_kind_not_equals);
LEX_CASE3('%', lex_kind_modulo, '=', lex_kind_modulo_assign, /*ignored option*/'=', lex_kind_modulo_assign);
LEX_CASE3('*', lex_kind_multiply, '=', lex_kind_multiply_assign, /*ignored option*/'=', lex_kind_multiply_assign);
LEX_CASE3('+', lex_kind_plus, '+', lex_kind_increment, '=', lex_kind_plus_assign);
LEX_CASE3('-', lex_kind_minus, '-', lex_kind_decrement, '=', lex_kind_minus_assign);
LEX_CASE3('&', lex_kind_bit_and, '&', lex_kind_and, '=', lex_kind_bit_and_assign);
LEX_CASE3('|', lex_kind_bit_or, '|', lex_kind_or, '=', lex_kind_bit_or_assign);
case '>': {
token->kind = lex_kind_greater;
if (lex_match(lex, '=')) {
@@ -189,8 +274,7 @@ fn void lex_token_ex(lexer_t *lex, lex_t *token) {
token->kind = lex_kind_lesser;
if (lex_match(lex, '=')) {
token->kind = lex_kind_lesser_or_equal;
}
else if (lex_match(lex, '<')) {
} else if (lex_match(lex, '<')) {
token->kind = lex_kind_bit_left_shift;
if (lex_match(lex, '=')) {
token->kind = lex_kind_bit_left_shift_assign;
@@ -198,6 +282,26 @@ fn void lex_token_ex(lexer_t *lex, lex_t *token) {
}
} break;
case '#': {
if (lex_match(lex, '#')) {
token->kind = lex_kind_preproc_concat;
} else {
b32 inside_macro = lex_macro(lex, token);
if (inside_macro) {
lex->inside_macro = true;
} else {
if (!lex->inside_macro) {
lex_error(token, "invalid preprocessor directive");
return;
}
token->kind = lex_kind_preproc_stringify;
token->str = lex->at;
// this is slighly wrong, first letter can't be number
while (char_is_alphanumeric(lex->at[0]) || lex->at[0] == '_') lex_advance(lex);
}
}
} break;
case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '0': {
lex_eat_number(lex, token);
} break;
@@ -222,6 +326,13 @@ fn void lex_token_ex(lexer_t *lex, lex_t *token) {
token->len = (i32)(lex->at - token->str);
if (token->kind == lex_kind_integer || token->kind == lex_kind_real) {
if (token->suffix != lex_suffix_none) {
s8_t string_value = ti_enum_value_to_name(token->suffix, type(lex_suffix_t));
s8_t prefix = s8_lit("lex_suffix_");
token->len -= string_value.len - prefix.len;
}
}
if (token->kind == lex_kind_integer) {
token->integer = u64_from_s8(token->string, 10);
} else if (token->kind == lex_kind_real) {
@@ -229,7 +340,10 @@ fn void lex_token_ex(lexer_t *lex, lex_t *token) {
} else if (token->kind == lex_kind_string) {
token->str += 1;
token->len -= 2;
} else if (token->kind == lex_kind_preproc_include) {
token->len -= 1;
}
if (lex->inside_macro) token->inside_macro = true;
}
fn lex_t lex_token(lexer_t *lex) {
@@ -309,7 +423,7 @@ fn lex_t *parser_matchi(parser_t *par, s8_t str) {
fn void parser_panicf(lex_t *token, const char *str, ...) {
ma_temp_t scratch = ma_begin_scratch();
S8_FMT(scratch.arena, str, str8);
fatalf("%s(%d:%d): error: %S", token->file_name, token->line, token->column, str8);
fatalf("%s(%d:%d): error: %S", token->file, token->line, token->column, str8);
ma_end_scratch(scratch);
}

View File

@@ -57,6 +57,20 @@ enum lex_kind_t {
X(lex_kind_arrow , "'->' arrow" , "->" )\
X(lex_kind_question , "'?' question mark" , "?" )\
X(lex_kind_tag , "'@' tag sign" , "@" )\
X(lex_kind_preproc_null , "preproc_null" , "---" )\
X(lex_kind_preproc_define , "preproc_define" , "---" )\
X(lex_kind_preproc_ifdef , "preproc_ifdef" , "---" )\
X(lex_kind_preproc_ifndef , "preproc_ifndef" , "---" )\
X(lex_kind_preproc_include , "preproc_include" , "---" )\
X(lex_kind_preproc_endif , "preproc_endif" , "---" )\
X(lex_kind_preproc_if , "preproc_if" , "---" )\
X(lex_kind_preproc_pragma , "preproc_pragma" , "---" )\
X(lex_kind_preproc_error , "preproc_error" , "---" )\
X(lex_kind_preproc_else , "preproc_else" , "---" )\
X(lex_kind_preproc_elif , "preproc_elif" , "---" )\
X(lex_kind_preproc_undef , "preproc_undef" , "---" )\
X(lex_kind_preproc_concat , "preproc_concat" , "---" )\
X(lex_kind_preproc_stringify , "preproc_stringify" , "---" )\
#define X(KIND, STR, SIMPLE) KIND,
LEX_KIND_XLIST
@@ -88,6 +102,10 @@ typedef struct lex_t lex_t;
struct lex_t {
lex_kind_t kind;
lex_suffix_t suffix;
struct {
b8 inside_macro: 1;
b8 system_include: 1;
};
union {
struct {char *str; i64 len;};
@@ -96,7 +114,7 @@ struct lex_t {
i32 line;
i32 column;
char *file_name;
char *file;
union {
u64 integer;
@@ -109,9 +127,11 @@ typedef struct lexer_t lexer_t;
struct lexer_t {
char *at;
char *end;
char *file_name;
char *file;
i32 line;
i32 column;
b8 inside_macro;
};
typedef struct lex_array_t lex_array_t;

View File

@@ -1,5 +1,6 @@
#include "core/core_inc.h"
#include "core/core_inc.c"
#include "core/core_clexer.c"
void test_s8(void) {
ma_arena_t *arena = ma_create(ma_default_reserve_size);
@@ -15,7 +16,7 @@ void test_s8(void) {
assert(s8_are_equal(sb->first->string, memes));
sb8_printf(sb, "%S", s8_lit("things are going fine"));
s8_t string = sb8_merge(sb);
s8_t string = sb8_merge(temp.arena, sb);
assert(s8_are_equal(string, s8_lit("memesthings are going fine")));
ma_end_temp(temp);

View File

@@ -131,7 +131,7 @@ fn ui_box_t *ui_build_box_from_string(ui_code_loc_t loc, ui_box_flags_t flags, s
ui_box_t *box = ui_build_box_from_id(loc, flags, id);
box->string = ui_get_display_string(string);
v2f32_t string_size = rn_measure_string(&rn_state.main_font, box->string);
string_size.x += 50;
string_size.x += ui_em(1);
r2f32_t rect = ui_next_rect(ui->top->op, &ui->top->rect, string_size);
ui_set_rect(box, rect);
return box;
@@ -512,28 +512,18 @@ fn void ui_demo_update(app_frame_t *frame) {
ui_begin_build(UILOC, ev, window_rect_from_frame(frame));
ui->top->op = ui_op_idle;
ui_box_t *top_box = ui_boxf((ui_box_flags_t){.draw_rect = true, .draw_border = true, .clip_rect = true}, "top_box");
ui_set_rect(top_box, r2f32_cut_top(&ui->top->rect, ui_em(1)));
defer_block(ui_push_top(top_box), ui_pop_top()) {
top_box->op = ui_op_cut_left;
ui->pref_text_align = ui_text_align_center;
ui_button("file");
ui_button("edit");
ui->pref_text_align = ui_text_align_left;
}
ui_box_t *scroller_box = ui_boxf((ui_box_flags_t){.draw_rect = true, .clip_rect = true}, "scroller");
ui_set_rect(scroller_box, r2f32_cut_right(&ui->top->rect, ui_em(1)));
ui_set_rect(scroller_box, r2f32_cut_right(&ui->top->rect, ui_em(0.5f)));
ui_box_t *item_box = ui_boxf((ui_box_flags_t){.draw_rect = true, .clip_rect = true}, "item_box");
ui_set_rect(item_box, r2f32_cut_left(&ui->top->rect, ui_max));
// @todo: now actually fill this out with struct data using type info
static f32 scroller_value;
defer_block(ui_push_top(item_box), ui_pop_top()) {
ui_serial_type(&ui_test_event, type(app_event_t));
defer_if (ui_begin_expander("app_event_t").clicked, ui_end_expander()) {
for (int i = 0; i < 10; i += 1) {
for (int i = 0; i < 2; i += 1) {
ui_label("kind: app_event_kind_t##a%d", i);
ui_label("ctrl: b8##ds%d", i);
ui_label("shift: b8##f%d", i);
@@ -562,7 +552,6 @@ fn void ui_demo_update(app_frame_t *frame) {
f32 scroller_percent = scroller_norm * scrollable_space;
f32 scroller_second = scrollable_space - scroller_percent;
scroller_box->op = ui_op_idle;
r2f32_cut_top(&ui->top->rect, scroller_percent * scroller_box_size);
ui_box_t *box = ui_build_box_from_id(UILOC, (ui_box_flags_t){.draw_border = true, .draw_rect = true}, ui_idf("slider"));
ui_set_rect(box, r2f32_cut_top(&ui->top->rect, scroller_size * scroller_box_size));
@@ -576,7 +565,6 @@ fn void ui_demo_update(app_frame_t *frame) {
scroller_value = CLAMP(scroller_value, 0, all_items_size);
}
for (ui_box_t *it = item_box->first; it; it = it->next) {
it->full_rect.min.y -= scroller_value;
it->full_rect.max.y -= scroller_value;