improving lexer

2025-01-17 23:08:52 +01:00
parent a3e6730e0b
commit 63dda7bf13
5 changed files with 204 additions and 63 deletions
--- a/src/core/core_lexer.c
+++ b/src/core/core_lexer.c
@@ -7,12 +7,17 @@ const i32 module_lex = 1;
 } while (0)

 fn lexer_t lex_make(s8_t stream, char *file_name) {
-    lexer_t result = {.at = stream.str, .end = stream.str + stream.len, .file_name = file_name};
+    lexer_t result = {.at = stream.str, .end = stream.str + stream.len, .file = file_name};
+    return result;
+}
+
+fn b32 lex_at_end(lexer_t *lex) {
+    b32 result = lex->at >= lex->end;
    return result;
 }

 fn void lex_advance(lexer_t *lex) {
-    if (lex->at >= lex->end) return;
+    if (lex_at_end(lex)) return;
    if (lex->at[0] == '\n') { lex->column = 0; lex->line += 1; }
    lex->column += 1;
    lex->at += 1;
@@ -27,7 +32,22 @@ fn b32 lex_match(lexer_t *lex, char c) {
 }

 fn void lex_eat_whitespace(lexer_t *lex) {
-    while (char_is_whitespace(lex->at[0])) lex_advance(lex);
+    for (;;) {
+        if (char_is_whitespace(lex->at[0])) {
+            if (lex->at[0] == '\n') lex->inside_macro = false;
+            lex_advance(lex);
+        } else if (lex->at[0] == '\\' && lex->at[1] == '\n') {
+            lex_advance(lex); lex_advance(lex);
+        } else if (lex->at[0] == '\\' && lex->at[1] == '\r' && lex->at[2] == '\n') {
+            lex_advance(lex); lex_advance(lex); lex_advance(lex);
+        } else {
+            break;
+        }
+    }
+}
+
+fn void lex_eat_macro_whitespace(lexer_t *lex) {
+    while (lex->at[0] == ' ' || lex->at[0] == '\t') lex_advance(lex);
 }

 fn void lex_suffix(lexer_t *lex, lex_t *token) {
@@ -76,13 +96,15 @@ fn void lex_eat_number(lexer_t *lex, lex_t *token) {
 }

 fn void lex_eat_until(lexer_t *lex, char c) {
-    while (lex->at[0] != c && lex->at[0] != 0) lex_advance(lex);
+    while (lex->at[0] != c && !lex_at_end(lex)) lex_advance(lex);
 }

 fn void lex_eat_string(lexer_t *lex, lex_t *token) {
    token->kind = lex_kind_string;
    for (;;) {
-        if (lex_match(lex, token->str[0])) {
+        if (lex->at[0] == '\\') {
+            lex_advance(lex);
+        } else if (lex_match(lex, token->str[0])) {
            break;
        }

@@ -95,6 +117,69 @@ fn void lex_eat_string(lexer_t *lex, lex_t *token) {
    }
 }

+fn b32 lex_macro(lexer_t *lex, lex_t *token) {
+    lex_eat_macro_whitespace(lex);
+    token->str = lex->at;
+    while (char_is_alphabetic(lex->at[0])) lex_advance(lex);
+    token->len = (i32)(lex->at - token->str);
+
+    if (s8_are_equal(token->string, s8_lit("define"))) {
+        token->kind = lex_kind_preproc_define;
+    } else if (s8_are_equal(token->string, s8_lit("ifdef"))) {
+        token->kind = lex_kind_preproc_ifdef;
+    } else if (s8_are_equal(token->string, s8_lit("ifndef"))) {
+        token->kind = lex_kind_preproc_ifndef;
+    } else if (s8_are_equal(token->string, s8_lit("include"))) {
+        token->kind = lex_kind_preproc_include;
+        lex_eat_macro_whitespace(lex);
+        char end = 0;
+        if (lex_match(lex, '"')) {
+            end = '"';
+        } else if (lex_match(lex, '<')) {
+            end = '>';
+            token->system_include = true;
+        } else {
+            lex_error(token, "invalid include directive, should be followed by string or '<'");
+            return false;
+        }
+
+        token->str = lex->at;
+        while (lex->at[0] != end) {
+            if (lex_at_end(lex)) {
+                lex_error(token, "invalid include directive, reached end of file");
+                return false;
+            }
+            if (lex->at[0] == '\n') {
+                lex_error(token, "invalid include directive, reached end of line");
+                return false;
+            }
+            lex_advance(lex);
+        }
+        lex_advance(lex);
+    } else if (s8_are_equal(token->string, s8_lit("if"))) {
+        token->kind = lex_kind_preproc_if;
+    } else if (s8_are_equal(token->string, s8_lit("endif"))) {
+        token->kind = lex_kind_preproc_endif;
+    } else if (s8_are_equal(token->string, s8_lit("error"))) {
+        token->kind = lex_kind_preproc_error;
+        lex_eat_macro_whitespace(lex);
+        token->str = lex->at;
+        lex_eat_until(lex, '\n');
+    } else if (s8_are_equal(token->string, s8_lit("else"))) {
+        token->kind = lex_kind_preproc_else;
+    } else if (s8_are_equal(token->string, s8_lit("elif"))) {
+        token->kind = lex_kind_preproc_elif;
+    } else if (s8_are_equal(token->string, s8_lit("pragma"))) {
+        token->kind = lex_kind_preproc_pragma;
+    } else if (s8_are_equal(token->string, s8_lit("undef"))) {
+        token->kind = lex_kind_preproc_undef;
+    } else {
+        return false;
+    }
+
+    return true;
+}
+
 #define LEX_CASE3(C1, K1, C2, K2, C3, K3) \
    case C1: {                            \
        token->kind = K1;                 \
@@ -109,7 +194,7 @@ fn void lex_eat_string(lexer_t *lex, lex_t *token) {

 fn void lex_token_ex(lexer_t *lex, lex_t *token) {
    lex_eat_whitespace(lex);
-    *token = (lex_t){.str = lex->at, .file_name = lex->file_name, .line = lex->line, .column = lex->column};
+    *token = (lex_t){.str = lex->at, .file = lex->file, .line = lex->line, .column = lex->column};
    lex_advance(lex);

    switch (token->str[0]) {
@@ -125,10 +210,21 @@ fn void lex_token_ex(lexer_t *lex, lex_t *token) {
        case ':': token->kind = lex_kind_colon; break;
        case ',': token->kind = lex_kind_comma; break;
        case '@': token->kind = lex_kind_tag; break;
+        case '?': token->kind = lex_kind_question; break;
        case '"': lex_eat_string(lex, token); break;
        case '`': lex_eat_string(lex, token); break;
        case '\'': lex_eat_string(lex, token); break;

+        LEX_CASE3('^', lex_kind_bit_xor, '=', lex_kind_bit_xor_assign, /*ignored option*/'=', lex_kind_bit_xor_assign);
+        LEX_CASE3('=', lex_kind_assign, '=', lex_kind_equals, /*ignored option*/'=', lex_kind_equals);
+        LEX_CASE3('!', lex_kind_negation, '=', lex_kind_not_equals, /*ignored option*/'=', lex_kind_not_equals);
+        LEX_CASE3('%', lex_kind_modulo, '=', lex_kind_modulo_assign, /*ignored option*/'=', lex_kind_modulo_assign);
+        LEX_CASE3('*', lex_kind_multiply, '=', lex_kind_multiply_assign, /*ignored option*/'=', lex_kind_multiply_assign);
+        LEX_CASE3('+', lex_kind_plus, '+', lex_kind_increment, '=', lex_kind_plus_assign);
+        LEX_CASE3('-', lex_kind_minus, '-', lex_kind_decrement, '=', lex_kind_minus_assign);
+        LEX_CASE3('&', lex_kind_bit_and, '&', lex_kind_and, '=', lex_kind_bit_and_assign);
+        LEX_CASE3('|', lex_kind_bit_or, '|', lex_kind_or, '=', lex_kind_bit_or_assign);
+
        case '.': {
            token->kind = lex_kind_dot;
            if (lex->at[0] == '.' && lex->at[1] == '.') {
@@ -159,20 +255,9 @@ fn void lex_token_ex(lexer_t *lex, lex_t *token) {
                lex_advance(lex);
            } else if (lex_match(lex, '=')) {
                token->kind = lex_kind_divide_assign;
-                lex_advance(lex);
            }
        } break;

-        LEX_CASE3('^', lex_kind_bit_xor, '=', lex_kind_bit_xor_assign, /*ignored option*/'=', lex_kind_bit_xor_assign);
-        LEX_CASE3('=', lex_kind_assign, '=', lex_kind_equals, /*ignored option*/'=', lex_kind_equals);
-        LEX_CASE3('!', lex_kind_negation, '=', lex_kind_not_equals, /*ignored option*/'=', lex_kind_not_equals);
-        LEX_CASE3('%', lex_kind_modulo, '=', lex_kind_modulo_assign, /*ignored option*/'=', lex_kind_modulo_assign);
-        LEX_CASE3('*', lex_kind_multiply, '=', lex_kind_multiply_assign, /*ignored option*/'=', lex_kind_multiply_assign);
-        LEX_CASE3('+', lex_kind_plus, '+', lex_kind_increment, '=', lex_kind_plus_assign);
-        LEX_CASE3('-', lex_kind_minus, '-', lex_kind_decrement, '=', lex_kind_minus_assign);
-        LEX_CASE3('&', lex_kind_bit_and, '&', lex_kind_and, '=', lex_kind_bit_and_assign);
-        LEX_CASE3('|', lex_kind_bit_or, '|', lex_kind_or, '=', lex_kind_bit_or_assign);
-
        case '>': {
            token->kind = lex_kind_greater;
            if (lex_match(lex, '=')) {
@@ -189,8 +274,7 @@ fn void lex_token_ex(lexer_t *lex, lex_t *token) {
            token->kind = lex_kind_lesser;
            if (lex_match(lex, '=')) {
                token->kind = lex_kind_lesser_or_equal;
-            }
-            else if (lex_match(lex, '<')) {
+            } else if (lex_match(lex, '<')) {
                token->kind = lex_kind_bit_left_shift;
                if (lex_match(lex, '=')) {
                    token->kind = lex_kind_bit_left_shift_assign;
@@ -198,6 +282,26 @@ fn void lex_token_ex(lexer_t *lex, lex_t *token) {
            }
        } break;

+        case '#': {
+            if (lex_match(lex, '#')) {
+                token->kind = lex_kind_preproc_concat;
+            } else {
+                b32 inside_macro = lex_macro(lex, token);
+                if (inside_macro) {
+                    lex->inside_macro = true;
+                } else {
+                    if (!lex->inside_macro) {
+                        lex_error(token, "invalid preprocessor directive");
+                        return;
+                    }
+                    token->kind = lex_kind_preproc_stringify;
+                    token->str  = lex->at;
+                    // this is slighly wrong, first letter can't be number
+                    while (char_is_alphanumeric(lex->at[0]) || lex->at[0] == '_') lex_advance(lex);
+                }
+            }
+        } break;
+
        case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '0': {
            lex_eat_number(lex, token);
        } break;
@@ -222,6 +326,13 @@ fn void lex_token_ex(lexer_t *lex, lex_t *token) {

    token->len = (i32)(lex->at - token->str);

+    if (token->kind == lex_kind_integer || token->kind == lex_kind_real) {
+        if (token->suffix != lex_suffix_none) {
+            s8_t string_value = ti_enum_value_to_name(token->suffix, type(lex_suffix_t));
+            s8_t prefix = s8_lit("lex_suffix_");
+            token->len -= string_value.len - prefix.len;
+        }
+    }
    if (token->kind == lex_kind_integer) {
        token->integer = u64_from_s8(token->string, 10);
    } else if (token->kind == lex_kind_real) {
@@ -229,7 +340,10 @@ fn void lex_token_ex(lexer_t *lex, lex_t *token) {
    } else if (token->kind == lex_kind_string) {
        token->str += 1;
        token->len -= 2;
+    } else if (token->kind == lex_kind_preproc_include) {
+        token->len -= 1;
    }
+    if (lex->inside_macro) token->inside_macro = true;
 }

 fn lex_t lex_token(lexer_t *lex) {
@@ -309,7 +423,7 @@ fn lex_t *parser_matchi(parser_t *par, s8_t str) {
 fn void parser_panicf(lex_t *token, const char *str, ...) {
    ma_temp_t scratch = ma_begin_scratch();
    S8_FMT(scratch.arena, str, str8);
-    fatalf("%s(%d:%d): error: %S", token->file_name, token->line, token->column, str8);
+    fatalf("%s(%d:%d): error: %S", token->file, token->line, token->column, str8);
    ma_end_scratch(scratch);
 }

--- a/src/core/core_lexer.h
+++ b/src/core/core_lexer.h
@@ -57,6 +57,20 @@ enum lex_kind_t {
        X(lex_kind_arrow                  , "'->' arrow"                       , "->"  )\
        X(lex_kind_question               , "'?' question mark"                , "?"   )\
        X(lex_kind_tag                    , "'@' tag sign"                     , "@"   )\
+        X(lex_kind_preproc_null           , "preproc_null"                     , "---" )\
+        X(lex_kind_preproc_define         , "preproc_define"                   , "---" )\
+        X(lex_kind_preproc_ifdef          , "preproc_ifdef"                    , "---" )\
+        X(lex_kind_preproc_ifndef         , "preproc_ifndef"                   , "---" )\
+        X(lex_kind_preproc_include        , "preproc_include"                  , "---" )\
+        X(lex_kind_preproc_endif          , "preproc_endif"                    , "---" )\
+        X(lex_kind_preproc_if             , "preproc_if"                       , "---" )\
+        X(lex_kind_preproc_pragma         , "preproc_pragma"                   , "---" )\
+        X(lex_kind_preproc_error          , "preproc_error"                    , "---" )\
+        X(lex_kind_preproc_else           , "preproc_else"                     , "---" )\
+        X(lex_kind_preproc_elif           , "preproc_elif"                     , "---" )\
+        X(lex_kind_preproc_undef          , "preproc_undef"                    , "---" )\
+        X(lex_kind_preproc_concat         , "preproc_concat"                   , "---" )\
+        X(lex_kind_preproc_stringify      , "preproc_stringify"                , "---" )\

    #define X(KIND, STR, SIMPLE) KIND,
    LEX_KIND_XLIST
@@ -88,6 +102,10 @@ typedef struct lex_t lex_t;
 struct lex_t {
    lex_kind_t kind;
    lex_suffix_t suffix;
+    struct {
+        b8 inside_macro: 1;
+        b8 system_include: 1;
+    };

    union {
        struct {char *str; i64 len;};
@@ -96,7 +114,7 @@ struct lex_t {

    i32 line;
    i32 column;
-    char *file_name;
+    char *file;

    union {
        u64 integer;
@@ -109,9 +127,11 @@ typedef struct lexer_t lexer_t;
 struct lexer_t {
    char *at;
    char *end;
-    char *file_name;
+    char *file;
    i32 line;
    i32 column;
+
+    b8 inside_macro;
 };

 typedef struct lex_array_t lex_array_t;
--- a/src/core/core_test_entry.c
+++ b/src/core/core_test_entry.c
@@ -1,5 +1,6 @@
 #include "core/core_inc.h"
 #include "core/core_inc.c"
+#include "core/core_clexer.c"

 void test_s8(void) {
    ma_arena_t *arena = ma_create(ma_default_reserve_size);
@@ -15,7 +16,7 @@ void test_s8(void) {
        assert(s8_are_equal(sb->first->string, memes));

        sb8_printf(sb, "%S", s8_lit("things are going fine"));
-        s8_t string = sb8_merge(sb);
+        s8_t string = sb8_merge(temp.arena, sb);
        assert(s8_are_equal(string, s8_lit("memesthings are going fine")));

        ma_end_temp(temp);
--- a/src/wasm_app/ui.c
+++ b/src/wasm_app/ui.c
@@ -131,7 +131,7 @@ fn ui_box_t *ui_build_box_from_string(ui_code_loc_t loc, ui_box_flags_t flags, s
    ui_box_t *box = ui_build_box_from_id(loc, flags, id);
    box->string = ui_get_display_string(string);
    v2f32_t string_size = rn_measure_string(&rn_state.main_font, box->string);
-    string_size.x += 50;
+    string_size.x += ui_em(1);
    r2f32_t rect = ui_next_rect(ui->top->op, &ui->top->rect, string_size);
    ui_set_rect(box, rect);
    return box;
@@ -512,28 +512,18 @@ fn void ui_demo_update(app_frame_t *frame) {
        ui_begin_build(UILOC, ev, window_rect_from_frame(frame));

        ui->top->op = ui_op_idle;
-        ui_box_t *top_box = ui_boxf((ui_box_flags_t){.draw_rect = true, .draw_border = true, .clip_rect = true}, "top_box");
-        ui_set_rect(top_box, r2f32_cut_top(&ui->top->rect, ui_em(1)));
-        defer_block(ui_push_top(top_box), ui_pop_top()) {
-            top_box->op = ui_op_cut_left;
-            ui->pref_text_align = ui_text_align_center;
-            ui_button("file");
-            ui_button("edit");
-            ui->pref_text_align = ui_text_align_left;
-        }

        ui_box_t *scroller_box = ui_boxf((ui_box_flags_t){.draw_rect = true, .clip_rect = true}, "scroller");
-        ui_set_rect(scroller_box, r2f32_cut_right(&ui->top->rect, ui_em(1)));
+        ui_set_rect(scroller_box, r2f32_cut_right(&ui->top->rect, ui_em(0.5f)));

        ui_box_t *item_box = ui_boxf((ui_box_flags_t){.draw_rect = true, .clip_rect = true}, "item_box");
        ui_set_rect(item_box, r2f32_cut_left(&ui->top->rect, ui_max));

-        // @todo: now actually fill this out with struct data using type info
        static f32 scroller_value;
        defer_block(ui_push_top(item_box), ui_pop_top()) {
            ui_serial_type(&ui_test_event, type(app_event_t));
            defer_if (ui_begin_expander("app_event_t").clicked, ui_end_expander()) {
-                for (int i = 0; i < 10; i += 1) {
+                for (int i = 0; i < 2; i += 1) {
                    ui_label("kind: app_event_kind_t##a%d", i);
                    ui_label("ctrl: b8##ds%d", i);
                    ui_label("shift: b8##f%d", i);
@@ -562,7 +552,6 @@ fn void ui_demo_update(app_frame_t *frame) {
            f32 scroller_percent = scroller_norm * scrollable_space;
            f32 scroller_second = scrollable_space - scroller_percent;

-            scroller_box->op = ui_op_idle;
            r2f32_cut_top(&ui->top->rect, scroller_percent * scroller_box_size);
            ui_box_t *box = ui_build_box_from_id(UILOC, (ui_box_flags_t){.draw_border = true, .draw_rect = true}, ui_idf("slider"));
            ui_set_rect(box, r2f32_cut_top(&ui->top->rect, scroller_size * scroller_box_size));
@@ -576,7 +565,6 @@ fn void ui_demo_update(app_frame_t *frame) {
                scroller_value = CLAMP(scroller_value, 0, all_items_size);
            }

-
            for (ui_box_t *it = item_box->first; it; it = it->next) {
                it->full_rect.min.y -= scroller_value;
                it->full_rect.max.y -= scroller_value;