Slowly adding macro handling

2026-05-22 18:41:02 +02:00
parent f41560c5e7
commit a0adc03bcb
2 changed files with 54 additions and 16 deletions
--- a/lex.c
+++ b/lex.c
@@ -1,11 +1,21 @@
+/*
+
+- [ ] New line splicing, first source preprocessing stage. In order to properly handle '\\' backslash new line, we most likely need to preprocess the source in a initial pass. So at some point we need to introduce a stage that will create a buffer without wrong characters with a line / column mapping data structure.
+
+*/
+
 typedef struct Token {
    Token_Kind kind;
-    char *str;
    int len;
+    char *str;

    char *file;
    int line, column;

+    struct {
+        uint8_t preproc : 1;
+    };
+
    union {
        uint64_t u;
    };
@@ -19,6 +29,7 @@ typedef struct Lexer {
    char *file;
    int line;
    int column;
+    uint8_t preproc;
 } Lexer;

 void lex_advance(Lexer *lex) {
@@ -32,16 +43,36 @@ void lex_advance(Lexer *lex) {
    } else {
        lex->column++;
    }
-    lex->at += 1;
+
+    if (*lex->at == '\\') {
+        lex->at += 1;
+        if ((lex->at < lex->end) && *lex->at == '\n') {
+            lex->at += 1;
+            lex->line += 1; lex->column = 0;
+        } else if ((lex->at < lex->end) && *lex->at == '\r') {
+            lex->at += 1;
+            if ((lex->at < lex->end) && *lex->at == '\n') {
+                lex->at += 1;
+                lex->line += 1; lex->column = 0;
+            } else {
+                panicf("after \\r missing \\n");
+            }
+        } else {
+            panicf("stray '\\' without follow up new line");
+        }
+
+    } else if (*lex->at == '\n') {
+        lex->preproc = false;
+        lex->at += 1;
+    } else {
+        lex->at += 1;
+    }
 }

 void eat_whitespace(Lexer *lex) {
    while (lex->at < lex->end) {
        switch (*lex->at) {
-            case ' ':
-            case '\t':
-            case '\r':
-            case '\n':
+            case ' ': case '\t': case '\r': case '\n':
                lex_advance(lex);
                break;
            default:
@@ -97,6 +128,7 @@ Token lex_token(Lexer *lex) {
        .line = lex->line,
        .column = lex->column,
        .file = lex->file,
+        .preproc = lex->preproc,
    };

    if (lex->at >= lex->end) {
@@ -146,7 +178,6 @@ Token lex_token(Lexer *lex) {
        case ':': t.kind = TOK_COLON; break;
        case ';': t.kind = TOK_SEMICOLON; break;
        case '?': t.kind = TOK_QUESTION; break;
-        case '#': t.kind = TOK_HASH; break;
        case '+': t.kind = lex_repeat_or_assign(lex, '+', TOK_PLUS, TOK_INC, TOK_PLUS_ASSIGN); break;
        case '-': {
            if (lex_match(lex, '-')) t.kind = TOK_DEC;
@@ -165,6 +196,15 @@ Token lex_token(Lexer *lex) {
        case '&': t.kind = lex_repeat_or_assign(lex, '&', TOK_BITAND, TOK_AND, TOK_AND_ASSIGN); break;
        case '|': t.kind = lex_repeat_or_assign(lex, '|', TOK_BITOR, TOK_OR, TOK_OR_ASSIGN); break;
        case '^': t.kind = lex_assign_variant(lex, TOK_BITXOR, TOK_XOR_ASSIGN); break;
+
+        case '#': {
+            t.kind = TOK_HASH;
+            lex->preproc = t.preproc = true;
+            while (lex->at < lex->end && isalpha(*lex->at)) {
+                lex_advance(lex);
+            }
+        } break;
+
        default: panicf("unrecognized character: '%c', can't match with any of the token kinds", c);
    }

@@ -172,14 +212,6 @@ Token lex_token(Lexer *lex) {
    return t;
 }

-void assert_token(Token t, Token_Kind kind, char *text, int line, int column) {
-    assert(t.kind == kind);
-    assert(t.line == line);
-    assert(t.column == column);
-    assert(t.len == (int)strlen(text));
-    assert(strncmp(t.str, text, t.len) == 0);
-}
-
 Token_Array lex_file(char *file, char *src, int len) {
    Lexer lex = make_lexer(file, src, len);
    Token_Array result = {0};
@@ -193,6 +225,13 @@ Token_Array lex_file(char *file, char *src, int len) {
    return result;
 }

+void assert_token(Token t, Token_Kind kind, char *text, int line, int column) {
+    assert(t.kind == kind);
+    assert(t.line == line);
+    assert(t.column == column);
+    assert(t.len == (int)strlen(text));
+    assert(strncmp(t.str, text, t.len) == 0);
+}

 void lex_test(void) {
    char *src = "12 + 34.5 * 6\n- 7 % 2 / 1 == 1 != 2 <= 3 >= 4 && 3 || 4 << 1 >> 2";
--- a/main.c
+++ b/main.c
@@ -25,5 +25,4 @@ int main(int argc, char **argv) {
        lex_test();
        parser_test();
    }
-    
 }