WIP top-level parsing and keyword tokens
This commit is contained in:
89
lex.c
89
lex.c
@@ -1,9 +1,3 @@
|
||||
/*
|
||||
|
||||
- [ ] New line splicing, first source preprocessing stage. In order to properly handle '\\' backslash new line, we most likely need to preprocess the source in a initial pass. So at some point we need to introduce a stage that will create a buffer without wrong characters with a line / column mapping data structure.
|
||||
|
||||
*/
|
||||
|
||||
typedef struct Token {
|
||||
Token_Kind kind;
|
||||
int len;
|
||||
@@ -46,7 +40,6 @@ uint64_t hash_bytes(char *data, size_t len) {
|
||||
char *global_intern_table[4096];
|
||||
char intern_arena[4096*6];
|
||||
int intern_arena_len;
|
||||
|
||||
char *lex_alloc_string(int len) {
|
||||
char *result = intern_arena + intern_arena_len;
|
||||
intern_arena_len += len + 1;
|
||||
@@ -73,47 +66,6 @@ char *make_intern(char *string, int len) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *lex_first_keyword = NULL;
|
||||
char *lex_last_keyword = NULL;
|
||||
#define lex_add_keyword(x) make_intern(x, ilen(x) - 1)
|
||||
|
||||
void lex_init_keywords(void) {
|
||||
lex_first_keyword = lex_add_keyword("auto");
|
||||
lex_add_keyword("break");
|
||||
lex_add_keyword("case");
|
||||
lex_add_keyword("char");
|
||||
lex_add_keyword("const");
|
||||
lex_add_keyword("continue");
|
||||
lex_add_keyword("default");
|
||||
lex_add_keyword("do");
|
||||
lex_add_keyword("double");
|
||||
lex_add_keyword("else");
|
||||
lex_add_keyword("enum");
|
||||
lex_add_keyword("extern");
|
||||
lex_add_keyword("float");
|
||||
lex_add_keyword("for");
|
||||
lex_add_keyword("goto");
|
||||
lex_add_keyword("if");
|
||||
lex_add_keyword("inline");
|
||||
lex_add_keyword("int");
|
||||
lex_add_keyword("long");
|
||||
lex_add_keyword("register");
|
||||
lex_add_keyword("restrict");
|
||||
lex_add_keyword("return");
|
||||
lex_add_keyword("short");
|
||||
lex_add_keyword("signed");
|
||||
lex_add_keyword("sizeof");
|
||||
lex_add_keyword("static");
|
||||
lex_add_keyword("struct");
|
||||
lex_add_keyword("switch");
|
||||
lex_add_keyword("typedef");
|
||||
lex_add_keyword("union");
|
||||
lex_add_keyword("unsigned");
|
||||
lex_add_keyword("void");
|
||||
lex_add_keyword("volatile");
|
||||
lex_last_keyword = lex_add_keyword("while");
|
||||
}
|
||||
|
||||
bool lex_is_keyword(char *string) {
|
||||
bool result = string >= lex_first_keyword && string <= lex_last_keyword;
|
||||
return result;
|
||||
@@ -275,7 +227,40 @@ Token lex_token(Lexer *lex) {
|
||||
if (t.kind == TOK_IDENT) {
|
||||
t.intern = make_intern(t.str, t.len);
|
||||
if (lex_is_keyword(t.intern)) {
|
||||
t.kind = TOK_KEYWORD;
|
||||
if (t.intern == keyword_while) t.kind = TOK_while;
|
||||
if (t.intern == keyword_break) t.kind = TOK_break;
|
||||
if (t.intern == keyword_case) t.kind = TOK_case;
|
||||
if (t.intern == keyword_char) t.kind = TOK_char;
|
||||
if (t.intern == keyword_const) t.kind = TOK_const;
|
||||
if (t.intern == keyword_continue) t.kind = TOK_continue;
|
||||
if (t.intern == keyword_default) t.kind = TOK_default;
|
||||
if (t.intern == keyword_do) t.kind = TOK_do;
|
||||
if (t.intern == keyword_double) t.kind = TOK_double;
|
||||
if (t.intern == keyword_else) t.kind = TOK_else;
|
||||
if (t.intern == keyword_enum) t.kind = TOK_enum;
|
||||
if (t.intern == keyword_extern) t.kind = TOK_extern;
|
||||
if (t.intern == keyword_float) t.kind = TOK_float;
|
||||
if (t.intern == keyword_for) t.kind = TOK_for;
|
||||
if (t.intern == keyword_goto) t.kind = TOK_goto;
|
||||
if (t.intern == keyword_if) t.kind = TOK_if;
|
||||
if (t.intern == keyword_inline) t.kind = TOK_inline;
|
||||
if (t.intern == keyword_int) t.kind = TOK_int;
|
||||
if (t.intern == keyword_long) t.kind = TOK_long;
|
||||
if (t.intern == keyword_register) t.kind = TOK_register;
|
||||
if (t.intern == keyword_restrict) t.kind = TOK_restrict;
|
||||
if (t.intern == keyword_return) t.kind = TOK_return;
|
||||
if (t.intern == keyword_short) t.kind = TOK_short;
|
||||
if (t.intern == keyword_signed) t.kind = TOK_signed;
|
||||
if (t.intern == keyword_sizeof) t.kind = TOK_sizeof;
|
||||
if (t.intern == keyword_static) t.kind = TOK_static;
|
||||
if (t.intern == keyword_struct) t.kind = TOK_struct;
|
||||
if (t.intern == keyword_switch) t.kind = TOK_switch;
|
||||
if (t.intern == keyword_typedef) t.kind = TOK_typedef;
|
||||
if (t.intern == keyword_union) t.kind = TOK_union;
|
||||
if (t.intern == keyword_unsigned) t.kind = TOK_unsigned;
|
||||
if (t.intern == keyword_void) t.kind = TOK_void;
|
||||
if (t.intern == keyword_volatile) t.kind = TOK_volatile;
|
||||
if (t.intern == keyword_auto) t.kind = TOK_auto;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -365,11 +350,11 @@ void lex_test(void) {
|
||||
assert_token(baz123, TOK_IDENT, "baz123", 0, 9);
|
||||
assert(strcmp(baz123.intern, "baz123") == 0);
|
||||
Token kw_if = lex_token(&ident_lex);
|
||||
assert_token(kw_if, TOK_KEYWORD, "if", 0, 16);
|
||||
assert_token(kw_if, TOK_if, "if", 0, 16);
|
||||
Token kw_for = lex_token(&ident_lex);
|
||||
assert_token(kw_for, TOK_KEYWORD, "for", 0, 19);
|
||||
assert_token(kw_for, TOK_for, "for", 0, 19);
|
||||
Token kw_while = lex_token(&ident_lex);
|
||||
assert_token(kw_while, TOK_KEYWORD, "while", 0, 23);
|
||||
assert_token(kw_while, TOK_while, "while", 0, 23);
|
||||
Token ident_if_ = lex_token(&ident_lex);
|
||||
assert_token(ident_if_, TOK_IDENT, "if_", 0, 29);
|
||||
Token ident_x9 = lex_token(&ident_lex);
|
||||
|
||||
Reference in New Issue
Block a user