Squashing lexer bugs related to EOF

This commit is contained in:
Krzosa Karol
2022-06-07 21:18:49 +02:00
parent c69d2b7fe2
commit c3e071b3bb
2 changed files with 22 additions and 11 deletions

View File

@@ -97,6 +97,7 @@ enum Token_Kind{
struct Token{
Token_Kind kind;
U32 di; // debug_id
union{
String string;
struct{U8 *str; S64 len;};
@@ -133,6 +134,7 @@ struct Lexer{
Array<Token> tokens;
Intern_Table interns;
S64 token_iter;
U32 token_debug_ids;
Intern_String intern(String string){
return intern_string(&interns, string);
@@ -182,11 +184,12 @@ struct Parse_Ctx:Lexer{
String_Builder gen;
};
global B32 emit_line_directives;
//-----------------------------------------------------------------------------
// Constructors
//-----------------------------------------------------------------------------
thread_local Parse_Ctx *pctx;
function void
lex_init(Allocator *token_string_arena, Allocator *map_allocator, Lexer *l){
l->arena = token_string_arena;

View File

@@ -2,6 +2,7 @@ force_inline B32 token_is_assign(Token_Kind token){return token >= TK_FirstAssig
force_inline B32 token_is_assign(Token *token){return token_is_assign(token->kind);}
force_inline B32 token_is_compare(Token_Kind token){return token >= TK_FirstCompare && token <= TK_LastCompare;}
force_inline B32 token_is_compare(Token *token){return token_is_compare(token->kind);}
global Token token_null = {SAME_SCOPE};
function U8
lexc(Lex_Stream *s){
@@ -164,16 +165,20 @@ lex_parse_ident(Intern_Table *table, Lex_Stream *s, Token *t){
break
function Token
token_make(U8 *str, String file, int line, U8 *line_begin){
token_make(Lexer *lexer, U8 *str, String file, int line, U8 *line_begin){
Token t = {};
t.str = str;
t.file = file;
t.line = line;
t.line_begin = line_begin;
t.di = lexer->token_debug_ids++;
return t;
}
global Token token_null = {SAME_SCOPE};
function Token
token_make(Lexer *lexer){
return token_make(lexer, lexcp(&lexer->stream), lexer->stream.file, lexer->stream.line, lexer->stream.line_begin);
}
function Token *
lex_last_indent_token(Lex_Stream *s){
@@ -213,14 +218,16 @@ lex_unwind_indent_stack(Token *t, Lex_Stream *s, Array<Token> *array){
}
function void
lex__stream(Lexer *lexer, Lex_Stream *s){
lex__stream(Lexer *lexer){
Intern_Table *table = &lexer->interns;
Array<Token> *array = &lexer->tokens;
Lex_Stream *s = &lexer->stream;
B32 beginning = true;
for(;;){
if(lexc(s) == 0 || s->iter >= s->stream.len){
Token t = token_make(lexcp(s), s->file, s->line, s->line_begin);
end_of_stream:
Token t = token_make(lexer);
lex_unwind_indent_stack(&t, s, array);
break;
}
@@ -242,10 +249,11 @@ lex__stream(Lexer *lexer, Lex_Stream *s){
// but for down scopes we want to emit 2 new lines
// that will ease out parsing, one token to break out
// from a block parsing, second to allow continuation of surrounding scope
Token t = token_make(lexcp(s), s->file, s->line, s->line_begin);
Token t = token_make(lexer);
B32 should_emit = beginning;
for(;;){
switch(lexc(s)){
case 0 : goto end_of_stream; break;
case '\t': case ' ': lex_advance(s); t.indent++; break;
case '\r': lex_advance(s); break;
case '/': {
@@ -281,7 +289,7 @@ lex__stream(Lexer *lexer, Lex_Stream *s){
// all because we would know by how much to indent
// @todo: after detecting indentation 2 spaces would become 1 indent value
case ';' : {
Token semi = token_make(lexcp(s), s->file, s->line, s->line_begin);
Token semi = token_make(lexer);
Token *last = lex_last_indent_token(s);
semi.kind = SAME_SCOPE;
semi.indent = last->indent;
@@ -292,7 +300,7 @@ lex__stream(Lexer *lexer, Lex_Stream *s){
case '\n':{
lex_advance(s);
should_emit = true;
t = token_make(lexcp(s), s->file, s->line, s->line_begin);
t = token_make(lexer);
} break;
default:{
@@ -321,10 +329,10 @@ lex__stream(Lexer *lexer, Lex_Stream *s){
beginning = false;
// @note: handle the indented token
t = token_make(lexcp(s), s->file, s->line, s->line_begin);
t = token_make(lexer);
lex_advance(s);
switch(*t.str){
case 0 : break;
case 0 : goto end_of_stream; break;
case '@': t.kind = TK_At; break;
case '(': s->inside_brace_paren++; t.kind = TK_OpenParen; break;
case ')': s->inside_brace_paren--; t.kind = TK_CloseParen; break;
@@ -540,7 +548,7 @@ lex_restream(Lexer *lexer, String istream, String file){
Scratch scratch;
lexer->stream.indent_stack.allocator = scratch;
lexer->stream.indent_stack.add(&token_null);
lex__stream(lexer, &lexer->stream);
lex__stream(lexer);
}
function Lexer