Squashing lexer bugs related to EOF
This commit is contained in:
@@ -97,6 +97,7 @@ enum Token_Kind{
|
||||
|
||||
struct Token{
|
||||
Token_Kind kind;
|
||||
U32 di; // debug_id
|
||||
union{
|
||||
String string;
|
||||
struct{U8 *str; S64 len;};
|
||||
@@ -133,6 +134,7 @@ struct Lexer{
|
||||
Array<Token> tokens;
|
||||
Intern_Table interns;
|
||||
S64 token_iter;
|
||||
U32 token_debug_ids;
|
||||
|
||||
Intern_String intern(String string){
|
||||
return intern_string(&interns, string);
|
||||
@@ -182,11 +184,12 @@ struct Parse_Ctx:Lexer{
|
||||
String_Builder gen;
|
||||
};
|
||||
|
||||
global B32 emit_line_directives;
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Constructors
|
||||
//-----------------------------------------------------------------------------
|
||||
thread_local Parse_Ctx *pctx;
|
||||
|
||||
function void
|
||||
lex_init(Allocator *token_string_arena, Allocator *map_allocator, Lexer *l){
|
||||
l->arena = token_string_arena;
|
||||
|
||||
28
lexer.cpp
28
lexer.cpp
@@ -2,6 +2,7 @@ force_inline B32 token_is_assign(Token_Kind token){return token >= TK_FirstAssig
|
||||
force_inline B32 token_is_assign(Token *token){return token_is_assign(token->kind);}
|
||||
force_inline B32 token_is_compare(Token_Kind token){return token >= TK_FirstCompare && token <= TK_LastCompare;}
|
||||
force_inline B32 token_is_compare(Token *token){return token_is_compare(token->kind);}
|
||||
global Token token_null = {SAME_SCOPE};
|
||||
|
||||
function U8
|
||||
lexc(Lex_Stream *s){
|
||||
@@ -164,16 +165,20 @@ lex_parse_ident(Intern_Table *table, Lex_Stream *s, Token *t){
|
||||
break
|
||||
|
||||
function Token
|
||||
token_make(U8 *str, String file, int line, U8 *line_begin){
|
||||
token_make(Lexer *lexer, U8 *str, String file, int line, U8 *line_begin){
|
||||
Token t = {};
|
||||
t.str = str;
|
||||
t.file = file;
|
||||
t.line = line;
|
||||
t.line_begin = line_begin;
|
||||
t.di = lexer->token_debug_ids++;
|
||||
return t;
|
||||
}
|
||||
|
||||
global Token token_null = {SAME_SCOPE};
|
||||
function Token
|
||||
token_make(Lexer *lexer){
|
||||
return token_make(lexer, lexcp(&lexer->stream), lexer->stream.file, lexer->stream.line, lexer->stream.line_begin);
|
||||
}
|
||||
|
||||
function Token *
|
||||
lex_last_indent_token(Lex_Stream *s){
|
||||
@@ -213,14 +218,16 @@ lex_unwind_indent_stack(Token *t, Lex_Stream *s, Array<Token> *array){
|
||||
}
|
||||
|
||||
function void
|
||||
lex__stream(Lexer *lexer, Lex_Stream *s){
|
||||
lex__stream(Lexer *lexer){
|
||||
Intern_Table *table = &lexer->interns;
|
||||
Array<Token> *array = &lexer->tokens;
|
||||
Lex_Stream *s = &lexer->stream;
|
||||
|
||||
B32 beginning = true;
|
||||
for(;;){
|
||||
if(lexc(s) == 0 || s->iter >= s->stream.len){
|
||||
Token t = token_make(lexcp(s), s->file, s->line, s->line_begin);
|
||||
end_of_stream:
|
||||
Token t = token_make(lexer);
|
||||
lex_unwind_indent_stack(&t, s, array);
|
||||
break;
|
||||
}
|
||||
@@ -242,10 +249,11 @@ lex__stream(Lexer *lexer, Lex_Stream *s){
|
||||
// but for down scopes we want to emit 2 new lines
|
||||
// that will ease out parsing, one token to break out
|
||||
// from a block parsing, second to allow continuation of surrounding scope
|
||||
Token t = token_make(lexcp(s), s->file, s->line, s->line_begin);
|
||||
Token t = token_make(lexer);
|
||||
B32 should_emit = beginning;
|
||||
for(;;){
|
||||
switch(lexc(s)){
|
||||
case 0 : goto end_of_stream; break;
|
||||
case '\t': case ' ': lex_advance(s); t.indent++; break;
|
||||
case '\r': lex_advance(s); break;
|
||||
case '/': {
|
||||
@@ -281,7 +289,7 @@ lex__stream(Lexer *lexer, Lex_Stream *s){
|
||||
// all because we would know by how much to indent
|
||||
// @todo: after detecting indentation 2 spaces would become 1 indent value
|
||||
case ';' : {
|
||||
Token semi = token_make(lexcp(s), s->file, s->line, s->line_begin);
|
||||
Token semi = token_make(lexer);
|
||||
Token *last = lex_last_indent_token(s);
|
||||
semi.kind = SAME_SCOPE;
|
||||
semi.indent = last->indent;
|
||||
@@ -292,7 +300,7 @@ lex__stream(Lexer *lexer, Lex_Stream *s){
|
||||
case '\n':{
|
||||
lex_advance(s);
|
||||
should_emit = true;
|
||||
t = token_make(lexcp(s), s->file, s->line, s->line_begin);
|
||||
t = token_make(lexer);
|
||||
} break;
|
||||
|
||||
default:{
|
||||
@@ -321,10 +329,10 @@ lex__stream(Lexer *lexer, Lex_Stream *s){
|
||||
beginning = false;
|
||||
|
||||
// @note: handle the indented token
|
||||
t = token_make(lexcp(s), s->file, s->line, s->line_begin);
|
||||
t = token_make(lexer);
|
||||
lex_advance(s);
|
||||
switch(*t.str){
|
||||
case 0 : break;
|
||||
case 0 : goto end_of_stream; break;
|
||||
case '@': t.kind = TK_At; break;
|
||||
case '(': s->inside_brace_paren++; t.kind = TK_OpenParen; break;
|
||||
case ')': s->inside_brace_paren--; t.kind = TK_CloseParen; break;
|
||||
@@ -540,7 +548,7 @@ lex_restream(Lexer *lexer, String istream, String file){
|
||||
Scratch scratch;
|
||||
lexer->stream.indent_stack.allocator = scratch;
|
||||
lexer->stream.indent_stack.add(&token_null);
|
||||
lex__stream(lexer, &lexer->stream);
|
||||
lex__stream(lexer);
|
||||
}
|
||||
|
||||
function Lexer
|
||||
|
||||
Reference in New Issue
Block a user