Squashing lexer bugs related to EOF
This commit is contained in:
@@ -97,6 +97,7 @@ enum Token_Kind{
|
|||||||
|
|
||||||
struct Token{
|
struct Token{
|
||||||
Token_Kind kind;
|
Token_Kind kind;
|
||||||
|
U32 di; // debug_id
|
||||||
union{
|
union{
|
||||||
String string;
|
String string;
|
||||||
struct{U8 *str; S64 len;};
|
struct{U8 *str; S64 len;};
|
||||||
@@ -133,6 +134,7 @@ struct Lexer{
|
|||||||
Array<Token> tokens;
|
Array<Token> tokens;
|
||||||
Intern_Table interns;
|
Intern_Table interns;
|
||||||
S64 token_iter;
|
S64 token_iter;
|
||||||
|
U32 token_debug_ids;
|
||||||
|
|
||||||
Intern_String intern(String string){
|
Intern_String intern(String string){
|
||||||
return intern_string(&interns, string);
|
return intern_string(&interns, string);
|
||||||
@@ -182,11 +184,12 @@ struct Parse_Ctx:Lexer{
|
|||||||
String_Builder gen;
|
String_Builder gen;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
global B32 emit_line_directives;
|
||||||
|
|
||||||
//-----------------------------------------------------------------------------
|
//-----------------------------------------------------------------------------
|
||||||
// Constructors
|
// Constructors
|
||||||
//-----------------------------------------------------------------------------
|
//-----------------------------------------------------------------------------
|
||||||
thread_local Parse_Ctx *pctx;
|
thread_local Parse_Ctx *pctx;
|
||||||
|
|
||||||
function void
|
function void
|
||||||
lex_init(Allocator *token_string_arena, Allocator *map_allocator, Lexer *l){
|
lex_init(Allocator *token_string_arena, Allocator *map_allocator, Lexer *l){
|
||||||
l->arena = token_string_arena;
|
l->arena = token_string_arena;
|
||||||
|
|||||||
28
lexer.cpp
28
lexer.cpp
@@ -2,6 +2,7 @@ force_inline B32 token_is_assign(Token_Kind token){return token >= TK_FirstAssig
|
|||||||
force_inline B32 token_is_assign(Token *token){return token_is_assign(token->kind);}
|
force_inline B32 token_is_assign(Token *token){return token_is_assign(token->kind);}
|
||||||
force_inline B32 token_is_compare(Token_Kind token){return token >= TK_FirstCompare && token <= TK_LastCompare;}
|
force_inline B32 token_is_compare(Token_Kind token){return token >= TK_FirstCompare && token <= TK_LastCompare;}
|
||||||
force_inline B32 token_is_compare(Token *token){return token_is_compare(token->kind);}
|
force_inline B32 token_is_compare(Token *token){return token_is_compare(token->kind);}
|
||||||
|
global Token token_null = {SAME_SCOPE};
|
||||||
|
|
||||||
function U8
|
function U8
|
||||||
lexc(Lex_Stream *s){
|
lexc(Lex_Stream *s){
|
||||||
@@ -164,16 +165,20 @@ lex_parse_ident(Intern_Table *table, Lex_Stream *s, Token *t){
|
|||||||
break
|
break
|
||||||
|
|
||||||
function Token
|
function Token
|
||||||
token_make(U8 *str, String file, int line, U8 *line_begin){
|
token_make(Lexer *lexer, U8 *str, String file, int line, U8 *line_begin){
|
||||||
Token t = {};
|
Token t = {};
|
||||||
t.str = str;
|
t.str = str;
|
||||||
t.file = file;
|
t.file = file;
|
||||||
t.line = line;
|
t.line = line;
|
||||||
t.line_begin = line_begin;
|
t.line_begin = line_begin;
|
||||||
|
t.di = lexer->token_debug_ids++;
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
global Token token_null = {SAME_SCOPE};
|
function Token
|
||||||
|
token_make(Lexer *lexer){
|
||||||
|
return token_make(lexer, lexcp(&lexer->stream), lexer->stream.file, lexer->stream.line, lexer->stream.line_begin);
|
||||||
|
}
|
||||||
|
|
||||||
function Token *
|
function Token *
|
||||||
lex_last_indent_token(Lex_Stream *s){
|
lex_last_indent_token(Lex_Stream *s){
|
||||||
@@ -213,14 +218,16 @@ lex_unwind_indent_stack(Token *t, Lex_Stream *s, Array<Token> *array){
|
|||||||
}
|
}
|
||||||
|
|
||||||
function void
|
function void
|
||||||
lex__stream(Lexer *lexer, Lex_Stream *s){
|
lex__stream(Lexer *lexer){
|
||||||
Intern_Table *table = &lexer->interns;
|
Intern_Table *table = &lexer->interns;
|
||||||
Array<Token> *array = &lexer->tokens;
|
Array<Token> *array = &lexer->tokens;
|
||||||
|
Lex_Stream *s = &lexer->stream;
|
||||||
|
|
||||||
B32 beginning = true;
|
B32 beginning = true;
|
||||||
for(;;){
|
for(;;){
|
||||||
if(lexc(s) == 0 || s->iter >= s->stream.len){
|
if(lexc(s) == 0 || s->iter >= s->stream.len){
|
||||||
Token t = token_make(lexcp(s), s->file, s->line, s->line_begin);
|
end_of_stream:
|
||||||
|
Token t = token_make(lexer);
|
||||||
lex_unwind_indent_stack(&t, s, array);
|
lex_unwind_indent_stack(&t, s, array);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -242,10 +249,11 @@ lex__stream(Lexer *lexer, Lex_Stream *s){
|
|||||||
// but for down scopes we want to emit 2 new lines
|
// but for down scopes we want to emit 2 new lines
|
||||||
// that will ease out parsing, one token to break out
|
// that will ease out parsing, one token to break out
|
||||||
// from a block parsing, second to allow continuation of surrounding scope
|
// from a block parsing, second to allow continuation of surrounding scope
|
||||||
Token t = token_make(lexcp(s), s->file, s->line, s->line_begin);
|
Token t = token_make(lexer);
|
||||||
B32 should_emit = beginning;
|
B32 should_emit = beginning;
|
||||||
for(;;){
|
for(;;){
|
||||||
switch(lexc(s)){
|
switch(lexc(s)){
|
||||||
|
case 0 : goto end_of_stream; break;
|
||||||
case '\t': case ' ': lex_advance(s); t.indent++; break;
|
case '\t': case ' ': lex_advance(s); t.indent++; break;
|
||||||
case '\r': lex_advance(s); break;
|
case '\r': lex_advance(s); break;
|
||||||
case '/': {
|
case '/': {
|
||||||
@@ -281,7 +289,7 @@ lex__stream(Lexer *lexer, Lex_Stream *s){
|
|||||||
// all because we would know by how much to indent
|
// all because we would know by how much to indent
|
||||||
// @todo: after detecting indentation 2 spaces would become 1 indent value
|
// @todo: after detecting indentation 2 spaces would become 1 indent value
|
||||||
case ';' : {
|
case ';' : {
|
||||||
Token semi = token_make(lexcp(s), s->file, s->line, s->line_begin);
|
Token semi = token_make(lexer);
|
||||||
Token *last = lex_last_indent_token(s);
|
Token *last = lex_last_indent_token(s);
|
||||||
semi.kind = SAME_SCOPE;
|
semi.kind = SAME_SCOPE;
|
||||||
semi.indent = last->indent;
|
semi.indent = last->indent;
|
||||||
@@ -292,7 +300,7 @@ lex__stream(Lexer *lexer, Lex_Stream *s){
|
|||||||
case '\n':{
|
case '\n':{
|
||||||
lex_advance(s);
|
lex_advance(s);
|
||||||
should_emit = true;
|
should_emit = true;
|
||||||
t = token_make(lexcp(s), s->file, s->line, s->line_begin);
|
t = token_make(lexer);
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
default:{
|
default:{
|
||||||
@@ -321,10 +329,10 @@ lex__stream(Lexer *lexer, Lex_Stream *s){
|
|||||||
beginning = false;
|
beginning = false;
|
||||||
|
|
||||||
// @note: handle the indented token
|
// @note: handle the indented token
|
||||||
t = token_make(lexcp(s), s->file, s->line, s->line_begin);
|
t = token_make(lexer);
|
||||||
lex_advance(s);
|
lex_advance(s);
|
||||||
switch(*t.str){
|
switch(*t.str){
|
||||||
case 0 : break;
|
case 0 : goto end_of_stream; break;
|
||||||
case '@': t.kind = TK_At; break;
|
case '@': t.kind = TK_At; break;
|
||||||
case '(': s->inside_brace_paren++; t.kind = TK_OpenParen; break;
|
case '(': s->inside_brace_paren++; t.kind = TK_OpenParen; break;
|
||||||
case ')': s->inside_brace_paren--; t.kind = TK_CloseParen; break;
|
case ')': s->inside_brace_paren--; t.kind = TK_CloseParen; break;
|
||||||
@@ -540,7 +548,7 @@ lex_restream(Lexer *lexer, String istream, String file){
|
|||||||
Scratch scratch;
|
Scratch scratch;
|
||||||
lexer->stream.indent_stack.allocator = scratch;
|
lexer->stream.indent_stack.allocator = scratch;
|
||||||
lexer->stream.indent_stack.add(&token_null);
|
lexer->stream.indent_stack.add(&token_null);
|
||||||
lex__stream(lexer, &lexer->stream);
|
lex__stream(lexer);
|
||||||
}
|
}
|
||||||
|
|
||||||
function Lexer
|
function Lexer
|
||||||
|
|||||||
Reference in New Issue
Block a user