Pratt parsing, basic ast, decl parse const
This commit is contained in:
111
new_lex.cpp
111
new_lex.cpp
@@ -81,7 +81,7 @@ enum Token_Kind{
|
||||
TK_Character,
|
||||
TK_Error,
|
||||
TK_Float,
|
||||
TK_Int,
|
||||
TK_Integer,
|
||||
TK_Keyword,
|
||||
};
|
||||
|
||||
@@ -114,6 +114,13 @@ struct Lex_Stream{
|
||||
S32 line;
|
||||
};
|
||||
|
||||
struct Lexer{
|
||||
Lex_Stream stream;
|
||||
Array<Token> tokens;
|
||||
Intern_Table interns;
|
||||
S64 token_iter;
|
||||
};
|
||||
|
||||
function U8
|
||||
lexc(Lex_Stream *s){
|
||||
return s->stream.str[s->iter];
|
||||
@@ -159,6 +166,23 @@ lex_set_len(Lex_Stream *s, Token *token){
|
||||
token->len = lexcp(s) - token->str;
|
||||
}
|
||||
|
||||
function void
|
||||
lex_set_keywords(Lexer *lexer, Array<String> keywords){
|
||||
Intern_String keyword = {};
|
||||
IFor(keywords){
|
||||
keyword = intern_string(&lexer->interns, *it);
|
||||
if(it == keywords.begin())
|
||||
lexer->interns.first_keyword = keyword.str;
|
||||
}
|
||||
lexer->interns.last_keyword = keyword.str;
|
||||
}
|
||||
|
||||
function B32
|
||||
lex_is_keyword(Intern_Table *lexer, Intern_String keyword){
|
||||
B32 result = keyword.str >= lexer->first_keyword && keyword.str <= lexer->last_keyword;
|
||||
return result;
|
||||
}
|
||||
|
||||
function void
|
||||
token_error(Token *t, String error_val){
|
||||
t->kind = TK_Error;
|
||||
@@ -237,7 +261,7 @@ t.kind = OpName;
|
||||
break
|
||||
|
||||
function void
|
||||
lex__stream(Array<Token> *array, Lex_Stream *s){
|
||||
lex__stream(Intern_Table *table, Array<Token> *array, Lex_Stream *s){
|
||||
while(lexc(s)){
|
||||
while(lex_is_whitespace(lexc(s)))
|
||||
lex_advance(s);
|
||||
@@ -261,7 +285,6 @@ lex__stream(Array<Token> *array, Lex_Stream *s){
|
||||
case ',': t.kind = TK_Comma; break;
|
||||
case '~': t.kind = TK_Neg; break;
|
||||
case '?': t.kind = TK_Question; break;
|
||||
case ';': t.kind = TK_Semicolon; break;
|
||||
case '#': t.kind = TK_Pound; break;
|
||||
CASE2('!', TK_Not, TK_NotEquals);
|
||||
CASE2('^', TK_BitXor, TK_XorAssign);
|
||||
@@ -273,6 +296,10 @@ lex__stream(Array<Token> *array, Lex_Stream *s){
|
||||
CASE3('|', TK_BitOr, TK_OrAssign, TK_Or);
|
||||
#undef CASE2
|
||||
#undef CASE3
|
||||
case ';': {
|
||||
t.kind = TK_Semicolon;
|
||||
}break;
|
||||
|
||||
case '\n': {
|
||||
t.kind = TK_NewLine;
|
||||
if(lexc(s) == '\r')
|
||||
@@ -378,7 +405,7 @@ lex__stream(Array<Token> *array, Lex_Stream *s){
|
||||
t.str += 1;
|
||||
t.len -= 2;
|
||||
}
|
||||
//t.intern_val = intern_string(&array->interns, t.string);
|
||||
t.intern_val = intern_string(table, t.string);
|
||||
} break;
|
||||
|
||||
case '/': {
|
||||
@@ -420,7 +447,7 @@ lex__stream(Array<Token> *array, Lex_Stream *s){
|
||||
|
||||
case '0':case '1':case '2':case '3':case '4':
|
||||
case '5':case '6':case '7':case '8':case '9':{
|
||||
t.kind = TK_Int;
|
||||
t.kind = TK_Integer;
|
||||
while(lex_is_numeric(lexc(s)))
|
||||
lex_advance(s);
|
||||
lex_set_len(s, &t);
|
||||
@@ -440,10 +467,10 @@ lex__stream(Array<Token> *array, Lex_Stream *s){
|
||||
while(lex_is_alphanumeric(lexc(s)) || lexc(s) == '_')
|
||||
lex_advance(s);
|
||||
lex_set_len(s,&t);
|
||||
//t.intern_val = intern_string(&array->interns, t.string);
|
||||
//if(lex_is_keyword(t.intern_val)){
|
||||
//t.kind = TK_Keyword;
|
||||
//}
|
||||
t.intern_val = intern_string(table, t.string);
|
||||
if(lex_is_keyword(table, t.intern_val)){
|
||||
t.kind = TK_Keyword;
|
||||
}
|
||||
} break;
|
||||
|
||||
default: {
|
||||
@@ -454,38 +481,67 @@ lex__stream(Array<Token> *array, Lex_Stream *s){
|
||||
if(t.len==0)
|
||||
lex_set_len(s,&t);
|
||||
|
||||
array_push(array, t);
|
||||
array->add(t);
|
||||
}
|
||||
}
|
||||
|
||||
function Array<Token>
|
||||
lex_stream(String istream, String file){
|
||||
Lex_Stream stream = {istream, 0, istream.str, file, 0};
|
||||
Array<Token> tokens = array_make<Token>(1024);
|
||||
lex__stream(&tokens, &stream);
|
||||
return tokens;
|
||||
function void
|
||||
lex_init(Lexer *l){
|
||||
l->tokens = array_make<Token>(1024*2);
|
||||
l->interns= intern_table_make(1024);
|
||||
}
|
||||
|
||||
function Lexer
|
||||
lex_make(){
|
||||
Lexer result = {};
|
||||
lex_init(&result);
|
||||
return result;
|
||||
}
|
||||
|
||||
function void
|
||||
lex_test(){
|
||||
Set_Scratch();
|
||||
String test = "//R\n 18446744073709551616{})(@?&+-;....->,:::/**/\"Thing\" Thingy"
|
||||
lex_restream(Lexer *lexer, String istream, String file){
|
||||
lexer->stream = {istream, 0, istream.str, file, 0};
|
||||
lexer->tokens.clear();
|
||||
lexer->token_iter = 0;
|
||||
lex__stream(&lexer->interns, &lexer->tokens, &lexer->stream);
|
||||
}
|
||||
|
||||
function Lexer
|
||||
lex_stream(String istream, String file){
|
||||
Lexer result = lex_make();
|
||||
lex_restream(&result, istream, file);
|
||||
return result;
|
||||
}
|
||||
|
||||
function void
|
||||
lex_test(){ Set_Scratch();
|
||||
String test = "Keyword //R\n 18446744073709551616{})(@?&+-;....->,:::/**/\"Thing\" Thingy"
|
||||
"\"Test_Meme\"+=-===42524 4294967295 18446744073709551615"
|
||||
"for if while switch :="_s;
|
||||
Array<Token> array = lex_stream(test, "Test1"_s);
|
||||
|
||||
Array<String> keywords = {};
|
||||
keywords.add("Keyword"_s);
|
||||
keywords.add("for"_s);
|
||||
keywords.add("if"_s);
|
||||
keywords.add("while"_s);
|
||||
keywords.add("switch"_s);
|
||||
|
||||
Lexer lexer = lex_make();
|
||||
lex_set_keywords(&lexer, keywords);
|
||||
lex_restream(&lexer, test, "Test1"_s);
|
||||
Array<Token> array = lexer.tokens;
|
||||
|
||||
Token_Kind kind[] = {
|
||||
TK_NewLine, TK_Error,TK_OpenBrace,TK_CloseBrace,TK_CloseParen,TK_OpenParen,
|
||||
TK_Keyword, TK_NewLine, TK_Error,TK_OpenBrace,TK_CloseBrace,TK_CloseParen,TK_OpenParen,
|
||||
TK_At,TK_Question,TK_BitAnd,TK_Add,TK_Sub,TK_Semicolon,
|
||||
TK_ThreeDots, TK_Dot, TK_Arrow, TK_Comma, TK_DoubleColon, TK_Colon,
|
||||
TK_StringLit, TK_Identifier, TK_StringLit, TK_AddAssign, TK_SubAssign,
|
||||
TK_Equals, TK_Int, TK_Int, TK_Int,
|
||||
TK_Identifier, TK_Identifier, TK_Identifier, TK_Identifier,
|
||||
// TK_Keyword, TK_Keyword, TK_Keyword, TK_Keyword,
|
||||
TK_Equals, TK_Integer, TK_Integer, TK_Integer,
|
||||
TK_Keyword, TK_Keyword, TK_Keyword, TK_Keyword,
|
||||
TK_ColonAssign, TK_End
|
||||
};
|
||||
String strs[] = {
|
||||
"\n "_s, "18446744073709551616"_s,"{"_s,"}"_s,")"_s,"("_s,
|
||||
"Keyword"_s, "\n "_s, "18446744073709551616"_s,"{"_s,"}"_s,")"_s,"("_s,
|
||||
"@"_s,"?"_s,"&"_s,"+"_s,"-"_s,";"_s,
|
||||
"..."_s,"."_s,"->"_s,","_s,"::"_s,":"_s,
|
||||
"Thing"_s,"Thingy"_s,"Test_Meme"_s, "+="_s,"-="_s,
|
||||
@@ -500,11 +556,10 @@ lex_test(){
|
||||
For(array, t, i){
|
||||
assert(t->kind == kind[i]);
|
||||
assert(string_compare(t->string, strs[i]));
|
||||
if(t->kind == TK_Int){
|
||||
if(t->kind == TK_Integer){
|
||||
assert(t->int_val == vals[ui++]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
@@ -574,7 +629,7 @@ token_kind_string(Token_Kind kind){
|
||||
case TK_Character: return "Character"_s;
|
||||
case TK_Error: return "Error"_s;
|
||||
case TK_Float: return "Float"_s;
|
||||
case TK_Int: return "Int"_s;
|
||||
case TK_Integer: return "Int"_s;
|
||||
case TK_Keyword: return "Keyword"_s;
|
||||
default: invalid_codepath; return "<Undefined>"_s;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user