Pratt parsing, basic ast, decl parse const

This commit is contained in:
Krzosa Karol
2022-05-13 16:04:39 +02:00
parent 9c22a379ea
commit 2689aa9ba1
7 changed files with 586 additions and 125 deletions

View File

@@ -81,7 +81,7 @@ enum Token_Kind{
TK_Character,
TK_Error,
TK_Float,
TK_Int,
TK_Integer,
TK_Keyword,
};
@@ -114,6 +114,13 @@ struct Lex_Stream{
S32 line;
};
struct Lexer{
Lex_Stream stream;
Array<Token> tokens;
Intern_Table interns;
S64 token_iter;
};
function U8
lexc(Lex_Stream *s){
return s->stream.str[s->iter];
@@ -159,6 +166,23 @@ lex_set_len(Lex_Stream *s, Token *token){
token->len = lexcp(s) - token->str;
}
function void
lex_set_keywords(Lexer *lexer, Array<String> keywords){
Intern_String keyword = {};
IFor(keywords){
keyword = intern_string(&lexer->interns, *it);
if(it == keywords.begin())
lexer->interns.first_keyword = keyword.str;
}
lexer->interns.last_keyword = keyword.str;
}
function B32
lex_is_keyword(Intern_Table *lexer, Intern_String keyword){
B32 result = keyword.str >= lexer->first_keyword && keyword.str <= lexer->last_keyword;
return result;
}
function void
token_error(Token *t, String error_val){
t->kind = TK_Error;
@@ -237,7 +261,7 @@ t.kind = OpName;
break
function void
lex__stream(Array<Token> *array, Lex_Stream *s){
lex__stream(Intern_Table *table, Array<Token> *array, Lex_Stream *s){
while(lexc(s)){
while(lex_is_whitespace(lexc(s)))
lex_advance(s);
@@ -261,7 +285,6 @@ lex__stream(Array<Token> *array, Lex_Stream *s){
case ',': t.kind = TK_Comma; break;
case '~': t.kind = TK_Neg; break;
case '?': t.kind = TK_Question; break;
case ';': t.kind = TK_Semicolon; break;
case '#': t.kind = TK_Pound; break;
CASE2('!', TK_Not, TK_NotEquals);
CASE2('^', TK_BitXor, TK_XorAssign);
@@ -273,6 +296,10 @@ lex__stream(Array<Token> *array, Lex_Stream *s){
CASE3('|', TK_BitOr, TK_OrAssign, TK_Or);
#undef CASE2
#undef CASE3
case ';': {
t.kind = TK_Semicolon;
}break;
case '\n': {
t.kind = TK_NewLine;
if(lexc(s) == '\r')
@@ -378,7 +405,7 @@ lex__stream(Array<Token> *array, Lex_Stream *s){
t.str += 1;
t.len -= 2;
}
//t.intern_val = intern_string(&array->interns, t.string);
t.intern_val = intern_string(table, t.string);
} break;
case '/': {
@@ -420,7 +447,7 @@ lex__stream(Array<Token> *array, Lex_Stream *s){
case '0':case '1':case '2':case '3':case '4':
case '5':case '6':case '7':case '8':case '9':{
t.kind = TK_Int;
t.kind = TK_Integer;
while(lex_is_numeric(lexc(s)))
lex_advance(s);
lex_set_len(s, &t);
@@ -440,10 +467,10 @@ lex__stream(Array<Token> *array, Lex_Stream *s){
while(lex_is_alphanumeric(lexc(s)) || lexc(s) == '_')
lex_advance(s);
lex_set_len(s,&t);
//t.intern_val = intern_string(&array->interns, t.string);
//if(lex_is_keyword(t.intern_val)){
//t.kind = TK_Keyword;
//}
t.intern_val = intern_string(table, t.string);
if(lex_is_keyword(table, t.intern_val)){
t.kind = TK_Keyword;
}
} break;
default: {
@@ -454,38 +481,67 @@ lex__stream(Array<Token> *array, Lex_Stream *s){
if(t.len==0)
lex_set_len(s,&t);
array_push(array, t);
array->add(t);
}
}
function Array<Token>
lex_stream(String istream, String file){
Lex_Stream stream = {istream, 0, istream.str, file, 0};
Array<Token> tokens = array_make<Token>(1024);
lex__stream(&tokens, &stream);
return tokens;
function void
lex_init(Lexer *l){
l->tokens = array_make<Token>(1024*2);
l->interns= intern_table_make(1024);
}
function Lexer
lex_make(){
Lexer result = {};
lex_init(&result);
return result;
}
function void
lex_test(){
Set_Scratch();
String test = "//R\n 18446744073709551616{})(@?&+-;....->,:::/**/\"Thing\" Thingy"
lex_restream(Lexer *lexer, String istream, String file){
lexer->stream = {istream, 0, istream.str, file, 0};
lexer->tokens.clear();
lexer->token_iter = 0;
lex__stream(&lexer->interns, &lexer->tokens, &lexer->stream);
}
function Lexer
lex_stream(String istream, String file){
Lexer result = lex_make();
lex_restream(&result, istream, file);
return result;
}
function void
lex_test(){ Set_Scratch();
String test = "Keyword //R\n 18446744073709551616{})(@?&+-;....->,:::/**/\"Thing\" Thingy"
"\"Test_Meme\"+=-===42524 4294967295 18446744073709551615"
"for if while switch :="_s;
Array<Token> array = lex_stream(test, "Test1"_s);
Array<String> keywords = {};
keywords.add("Keyword"_s);
keywords.add("for"_s);
keywords.add("if"_s);
keywords.add("while"_s);
keywords.add("switch"_s);
Lexer lexer = lex_make();
lex_set_keywords(&lexer, keywords);
lex_restream(&lexer, test, "Test1"_s);
Array<Token> array = lexer.tokens;
Token_Kind kind[] = {
TK_NewLine, TK_Error,TK_OpenBrace,TK_CloseBrace,TK_CloseParen,TK_OpenParen,
TK_Keyword, TK_NewLine, TK_Error,TK_OpenBrace,TK_CloseBrace,TK_CloseParen,TK_OpenParen,
TK_At,TK_Question,TK_BitAnd,TK_Add,TK_Sub,TK_Semicolon,
TK_ThreeDots, TK_Dot, TK_Arrow, TK_Comma, TK_DoubleColon, TK_Colon,
TK_StringLit, TK_Identifier, TK_StringLit, TK_AddAssign, TK_SubAssign,
TK_Equals, TK_Int, TK_Int, TK_Int,
TK_Identifier, TK_Identifier, TK_Identifier, TK_Identifier,
// TK_Keyword, TK_Keyword, TK_Keyword, TK_Keyword,
TK_Equals, TK_Integer, TK_Integer, TK_Integer,
TK_Keyword, TK_Keyword, TK_Keyword, TK_Keyword,
TK_ColonAssign, TK_End
};
String strs[] = {
"\n "_s, "18446744073709551616"_s,"{"_s,"}"_s,")"_s,"("_s,
"Keyword"_s, "\n "_s, "18446744073709551616"_s,"{"_s,"}"_s,")"_s,"("_s,
"@"_s,"?"_s,"&"_s,"+"_s,"-"_s,";"_s,
"..."_s,"."_s,"->"_s,","_s,"::"_s,":"_s,
"Thing"_s,"Thingy"_s,"Test_Meme"_s, "+="_s,"-="_s,
@@ -500,11 +556,10 @@ lex_test(){
For(array, t, i){
assert(t->kind == kind[i]);
assert(string_compare(t->string, strs[i]));
if(t->kind == TK_Int){
if(t->kind == TK_Integer){
assert(t->int_val == vals[ui++]);
}
}
}
//-----------------------------------------------------------------------------
@@ -574,7 +629,7 @@ token_kind_string(Token_Kind kind){
case TK_Character: return "Character"_s;
case TK_Error: return "Error"_s;
case TK_Float: return "Float"_s;
case TK_Int: return "Int"_s;
case TK_Integer: return "Int"_s;
case TK_Keyword: return "Keyword"_s;
default: invalid_codepath; return "<Undefined>"_s;
}