Pratt parsing, basic ast, decl parse const

This commit is contained in:
Krzosa Karol
2022-05-13 16:04:39 +02:00
parent 9c22a379ea
commit 2689aa9ba1
7 changed files with 586 additions and 125 deletions

2
ast.c
View File

@@ -353,7 +353,7 @@ expr_new(Arena *p, Expr_Kind kind, Token *token){
function Expr * function Expr *
expr_int(Arena *p, Token *token){ expr_int(Arena *p, Token *token){
assert(token->kind == TK_Int); assert(token->kind == TK_Integer);
Expr *expr = expr_new(p, EK_Int, token); Expr *expr = expr_new(p, EK_Int, token);
expr->int_val = token->int_val; expr->int_val = token->int_val;
return expr; return expr;

10
lex.c
View File

@@ -140,7 +140,7 @@ typedef enum Token_Kind{
TK_Character, TK_Character,
TK_Error, TK_Error,
TK_Float, TK_Float,
TK_Int, TK_Integer,
TK_Keyword, TK_Keyword,
}Token_Kind; }Token_Kind;
@@ -467,7 +467,7 @@ lex__stream(Token_Array *array, Lex_Stream *s){
case '0':case '1':case '2':case '3':case '4': case '0':case '1':case '2':case '3':case '4':
case '5':case '6':case '7':case '8':case '9':{ case '5':case '6':case '7':case '8':case '9':{
t.kind = TK_Int; t.kind = TK_Integer;
while(lex_is_numeric(lexc(s))) while(lex_is_numeric(lexc(s)))
lex_advance(s); lex_advance(s);
lex_set_len(s, &t); lex_set_len(s, &t);
@@ -544,7 +544,7 @@ lex_test(){
TK_At,TK_Question,TK_BitAnd,TK_Add,TK_Sub,TK_Semicolon, TK_At,TK_Question,TK_BitAnd,TK_Add,TK_Sub,TK_Semicolon,
TK_ThreeDots, TK_Dot, TK_Arrow, TK_Comma, TK_DoubleColon, TK_Colon, TK_ThreeDots, TK_Dot, TK_Arrow, TK_Comma, TK_DoubleColon, TK_Colon,
TK_StringLit, TK_Identifier, TK_StringLit, TK_AddAssign, TK_SubAssign, TK_StringLit, TK_Identifier, TK_StringLit, TK_AddAssign, TK_SubAssign,
TK_Equals, TK_Int, TK_Int, TK_Int, TK_Keyword, TK_Keyword, TK_Equals, TK_Integer, TK_Integer, TK_Integer, TK_Keyword, TK_Keyword,
TK_Keyword, TK_Keyword, TK_ColonAssign, TK_End TK_Keyword, TK_Keyword, TK_ColonAssign, TK_End
}; };
String strs[] = { String strs[] = {
@@ -564,7 +564,7 @@ lex_test(){
for(Token *t = token_array_iter_begin(&array); t->kind != TK_End; t = token_array_iter_next(&array)){ for(Token *t = token_array_iter_begin(&array); t->kind != TK_End; t = token_array_iter_next(&array)){
assert(t->kind == kind[i]); assert(t->kind == kind[i]);
assert(string_compare(t->string, strs[i++])); assert(string_compare(t->string, strs[i++]));
if(t->kind == TK_Int){ if(t->kind == TK_Integer){
assert(t->int_val == vals[ui++]); assert(t->int_val == vals[ui++]);
} }
} }
@@ -637,6 +637,6 @@ global const char *token_kind_string[] = {
[TK_Character] = "Character", [TK_Character] = "Character",
[TK_Error] = "Error", [TK_Error] = "Error",
[TK_Float] = "Float", [TK_Float] = "Float",
[TK_Int] = "Int", [TK_Integer] = "Int",
[TK_Keyword] = "Keyword", [TK_Keyword] = "Keyword",
}; };

169
main.cpp
View File

@@ -154,7 +154,7 @@ wrap_around_pow2(U64 x, U64 power_of_2) {
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// OS Memory // OS Memory
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
constexpr SizeU os_page_size = 4096; const SizeU os_page_size = 4096;
struct OS_Memory{ struct OS_Memory{
SizeU commit, reserve; SizeU commit, reserve;
U8 *data; U8 *data;
@@ -395,6 +395,7 @@ struct Thread_Ctx{
Arena scratch[2]; Arena scratch[2];
Allocator *implicit_allocator; Allocator *implicit_allocator;
void *ctx; void *ctx;
U64 ctx_id;
Log_Proc *log_proc; Log_Proc *log_proc;
}; };
thread_local Thread_Ctx thread_ctx; thread_local Thread_Ctx thread_ctx;
@@ -433,15 +434,26 @@ struct Scoped_Allocator{
} }
}; };
#define Get_Ctx(T) T *ctx = (T *)thread_ctx.ctx function void *
#define Set_Ctx(ctx) Scoped_Ctx scoped_ctx_##__LINE__((void *)ctx) thread_ctx_get_user_ctx(U64 id){
assert(id == thread_ctx.ctx_id);
assert(id != 0);
assert(thread_ctx.ctx_id != 0);
assert(thread_ctx.ctx != 0);
return thread_ctx.ctx;
}
#define Get_Ctx(T) T *ctx = (T *)thread_ctx_get_user_ctx(T##_ID)
#define Set_Ctx(ctx, id) Scoped_Ctx scoped_ctx_##__LINE__((void *)ctx, id)
struct Scoped_Ctx{ struct Scoped_Ctx{
void *prev_ctx; void *prev_ctx;
Scoped_Ctx(void *in_ctx){ U64 prev_id;
Scoped_Ctx(void *in_ctx, U64 id){
prev_ctx = thread_ctx.ctx; prev_ctx = thread_ctx.ctx;
prev_id = thread_ctx.ctx_id;
thread_ctx.ctx = in_ctx; thread_ctx.ctx = in_ctx;
thread_ctx.ctx_id = id;
} }
~Scoped_Ctx(){thread_ctx.ctx = prev_ctx;} ~Scoped_Ctx(){thread_ctx.ctx = prev_ctx; thread_ctx.ctx_id = prev_id;}
}; };
enum Alloc_Flag{AF_None,AF_ZeroMemory}; enum Alloc_Flag{AF_None,AF_ZeroMemory};
@@ -579,6 +591,7 @@ test_heap_allocator(){
assert(thread_ctx.implicit_allocator == &heap); assert(thread_ctx.implicit_allocator == &heap);
} }
const U64 Test_Context_ID = 14242;
struct Test_Context{ struct Test_Context{
int value; int value;
}; };
@@ -592,7 +605,7 @@ test_custom_context_2(){
function void function void
test_custom_context_1(){ test_custom_context_1(){
Test_Context context = {}; Test_Context context = {};
Set_Ctx(&context); Set_Ctx(&context, Test_Context_ID);
Get_Ctx(Test_Context); Get_Ctx(Test_Context);
ctx->value = 10; ctx->value = 10;
test_custom_context_2(); test_custom_context_2();
@@ -640,78 +653,60 @@ struct Array{
S64 len; S64 len;
Allocator *allocator; Allocator *allocator;
void init(S64 size){
if(!allocator) allocator = imp_get();
data = exp_alloc_array(allocator, T, size);
cap = size;
}
void grow(S64 required_size){
if(cap == 0){
S64 cap = max(required_size*2, (S64)16);
init(cap);
}
else if(len + required_size > cap){
S64 cap = (len + required_size)*2;
data = exp_resize_array(allocator, data, T, cap);
cap = cap;
}
}
void add(T item){
grow(1);
data[len++] = item;
}
void add(T *item){
grow(1);
data[len++] = *item;
}
void clear(){
len = 0;
}
T *begin(){ return data; } T *begin(){ return data; }
T *end (){ return data + len; } T *end (){ return data + len; }
T &operator[](S64 i){ return data[i]; } T &operator[](S64 i){ return data[i]; }
}; };
#define For(array,it,i) for(SizeU i = 0; i < array.len; i++) for(auto *it = &array[i]; it; it = 0) #define For(array,it,i) for(SizeU i = 0; i < array.len; i++) for(auto *it = &array[i]; it; it = 0)
#define IFor(array) for(auto *it = array.begin(); it != array.end(); it++)
#define IterList(list,it) for(auto *it = list->first; it; it=it->next) #define IterList(list,it) for(auto *it = list->first; it; it=it->next)
template<class T>
void array_init(Array<T> *a, S64 size){
if(!a->allocator) a->allocator = thread_ctx.implicit_allocator;
a->data = exp_alloc_array(a->allocator, T, size);
a->cap = size;
}
template<class T>
void array_grow(Array<T> *a, S64 required_size){
if(a->cap == 0){
S64 cap = max(required_size*2, (S64)16);
array_init(a, cap);
}
else if(a->len + required_size > a->cap){
S64 cap = (a->len + required_size)*2;
a->data = exp_resize_array(a->allocator, a->data, T, cap);
a->cap = cap;
}
}
template<class T> template<class T>
Array<T> array_make(S64 size){ Array<T> array_make(S64 size){
Array<T> result = {}; Array<T> result = {};
array_init(&result, size); result.init(size);
return result; return result;
} }
template<class T>
T *array_alloc(Array<T> *a, S64 count){
array_grow(a, count);
T *result = a->data + a->len;
a->len += count;
return result;
}
template<class T>
void array_push(Array<T> *a, T &item){
array_grow(a, 1);
a->data[a->len++] = item;
}
template<class T>
T array_pop_get(Array<T> *a){
assert(a->len > 0);
return a->data[--a->len];
}
template<class T>
void array_pop(Array<T> *a){
assert(a->len > 0);
--a->len;
}
template<class T>
void array_clear(Array<T> *array){
array->len = 0;
}
function void function void
test_array(){ test_array(){
Set_Scratch(); Set_Scratch();
Array<int> array = {}; Array<int> array = {};
int size = 1000; int size = 1000;
for(int i = 0; i < size; i++){ for(int i = 0; i < size; i++){
array_push(&array, i); array.add(i);
} }
For(array, it, i){ For(array, it, i){
assert(*it == i); assert(*it == i);
@@ -721,17 +716,11 @@ test_array(){
Array<int> array2 = {}; Array<int> array2 = {};
array2.allocator = &arena; array2.allocator = &arena;
for(int i = 0; i < size; i++){ for(int i = 0; i < size; i++){
array_push(&array2, i); array2.add(i);
} }
For(array2, iterator, count){ For(array2, iterator, count){
assert(*iterator == count); assert(*iterator == count);
} }
for(int i = 999; i > 950; i--){
assert(array_pop_get(&array) == i);
}
for(int i = 0; i < 10; i++){
array_pop(&array2);
}
exp_destroy(&arena); exp_destroy(&arena);
assert(arena.memory.data == 0); assert(arena.memory.data == 0);
assert(thread_ctx.scratch->memory.data != 0); assert(thread_ctx.scratch->memory.data != 0);
@@ -775,6 +764,13 @@ map_grow(Map *map, S64 new_size){
*map = new_map; *map = new_map;
} }
function Map
map_make(S64 size){
Map result = {};
map_grow(&result, size);
return result;
}
function void function void
map_insert_u64(Map *map, U64 key, void *val){ map_insert_u64(Map *map, U64 key, void *val){
assert(val); assert(val);
@@ -849,30 +845,6 @@ map_test(){
} }
} }
//-----------------------------------------------------------------------------
// Bucket Array
//-----------------------------------------------------------------------------
template<class T>
struct Bucket_Array{
struct Bucket{
Bucket *next;
S64 len, cap;
T data[0];
};
Allocator *allocator;
Bucket *first;
Bucket *last;
Bucket *iter;
S64 iter_len;
};
function void
test_bucket_array(){
Bucket_Array<int> arr = {};
}
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// Linked lists // Linked lists
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
@@ -1027,8 +999,18 @@ test_string_builder(){
struct Intern_Table{ struct Intern_Table{
Allocator *string_allocator; Allocator *string_allocator;
Map map; Map map;
U8 *first_keyword;
U8 *last_keyword;
}; };
function Intern_Table
intern_table_make(S64 initial_size){
Intern_Table result = {};
result.map = map_make(initial_size);
result.string_allocator = imp_get();
return result;
}
function Intern_String function Intern_String
intern_string(Intern_Table *t, String string){ intern_string(Intern_Table *t, String string){
if(!t->string_allocator) t->string_allocator = imp_get(); if(!t->string_allocator) t->string_allocator = imp_get();
@@ -1063,11 +1045,18 @@ test_intern_table(){ Set_Scratch();
} }
#include "new_lex.cpp" #include "new_lex.cpp"
#include "new_ast.cpp"
#include "new_parse.cpp"
int main(){ int main(){
test_custom_context(); test_custom_context();
test_heap_allocator(); test_heap_allocator();
test_os_memory(); test_os_memory();
thread_ctx_init(); thread_ctx_init();
test_parse_decl();
test_parse_expr();
test_array(); test_array();
map_test(); map_test();
test_string_builder(); test_string_builder();

186
new_ast.cpp Normal file
View File

@@ -0,0 +1,186 @@
// Lexer::interns::map::allocator - array allocator, resizing
// Lexer::tokens - array allocator, resizing
//
// Parser::ast_arena - arena for asts
// Lexer::interns::string_allocator - arena for interns
//
Intern_String keyword_const;
Intern_String keyword_struct;
Intern_String keyword_union;
Intern_String keyword_enum;
const U64 Parse_Ctx_ID = 115151;
struct Parse_Ctx:Lexer{
Arena ast_arena;
Token empty_token;
S64 pt[256]; // precedence table
void init(){
const S64 addp = 1;
const S64 mulp = 2;
pt[TK_Add] = addp;
pt[TK_Sub] = addp;
pt[TK_Div] = mulp;
pt[TK_Mul] = mulp;
arena_init(&ast_arena);
lex_init(this);
keyword_const = intern_string(&interns, "const"_s);
keyword_struct= intern_string(&interns, "struct"_s);
keyword_union = intern_string(&interns, "union"_s);
keyword_enum = intern_string(&interns, "enum"_s);
interns.first_keyword = keyword_const.str;
interns.last_keyword = keyword_enum.str;
}
};
//-----------------------------------------------------------------------------
// AST
//-----------------------------------------------------------------------------
enum Ast_Kind{
AK_None,
AK_Expr_Str,
AK_Expr_Int,
AK_Expr_Ident,
AK_Expr_Binary,
AK_Decl_Func,
AK_Decl_Func_Arg,
AK_Decl_Const,
AK_Typespec_Ident,
AK_Typespec_Pointer,
AK_Typespec_Array,
AK_Typespec_Func
};
struct Ast{
Ast_Kind kind;
Token *pos;
};
struct Ast_Expr:Ast{
union{
Intern_String intern_val;
U64 int_val;
struct{
Token_Kind op;
Ast_Expr *left;
Ast_Expr *right;
} binary;
};
};
struct Ast_Typespec:Ast{
union{
Ast_Typespec *base;
Intern_String name;
struct{
Ast_Typespec *base;
Ast_Expr *expr;
}arr;
struct{
Ast_Typespec *ret;
Array<Ast_Typespec*> args;
}func;
};
};
struct Ast_Decl:Ast{
Intern_String name;
union{
struct{
Ast_Typespec *typespec;
Intern_String name;
Ast_Expr *expr;
}var;
struct{
Array<Ast_Decl*> args;
Ast_Typespec *ret;
}func;
};
};
//-----------------------------------------------------------------------------
// AST Constructors beginning with expressions
//-----------------------------------------------------------------------------
#define AST_NEW(T,ikind,ipos) \
Get_Ctx(Parse_Ctx); \
Ast_##T *result = exp_alloc_type(&ctx->ast_arena, Ast_##T); \
result->kind = ikind; \
result->pos = ipos
function Ast_Expr *
ast_expr_string(Token *pos, Intern_String string){
AST_NEW(Expr, AK_Expr_Str, pos);
result->intern_val = string;
return result;
}
function Ast_Expr *
ast_expr_identifier(Token *pos, Intern_String string){
AST_NEW(Expr, AK_Expr_Ident, pos);
result->intern_val = string;
return result;
}
function Ast_Expr *
ast_expr_integer(Token *pos, S64 integer){
AST_NEW(Expr, AK_Expr_Int, pos);
result->int_val = integer;
return result;
}
function Ast_Expr *
ast_expr_binary(Ast_Expr *left, Ast_Expr *right, Token *op){
AST_NEW(Expr, AK_Expr_Binary, op);
result->binary.op = op->kind;
result->binary.left = left;
result->binary.right = right;
return result;
}
//-----------------------------------------------------------------------------
// Typespecs
//-----------------------------------------------------------------------------
function Ast_Typespec *
ast_typespec_name(Token *pos, Intern_String name){
AST_NEW(Typespec, AK_Typespec_Ident, pos);
result->name = name;
return result;
}
function Ast_Typespec *
ast_typespec_pointer(Token *pos, Ast_Typespec *base){
AST_NEW(Typespec, AK_Typespec_Pointer, pos);
result->base = base;
return result;
}
function Ast_Typespec *
ast_typespec_array(Token *pos, Ast_Typespec *base, Ast_Expr *expr){
AST_NEW(Typespec, AK_Typespec_Array, pos);
result->arr.base = base;
result->arr.expr = expr;
return result;
}
//-----------------------------------------------------------------------------
// Declarations
//-----------------------------------------------------------------------------
function Ast_Decl *
ast_decl_func(Token *pos, Intern_String name){
AST_NEW(Decl, AK_Decl_Func, pos);
result->name = name;
return result;
}
function Ast_Decl *
ast_decl_const(Token *pos, Intern_String name, Ast_Expr *expr){
AST_NEW(Decl, AK_Decl_Const, pos);
result->var.expr = expr;
result->name = name;
return result;
}

View File

@@ -81,7 +81,7 @@ enum Token_Kind{
TK_Character, TK_Character,
TK_Error, TK_Error,
TK_Float, TK_Float,
TK_Int, TK_Integer,
TK_Keyword, TK_Keyword,
}; };
@@ -114,6 +114,13 @@ struct Lex_Stream{
S32 line; S32 line;
}; };
struct Lexer{
Lex_Stream stream;
Array<Token> tokens;
Intern_Table interns;
S64 token_iter;
};
function U8 function U8
lexc(Lex_Stream *s){ lexc(Lex_Stream *s){
return s->stream.str[s->iter]; return s->stream.str[s->iter];
@@ -159,6 +166,23 @@ lex_set_len(Lex_Stream *s, Token *token){
token->len = lexcp(s) - token->str; token->len = lexcp(s) - token->str;
} }
function void
lex_set_keywords(Lexer *lexer, Array<String> keywords){
Intern_String keyword = {};
IFor(keywords){
keyword = intern_string(&lexer->interns, *it);
if(it == keywords.begin())
lexer->interns.first_keyword = keyword.str;
}
lexer->interns.last_keyword = keyword.str;
}
function B32
lex_is_keyword(Intern_Table *lexer, Intern_String keyword){
B32 result = keyword.str >= lexer->first_keyword && keyword.str <= lexer->last_keyword;
return result;
}
function void function void
token_error(Token *t, String error_val){ token_error(Token *t, String error_val){
t->kind = TK_Error; t->kind = TK_Error;
@@ -237,7 +261,7 @@ t.kind = OpName;
break break
function void function void
lex__stream(Array<Token> *array, Lex_Stream *s){ lex__stream(Intern_Table *table, Array<Token> *array, Lex_Stream *s){
while(lexc(s)){ while(lexc(s)){
while(lex_is_whitespace(lexc(s))) while(lex_is_whitespace(lexc(s)))
lex_advance(s); lex_advance(s);
@@ -261,7 +285,6 @@ lex__stream(Array<Token> *array, Lex_Stream *s){
case ',': t.kind = TK_Comma; break; case ',': t.kind = TK_Comma; break;
case '~': t.kind = TK_Neg; break; case '~': t.kind = TK_Neg; break;
case '?': t.kind = TK_Question; break; case '?': t.kind = TK_Question; break;
case ';': t.kind = TK_Semicolon; break;
case '#': t.kind = TK_Pound; break; case '#': t.kind = TK_Pound; break;
CASE2('!', TK_Not, TK_NotEquals); CASE2('!', TK_Not, TK_NotEquals);
CASE2('^', TK_BitXor, TK_XorAssign); CASE2('^', TK_BitXor, TK_XorAssign);
@@ -273,6 +296,10 @@ lex__stream(Array<Token> *array, Lex_Stream *s){
CASE3('|', TK_BitOr, TK_OrAssign, TK_Or); CASE3('|', TK_BitOr, TK_OrAssign, TK_Or);
#undef CASE2 #undef CASE2
#undef CASE3 #undef CASE3
case ';': {
t.kind = TK_Semicolon;
}break;
case '\n': { case '\n': {
t.kind = TK_NewLine; t.kind = TK_NewLine;
if(lexc(s) == '\r') if(lexc(s) == '\r')
@@ -378,7 +405,7 @@ lex__stream(Array<Token> *array, Lex_Stream *s){
t.str += 1; t.str += 1;
t.len -= 2; t.len -= 2;
} }
//t.intern_val = intern_string(&array->interns, t.string); t.intern_val = intern_string(table, t.string);
} break; } break;
case '/': { case '/': {
@@ -420,7 +447,7 @@ lex__stream(Array<Token> *array, Lex_Stream *s){
case '0':case '1':case '2':case '3':case '4': case '0':case '1':case '2':case '3':case '4':
case '5':case '6':case '7':case '8':case '9':{ case '5':case '6':case '7':case '8':case '9':{
t.kind = TK_Int; t.kind = TK_Integer;
while(lex_is_numeric(lexc(s))) while(lex_is_numeric(lexc(s)))
lex_advance(s); lex_advance(s);
lex_set_len(s, &t); lex_set_len(s, &t);
@@ -440,10 +467,10 @@ lex__stream(Array<Token> *array, Lex_Stream *s){
while(lex_is_alphanumeric(lexc(s)) || lexc(s) == '_') while(lex_is_alphanumeric(lexc(s)) || lexc(s) == '_')
lex_advance(s); lex_advance(s);
lex_set_len(s,&t); lex_set_len(s,&t);
//t.intern_val = intern_string(&array->interns, t.string); t.intern_val = intern_string(table, t.string);
//if(lex_is_keyword(t.intern_val)){ if(lex_is_keyword(table, t.intern_val)){
//t.kind = TK_Keyword; t.kind = TK_Keyword;
//} }
} break; } break;
default: { default: {
@@ -454,38 +481,67 @@ lex__stream(Array<Token> *array, Lex_Stream *s){
if(t.len==0) if(t.len==0)
lex_set_len(s,&t); lex_set_len(s,&t);
array_push(array, t); array->add(t);
} }
} }
function Array<Token> function void
lex_stream(String istream, String file){ lex_init(Lexer *l){
Lex_Stream stream = {istream, 0, istream.str, file, 0}; l->tokens = array_make<Token>(1024*2);
Array<Token> tokens = array_make<Token>(1024); l->interns= intern_table_make(1024);
lex__stream(&tokens, &stream); }
return tokens;
function Lexer
lex_make(){
Lexer result = {};
lex_init(&result);
return result;
} }
function void function void
lex_test(){ lex_restream(Lexer *lexer, String istream, String file){
Set_Scratch(); lexer->stream = {istream, 0, istream.str, file, 0};
String test = "//R\n 18446744073709551616{})(@?&+-;....->,:::/**/\"Thing\" Thingy" lexer->tokens.clear();
lexer->token_iter = 0;
lex__stream(&lexer->interns, &lexer->tokens, &lexer->stream);
}
function Lexer
lex_stream(String istream, String file){
Lexer result = lex_make();
lex_restream(&result, istream, file);
return result;
}
function void
lex_test(){ Set_Scratch();
String test = "Keyword //R\n 18446744073709551616{})(@?&+-;....->,:::/**/\"Thing\" Thingy"
"\"Test_Meme\"+=-===42524 4294967295 18446744073709551615" "\"Test_Meme\"+=-===42524 4294967295 18446744073709551615"
"for if while switch :="_s; "for if while switch :="_s;
Array<Token> array = lex_stream(test, "Test1"_s);
Array<String> keywords = {};
keywords.add("Keyword"_s);
keywords.add("for"_s);
keywords.add("if"_s);
keywords.add("while"_s);
keywords.add("switch"_s);
Lexer lexer = lex_make();
lex_set_keywords(&lexer, keywords);
lex_restream(&lexer, test, "Test1"_s);
Array<Token> array = lexer.tokens;
Token_Kind kind[] = { Token_Kind kind[] = {
TK_NewLine, TK_Error,TK_OpenBrace,TK_CloseBrace,TK_CloseParen,TK_OpenParen, TK_Keyword, TK_NewLine, TK_Error,TK_OpenBrace,TK_CloseBrace,TK_CloseParen,TK_OpenParen,
TK_At,TK_Question,TK_BitAnd,TK_Add,TK_Sub,TK_Semicolon, TK_At,TK_Question,TK_BitAnd,TK_Add,TK_Sub,TK_Semicolon,
TK_ThreeDots, TK_Dot, TK_Arrow, TK_Comma, TK_DoubleColon, TK_Colon, TK_ThreeDots, TK_Dot, TK_Arrow, TK_Comma, TK_DoubleColon, TK_Colon,
TK_StringLit, TK_Identifier, TK_StringLit, TK_AddAssign, TK_SubAssign, TK_StringLit, TK_Identifier, TK_StringLit, TK_AddAssign, TK_SubAssign,
TK_Equals, TK_Int, TK_Int, TK_Int, TK_Equals, TK_Integer, TK_Integer, TK_Integer,
TK_Identifier, TK_Identifier, TK_Identifier, TK_Identifier, TK_Keyword, TK_Keyword, TK_Keyword, TK_Keyword,
// TK_Keyword, TK_Keyword, TK_Keyword, TK_Keyword,
TK_ColonAssign, TK_End TK_ColonAssign, TK_End
}; };
String strs[] = { String strs[] = {
"\n "_s, "18446744073709551616"_s,"{"_s,"}"_s,")"_s,"("_s, "Keyword"_s, "\n "_s, "18446744073709551616"_s,"{"_s,"}"_s,")"_s,"("_s,
"@"_s,"?"_s,"&"_s,"+"_s,"-"_s,";"_s, "@"_s,"?"_s,"&"_s,"+"_s,"-"_s,";"_s,
"..."_s,"."_s,"->"_s,","_s,"::"_s,":"_s, "..."_s,"."_s,"->"_s,","_s,"::"_s,":"_s,
"Thing"_s,"Thingy"_s,"Test_Meme"_s, "+="_s,"-="_s, "Thing"_s,"Thingy"_s,"Test_Meme"_s, "+="_s,"-="_s,
@@ -500,11 +556,10 @@ lex_test(){
For(array, t, i){ For(array, t, i){
assert(t->kind == kind[i]); assert(t->kind == kind[i]);
assert(string_compare(t->string, strs[i])); assert(string_compare(t->string, strs[i]));
if(t->kind == TK_Int){ if(t->kind == TK_Integer){
assert(t->int_val == vals[ui++]); assert(t->int_val == vals[ui++]);
} }
} }
} }
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
@@ -574,7 +629,7 @@ token_kind_string(Token_Kind kind){
case TK_Character: return "Character"_s; case TK_Character: return "Character"_s;
case TK_Error: return "Error"_s; case TK_Error: return "Error"_s;
case TK_Float: return "Float"_s; case TK_Float: return "Float"_s;
case TK_Int: return "Int"_s; case TK_Integer: return "Int"_s;
case TK_Keyword: return "Keyword"_s; case TK_Keyword: return "Keyword"_s;
default: invalid_codepath; return "<Undefined>"_s; default: invalid_codepath; return "<Undefined>"_s;
} }

231
new_parse.cpp Normal file
View File

@@ -0,0 +1,231 @@
function Token *
token_get(S64 i = 0){ Get_Ctx(Parse_Ctx);
i += ctx->token_iter;
if(i >= ctx->tokens.len){
return &ctx->empty_token;
}
Token *result = &ctx->tokens[i];
return result;
}
function Token *
token_next(){ Get_Ctx(Parse_Ctx);
Token *token = token_get();
ctx->token_iter++;
return token;
}
function Token *
token_is(Token_Kind kind){
Token *token = token_get();
if(token->kind == kind){
return token;
}
return 0;
}
function Token *
token_match(Token_Kind kind){
Token *token = token_get();
if(token->kind == kind){
return token_next();
}
return 0;
}
function Token *
token_match_keyword(Intern_String string){
Token *token = token_get();
if(token->kind == TK_Keyword){
if(string.str == token->intern_val.str){
token = token_next();
return token;
}
}
return 0;
}
function void
parsing_error(Token *token, const char *str, ...){
Set_Scratch();
STRING_FMT(imp_get(), str, string);
// @Note(Krzosa): Print nice error message
printf("\nError: %s", string.str);
if(token){
printf(" %s:%d\n", token->file.str, (S32)token->line);
// @Note(Krzosa): Print error line
{
int i = 0;
while(token->line_begin[i]!='\n' && token->line_begin[i]!=0) i++;
printf("%.*s\n", i, token->line_begin);
// @Note(Krzosa): Print error marker
int token_i = token->str - token->line_begin;
for(int i = 0; i < token_i-2; i++) printf(" ");
printf("^^^^^^\n");
}
}
__debugbreak();
}
function Token *
token_expect(Token_Kind kind){
Token *token = token_get();
if(token->kind == kind){
token = token_next();
return token;
}
parsing_error(token, "Expected token of kind: [%s], got instead token of kind: [%s]", token_kind_string(kind).str, token_kind_string(token->kind).str);
return 0;
}
//-----------------------------------------------------------------------------
// Expression parsing
//-----------------------------------------------------------------------------
/*
add = [+-]
mul = [/%*]
compare = == | != | >= | > | <= | <
logical = [&|^] | && | ||
unary = [&*-!~+] | ++ | --
atom_expr = Int
| Float
| String
| Identifier
| 'cast' '(' typespec ',' expr ')'
| 'size_type' '(' typespec ')'
| 'size_expr' '(' expr ')'
| '{' compound_expr '}'
| '(' expr ')'
| '(' ':' typespec ')' '{' compound_expr '}'
postfix_expr = atom_expr ('[' expr ']' | '.' Identifier | ++ | -- | '(' expr_list ')')*
unary_expr = unary ? unary_expr : atom_expr
mul_expr = atom_expr (mul atom_expr)*
add_expr = mul_expr (add mul_expr)*
logical_expr = add_expr (logical add_expr)*
compare_expr = logical_expr (compare logical_expr)*
ternary_expr = compare_expr ('?' ternary_expr ':' ternary_expr)?
expr = logical_expr
Compound literals
- (:[23]*Type){}
- Type{}
- { }
*/
function Ast_Expr *parse_expr(S64 rbp = 0);
function Ast_Expr *
parse_expr_nud(Token *token){
switch(token->kind){
case TK_StringLit: return ast_expr_string(token, token->intern_val);
case TK_Identifier: return ast_expr_identifier(token, token->intern_val);
case TK_Integer: return ast_expr_integer(token, token->int_val);
case TK_OpenParen: {Ast_Expr *result = parse_expr(); token_expect(TK_CloseParen); return result;}
default: parsing_error(token, "Unexpected token of kind: [%s] in expression", token_kind_string(token->kind).str); return 0;
}
}
function S64
op_precedence(Token_Kind kind){ Get_Ctx(Parse_Ctx);
S64 result = ctx->pt[kind];
return result;
}
function Ast_Expr *
parse_expr_led(Token *op, Ast_Expr *left){
enum{ Left_Associative, Right_Associative };
S64 assoc = Left_Associative;
Ast_Expr *right = parse_expr(op_precedence(op->kind) - assoc);
switch(op->kind){
case TK_Add: case TK_Mul: case TK_Sub: case TK_Div: return ast_expr_binary(left, right, op);
default: parsing_error(op, "Unexpected token of kind: [%s] in expression", token_kind_string(op->kind).str); return 0;
}
}
function Ast_Expr *
parse_expr(S64 rbp){
Token *token = token_next();
Ast_Expr *left = parse_expr_nud(token);
while(rbp < op_precedence(token_get()->kind)){
token = token_next();
left = parse_expr_led(token, left);
}
return left;
}
function S64
expr_eval(Ast_Expr *expr){
switch(expr->kind){
case AK_Expr_Int: return expr->int_val;
case AK_Expr_Binary : {
S64 left = expr_eval(expr->binary.left);
S64 right = expr_eval(expr->binary.right);
switch(expr->binary.op){
case TK_Add: return left + right;
case TK_Sub: return left - right;
case TK_Mul: return left * right;
case TK_Div: return left / right;
default: invalid_codepath;
}
}break;
default: invalid_codepath;
}
return 0;
}
#define TEST_PARSER() \
Set_Scratch(); \
Parse_Ctx ctx = {}; \
ctx.init(); \
Set_Ctx(&ctx, Parse_Ctx_ID)
function void
test_parse_expr(){
TEST_PARSER();
struct Test{String str;S64 val;};
Array<Test> exprs = {};
exprs.add({"(4+5)*2"_s, (4+5)*2});
exprs.add({"4+5*2"_s, 4+5*2});
exprs.add({"4*5+5"_s, 4*5+5});
exprs.add({"4+5+5+3"_s, 4+5+5+3});
For(exprs,it,i){
lex_restream(&ctx, it->str, "test_expr"_s);
Ast_Expr *result = parse_expr();
S64 val = expr_eval(result);
assert(val == it->val);
}
}
//-----------------------------------------------------------------------------
// Parsing declarations
//-----------------------------------------------------------------------------
function Ast_Decl *
parse_decl(){
Ast_Decl *result = 0;
Token *token = token_match(TK_Identifier);
if(token){
if(token_match(TK_DoubleColon)){
if(token_match_keyword(keyword_const)){
Ast_Expr *expr = parse_expr();
result = ast_decl_const(token, token->intern_val, expr);
}
}
else parsing_error(token, "Encountered unexpected token while parsing a top level declarations");
}
return result;
}
function void
test_parse_decl(){
TEST_PARSER();
lex_restream(&ctx, "thing :: const 24252\n"_s, "test_parse_decl"_s);
Ast_Decl *result = parse_decl();
}

View File

@@ -245,7 +245,7 @@ parse_expr_atom(Parser *p){
result = expr_identifier(p->arena, token); result = expr_identifier(p->arena, token);
} }
} }
else if(token_match(p, TK_Int)){ else if(token_match(p, TK_Integer)){
result = expr_int(p->arena, token); result = expr_int(p->arena, token);
} }
else if(token_is(p, TK_OpenBrace)){ else if(token_is(p, TK_OpenBrace)){