Big renames

This commit is contained in:
Krzosa Karol
2022-06-09 14:22:04 +02:00
parent 2f127dea98
commit 2ec901f6da
7 changed files with 6 additions and 3449 deletions

637
lexer.cpp
View File

@@ -1,637 +0,0 @@
force_inline B32 token_is_assign(Token_Kind token){return token >= TK_FirstAssign && token <= TK_LastAssign;}
force_inline B32 token_is_assign(Token *token){return token_is_assign(token->kind);}
force_inline B32 token_is_compare(Token_Kind token){return token >= TK_FirstCompare && token <= TK_LastCompare;}
force_inline B32 token_is_compare(Token *token){return token_is_compare(token->kind);}
global Token token_null = {SAME_SCOPE};
function U8
lexc(Lex_Stream *s){
return s->stream.str[s->iter];
}
function U8
lexci(Lex_Stream *s, S32 i){
return s->stream.str[s->iter+i];
}
function U8 *
lexcp(Lex_Stream *s){
return s->stream.str + s->iter;
}
function B32
lex_is_whitespace(U8 c){
B32 result = c == ' ' || c == '\r';
return result;
}
function B32
lex_is_alphabetic(U8 c){
B32 result = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
return result;
}
function B32
lex_is_numeric(U8 c){
B32 result = c >= '0' && c <= '9';
return result;
}
function B32
lex_is_alphanumeric(U8 c){
B32 result = lex_is_numeric(c) || lex_is_alphabetic(c);
return result;
}
function void
lex_set_len(Lex_Stream *s, Token *token){
assert(lexcp(s) >= token->str);
token->len = lexcp(s) - token->str;
}
function void
lex_set_keywords(Lexer *lexer, Array<String> keywords){
Intern_String keyword = {};
For(keywords){
keyword = intern_string(&lexer->interns, it);
if(&it == keywords.begin())
lexer->interns.first_keyword = keyword.str;
}
lexer->interns.last_keyword = keyword.str;
}
function B32
lex_is_keyword(Intern_Table *lexer, Intern_String keyword){
B32 result = keyword.str >= lexer->first_keyword && keyword.str <= lexer->last_keyword;
return result;
}
function void
token_error(Token *t, String error_val){
t->kind = TK_Error;
t->error_val = error_val;
}
function void
lex_parse_u64(Lexer *lexer, Token *t){
Scratch scratch;
Set_BigInt_Allocator(scratch);
t->kind = TK_Integer;
BigInt m = bigint_u64(1); // @leak, it accumulates and potentially needs allocation
BigInt val10 = bigint_u64(10);
BigInt result = bigint_u64(0);
for(S64 i = t->len - 1; i >= 0; --i){
BigInt val = bigint_u64(t->str[i] - '0'); // I dont think this is a leak, too small
BigInt new_val = bigint_mul(&val, &m); // @leak
result = bigint_add(&result, &new_val); // @leak
m = bigint_mul(&m, &val10); // @leak
}
t->int_val = bigint_copy(lexer->arena, &result);
}
function void
lex_parse_f64(Token *t){
t->kind = TK_Float;
char buffer[128];
S64 len = clamp_top((int)t->len, 126);
memory_copy(buffer, t->str, len);
buffer[len] = 0;
t->f64_val = strtod(buffer, 0);
}
function void
lex_advance(Lex_Stream *s){
if(s->iter >= s->stream.len){
return;
}
else if(lexc(s) == '\n'){
s->iter++;
s->line++;
s->line_begin = lexcp(s);
}
else{
s->iter++;
}
}
function void
lex_parse_string(Lex_Stream *s, Token *t, U8 c){
for(;;){
if(lexc(s) == '\\') lex_advance(s);
else if(lexc(s) == c) break;
else if(lexc(s) == 0){
token_error(t, "Unterminated string, reached end of file"_s);
break;
}
lex_advance(s);
}
if(t->kind != TK_Error){
lex_advance(s);
lex_set_len(s,t);
}
}
function void
lex_parse_ident(Intern_Table *table, Lex_Stream *s, Token *t){
while(lex_is_alphanumeric(lexc(s)) || lexc(s) == '_')
lex_advance(s);
lex_set_len(s,t);
t->intern_val = intern_string(table, t->string);
}
#define CASE2(op, OpName, Assign) \
case op: \
if (lexc(s) == '=') { \
lex_advance(s); \
t.kind = Assign; \
} else { \
t.kind = OpName; \
} \
break
#define CASE3(op, OpName, Assign, Incr) \
case op: \
if (lexc(s) == '=') { \
lex_advance(s); \
t.kind = Assign; \
} else if (lexc(s) == op) { \
lex_advance(s); \
t.kind = Incr; \
} else { \
t.kind = OpName; \
} \
break
function Token
token_make(Lexer *lexer, U8 *str, String file, int line, U8 *line_begin){
Token t = {};
t.str = str;
t.file = file;
t.line = line;
t.line_begin = line_begin;
t.di = lexer->token_debug_ids++;
return t;
}
function Token
token_make(Lexer *lexer){
return token_make(lexer, lexcp(&lexer->stream), lexer->stream.file, lexer->stream.line, lexer->stream.line_begin);
}
function Token *
lex_last_indent_token(Lex_Stream *s){
if(s->indent_stack.len > 0){
return *s->indent_stack.last();
}
return &token_null;
}
function B32
lex_is_scope(Token *t){
B32 result = t->kind == OPEN_SCOPE || t->kind == CLOSE_SCOPE || t->kind == SAME_SCOPE;
return result;
}
function void
lex_unwind_indent_stack(Token *t, Lex_Stream *s, Array<Token> *array){
for(S64 i = s->indent_stack.len-1; i >= 0; i-=1){
auto it = s->indent_stack.data[i];
assert(lex_is_scope(it));
if(it->indent == t->indent){
t->kind = SAME_SCOPE;
array->add(*t);
break;
}
else if(it->indent < t->indent){
token_error(t, "Bad indentation"_s);
array->add(*t);
break;
}
else{
s->indent_stack.pop();
t->kind = CLOSE_SCOPE;
array->add(*t);
}
}
}
function void
lex__stream(Lexer *lexer){
Intern_Table *table = &lexer->interns;
Array<Token> *array = &lexer->tokens;
Lex_Stream *s = &lexer->stream;
B32 beginning = true;
for(;;){
if(lexc(s) == 0 || s->iter >= s->stream.len){
end_of_stream:
Token t = token_make(lexer);
lex_unwind_indent_stack(&t, s, array);
break;
}
// @note: the lexer is going to be a 2 stage process
// first we tokenize the indentation and then proceed to tokenize
// the good stuff
// for blocks of stmts we parse till we cant find another new line
// of same scope.
// parse_decl doesn't require preceding new line
//
// in that way new lines act as commas in function params
// seeing a comma means that there is a next thing to parse
// and it's easy to parse stuff using a do while loop
// @note: first handle indentation
// mostly we want to merge multiple new lines
// but for down scopes we want to emit 2 new lines
// that will ease out parsing, one token to break out
// from a block parsing, second to allow continuation of surrounding scope
Token t = token_make(lexer);
B32 should_emit = beginning;
for(;;){
switch(lexc(s)){
case 0 : goto end_of_stream; break;
case '\t': case ' ': lex_advance(s); t.indent++; break;
case '\r': lex_advance(s); break;
case '/': {
if(lexci(s,1) == '/'){
lex_advance(s); lex_advance(s);
t.kind = TK_Comment;
for(;;){
if(lexc(s) == '\n' || lexc(s) == 0) break;
lex_advance(s);
}
}
else if(lexci(s,1) == '*'){
lex_advance(s); lex_advance(s);
t.kind = TK_Comment;
for(;;){
if(lexc(s) == '*' && lexci(s,1) == '/'){
lex_advance(s); lex_advance(s);
break;
}
else if(lexc(s) == 0){
token_error(&t, "Unterminated block comment"_s);
break;
}
lex_advance(s);
}
}
else goto indent_loop_break;
} break;
// @todo: add [;;] operator which adds new scope
// @todo: also need some way to detect indentation so that
// first of all we can check for consistency and second of
// all because we would know by how much to indent
// @todo: after detecting indentation 2 spaces would become 1 indent value
case ';' : {
Token semi = token_make(lexer);
Token *last = lex_last_indent_token(s);
semi.kind = SAME_SCOPE;
semi.indent = last->indent;
lex_advance(s);
array->add(semi);
} break;
case '\n':{
lex_advance(s);
should_emit = true;
t = token_make(lexer);
} break;
default:{
if(s->inside_brace_paren) should_emit = false;
if(should_emit){
Token *last = lex_last_indent_token(s);
if(t.indent > last->indent){
t.kind = OPEN_SCOPE;
array->add(t);
s->indent_stack.add(array->last());
}
else if(t.indent < last->indent){
lex_unwind_indent_stack(&t, s, array);
}
else {
t.kind = SAME_SCOPE;
array->add(t);
}
}
goto indent_loop_break;
}
}
} indent_loop_break:
beginning = false;
// @note: handle the indented token
t = token_make(lexer);
lex_advance(s);
switch(*t.str){
case 0 : goto end_of_stream; break;
case '@': t.kind = TK_At; break;
case '(': s->inside_brace_paren++; t.kind = TK_OpenParen; break;
case ')': s->inside_brace_paren--; t.kind = TK_CloseParen; break;
case '{': s->inside_brace_paren++; t.kind = TK_OpenBrace; break;
case '}': s->inside_brace_paren--; t.kind = TK_CloseBrace; break;
case '[': s->inside_brace_paren++; t.kind = TK_OpenBracket; break;
case ']': s->inside_brace_paren--; t.kind = TK_CloseBracket; break;
case ',': t.kind = TK_Comma; break;
case '~': t.kind = TK_Neg; break;
case '?': t.kind = TK_Question; break;
case '^': t.kind = TK_BitXor; break;
CASE2('!', TK_Not, TK_NotEquals);
CASE2('=', TK_Assign, TK_Equals);
CASE2('*', TK_Mul, TK_MulAssign);
CASE2('%', TK_Mod, TK_ModAssign);
CASE3('+', TK_Add, TK_AddAssign, TK_Increment);
CASE3('&', TK_BitAnd, TK_AndAssign, TK_And);
CASE3('|', TK_BitOr, TK_OrAssign, TK_Or);
case '#': {
lex_parse_ident(table, s, &t);
if(t.intern_val.str == intern_foreign.str){
t.kind = TK_FOREIGN;
}
else token_error(&t, "Unrecognized #note"_s);
}break;
case '.': {
if(lexc(s) == '.' && lexci(s,1) == '.') {
lex_advance(s); lex_advance(s);
t.kind = TK_ThreeDots;
}
else {
t.kind = TK_Dot;
}
} break;
case '\'':{
assert(s->stream.len >= s->iter);
UTF32_Result decode = utf8_to_utf32(lexcp(s), s->stream.len - s->iter);
if(!decode.error){
for(S32 i = 0; i < decode.advance; i++) lex_advance(s);
t.unicode = decode.out_str;
t.kind = TK_UnicodeLit;
}
else{
token_error(&t, "Invalid UTF8 sequence in unicode literal"_s);
}
} break;
case '<': {
if (lexc(s) == '<') {
lex_advance(s);
if (lexc(s) == '=') {
lex_advance(s);
t.kind = TK_LeftShiftAssign;
}
else {
t.kind = TK_LeftShift;
}
}
else if (lexc(s) == '=') {
lex_advance(s);
t.kind = TK_LesserThenOrEqual;
}
else {
t.kind = TK_LesserThen;
}
} break;
case '>': {
if (lexc(s) == '>') {
lex_advance(s);
if (lexc(s) == '=') {
lex_advance(s);
t.kind = TK_RightShiftAssign;
}
else {
t.kind = TK_RightShift;
}
}
else if (lexc(s) == '=') {
lex_advance(s);
t.kind = TK_GreaterThenOrEqual;
}
else {
t.kind = TK_GreaterThen;
}
} break;
case ':': {
if (lexc(s) == ':') {
lex_advance(s);
t.kind = TK_DoubleColon;
}
else if(lexc(s) == '='){
lex_advance(s);
t.kind = TK_ColonAssign;
}
else {
t.kind = TK_Colon;
}
} break;
case '-':{
if (lexc(s) == '=') {
lex_advance(s);
t.kind = TK_SubAssign;
}
else if (lexc(s) == '-') {
lex_advance(s);
t.kind = TK_Decrement;
}
else if (lexc(s) == '>') {
lex_advance(s);
t.kind = TK_Arrow;
}
else {
t.kind = TK_Sub;
}
} break;
case '"': {
t.kind = TK_StringLit;
lex_parse_string(s,&t,'"');
if(t.kind != TK_Error){
t.str += 1;
t.len -= 2;
}
t.intern_val = intern_string(table, t.string);
} break;
case '/': {
if(lexc(s) == '='){
t.kind = TK_DivAssign;
lex_advance(s);
}
else {
t.kind = TK_Div;
}
} break;
case '0':case '1':case '2':case '3':case '4':
case '5':case '6':case '7':case '8':case '9':{
B32 found_dot = false;
for(;;){
if(lex_is_numeric(lexc(s)))
;
else if(lexc(s) == '.'){
if(found_dot){
token_error(&t, "Multiple '.' in float literal"_s);
goto end_of_switch;
}
found_dot = true;
}
else break;
lex_advance(s);
}
lex_set_len(s, &t);
if(found_dot) lex_parse_f64(&t);
else lex_parse_u64(lexer, &t);
} break;
case 'A':case 'a':case 'M':case 'm':case 'B':
case 'b':case 'N':case 'n':case 'C':case 'c':case 'O':
case 'o':case 'D':case 'd':case 'P':case 'p':case 'E':
case 'e':case 'Q':case 'q':case 'F':case 'f':case 'R':
case 'r':case 'G':case 'g':case 'S':case 's':case 'H':
case 'h':case 'T':case 't':case 'I':case 'i':case 'U':
case 'u':case 'J':case 'j':case 'V':case 'v':case 'K':
case 'k':case 'W':case 'w':case 'L':case 'X':case 'l':
case 'x':case 'Z':case 'z':case 'Y':case 'y':case '_': {
t.kind = TK_Identifier;
lex_parse_ident(table, s, &t);
if(lex_is_keyword(table, t.intern_val)){
t.kind = TK_Keyword;
}
} break;
default: {
token_error(&t, "Unknown token"_s);
}
}end_of_switch:
if(t.len==0)
lex_set_len(s,&t);
array->add(t);
}
#undef CASE2
#undef CASE3
}
function Lexer
lex_make(Allocator *token_string_arena, Allocator *map_allocator){
Lexer result = {};
lex_init(token_string_arena, map_allocator, &result);
return result;
}
function void
lex_restream(Lexer *lexer, String istream, String file){
lexer->stream = {};
lexer->stream.stream = istream;
lexer->stream.line_begin = istream.str;
lexer->stream.file = file;
lexer->tokens.clear();
lexer->token_iter = 0;
Scratch scratch;
lexer->stream.indent_stack.allocator = scratch;
lexer->stream.indent_stack.add(&token_null);
lex__stream(lexer);
}
function Lexer
lex_stream(Allocator *token_string_arena, Allocator *map_allocator, String istream, String file){
Lexer result = lex_make(token_string_arena, map_allocator);
lex_restream(&result, istream, file);
return result;
}
//-----------------------------------------------------------------------------
// Token metadata
//-----------------------------------------------------------------------------
function const char *
name(Token_Kind kind){
switch(kind){
case TK_End: return "End of stream";
case TK_Mul: return "*";
case TK_Div: return "/";
case TK_Add: return "+";
case TK_Sub: return "-";
case TK_Mod: return "%";
case TK_BitAnd: return "&";
case TK_BitOr: return "|";
case TK_BitXor: return "^";
case TK_Neg: return "~";
case TK_Not: return "!";
case TK_OpenParen: return "(";
case TK_CloseParen: return ")";
case TK_OpenBrace: return "{";
case TK_CloseBrace: return "}";
case TK_OpenBracket: return "[";
case TK_CloseBracket: return "]";
case TK_ColonAssign: return ":=";
case TK_Comma: return ",";
case TK_Pound: return "#";
case TK_Question: return "?";
case TK_ThreeDots: return "...";
case TK_Semicolon: return ";";
case TK_Dot: return ".";
case TK_LesserThen: return "<";
case TK_GreaterThen: return ">";
case TK_Colon: return ":";
case TK_Assign: return "=";
case TK_DivAssign: return "/=";
case TK_MulAssign: return "*=";
case TK_ModAssign: return "%=";
case TK_SubAssign: return "-=";
case TK_AddAssign: return "+=";
case TK_AndAssign: return "&=";
case TK_OrAssign: return "|=";
case TK_XorAssign: return "^=";
case TK_LeftShiftAssign: return "<<=";
case TK_RightShiftAssign: return ">>=";
case TK_DoubleColon: return "::";
case TK_At: return "@";
case TK_Decrement: return "--";
case TK_Increment: return "++";
case TK_PostDecrement: return "--";
case TK_PostIncrement: return "++";
case TK_LesserThenOrEqual: return "<=";
case TK_GreaterThenOrEqual: return ">=";
case TK_Equals: return "==";
case TK_And: return "&&";
case TK_Or: return "||";
case TK_NotEquals: return "!=";
case TK_LeftShift: return "<<";
case TK_RightShift: return ">>";
case TK_Arrow: return "->";
case TK_NewLine: return "New_Line";
case TK_ExprSizeof: return "sizeof";
case TK_DocComment: return "Doc_Comment";
case TK_Comment: return "Comment";
case TK_Identifier: return "Identifier";
case TK_StringLit: return "String_Lit";
case TK_UnicodeLit: return "Unicode_Lit";
case TK_Error: return "Error";
case TK_Float: return "Float";
case TK_Integer: return "int";
case TK_Keyword: return "Keyword";
case TK_FOREIGN: return "#foreign";
case CLOSE_SCOPE: return "Close_Scope";
case OPEN_SCOPE: return "Open_Scope";
case SAME_SCOPE: return "Same_Scope";
default: invalid_codepath; return "<Undefined>";
}
}

View File

@@ -158,13 +158,12 @@ Expr:
#include "base_unicode.cpp"
#include "big_int_c3.cpp"
#include "compiler.h"
#include "lexer.cpp"
#include "lexing.cpp"
#include "types.h"
// #include "big_int.cpp"
#include "new_ast.cpp"
#include "new_parse.cpp"
#include "typecheck.h"
#include "typecheck.cpp"
#include "ast.cpp"
#include "parsing.cpp"
#include "typechecking.h"
#include "typechecking.cpp"
#include "ccodegen.cpp"
int main(int argument_count, char **arguments){
@@ -198,7 +197,7 @@ int main(int argument_count, char **arguments){
F64 begin = os_time();
system((const char *)compiler_call.str);
printf("\nCompile time: %f", os_time() - begin);
system((const char *)run_program.str);
}
else{

View File

@@ -1,628 +0,0 @@
//-----------------------------------------------------------------------------
// AST
//-----------------------------------------------------------------------------
enum Ast_Kind: U32{
AST_NONE,
AST_PACKAGE,
AST_VALUE,
AST_CAST,
AST_IDENT,
AST_INDEX,
AST_UNARY,
AST_BINARY,
AST_CALL_ITEM,
AST_CALL,
AST_POINTER,
AST_ARRAY,
AST_FOR,
AST_IF,
AST_IF_NODE,
AST_RETURN,
AST_BLOCK,
AST_PASS,
AST_LAMBDA,
AST_LAMBDA_ARG,
AST_ENUM,
AST_ENUM_MEMBER,
AST_STRUCT,
AST_CONST,
AST_VAR,
};
typedef U32 Ast_Flag;
enum{
AST_EXPR = 1,
AST_STMT = 2,
AST_BINDING = 4,
AST_AGGREGATE = 8,
AST_AGGREGATE_CHILD = 16,
AST_ITEM_INCLUDED = 32,
AST_ATOM = 64,
AST_FOREIGN = 128,
};
struct Ast{
U64 id;
Token *pos;
Ast_Kind kind;
Ast *parent;
Ast_Flag flags;
};
struct Ast_Resolved_Type;
struct Ast_Expr:Ast{};
#define VALUE_FIELDS \
Ast_Resolved_Type *type; \
union{ \
bool bool_val; \
F64 f64_val; \
Intern_String intern_val; \
BigInt big_int_val;\
Ast_Resolved_Type *type_val; \
};
#define INLINE_VALUE_FIELDS union{Value value; struct{VALUE_FIELDS};}
struct Value{VALUE_FIELDS};
// BigInt big_int_val;
struct Ast_Atom: Ast_Expr{
INLINE_VALUE_FIELDS;
};
struct Ast_Call_Item: Ast_Expr{
Ast_Atom *name; // index | name
Ast_Expr *index;
Ast_Expr *item;
};
struct Ast_Call: Ast_Expr{
Ast_Resolved_Type *type; // @todo: to map
Ast_Expr *name;
Array<Ast_Call_Item *> exprs;
};
struct Ast_Unary: Ast_Expr{
Token_Kind op;
Ast_Expr *expr;
U64 padding[3]; // For folding constants into atoms
};
struct Ast_Cast: Ast_Expr{
Ast_Expr *expr;
Ast_Expr *typespec;
};
struct Ast_Index: Ast_Expr{
Ast_Expr *expr;
Ast_Expr *index;
};
struct Ast_Binary: Ast_Expr{
Token_Kind op;
Ast_Expr *left;
Ast_Expr *right;
};
struct Ast_Block : Ast {
Array<Ast *> stmts;
};
struct Ast_Return: Ast{
Ast_Expr *expr;
};
struct Ast_If_Node: Ast{
Ast_Expr *expr ;
Ast_Block *block;
Ast_Binary*init;
};
struct Ast_If: Ast{
Array<Ast_If_Node *> ifs;
};
struct Ast_Pass: Ast{};
struct Ast_For: Ast{
Ast_Expr *init;
Ast_Expr *cond;
Ast_Expr *iter;
Ast_Block *block;
};
struct Ast_Lambda_Arg: Ast_Expr{
Intern_String name;
Ast_Expr *typespec;
Ast_Expr *default_value;
};
struct Ast_Lambda : Ast_Expr {
Array<Ast_Lambda_Arg *> args;
Ast_Expr *ret;
Ast_Block *block;
B32 has_var_args;
};
struct Ast_Array: Ast_Expr{
Ast_Expr *base;
Ast_Expr *expr;
};
struct Ast_Named:Ast{
Intern_String name;
};
struct Ast_Var: Ast_Named{
Ast_Expr *typespec;
Ast_Expr *expr;
};
struct Ast_Const;
struct Ast_Resolved_Type;
struct Ast_Struct: Ast{
// Required to be Ast_Struct or Ast_Var or Ast_Const
Array<Ast_Var *> members;
Array<Ast_Const *> const_members;
Ast_Resolved_Type *type;
};
struct Ast_Enum_Member: Ast{
Intern_String name;
Ast_Expr *value;
};
struct Ast_Enum: Ast{
Ast_Expr *typespec;
Array<Ast_Enum_Member *> members;
};
struct Ast_Const: Ast_Named{
union{
Ast *ast;
Ast_Expr *value;
Ast_Struct *agg;
Ast_Enum *enu;
};
};
struct Ast_Package:Ast{
Intern_String name;
Array<Ast_Named *> decls;
Array<Ast_Named *> ordered;
};
//-----------------------------------------------------------------------------
// AST Constructors beginning with expressions
//-----------------------------------------------------------------------------
#define AST_NEW(T,ikind,ipos,iflags) \
Ast_##T *result = exp_alloc_type(pctx->perm, Ast_##T, AF_ZeroMemory);\
result->flags = iflags; \
result->kind = AST_##ikind; \
result->pos = ipos; \
result->id = ++pctx->unique_ids
function Ast_Atom *
ast_str(Token *pos, Intern_String string){
AST_NEW(Atom, VALUE, pos, AST_EXPR | AST_ATOM);
result->type = untyped_string;
result->intern_val = string;
return result;
}
function Ast_Atom *
ast_ident(Token *pos, Intern_String string){
AST_NEW(Atom, IDENT, pos, AST_EXPR | AST_ATOM);
result->intern_val = string;
return result;
}
function Ast_Atom *
ast_bool(Token *pos, B32 bool_val){
AST_NEW(Atom, VALUE, pos, AST_EXPR | AST_ATOM);
result->bool_val = bool_val;
result->type = untyped_bool;
return result;
}
function Ast_Atom *
ast_float(Token *pos, F64 value){
AST_NEW(Atom, VALUE, pos, AST_EXPR | AST_ATOM);
result->type = untyped_float;
result->f64_val = value;
return result;
}
function Ast_Atom *
ast_int(Token *pos, BigInt val){
AST_NEW(Atom, VALUE, pos, AST_EXPR | AST_ATOM);
result->type = untyped_int;
result->big_int_val = bigint_copy(pctx->perm, &val);
return result;
}
function Ast_Atom *
ast_int(Token *pos, U64 value){
return ast_int(pos, bigint_u64(value));
}
function Ast_Expr *
ast_expr_binary(Ast_Expr *left, Ast_Expr *right, Token *op){
AST_NEW(Binary, BINARY, op, AST_EXPR);
result->op = op->kind;
result->left = left;
result->right = right;
result->left->parent = result;
if(result->right) result->right->parent = result;
return result;
}
function Ast_Call *
ast_call(Token *pos, Ast_Expr *name, Array<Ast_Call_Item *> exprs){
AST_NEW(Call, CALL, pos, AST_EXPR);
result->name = name;
result->exprs = exprs.tight_copy(pctx->perm);
if(result->name) result->name->parent = result;
For(result->exprs) it->parent = result;
return result;
}
function Ast_Call_Item *
ast_call_item(Token *pos, Ast_Expr *index, Ast_Atom *name, Ast_Expr *item){
AST_NEW(Call_Item, CALL_ITEM, pos, AST_EXPR);
result->name = name;
result->index = index;
result->item = item;
if(result->name) result->name->parent = result;
if(result->index) result->index->parent = result;
item->parent = result;
return result;
}
function Ast_Expr *
ast_expr_cast(Token *pos, Ast_Expr *expr, Ast_Expr *typespec){
AST_NEW(Cast, CAST, pos, AST_EXPR);
result->flags = AST_EXPR;
result->expr = expr;
result->typespec = typespec;
expr->parent = result;
typespec->parent = result;
return result;
}
function Ast_Expr *
ast_expr_unary(Token *pos, Token_Kind op, Ast_Expr *expr){
AST_NEW(Unary, UNARY, pos, AST_EXPR);
result->flags = AST_EXPR;
result->expr = expr;
result->op = op;
expr->parent = result;
return result;
}
function Ast_Expr *
ast_expr_index(Token *pos, Ast_Expr *expr, Ast_Expr *index){
AST_NEW(Index, INDEX, pos, AST_EXPR);
result->flags = AST_EXPR;
result->expr = expr;
result->index = index;
expr->parent = result;
index->parent = result;
return result;
}
function Ast_Lambda *
ast_lambda(Token *pos, Array<Ast_Lambda_Arg *> params, B32 has_var_args, Ast_Expr *ret, Ast_Block *block){
AST_NEW(Lambda, LAMBDA, pos, AST_EXPR);
result->flags = AST_EXPR;
result->args = params.tight_copy(pctx->perm);
result->block = block;
result->ret = ret;
result->has_var_args = has_var_args;
if(!ret) result->ret = ast_ident(result->pos, intern_void);
if(result->block) result->block->parent = result;
result->ret->parent = result;
For(result->args) it->parent = result;
return result;
}
function Ast_Lambda_Arg *
ast_expr_lambda_arg(Token *pos, Intern_String name, Ast_Expr *typespec, Ast_Expr *default_value){
AST_NEW(Lambda_Arg, LAMBDA_ARG, pos, AST_EXPR);
result->flags = AST_EXPR;
result->name = name;
result->typespec = typespec;
result->default_value = default_value;
result->typespec->parent = result;
if(result->default_value) result->default_value->parent = result;
return result;
}
function Ast_Block *
ast_block(Token *pos, Array<Ast *> stmts){
AST_NEW(Block, BLOCK, pos, AST_STMT);
result->stmts = stmts.tight_copy(pctx->perm);
For(result->stmts) it->parent = result;
return result;
}
function Ast_If *
ast_if(Token *pos, Array<Ast_If_Node *> ifs){
AST_NEW(If, IF, pos, AST_STMT);
result->ifs = ifs.tight_copy(pctx->perm);
For(result->ifs) it->parent = result;
return result;
}
function Ast_For *
ast_for(Token *pos, Ast_Expr *init, Ast_Expr *cond, Ast_Expr *iter, Ast_Block *block){
AST_NEW(For, FOR, pos, AST_STMT);
result->init = init;
result->cond = cond;
result->iter = iter;
result->block = block;
if(result->init) result->init->parent = result;
if(result->cond) result->cond->parent = result;
if(result->iter) result->iter->parent = result;
result->block->parent = result;
return result;
}
function Ast_Pass *
ast_pass(Token *pos){
AST_NEW(Pass, PASS, pos, AST_STMT);
return result;
}
function Ast_Return *
ast_return(Token *pos, Ast_Expr *expr){
AST_NEW(Return, RETURN, pos, AST_STMT);
if(expr){
assert(is_flag_set(expr->flags, AST_EXPR));
result->expr = expr;
result->expr->parent = result;
}
return result;
}
function Ast_If_Node *
ast_if_node(Token *pos, Ast_Expr *init, Ast_Expr *expr, Ast_Block *block){
AST_NEW(If_Node, IF_NODE, pos, AST_STMT);
result->block = block;
result->expr = expr;
result->init = (Ast_Binary *)init;
if(result->block) result->block->parent = result;
if(result->expr) result->expr->parent = result;
if(result->init) {
assert(init->kind == AST_BINARY);
result->init->parent = result;
}
return result;
}
function Ast_Array *
ast_array(Token *pos, Ast_Expr *expr){
AST_NEW(Array, ARRAY, pos, AST_EXPR);
result->expr = expr;
if(result->expr) result->expr->parent = result;
return result;
}
function Ast_Enum_Member *
ast_enum_member(Token *pos, Intern_String name, Ast_Expr *default_value){
AST_NEW(Enum_Member, ENUM_MEMBER, pos, AST_AGGREGATE_CHILD);
result->name = name;
result->value = default_value;
if(result->value) result->value->parent = result;
return result;
}
function Ast_Enum *
ast_enum(Token *pos, Ast_Expr *typespec, Array<Ast_Enum_Member *> members){
AST_NEW(Enum, ENUM, pos, AST_AGGREGATE);
result->members = members.tight_copy(pctx->perm);
result->typespec = typespec;
if(result->typespec) result->typespec->parent = result;
For(result->members){
it->parent = result;
}
return result;
}
function Ast_Struct *
ast_struct(Token *pos, Array<Ast_Var *> members, Array<Ast_Const *> const_members){
AST_NEW(Struct, STRUCT, pos, AST_AGGREGATE);
result->members = members.tight_copy(pctx->perm);
result->const_members = const_members.tight_copy(pctx->perm);
For(result->members) {
assert(is_flag_set(it->flags, AST_BINDING));
assert(it->kind == AST_VAR);
it->parent = result;
}
For(result->const_members) {
assert(is_flag_set(it->flags, AST_BINDING));
assert(it->kind == AST_CONST);
it->parent = result;
}
return result;
}
//-----------------------------------------------------------------------------
// Declarations
//-----------------------------------------------------------------------------
function Ast_Var *
ast_var(Token *pos, Ast_Expr *typespec, Intern_String name, Ast_Expr *expr){
AST_NEW(Var, VAR, pos, AST_BINDING);
result->expr = expr;
result->typespec = typespec;
result->name = name;
if(result->expr) result->expr->parent = result;
if(result->typespec) result->typespec->parent = result;
return result;
}
function Ast_Const *
ast_const(Token *pos, Intern_String name, Ast_Expr *value){
assert(is_flag_set(value->flags, AST_AGGREGATE) || is_flag_set(value->flags, AST_EXPR) );
AST_NEW(Const, CONST, pos, AST_BINDING);
result->value = value;
result->name = name;
result->value->parent = result;
return result;
}
function Ast_Package *
ast_package(Token *pos, String name, Array<Ast_Named *> decls){
AST_NEW(Package, PACKAGE, pos, 0);
result->decls = decls.tight_copy(pctx->perm);
result->ordered = array_make<Ast_Named *>(pctx->perm, decls.len);
result->name = intern_string(&pctx->interns, name);
For(result->decls) it->parent = result;
return result;
}
//-----------------------------------------------------------------------------
// Value
//-----------------------------------------------------------------------------
function Value
value_bool(B32 v){
Value value;
value.bool_val = v;
value.type = untyped_bool;
return value;
}
function Value
value_int(BigInt b){
Value value;
value.big_int_val = b;
value.type = untyped_int;
return value;
}
function Value
value_int(S64 s64){
Value value;
value.type = untyped_int;
bigint_init_signed(&value.big_int_val, s64);
return value;
}
function Value
value_float(F64 b){
Value value;
value.f64_val = b;
value.type = untyped_float;
return value;
}
function Value
value_float(BigInt a){
Value value;
value.f64_val = bigint_as_float(&a);
value.type = untyped_float;
return value;
}
//-----------------------------------------------------------------------------
// Utillities
//-----------------------------------------------------------------------------
function Ast_Struct *
const_try_getting_struct(Ast *ast){
assert(ast->kind == AST_CONST);
Ast_Const *constant = (Ast_Const *)ast;
if(constant->value->kind == AST_STRUCT){
return (Ast_Struct *)constant->value;
}
return 0;
}
function Ast_Struct *
const_get_struct(Ast *ast){
auto result = const_try_getting_struct(ast);
assert(result);
return result;
}
function Ast_Lambda *
const_try_getting_lambda(Ast *ast){
assert(ast->kind == AST_CONST);
Ast_Const *constant = (Ast_Const *)ast;
if(constant->value->kind == AST_LAMBDA){
return (Ast_Lambda *)constant->value;
}
return 0;
}
function Ast_Lambda *
const_get_lambda(Ast *ast){
auto result = const_try_getting_lambda(ast);
assert(result);
return result;
}
function Intern_String
ast_get_name(Ast *ast){
assert(is_flag_set(ast->flags, AST_BINDING));
auto constant = (Ast_Named *)ast;
return constant->name;
}
function B32
ast_is_struct(Ast *ast){
if(ast->kind == AST_CONST){
auto a = (Ast_Const *)ast;
B32 result = a->agg->kind == AST_STRUCT;
return result;
}
return false;
}
function B32
is_ident(Ast *ast){
B32 result = ast->kind == AST_IDENT;
return result;
}
function B32
is_binary(Ast *ast){
B32 result = ast->kind == AST_BINARY;
return result;
}
function B32
is_atom(Ast *ast){
B32 result = is_flag_set(ast->flags, AST_ATOM);
return result;
}
function Ast *
query_struct(Ast_Struct *agg, Intern_String string){
For(agg->members){
if(it->name == string){
return it;
}
}
For(agg->const_members){
if(it->name == string){
return it;
}
}
return 0;
}
function Ast_Enum_Member *
query_enum(Ast_Enum *enu, Intern_String string){
For(enu->members){
if(it->name == string){
return it;
}
}
return 0;
}

View File

@@ -1,607 +0,0 @@
function void
parsing_error(Token *token, const char *str, ...){
Scratch scratch;
STRING_FMT(scratch, str, string);
// @Note(Krzosa): Print nice error message
printf("\nError :: %s", string.str);
if(token){
if(token->kind == TK_Error){
printf("Token Error: %.*s", (int)token->error_val.len, token->error_val.str);
}
printf(" :: %s:%d\n", token->file.str, (S32)token->line + 1);
// @Note(Krzosa): Print error line
{
int i = 0;
while(token->line_begin[i]!='\n' && token->line_begin[i]!=0) i++;
printf("%.*s\n", i, token->line_begin);
// @Note(Krzosa): Print error marker
int token_i = token->str - token->line_begin;
for(int i = 0; i < token_i-2; i++) printf(" ");
printf("^^^^^^\n");
}
}
__debugbreak();
}
function Token *
token_get(S64 i = 0){
i += pctx->token_iter;
if(i >= pctx->tokens.len){
return &pctx->empty_token;
}
Token *result = &pctx->tokens[i];
return result;
}
function Token *
token_is_scope(){
Token *token = token_get();
if(lex_is_scope(token)) return token;
return 0;
}
function Token *
token_next(){
Token *token = token_get();
if(lex_is_scope(token)) pctx->indent = token->indent;
pctx->token_iter++;
return token;
}
function Token *
token_is(Token_Kind kind, S64 lookahead = 0){
Token *token = token_get(lookahead);
if(token->kind == kind){
return token;
}
return 0;
}
function Token *
token_is_keyword(Intern_String keyword, S64 lookahead = 0){
Token *token = token_get(lookahead);
if(token->kind == TK_Keyword){
if(keyword.str == token->intern_val.str){
return token;
}
}
return 0;
}
function Token *
token_match(Token_Kind kind){
Token *token = token_get();
if(token->kind == kind){
return token_next();
}
return 0;
}
function Token *
token_match(Token_Kind a, Token_Kind b){
Token *ta = token_get();
Token *tb = token_get(1);
if(ta->kind == a && tb->kind == b){
token_next(); token_next();
return ta;
}
return 0;
}
function Token *
token_match_keyword(Intern_String string){
Token *token = token_get();
if(token->kind == TK_Keyword){
if(string.str == token->intern_val.str){
token = token_next();
return token;
}
}
return 0;
}
function Token *
token_expect(Token_Kind kind){
Token *token = token_get();
if(token->kind == kind) return token_next();
parsing_error(token, "Expected token of kind: [%s], got instead token of kind: [%s]", name(kind), name(token->kind));
return 0;
}
function Ast_Expr *parse_expr(S64 minbp = 0);
function Ast_Expr *
parse_init_stmt(Ast_Expr *expr){
Token *token = token_get();
if(token->kind == TK_ColonAssign && expr->kind != AST_IDENT)
parsing_error(expr->pos, "Binding with [:=] to something that is not an identifier");
if(token_is_assign(token)){
token_next();
Ast_Expr *value = parse_expr();
Ast_Expr *result = ast_expr_binary((Ast_Atom *)expr, value, token);
result->flags = set_flag(result->flags, AST_STMT);
return result;
}
return expr;
}
function Ast_Call *
parse_expr_call(Ast_Expr *left){
Scratch scratch;
Token *pos = token_get();
Array<Ast_Call_Item *> exprs = {scratch};
while(!token_is(TK_CloseParen)){
Token *token = token_get();
Ast_Expr *index = 0;
Ast_Atom *name = 0;
if(token_match(TK_OpenBracket)){
index = parse_expr();
token_expect(TK_CloseBracket);
token_expect(TK_Assign);
}
Ast_Expr *item = parse_expr();
if(!index && token_match(TK_Assign)){
assert(is_flag_set(item->flags, AST_ATOM));
name = (Ast_Atom *)item;
item = parse_expr();
}
Ast_Call_Item *item_comp = ast_call_item(token, index, name, item);
exprs.add(item_comp);
if(!token_match(TK_Comma)){
break;
}
}
token_expect(TK_CloseParen);
Ast_Call *result = ast_call(pos, left, exprs);
return result;
}
function Ast_Expr *
parse_optional_type(){
Ast_Expr *result = 0;
if(token_match(TK_Colon)) result = parse_expr();
return result;
}
function Ast_Named *parse_named(B32);
function Ast_Block *
parse_block(){
Ast_Block *block = 0;
if(token_expect(OPEN_SCOPE)){ // @todo: Fix error message here, it doesn't show proper token context
Token *token_block = token_get();
Scratch scratch;
Array<Ast *> stmts = {scratch};
do{
Token *token = token_get();
if(token_match_keyword(keyword_return)){
Ast_Expr *expr = 0;
if(!token_is_scope()) expr = parse_expr();
stmts.add(ast_return(token, expr));
}
else if(token_match_keyword(keyword_pass)){
stmts.add(ast_pass(token));
}
else if(token_match_keyword(keyword_for)){
Ast_Expr *init = 0;
Ast_Expr *cond = 0;
Ast_Expr *iter = 0;
if(!token_is(OPEN_SCOPE)){
if(!token_is(TK_Comma)){
Ast_Expr *expr_first = parse_expr();
init = parse_init_stmt(expr_first);
}
if(token_match(TK_Comma)){
if(!token_is(TK_Comma)) cond = parse_expr();
if(token_match(TK_Comma)){
iter = parse_expr();
iter = parse_init_stmt(iter);
}
}
}
Ast_Block *for_block = parse_block();
stmts.add(ast_for(token, init, cond, iter, for_block));
}
else if(token_match_keyword(keyword_if)){
Array<Ast_If_Node *> if_nodes = {scratch};
Ast_Expr *expr = parse_expr();
Ast_Expr *init_val = parse_init_stmt(expr);
if(init_val != expr){
if(token_match(TK_Comma)) expr = parse_expr();
else expr = 0;
}
if(init_val == expr) init_val = 0;
Ast_Block *if_block = parse_block();
Ast_If_Node *if_node = ast_if_node(token, init_val, expr, if_block);
if_nodes.add(if_node);
while(token_is(SAME_SCOPE) && token_is_keyword(keyword_else, 1)){
token_next();
token = token_next();
if(token_match_keyword(keyword_if)){
Ast_Expr *expr = parse_expr();
Ast_Block *else_if_block = parse_block();
Ast_If_Node *if_node = ast_if_node(token, 0, expr, else_if_block);
if_nodes.add(if_node);
}
else{
Ast_Block *else_block = parse_block();
Ast_If_Node *if_node = ast_if_node(token, 0, 0, else_block);
if_nodes.add(if_node);
break;
}
}
Ast_If *result_if = ast_if(token, if_nodes);
stmts.add(result_if);
}
else{
Ast *result = parse_named(false);
if(!result){
result = parse_expr();
result = parse_init_stmt((Ast_Expr *)result);
}
if(result) {
result->flags = set_flag(result->flags, AST_STMT);
stmts.add(result);
}
else {
parsing_error(token, "Unexpected token [%s] while parsing statement", name(token->kind));
}
}
} while(token_match(SAME_SCOPE));
token_expect(CLOSE_SCOPE);
block = ast_block(token_block, stmts);
}
return block;
}
function Ast_Lambda *
parse_lambda(Token *token){
Scratch scratch;
B32 has_var_args = false;
Array<Ast_Lambda_Arg *> params = {scratch};
if(!token_is(TK_CloseParen)){
for(;;){
Token *name = token_get();
if(token_match(TK_Identifier)){
token_expect(TK_Colon);
Ast_Expr *typespec = parse_expr();
Ast_Expr *default_value = 0;
if(token_match(TK_Assign)) {
default_value = parse_expr();
}
Ast_Lambda_Arg *param = ast_expr_lambda_arg(name, name->intern_val, typespec, default_value);
params.add(param);
}
else if(token_match(TK_ThreeDots)){
has_var_args = true;
break;
}
else parsing_error(name, "Expected [Identifier] or [...] when parsing lambda arguments");
if(!token_match(TK_Comma))
break;
}
}
token_expect(TK_CloseParen);
Ast_Expr *ret = parse_optional_type();
Ast_Block *block = token_is(OPEN_SCOPE) ? parse_block() : 0;
Ast_Lambda *result = ast_lambda(token, params, has_var_args, ret, block);
return result;
}
//-----------------------------------------------------------------------------
// Pratt expression parser
// Based on this really good article: https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
//-----------------------------------------------------------------------------
struct Binding_Power{S64 left;S64 right;};
enum Binding{Binding_Prefix,Binding_Infix,Binding_Postfix};
function Binding_Power
binding_power(Binding binding, Token_Kind kind){
if(binding == Binding_Prefix) goto Prefix;
if(binding == Binding_Infix) goto Infix;
if(binding == Binding_Postfix) goto Postfix;
else invalid_codepath;
Prefix: switch(kind){
case TK_OpenBracket:
return {-2, 22};
case TK_Increment:
case TK_Decrement:
case TK_Pointer:
case TK_Dereference:
case TK_Keyword:
case TK_OpenParen:
case TK_Sub:
case TK_Add:
case TK_Neg:
case TK_Not:
return{-2, 20};
default: return {-1, -1};
}
Infix: switch(kind){
case TK_Or:
return {9,10};
case TK_And:
return {11,12};
case TK_Equals:
case TK_NotEquals:
case TK_GreaterThen:
case TK_GreaterThenOrEqual:
case TK_LesserThen:
case TK_LesserThenOrEqual:
return {13,14};
case TK_Sub:
case TK_Add:
case TK_BitOr:
case TK_BitXor:
return {15,16};
case TK_RightShift:
case TK_LeftShift:
case TK_BitAnd:
case TK_Mul:
case TK_Div:
case TK_Mod:
return {17,18};
case TK_Dot:
return {24,23};
default: return {};
}
Postfix: switch(kind){
case TK_Increment:
case TK_Decrement:
case TK_OpenBracket:
case TK_OpenParen:
return {21, -2};
default: return{-1,-1};
}
}
function Ast_Expr *
parse_expr(S64 min_bp){
Ast_Expr *left = 0;
Token *token = token_next();
Binding_Power prefix_bp = binding_power(Binding_Prefix, token->kind);
// @note: parse prefix expression
switch(token->kind){
case TK_StringLit : left = ast_str(token, token->intern_val); break;
case TK_Identifier : left = ast_ident(token, token->intern_val); break;
case TK_Integer : left = ast_int(token, token->int_val); break;
case TK_UnicodeLit : left = ast_int(token, token->unicode); break;
case TK_Float : left = ast_float(token, token->f64_val); break;
case TK_Pointer : left = ast_expr_unary(token, TK_Pointer, parse_expr(prefix_bp.right)); break;
case TK_Dereference: left = ast_expr_unary(token, TK_Dereference, parse_expr(prefix_bp.right)); break;
case TK_Sub : left = ast_expr_unary(token, TK_Sub, parse_expr(prefix_bp.right)); break;
case TK_Add : left = ast_expr_unary(token, TK_Add, parse_expr(prefix_bp.right)); break;
case TK_Not : left = ast_expr_unary(token, TK_Not, parse_expr(prefix_bp.right)); break;
case TK_Neg : left = ast_expr_unary(token, TK_Neg, parse_expr(prefix_bp.right)); break;
case TK_Increment : left = ast_expr_unary(token, TK_Increment, parse_expr(prefix_bp.right)); break;
case TK_Decrement : left = ast_expr_unary(token, TK_Decrement, parse_expr(prefix_bp.right)); break;
case TK_OpenBracket: {
Ast_Expr *expr = 0;
if(!token_is(TK_CloseBracket))
expr = parse_expr(0);
Ast_Array *result = ast_array(token, expr);
token_expect(TK_CloseBracket);
result->base = parse_expr(prefix_bp.right);
left = result;
}break;
case TK_Keyword: {
if(token->intern_val == keyword_true) left = ast_bool(token, 1);
else if(token->intern_val == keyword_false) left = ast_bool(token, 0);
else if(token->intern_val == keyword_cast){
token_expect(TK_OpenParen);
Ast_Expr *expr = parse_expr(0);
token_expect(TK_Colon);
Ast_Expr *typespec = parse_expr(0);
token_expect(TK_CloseParen);
left = ast_expr_cast(token, expr, typespec);
}
else parsing_error(token, "Unexpected keyword: [%s], expected keyword [cast]", token->intern_val.str);
}break;
case TK_OpenParen: {
if(token_is(TK_CloseParen) || (token_is(TK_Identifier) && token_is(TK_Colon, 1)) || token_is(TK_ThreeDots))
left = parse_lambda(token);
else{
left = parse_expr(0);
token_expect(TK_CloseParen);
}
}break;
default: parsing_error(token, "Unexpected token of kind: [%s] in expression", name(token->kind)); return 0;
}
for(;;){
token = token_get();
// lets say [+] is left:1, right:2 and we parse 2+3+4
// We pass min_bp of 2 to the next recursion
// in recursion we check if left(1) > min_bp(2)
// it's not so we don't recurse - we break
// We do standard do the for loop instead
Binding_Power postfix_bp = binding_power(Binding_Postfix, token->kind);
Binding_Power infix_bp = binding_power(Binding_Infix, token->kind);
// @note: parse postfix expression
if(postfix_bp.left > min_bp){
token_next();
switch(token->kind){
case TK_OpenBracket:{
Ast_Expr *index = parse_expr(0);
token_expect(TK_CloseBracket);
left = ast_expr_index(token, left, index);
}break;
case TK_OpenParen:{
left = parse_expr_call(left);
}break;
default:{
assert(token->kind == TK_Increment || token->kind == TK_Decrement);
if(token->kind == TK_Increment) token->kind = TK_PostIncrement;
else if(token->kind == TK_Decrement) token->kind = TK_PostDecrement;
left = ast_expr_unary(token, token->kind, left);
}
}
}
// @note: parse infix expression
else if(infix_bp.left > min_bp){
token = token_next();
Ast_Expr *right = parse_expr(infix_bp.right);
left = ast_expr_binary(left, right, token);
}
else break;
}
return left;
}
function Ast_Expr *
parse_assign_expr(){
Ast_Expr *result = 0;
if(token_match(TK_Assign)) result = parse_expr();
return result;
}
function Ast_Struct *
parse_struct(Token *pos){
Scratch scratch;
Array<Ast_Var *> members = {scratch};
Array<Ast_Const *> members_const = {scratch};
token_match(OPEN_SCOPE);
do{
Token *token = token_get();
Ast_Named *named = parse_named(false);
if(!named) parsing_error(token, "Failed to parse struct member");
named->flags = set_flag(named->flags, AST_AGGREGATE_CHILD);
if(named->kind == AST_CONST){
members_const.add((Ast_Const *)named);
}
else {
assert(named->kind == AST_VAR);
members.add((Ast_Var *)named);
}
}while(token_match(SAME_SCOPE));
token_expect(CLOSE_SCOPE);
Ast_Struct *result = ast_struct(pos, members, members_const);
return result;
}
function Ast_Enum *
parse_enum(Token *pos){
Scratch scratch;
Array<Ast_Enum_Member *> members = {scratch};
Ast_Expr *typespec = parse_optional_type();
token_match(OPEN_SCOPE);
do{
Token *name = token_expect(TK_Identifier);
Ast_Expr *value = parse_assign_expr();
Ast_Enum_Member *member = ast_enum_member(name, name->intern_val, value);
members.add(member);
}while(token_match(SAME_SCOPE));
token_expect(CLOSE_SCOPE);
Ast_Enum *result = ast_enum(pos, typespec, members);
return result;
}
/*
Needs peeking only because I didn't want to duplicate code
for parsing statements and it makes code nicer.
Statements can have named syntax i :=
*/
function Ast_Named *
parse_named(B32 is_global){
Ast_Named *result = 0;
if(is_global) {
token_match(SAME_SCOPE);
if(pctx->indent != 0){
parsing_error(token_get(), "Top level declarations shouldn't be indented");
}
}
Ast_Flag flags = 0;
if(token_match(TK_FOREIGN)){
flags = set_flag(flags, AST_FOREIGN);
}
Token *tname = token_get();
if(token_match(TK_Identifier, TK_DoubleColon)){
// @note parse struct binding
Token *struct_pos = token_get();
if(token_match_keyword(keyword_struct)){
Ast_Struct *struct_val = parse_struct(struct_pos);
result = ast_const(tname, tname->intern_val, (Ast_Expr *)struct_val);
}
else if(token_match_keyword(keyword_enum)){
Ast_Enum *enum_val = parse_enum(struct_pos);
result = ast_const(tname, tname->intern_val, (Ast_Expr *)enum_val);
}
// @note parse constant expression
else{
Ast_Expr *expr = parse_expr();
result = ast_const(tname, tname->intern_val, expr);
}
}
else if(token_match(TK_Identifier, TK_Colon)){
Ast_Expr *typespec = typespec = parse_expr();
Ast_Expr *expr = parse_assign_expr();
result = ast_var(tname, typespec, tname->intern_val, expr);
}
else if(token_match(TK_Identifier, TK_ColonAssign)){
Ast_Expr *expr = parse_expr();
result = ast_var(tname, 0, tname->intern_val, expr);
}
else if(is_global && tname->kind != TK_End){
parsing_error(tname, "Unexpected token: [%s] when parsing a declaration", name(tname->kind));
}
if(result){
result->flags = set_flag(result->flags, flags);
}
return result;
}

View File

@@ -1 +0,0 @@

File diff suppressed because it is too large Load Diff

View File

@@ -1,419 +0,0 @@
//-----------------------------------------------------------------------------
// Symbols
//-----------------------------------------------------------------------------
enum Sym_Kind{
SYM_NONE,
SYM_CONST,
SYM_VAR,
};
enum Sym_State{
SYM_NOT_RESOLVED,
SYM_RESOLVING,
SYM_RESOLVED,
};
struct Sym{
Intern_String name;
Sym_Kind kind;
Sym_State state;
Ast *ast;
INLINE_VALUE_FIELDS;
};
struct Operand{
INLINE_VALUE_FIELDS;
bool is_const: 1;
bool is_lvalue: 1;
};
enum{AST_CANT_BE_NULL = 0, AST_CAN_BE_NULL = 1};
function Ast_Resolved_Type *resolve_typespec(Ast_Expr *ast, B32 ast_can_be_null = AST_CANT_BE_NULL);
function Sym *resolve_name(Token *pos, Intern_String name);
function Operand resolve_expr(Ast_Expr *ast, Ast_Resolved_Type *compound_required_type = 0, Sym *const_sym = 0);
function Operand resolve_binding(Ast *ast, Sym *sym = 0);
global Ast_Named empty_decl = {};
//-----------------------------------------------------------------------------
// Symbol constructors and utils
//-----------------------------------------------------------------------------
function void
sym_insert(Sym *sym){
U64 hash = hash_string(sym->name.s);
Sym *is_sym = (Sym *)map_get(&pctx->syms, hash);
if(is_sym) parsing_error(sym->ast->pos, "Symbol with name: [%s] defined multiple times", sym->name.s.str);
if(pctx->scope > 0) pctx->local_syms.add(sym);
map_insert(&pctx->syms, hash, sym);
}
function Sym *
sym_get(Intern_String name){
Sym *result = (Sym *)map_get(&pctx->syms, hash_string(name.s));
return result;
}
function S64
scope_open(){
S64 local_sym_count = pctx->local_syms.len;
pctx->scope++;
return local_sym_count;
}
function void
scope_close(S64 local_sym_count){
pctx->scope--;
assert(pctx->scope >= 0);
for(S64 i = local_sym_count; i < pctx->local_syms.len; i++){
Sym *it = pctx->local_syms.data[i];
void *removed = map_remove(&pctx->syms, hash_string(it->name.s));
assert(removed);
}
pctx->local_syms.len = local_sym_count;
}
function void
sym_associate(Ast *ast, Sym *sym){
assert(ast);
assert(sym);
map_insert(&pctx->resolved, ast, sym);
}
function Sym *
sym_new(Sym_Kind kind, Intern_String name, Ast *ast, B32 associate = true){
Sym *result = exp_alloc_type(pctx->perm, Sym, AF_ZeroMemory);
result->name = name;
result->kind = kind;
result->ast = ast;
if(associate) sym_associate(ast, result);
return result;
}
function Sym *
sym_new_resolved(Sym_Kind kind, Intern_String name, Value value, Ast *ast, B32 associate = true){
Sym *result = sym_new(kind, name, ast, associate);
result->state = SYM_RESOLVED;
result->value = value;
return result;
}
const B32 INSERT_INTO_SCOPE = true;
function Sym *
sym_var(Intern_String name, Ast_Resolved_Type *type, Ast *ast, B32 insert_into_scope = false){
Value value;
value.type = type;
Sym *sym = sym_new_resolved(SYM_VAR, name, value, ast);
if(insert_into_scope) sym_insert(sym);
return sym;
}
function Sym *
sym_var(Intern_String name, Operand op, Ast *ast, B32 insert_into_scope = false){
Sym *sym = sym_new_resolved(SYM_VAR, name, op.value, ast);
if(insert_into_scope) sym_insert(sym);
return sym;
}
function Sym *
sym_const(Intern_String name, Operand op, Ast *ast, B32 insert_into_scope = false){
Sym *sym = sym_new_resolved(SYM_CONST, name, op.value, ast);
if(insert_into_scope) sym_insert(sym);
return sym;
}
function Sym *
resolved_get(Ast *ast){
Sym *result = (Sym *)map_get(&pctx->resolved, ast);
assert(result);
return result;
}
function Ast_Resolved_Type *
resolved_type_get(Ast_Expr *ast){
Sym *result = resolved_get(ast);
assert(result->type == type_type);
assert(result->type);
return result->type_val;
}
function Sym *
sym_type(Ast_Resolved_Type *type, Ast *ast, Intern_String name = {}, B32 associate = true){
Value value;
value.type = type_type;
value.type_val = type;
Sym *result = sym_new_resolved(SYM_CONST, name, value, ast, associate);
return result;
}
function Sym *
sym_insert(Sym_Kind kind, Intern_String name, Value value, Ast *ast){
Sym *sym = sym_new_resolved(kind, name, value, ast);
sym_insert(sym);
return sym;
}
function void
sym_insert_builtin_type(String name, Ast_Resolved_Type *type){
Intern_String string = intern_string(&pctx->interns, name);
Sym *sym = sym_type(type, &empty_decl, string, false);
sym_insert(sym);
}
function void
sym_insert_builtins(){
sym_insert_builtin_type("void"_s , type_void);
sym_insert_builtin_type("Bool"_s , type_bool);
sym_insert_builtin_type("String"_s, type_string);
sym_insert_builtin_type("S8"_s, type_s8);
sym_insert_builtin_type("S16"_s, type_s16);
sym_insert_builtin_type("S32"_s, type_s32);
sym_insert_builtin_type("S64"_s, type_s64);
sym_insert_builtin_type("U8"_s, type_u8);
sym_insert_builtin_type("U16"_s, type_u16);
sym_insert_builtin_type("U32"_s, type_u32);
sym_insert_builtin_type("U64"_s, type_u64);
sym_insert_builtin_type("F32"_s, type_f32);
sym_insert_builtin_type("F64"_s, type_f64);
}
//-----------------------------------------------------------------------------
// Operands
//-----------------------------------------------------------------------------
function Operand
operand(Sym *sym){
Operand result = {};
result.type = sym->type;
result.is_const = sym->kind == SYM_CONST ? true : false;
result.is_lvalue= sym->kind == SYM_CONST ? false : true; // Cant assign to const values
result.value = sym->value;
return result;
}
function Operand
operand_type(Ast_Resolved_Type *type){
Operand result = {};
result.type = type_type;
result.is_const = true;
result.is_lvalue = false;
result.type_val = type;
return result;
}
function Operand
operand_int(BigInt big_int){
Operand result = {};
result.type = untyped_int;
result.big_int_val = bigint_copy(pctx->perm, &big_int);
result.is_const = true;
result.is_lvalue = false;
return result;
}
function Operand
operand_str(Intern_String intern_val){
Operand result = {};
result.type = type_string;
result.intern_val = intern_val;
result.is_const = true;
result.is_lvalue = false;
return result;
}
function Operand
operand_lambda(Ast_Resolved_Type *type){
Operand result = {};
result.type = type;
result.is_const = true;
result.is_lvalue = false;
return result;
}
function Operand
operand_const_rvalue(Value value){
Operand result = {};
result.is_const = true;
result.value = value;
return result;
}
function Operand
operand_lvalue(Ast_Resolved_Type *type){
Operand result = {};
result.type = type;
result.is_const = false;
result.is_lvalue = true;
return result;
}
function Operand
operand_rvalue(Ast_Resolved_Type *type){
Operand result = {};
result.type = type;
result.is_const = false;
result.is_lvalue = false;
return result;
}
//-----------------------------------------------------------------------------
// Hash consed types
//-----------------------------------------------------------------------------
function Ast_Resolved_Type *
type_new(Allocator *allocator, Ast_Resolved_Type_Kind kind, SizeU size, SizeU align){
Ast_Resolved_Type *result = exp_alloc_type(allocator, Ast_Resolved_Type, AF_ZeroMemory);
result->kind = kind;
result->size = size;
result->align = align;
return result;
}
function Ast_Resolved_Type *
type_copy(Allocator *a, Ast_Resolved_Type *type){
Ast_Resolved_Type *result = exp_alloc_type(a, Ast_Resolved_Type);
memory_copy(result, type, sizeof(Ast_Resolved_Type));
return result;
}
function Ast_Resolved_Type *
type_pointer(Ast_Resolved_Type *base){
Ast_Resolved_Type *result = (Ast_Resolved_Type *)map_get(&pctx->type_map, (void *)base);
if(!result){
result = type_new(pctx->perm, TYPE_POINTER, pointer_size, pointer_align);
result->base = base;
map_insert(&pctx->type_map, base, result);
}
assert(result->kind == TYPE_POINTER);
return result;
}
function Ast_Resolved_Type *
type_array(Ast_Resolved_Type *base, B32 size_present, S64 size){
if(!size_present){
size = ARRAY_SIZE_INFERRED;
}
U64 hash_base = hash_ptr(base);
U64 hash = hash_mix(hash_base, hash_u64(size));
Ast_Resolved_Type *result = (Ast_Resolved_Type *)map_get(&pctx->type_map, hash);
if(result){
assert(result->kind == TYPE_ARRAY);
assert(result->arr.size == size);
assert(result->arr.base == base);
return result;
}
result = type_new(pctx->perm, TYPE_ARRAY, pointer_size, pointer_align);
result->arr.base = base;
result->arr.size = size;
result->arr.inferred_size_hash = hash_mix(hash_base, hash_u64(ARRAY_SIZE_INFERRED));
map_insert(&pctx->type_map, hash, result);
return result;
}
function Ast_Resolved_Type *
type_lambda(Ast *ast, Ast_Resolved_Type *ret, Array<Ast_Resolved_Type *> args){
U64 hash = hash_ptr(ret);
For(args) hash = hash_mix(hash, hash_ptr(it));
Ast_Resolved_Type *result = (Ast_Resolved_Type *)map_get(&pctx->type_map, hash);
if(result){
assert(result->kind == TYPE_LAMBDA);
assert(result->func.ret == ret);
assert(result->func.args.len == args.len);
return result;
}
result = type_new(pctx->perm, TYPE_LAMBDA, pointer_size, pointer_align);
result->ast = ast;
result->func.ret = ret;
result->func.args = args.tight_copy(pctx->perm);
map_insert(&pctx->type_map, hash, result);
return result;
}
function Ast_Resolved_Type *
type_enum(Ast_Enum *ast){
Ast_Resolved_Type *type = resolve_typespec(ast->typespec, AST_CAN_BE_NULL);
if(!type){
type = untyped_int;
}
Ast_Resolved_Type *result = type_new(pctx->perm, TYPE_ENUM, type->size, type->align);
result->base = type;
result->ast = ast;
return result;
}
/*
2022.05.31 - Global scope structs vs nested structs
Structs exist in 2 variants, the global scope structs are a bit different
then scoped structs. They startout incomplete and when some operation
requires the actual struct size, alignment, field access etc. then it
should call complete_type. It resolves all the children, calculates the
size and makes sure there are no cyclic dependencies. This is require for
correct behaviour of order independent structs. If someone just wants a pointer
to that struct we don't need to complete the type, we know how large a pointer is.
This allows us to have cyclic dependency that is a pointer. Cause we know how large pointer is.
*/
function Ast_Resolved_Type *
type_incomplete(Ast *ast){
Ast_Resolved_Type *result = type_new(pctx->perm, TYPE_INCOMPLETE, 0, 0);
result->ast = ast;
return result;
}
function void
type_struct_complete(Ast_Resolved_Type *type, Ast_Struct *node){
// @todo: compute size, alignement, offset !!!
// @note: resolve all the struct members first
type->kind = TYPE_COMPLETING;
Scratch scratch;
Array<Ast_Resolved_Member> members = {scratch};
For(node->members){
Operand op = resolve_binding(it);
Intern_String name = ast_get_name(it);
sym_var(name, op, it);
members.add({op.type, name});
}
type->agg.members = members.tight_copy(pctx->perm);
type->kind = TYPE_STRUCT;
/*
@note: resolve constant members after the struct got resolved
this way we avoid a problem where we start resolving the function
and this function has parameter of type parent struct
which is being resolved right now, cyclic dependency happens.
constants arent required to make struct work
*/
For(node->const_members){
Operand op = resolve_binding(it);
Intern_String name = ast_get_name(it);
sym_const(name, op, it);
}
}
function Ast_Resolved_Type *
type_struct(Ast_Struct *agg){
Ast_Resolved_Type *result = type_new(pctx->perm, TYPE_STRUCT, 0, 0);
result->ast = agg;
type_struct_complete(result, agg);
return result;
}
function void
type_complete(Ast_Resolved_Type *type){
if(!type) {
return;
}
if(type->kind == TYPE_COMPLETING){
parsing_error(type->ast->pos, "Cyclic type dependency");
}
else if(type->kind != TYPE_INCOMPLETE){
return;
}
Ast_Struct *node = (Ast_Struct *)type->ast;
type_struct_complete(type, node);
pctx->resolving_package->ordered.add((Ast_Named *)node->parent);
}