From 4edd2a4799195267305045844f124a0e81099ef6 Mon Sep 17 00:00:00 2001 From: Krzosa Karol Date: Thu, 9 Jun 2022 17:45:30 +0200 Subject: [PATCH] Big renames, prepare for scope changes --- ast.cpp | 697 ++++++++++++++++++++++++++++ lexing.cpp | 637 +++++++++++++++++++++++++ parsing.cpp | 607 ++++++++++++++++++++++++ typechecking.cpp | 1150 ++++++++++++++++++++++++++++++++++++++++++++++ typechecking.h | 419 +++++++++++++++++ 5 files changed, 3510 insertions(+) create mode 100644 ast.cpp create mode 100644 lexing.cpp create mode 100644 parsing.cpp create mode 100644 typechecking.cpp create mode 100644 typechecking.h diff --git a/ast.cpp b/ast.cpp new file mode 100644 index 0000000..6c0db21 --- /dev/null +++ b/ast.cpp @@ -0,0 +1,697 @@ + +//----------------------------------------------------------------------------- +// AST +//----------------------------------------------------------------------------- +enum Ast_Kind: U32{ + AST_NONE, + + AST_PACKAGE, + + AST_VALUE, + AST_CAST, + AST_IDENT, + AST_INDEX, + AST_UNARY, + AST_BINARY, + AST_CALL_ITEM, + AST_CALL, + + AST_POINTER, + AST_ARRAY, + AST_FOR, + AST_IF, + AST_IF_NODE, + AST_RETURN, + AST_BLOCK, + AST_PASS, + AST_LAMBDA, + AST_LAMBDA_ARG, + AST_ENUM, + AST_ENUM_MEMBER, + AST_STRUCT, + AST_CONST, + AST_VAR, +}; + +typedef U32 Ast_Flag; +enum{ + AST_EXPR = bit_flag(1), + AST_STMT = bit_flag(2), + AST_BINDING = bit_flag(3), + AST_AGGREGATE = bit_flag(4), + AST_AGGREGATE_CHILD = bit_flag(5), + AST_ITEM_INCLUDED = bit_flag(6), + AST_ATOM = bit_flag(7), + AST_FOREIGN = bit_flag(8), +}; + +struct Ast{ + U64 id; + Token *pos; + + Ast_Kind kind; + Ast *parent; + Ast_Flag flags; +}; + +struct Ast_Resolved_Type; +struct Ast_Expr:Ast{}; + +#define VALUE_FIELDS \ +Ast_Resolved_Type *type; \ +union{ \ + bool bool_val; \ + F64 f64_val; \ + Intern_String intern_val; \ + BigInt big_int_val;\ + Ast_Resolved_Type *type_val; \ +}; +#define INLINE_VALUE_FIELDS union{Value value; struct{VALUE_FIELDS};} +struct Value{VALUE_FIELDS}; + // BigInt big_int_val; + +struct Ast_Atom: Ast_Expr{ + INLINE_VALUE_FIELDS; +}; + +struct Ast_Call_Item: Ast_Expr{ + Ast_Atom *name; // index | name + Ast_Expr *index; + Ast_Expr *item; +}; + +struct Ast_Call: Ast_Expr{ + Ast_Resolved_Type *type; // @todo: to map + Ast_Expr *name; + Array exprs; +}; + +struct Ast_Unary: Ast_Expr{ + Token_Kind op; + Ast_Expr *expr; + U64 padding[3]; // For folding constants into atoms +}; + +struct Ast_Cast: Ast_Expr{ + Ast_Expr *expr; + Ast_Expr *typespec; +}; + +struct Ast_Index: Ast_Expr{ + Ast_Expr *expr; + Ast_Expr *index; +}; + +struct Ast_Binary: Ast_Expr{ + Token_Kind op; + Ast_Expr *left; + Ast_Expr *right; +}; + +// Problem: We are parsing out of order, in the middle of parsing a function +// we can jump down a different function, we cant therfore use global map. +// Each scope needs to have it's checked locals list. To lookup syms we need to +// look into global scope and to the locals list. +// + +struct Ast_Block : Ast { + // Stmts for global scope + Array stmts; + // Array members; +}; + +struct Ast_Return: Ast{ + Ast_Expr *expr; +}; + +struct Ast_If_Node: Ast{ + Ast_Expr *expr ; + Ast_Block *block; + Ast_Binary*init; +}; + +struct Ast_If: Ast{ + Array ifs; +}; + +struct Ast_Pass: Ast{}; + +struct Ast_For: Ast{ + Ast_Expr *init; + Ast_Expr *cond; + Ast_Expr *iter; + Ast_Block *block; +}; + +struct Ast_Lambda_Arg: Ast_Expr{ + Intern_String name; + Ast_Expr *typespec; + Ast_Expr *default_value; +}; + +struct Ast_Lambda : Ast_Expr { + Array args; + Ast_Expr *ret; + Ast_Block *block; + B32 has_var_args; +}; + +struct Ast_Array: Ast_Expr{ + Ast_Expr *base; + Ast_Expr *expr; +}; + +struct Ast_Named:Ast{ + Intern_String name; +}; + +enum Ast_Decl_State{ + DECL_NOT_RESOLVED, + DECL_RESOLVED, + DECL_RESOLVING, +}; + +/* +How does current declaration order resolver works: +* First we put all the global declarations into the global scope (when parsing) all unresolved + * All the types are declared INCOMPLETE and RESOLVED +* We descent the tree by resolving each of the named declarations, we resolve by their name + When we start resolving we set RESOLVING flag and when we complete RESOLVED flag + and put into ordered list +* When we meet a symbol (named declaration) while descending the tree, + we resolve that symbol instead before resolving current declaration. +* When we meet a declaration that requires size of a type - field access, var assignment, + we need to call "complete_type", it sets COMPLETING flag. + This call resolves all the dependencies of that type, + sets size of type and marks it as COMPLETE and puts into ordered list. + If it detects COMPLETING while + resolving, we got a circular dependency. That might happen when we have + that struct without pointer inside itself. + + + +We need a new algorithm to process structs +we probably want default values so constants +can be evaluated first, also it's pretty weird that +only top scope gets special treatment + +Idea 1 +New algorithm, at some point in a constant we might +see the struct inside which is the constant. struct is +unresolved so we probably call resolve_name on struct +and struct continues resolving omitting already resolved and +declaration that requires the struct size. Problem is what happens +when we meet a member that references a constant and that constant +has reference to struct. + +Idea 2 +We resolve first members without default values, we add to queue dependencies +then we resolve constants. And at the end we resolve the queued values. +*/ + +struct Ast_Scope{ + Array resolved; + Array children; + Array constants; + +}; + +struct Ast_Decl: Ast{ + Ast_Decl_State state; + // kind:AST_CONST: subkind:AST_STRUCT, AST_ENUM, AST_EXPR(can be TYPE_TYPE), AST_LAMBDA + // kind:AST_VAR : subkind:AST_EXPR, AST_LAMBDA + Intern_String name; + Ast_Kind sub_kind; + Ast_Scope *scope; + Ast_Resolved_Type *type; +}; + +struct Ast_Var: Ast_Named{ + Ast_Expr *typespec; + Ast_Expr *expr; +}; + +struct Ast_Const; +struct Ast_Resolved_Type; +struct Ast_Struct: Ast{ + // Required to be Ast_Struct or Ast_Var or Ast_Const + Array members; + Array const_members; + Ast_Resolved_Type *type; +}; + +struct Ast_Enum_Member: Ast{ + Intern_String name; + Ast_Expr *value; +}; + +struct Ast_Enum: Ast{ + Ast_Expr *typespec; + Array members; +}; + +struct Ast_Const: Ast_Named{ + union{ + Ast *ast; + Ast_Expr *value; + Ast_Struct *agg; + Ast_Enum *enu; + }; +}; + +struct Ast_Package:Ast{ + Intern_String name; + Array decls; + Array ordered; +}; + +//----------------------------------------------------------------------------- +// AST Constructors beginning with expressions +//----------------------------------------------------------------------------- +#define AST_NEW(T,ikind,ipos,iflags) \ + Ast_##T *result = exp_alloc_type(pctx->perm, Ast_##T, AF_ZeroMemory);\ + result->flags = iflags; \ + result->kind = AST_##ikind; \ + result->pos = ipos; \ + result->id = ++pctx->unique_ids + +function Ast_Atom * +ast_str(Token *pos, Intern_String string){ + AST_NEW(Atom, VALUE, pos, AST_EXPR | AST_ATOM); + result->type = untyped_string; + result->intern_val = string; + return result; +} + +function Ast_Atom * +ast_ident(Token *pos, Intern_String string){ + AST_NEW(Atom, IDENT, pos, AST_EXPR | AST_ATOM); + result->intern_val = string; + return result; +} + +function Ast_Atom * +ast_bool(Token *pos, B32 bool_val){ + AST_NEW(Atom, VALUE, pos, AST_EXPR | AST_ATOM); + result->bool_val = bool_val; + result->type = untyped_bool; + return result; +} + +function Ast_Atom * +ast_float(Token *pos, F64 value){ + AST_NEW(Atom, VALUE, pos, AST_EXPR | AST_ATOM); + result->type = untyped_float; + result->f64_val = value; + return result; +} + +function Ast_Atom * +ast_int(Token *pos, BigInt val){ + AST_NEW(Atom, VALUE, pos, AST_EXPR | AST_ATOM); + result->type = untyped_int; + result->big_int_val = bigint_copy(pctx->perm, &val); + return result; +} + +function Ast_Atom * +ast_int(Token *pos, U64 value){ + return ast_int(pos, bigint_u64(value)); +} + +function Ast_Expr * +ast_expr_binary(Ast_Expr *left, Ast_Expr *right, Token *op){ + AST_NEW(Binary, BINARY, op, AST_EXPR); + result->op = op->kind; + result->left = left; + result->right = right; + result->left->parent = result; + if(result->right) result->right->parent = result; + return result; +} + +function Ast_Call * +ast_call(Token *pos, Ast_Expr *name, Array exprs){ + AST_NEW(Call, CALL, pos, AST_EXPR); + result->name = name; + result->exprs = exprs.tight_copy(pctx->perm); + if(result->name) result->name->parent = result; + For(result->exprs) it->parent = result; + return result; +} + +function Ast_Call_Item * +ast_call_item(Token *pos, Ast_Expr *index, Ast_Atom *name, Ast_Expr *item){ + AST_NEW(Call_Item, CALL_ITEM, pos, AST_EXPR); + result->name = name; + result->index = index; + result->item = item; + if(result->name) result->name->parent = result; + if(result->index) result->index->parent = result; + item->parent = result; + return result; +} + +function Ast_Expr * +ast_expr_cast(Token *pos, Ast_Expr *expr, Ast_Expr *typespec){ + AST_NEW(Cast, CAST, pos, AST_EXPR); + result->flags = AST_EXPR; + result->expr = expr; + result->typespec = typespec; + expr->parent = result; + typespec->parent = result; + return result; +} + +function Ast_Expr * +ast_expr_unary(Token *pos, Token_Kind op, Ast_Expr *expr){ + AST_NEW(Unary, UNARY, pos, AST_EXPR); + result->flags = AST_EXPR; + result->expr = expr; + result->op = op; + expr->parent = result; + return result; +} + +function Ast_Expr * +ast_expr_index(Token *pos, Ast_Expr *expr, Ast_Expr *index){ + AST_NEW(Index, INDEX, pos, AST_EXPR); + result->flags = AST_EXPR; + result->expr = expr; + result->index = index; + expr->parent = result; + index->parent = result; + return result; +} + +function Ast_Lambda * +ast_lambda(Token *pos, Array params, B32 has_var_args, Ast_Expr *ret, Ast_Block *block){ + AST_NEW(Lambda, LAMBDA, pos, AST_EXPR); + result->flags = AST_EXPR; + result->args = params.tight_copy(pctx->perm); + result->block = block; + result->ret = ret; + result->has_var_args = has_var_args; + if(!ret) result->ret = ast_ident(result->pos, intern_void); + + if(result->block) result->block->parent = result; + result->ret->parent = result; + For(result->args) it->parent = result; + return result; +} + +function Ast_Lambda_Arg * +ast_expr_lambda_arg(Token *pos, Intern_String name, Ast_Expr *typespec, Ast_Expr *default_value){ + AST_NEW(Lambda_Arg, LAMBDA_ARG, pos, AST_EXPR); + result->flags = AST_EXPR; + result->name = name; + result->typespec = typespec; + result->default_value = default_value; + result->typespec->parent = result; + if(result->default_value) result->default_value->parent = result; + return result; +} + +function Ast_Block * +ast_block(Token *pos, Array stmts){ + AST_NEW(Block, BLOCK, pos, AST_STMT); + result->stmts = stmts.tight_copy(pctx->perm); + For(result->stmts) it->parent = result; + return result; +} + +function Ast_If * +ast_if(Token *pos, Array ifs){ + AST_NEW(If, IF, pos, AST_STMT); + result->ifs = ifs.tight_copy(pctx->perm); + For(result->ifs) it->parent = result; + return result; +} + +function Ast_For * +ast_for(Token *pos, Ast_Expr *init, Ast_Expr *cond, Ast_Expr *iter, Ast_Block *block){ + AST_NEW(For, FOR, pos, AST_STMT); + result->init = init; + result->cond = cond; + result->iter = iter; + result->block = block; + if(result->init) result->init->parent = result; + if(result->cond) result->cond->parent = result; + if(result->iter) result->iter->parent = result; + result->block->parent = result; + return result; +} + +function Ast_Pass * +ast_pass(Token *pos){ + AST_NEW(Pass, PASS, pos, AST_STMT); + return result; +} + +function Ast_Return * +ast_return(Token *pos, Ast_Expr *expr){ + AST_NEW(Return, RETURN, pos, AST_STMT); + if(expr){ + assert(is_flag_set(expr->flags, AST_EXPR)); + result->expr = expr; + result->expr->parent = result; + } + return result; +} + +function Ast_If_Node * +ast_if_node(Token *pos, Ast_Expr *init, Ast_Expr *expr, Ast_Block *block){ + AST_NEW(If_Node, IF_NODE, pos, AST_STMT); + result->block = block; + result->expr = expr; + result->init = (Ast_Binary *)init; + if(result->block) result->block->parent = result; + if(result->expr) result->expr->parent = result; + if(result->init) { + assert(init->kind == AST_BINARY); + result->init->parent = result; + } + return result; +} + +function Ast_Array * +ast_array(Token *pos, Ast_Expr *expr){ + AST_NEW(Array, ARRAY, pos, AST_EXPR); + result->expr = expr; + if(result->expr) result->expr->parent = result; + return result; +} + +function Ast_Enum_Member * +ast_enum_member(Token *pos, Intern_String name, Ast_Expr *default_value){ + AST_NEW(Enum_Member, ENUM_MEMBER, pos, AST_AGGREGATE_CHILD); + result->name = name; + result->value = default_value; + if(result->value) result->value->parent = result; + return result; +} + +function Ast_Enum * +ast_enum(Token *pos, Ast_Expr *typespec, Array members){ + AST_NEW(Enum, ENUM, pos, AST_AGGREGATE); + result->members = members.tight_copy(pctx->perm); + result->typespec = typespec; + if(result->typespec) result->typespec->parent = result; + For(result->members){ + it->parent = result; + } + return result; +} + +function Ast_Struct * +ast_struct(Token *pos, Array members, Array const_members){ + AST_NEW(Struct, STRUCT, pos, AST_AGGREGATE); + result->members = members.tight_copy(pctx->perm); + result->const_members = const_members.tight_copy(pctx->perm); + For(result->members) { + assert(is_flag_set(it->flags, AST_BINDING)); + assert(it->kind == AST_VAR); + it->parent = result; + } + For(result->const_members) { + assert(is_flag_set(it->flags, AST_BINDING)); + assert(it->kind == AST_CONST); + it->parent = result; + } + return result; +} + +//----------------------------------------------------------------------------- +// Declarations +//----------------------------------------------------------------------------- +function Ast_Var * +ast_var(Token *pos, Ast_Expr *typespec, Intern_String name, Ast_Expr *expr){ + AST_NEW(Var, VAR, pos, AST_BINDING); + result->expr = expr; + result->typespec = typespec; + result->name = name; + if(result->expr) result->expr->parent = result; + if(result->typespec) result->typespec->parent = result; + return result; +} + +function Ast_Const * +ast_const(Token *pos, Intern_String name, Ast_Expr *value){ + assert(is_flag_set(value->flags, AST_AGGREGATE) || is_flag_set(value->flags, AST_EXPR) ); + AST_NEW(Const, CONST, pos, AST_BINDING); + result->value = value; + result->name = name; + result->value->parent = result; + return result; +} + +function Ast_Package * +ast_package(Token *pos, String name, Array decls){ + AST_NEW(Package, PACKAGE, pos, 0); + result->decls = decls.tight_copy(pctx->perm); + result->ordered = array_make(pctx->perm, decls.len); + result->name = intern_string(&pctx->interns, name); + For(result->decls) it->parent = result; + return result; +} + +//----------------------------------------------------------------------------- +// Value +//----------------------------------------------------------------------------- +function Value +value_bool(B32 v){ + Value value; + value.bool_val = v; + value.type = untyped_bool; + return value; +} + +function Value +value_int(BigInt b){ + Value value; + value.big_int_val = b; + value.type = untyped_int; + return value; +} + +function Value +value_int(S64 s64){ + Value value; + value.type = untyped_int; + bigint_init_signed(&value.big_int_val, s64); + return value; +} + +function Value +value_float(F64 b){ + Value value; + value.f64_val = b; + value.type = untyped_float; + return value; +} + +function Value +value_float(BigInt a){ + Value value; + value.f64_val = bigint_as_float(&a); + value.type = untyped_float; + return value; +} + + +//----------------------------------------------------------------------------- +// Utillities +//----------------------------------------------------------------------------- +function Ast_Struct * +const_try_getting_struct(Ast *ast){ + assert(ast->kind == AST_CONST); + Ast_Const *constant = (Ast_Const *)ast; + if(constant->value->kind == AST_STRUCT){ + return (Ast_Struct *)constant->value; + } + return 0; +} + +function Ast_Struct * +const_get_struct(Ast *ast){ + auto result = const_try_getting_struct(ast); + assert(result); + return result; +} + +function Ast_Lambda * +const_try_getting_lambda(Ast *ast){ + assert(ast->kind == AST_CONST); + Ast_Const *constant = (Ast_Const *)ast; + if(constant->value->kind == AST_LAMBDA){ + return (Ast_Lambda *)constant->value; + } + return 0; +} + +function Ast_Lambda * +const_get_lambda(Ast *ast){ + auto result = const_try_getting_lambda(ast); + assert(result); + return result; +} + +function Intern_String +ast_get_name(Ast *ast){ + assert(is_flag_set(ast->flags, AST_BINDING)); + auto constant = (Ast_Named *)ast; + return constant->name; +} + +function B32 +ast_is_struct(Ast *ast){ + if(ast->kind == AST_CONST){ + auto a = (Ast_Const *)ast; + B32 result = a->agg->kind == AST_STRUCT; + return result; + } + return false; +} + +function B32 +is_ident(Ast *ast){ + B32 result = ast->kind == AST_IDENT; + return result; +} + +function B32 +is_binary(Ast *ast){ + B32 result = ast->kind == AST_BINARY; + return result; +} + +function B32 +is_atom(Ast *ast){ + B32 result = is_flag_set(ast->flags, AST_ATOM); + return result; +} + +function Ast * +query_struct(Ast_Struct *agg, Intern_String string){ + For(agg->members){ + if(it->name == string){ + return it; + } + } + For(agg->const_members){ + if(it->name == string){ + return it; + } + } + return 0; +} + +function Ast_Enum_Member * +query_enum(Ast_Enum *enu, Intern_String string){ + For(enu->members){ + if(it->name == string){ + return it; + } + } + return 0; +} \ No newline at end of file diff --git a/lexing.cpp b/lexing.cpp new file mode 100644 index 0000000..513cf5b --- /dev/null +++ b/lexing.cpp @@ -0,0 +1,637 @@ +force_inline B32 token_is_assign(Token_Kind token){return token >= TK_FirstAssign && token <= TK_LastAssign;} +force_inline B32 token_is_assign(Token *token){return token_is_assign(token->kind);} +force_inline B32 token_is_compare(Token_Kind token){return token >= TK_FirstCompare && token <= TK_LastCompare;} +force_inline B32 token_is_compare(Token *token){return token_is_compare(token->kind);} +global Token token_null = {SAME_SCOPE}; + +function U8 +lexc(Lex_Stream *s){ + return s->stream.str[s->iter]; +} + +function U8 +lexci(Lex_Stream *s, S32 i){ + return s->stream.str[s->iter+i]; +} + +function U8 * +lexcp(Lex_Stream *s){ + return s->stream.str + s->iter; +} + +function B32 +lex_is_whitespace(U8 c){ + B32 result = c == ' ' || c == '\r'; + return result; +} + +function B32 +lex_is_alphabetic(U8 c){ + B32 result = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); + return result; +} + +function B32 +lex_is_numeric(U8 c){ + B32 result = c >= '0' && c <= '9'; + return result; +} + +function B32 +lex_is_alphanumeric(U8 c){ + B32 result = lex_is_numeric(c) || lex_is_alphabetic(c); + return result; +} + +function void +lex_set_len(Lex_Stream *s, Token *token){ + assert(lexcp(s) >= token->str); + token->len = lexcp(s) - token->str; +} + +function void +lex_set_keywords(Lexer *lexer, Array keywords){ + Intern_String keyword = {}; + For(keywords){ + keyword = intern_string(&lexer->interns, it); + if(&it == keywords.begin()) + lexer->interns.first_keyword = keyword.str; + } + lexer->interns.last_keyword = keyword.str; +} + +function B32 +lex_is_keyword(Intern_Table *lexer, Intern_String keyword){ + B32 result = keyword.str >= lexer->first_keyword && keyword.str <= lexer->last_keyword; + return result; +} + +function void +token_error(Token *t, String error_val){ + t->kind = TK_Error; + t->error_val = error_val; +} + +function void +lex_parse_u64(Lexer *lexer, Token *t){ + Scratch scratch; + Set_BigInt_Allocator(scratch); + + t->kind = TK_Integer; + BigInt m = bigint_u64(1); // @leak, it accumulates and potentially needs allocation + BigInt val10 = bigint_u64(10); + BigInt result = bigint_u64(0); + + for(S64 i = t->len - 1; i >= 0; --i){ + BigInt val = bigint_u64(t->str[i] - '0'); // I dont think this is a leak, too small + BigInt new_val = bigint_mul(&val, &m); // @leak + result = bigint_add(&result, &new_val); // @leak + m = bigint_mul(&m, &val10); // @leak + } + + t->int_val = bigint_copy(lexer->arena, &result); +} + +function void +lex_parse_f64(Token *t){ + t->kind = TK_Float; + char buffer[128]; + S64 len = clamp_top((int)t->len, 126); + memory_copy(buffer, t->str, len); + buffer[len] = 0; + t->f64_val = strtod(buffer, 0); +} + +function void +lex_advance(Lex_Stream *s){ + if(s->iter >= s->stream.len){ + return; + } + else if(lexc(s) == '\n'){ + s->iter++; + s->line++; + s->line_begin = lexcp(s); + } + else{ + s->iter++; + } +} + +function void +lex_parse_string(Lex_Stream *s, Token *t, U8 c){ + for(;;){ + if(lexc(s) == '\\') lex_advance(s); + else if(lexc(s) == c) break; + else if(lexc(s) == 0){ + token_error(t, "Unterminated string, reached end of file"_s); + break; + } + lex_advance(s); + } + if(t->kind != TK_Error){ + lex_advance(s); + lex_set_len(s,t); + } +} + +function void +lex_parse_ident(Intern_Table *table, Lex_Stream *s, Token *t){ + while(lex_is_alphanumeric(lexc(s)) || lexc(s) == '_') + lex_advance(s); + lex_set_len(s,t); + t->intern_val = intern_string(table, t->string); +} + +#define CASE2(op, OpName, Assign) \ + case op: \ + if (lexc(s) == '=') { \ + lex_advance(s); \ + t.kind = Assign; \ + } else { \ + t.kind = OpName; \ + } \ + break +#define CASE3(op, OpName, Assign, Incr) \ + case op: \ + if (lexc(s) == '=') { \ + lex_advance(s); \ + t.kind = Assign; \ + } else if (lexc(s) == op) { \ + lex_advance(s); \ + t.kind = Incr; \ + } else { \ + t.kind = OpName; \ + } \ + break + +function Token +token_make(Lexer *lexer, U8 *str, String file, int line, U8 *line_begin){ + Token t = {}; + t.str = str; + t.file = file; + t.line = line; + t.line_begin = line_begin; + t.di = lexer->token_debug_ids++; + return t; +} + +function Token +token_make(Lexer *lexer){ + return token_make(lexer, lexcp(&lexer->stream), lexer->stream.file, lexer->stream.line, lexer->stream.line_begin); +} + +function Token * +lex_last_indent_token(Lex_Stream *s){ + if(s->indent_stack.len > 0){ + return *s->indent_stack.last(); + } + return &token_null; +} + +function B32 +lex_is_scope(Token *t){ + B32 result = t->kind == OPEN_SCOPE || t->kind == CLOSE_SCOPE || t->kind == SAME_SCOPE; + return result; +} + +function void +lex_unwind_indent_stack(Token *t, Lex_Stream *s, Array *array){ + for(S64 i = s->indent_stack.len-1; i >= 0; i-=1){ + auto it = s->indent_stack.data[i]; + assert(lex_is_scope(it)); + if(it->indent == t->indent){ + t->kind = SAME_SCOPE; + array->add(*t); + break; + } + else if(it->indent < t->indent){ + token_error(t, "Bad indentation"_s); + array->add(*t); + break; + } + else{ + s->indent_stack.pop(); + t->kind = CLOSE_SCOPE; + array->add(*t); + } + } +} + +function void +lex__stream(Lexer *lexer){ + Intern_Table *table = &lexer->interns; + Array *array = &lexer->tokens; + Lex_Stream *s = &lexer->stream; + + B32 beginning = true; + for(;;){ + if(lexc(s) == 0 || s->iter >= s->stream.len){ + end_of_stream: + Token t = token_make(lexer); + lex_unwind_indent_stack(&t, s, array); + break; + } + + // @note: the lexer is going to be a 2 stage process + // first we tokenize the indentation and then proceed to tokenize + // the good stuff + + // for blocks of stmts we parse till we cant find another new line + // of same scope. + // parse_decl doesn't require preceding new line + // + // in that way new lines act as commas in function params + // seeing a comma means that there is a next thing to parse + // and it's easy to parse stuff using a do while loop + + // @note: first handle indentation + // mostly we want to merge multiple new lines + // but for down scopes we want to emit 2 new lines + // that will ease out parsing, one token to break out + // from a block parsing, second to allow continuation of surrounding scope + Token t = token_make(lexer); + B32 should_emit = beginning; + for(;;){ + switch(lexc(s)){ + case 0 : goto end_of_stream; break; + case '\t': case ' ': lex_advance(s); t.indent++; break; + case '\r': lex_advance(s); break; + case '/': { + if(lexci(s,1) == '/'){ + lex_advance(s); lex_advance(s); + t.kind = TK_Comment; + for(;;){ + if(lexc(s) == '\n' || lexc(s) == 0) break; + lex_advance(s); + } + } + else if(lexci(s,1) == '*'){ + lex_advance(s); lex_advance(s); + t.kind = TK_Comment; + for(;;){ + if(lexc(s) == '*' && lexci(s,1) == '/'){ + lex_advance(s); lex_advance(s); + break; + } + else if(lexc(s) == 0){ + token_error(&t, "Unterminated block comment"_s); + break; + } + lex_advance(s); + } + } + else goto indent_loop_break; + } break; + + // @todo: add [;;] operator which adds new scope + // @todo: also need some way to detect indentation so that + // first of all we can check for consistency and second of + // all because we would know by how much to indent + // @todo: after detecting indentation 2 spaces would become 1 indent value + case ';' : { + Token semi = token_make(lexer); + Token *last = lex_last_indent_token(s); + semi.kind = SAME_SCOPE; + semi.indent = last->indent; + lex_advance(s); + array->add(semi); + } break; + + case '\n':{ + lex_advance(s); + should_emit = true; + t = token_make(lexer); + } break; + + default:{ + if(s->inside_brace_paren) should_emit = false; + if(should_emit){ + Token *last = lex_last_indent_token(s); + if(t.indent > last->indent){ + t.kind = OPEN_SCOPE; + array->add(t); + s->indent_stack.add(array->last()); + } + + else if(t.indent < last->indent){ + lex_unwind_indent_stack(&t, s, array); + } + else { + t.kind = SAME_SCOPE; + array->add(t); + } + } + + goto indent_loop_break; + } + } + } indent_loop_break: + beginning = false; + + // @note: handle the indented token + t = token_make(lexer); + lex_advance(s); + switch(*t.str){ + case 0 : goto end_of_stream; break; + case '@': t.kind = TK_At; break; + case '(': s->inside_brace_paren++; t.kind = TK_OpenParen; break; + case ')': s->inside_brace_paren--; t.kind = TK_CloseParen; break; + case '{': s->inside_brace_paren++; t.kind = TK_OpenBrace; break; + case '}': s->inside_brace_paren--; t.kind = TK_CloseBrace; break; + case '[': s->inside_brace_paren++; t.kind = TK_OpenBracket; break; + case ']': s->inside_brace_paren--; t.kind = TK_CloseBracket; break; + case ',': t.kind = TK_Comma; break; + case '~': t.kind = TK_Neg; break; + case '?': t.kind = TK_Question; break; + case '^': t.kind = TK_BitXor; break; + CASE2('!', TK_Not, TK_NotEquals); + CASE2('=', TK_Assign, TK_Equals); + CASE2('*', TK_Mul, TK_MulAssign); + CASE2('%', TK_Mod, TK_ModAssign); + CASE3('+', TK_Add, TK_AddAssign, TK_Increment); + CASE3('&', TK_BitAnd, TK_AndAssign, TK_And); + CASE3('|', TK_BitOr, TK_OrAssign, TK_Or); + + case '#': { + lex_parse_ident(table, s, &t); + if(t.intern_val.str == intern_foreign.str){ + t.kind = TK_FOREIGN; + } + else token_error(&t, "Unrecognized #note"_s); + }break; + + case '.': { + if(lexc(s) == '.' && lexci(s,1) == '.') { + lex_advance(s); lex_advance(s); + t.kind = TK_ThreeDots; + } + else { + t.kind = TK_Dot; + } + } break; + + case '\'':{ + assert(s->stream.len >= s->iter); + UTF32_Result decode = utf8_to_utf32(lexcp(s), s->stream.len - s->iter); + if(!decode.error){ + for(S32 i = 0; i < decode.advance; i++) lex_advance(s); + t.unicode = decode.out_str; + t.kind = TK_UnicodeLit; + } + else{ + token_error(&t, "Invalid UTF8 sequence in unicode literal"_s); + } + } break; + + case '<': { + if (lexc(s) == '<') { + lex_advance(s); + if (lexc(s) == '=') { + lex_advance(s); + t.kind = TK_LeftShiftAssign; + } + else { + t.kind = TK_LeftShift; + } + } + else if (lexc(s) == '=') { + lex_advance(s); + t.kind = TK_LesserThenOrEqual; + } + else { + t.kind = TK_LesserThen; + } + } break; + + case '>': { + if (lexc(s) == '>') { + lex_advance(s); + if (lexc(s) == '=') { + lex_advance(s); + t.kind = TK_RightShiftAssign; + } + else { + t.kind = TK_RightShift; + } + } + else if (lexc(s) == '=') { + lex_advance(s); + t.kind = TK_GreaterThenOrEqual; + } + else { + t.kind = TK_GreaterThen; + } + } break; + + case ':': { + if (lexc(s) == ':') { + lex_advance(s); + t.kind = TK_DoubleColon; + } + else if(lexc(s) == '='){ + lex_advance(s); + t.kind = TK_ColonAssign; + } + else { + t.kind = TK_Colon; + } + } break; + + case '-':{ + if (lexc(s) == '=') { + lex_advance(s); + t.kind = TK_SubAssign; + } + else if (lexc(s) == '-') { + lex_advance(s); + t.kind = TK_Decrement; + } + else if (lexc(s) == '>') { + lex_advance(s); + t.kind = TK_Arrow; + } + else { + t.kind = TK_Sub; + } + } break; + + case '"': { + t.kind = TK_StringLit; + lex_parse_string(s,&t,'"'); + if(t.kind != TK_Error){ + t.str += 1; + t.len -= 2; + } + t.intern_val = intern_string(table, t.string); + } break; + + case '/': { + if(lexc(s) == '='){ + t.kind = TK_DivAssign; + lex_advance(s); + } + else { + t.kind = TK_Div; + } + } break; + + case '0':case '1':case '2':case '3':case '4': + case '5':case '6':case '7':case '8':case '9':{ + B32 found_dot = false; + for(;;){ + if(lex_is_numeric(lexc(s))) + ; + else if(lexc(s) == '.'){ + if(found_dot){ + token_error(&t, "Multiple '.' in float literal"_s); + goto end_of_switch; + } + found_dot = true; + } + else break; + + lex_advance(s); + } + lex_set_len(s, &t); + if(found_dot) lex_parse_f64(&t); + else lex_parse_u64(lexer, &t); + + } break; + + case 'A':case 'a':case 'M':case 'm':case 'B': + case 'b':case 'N':case 'n':case 'C':case 'c':case 'O': + case 'o':case 'D':case 'd':case 'P':case 'p':case 'E': + case 'e':case 'Q':case 'q':case 'F':case 'f':case 'R': + case 'r':case 'G':case 'g':case 'S':case 's':case 'H': + case 'h':case 'T':case 't':case 'I':case 'i':case 'U': + case 'u':case 'J':case 'j':case 'V':case 'v':case 'K': + case 'k':case 'W':case 'w':case 'L':case 'X':case 'l': + case 'x':case 'Z':case 'z':case 'Y':case 'y':case '_': { + t.kind = TK_Identifier; + lex_parse_ident(table, s, &t); + if(lex_is_keyword(table, t.intern_val)){ + t.kind = TK_Keyword; + } + } break; + + default: { + token_error(&t, "Unknown token"_s); + } + }end_of_switch: + + if(t.len==0) + lex_set_len(s,&t); + + array->add(t); + } +#undef CASE2 +#undef CASE3 +} + +function Lexer +lex_make(Allocator *token_string_arena, Allocator *map_allocator){ + Lexer result = {}; + lex_init(token_string_arena, map_allocator, &result); + return result; +} + +function void +lex_restream(Lexer *lexer, String istream, String file){ + lexer->stream = {}; + lexer->stream.stream = istream; + lexer->stream.line_begin = istream.str; + lexer->stream.file = file; + + + lexer->tokens.clear(); + lexer->token_iter = 0; + Scratch scratch; + lexer->stream.indent_stack.allocator = scratch; + lexer->stream.indent_stack.add(&token_null); + lex__stream(lexer); +} + +function Lexer +lex_stream(Allocator *token_string_arena, Allocator *map_allocator, String istream, String file){ + Lexer result = lex_make(token_string_arena, map_allocator); + lex_restream(&result, istream, file); + return result; +} + +//----------------------------------------------------------------------------- +// Token metadata +//----------------------------------------------------------------------------- +function const char * +name(Token_Kind kind){ + switch(kind){ + case TK_End: return "End of stream"; + case TK_Mul: return "*"; + case TK_Div: return "/"; + case TK_Add: return "+"; + case TK_Sub: return "-"; + case TK_Mod: return "%"; + case TK_BitAnd: return "&"; + case TK_BitOr: return "|"; + case TK_BitXor: return "^"; + case TK_Neg: return "~"; + case TK_Not: return "!"; + case TK_OpenParen: return "("; + case TK_CloseParen: return ")"; + case TK_OpenBrace: return "{"; + case TK_CloseBrace: return "}"; + case TK_OpenBracket: return "["; + case TK_CloseBracket: return "]"; + case TK_ColonAssign: return ":="; + case TK_Comma: return ","; + case TK_Pound: return "#"; + case TK_Question: return "?"; + case TK_ThreeDots: return "..."; + case TK_Semicolon: return ";"; + case TK_Dot: return "."; + case TK_LesserThen: return "<"; + case TK_GreaterThen: return ">"; + case TK_Colon: return ":"; + case TK_Assign: return "="; + case TK_DivAssign: return "/="; + case TK_MulAssign: return "*="; + case TK_ModAssign: return "%="; + case TK_SubAssign: return "-="; + case TK_AddAssign: return "+="; + case TK_AndAssign: return "&="; + case TK_OrAssign: return "|="; + case TK_XorAssign: return "^="; + case TK_LeftShiftAssign: return "<<="; + case TK_RightShiftAssign: return ">>="; + case TK_DoubleColon: return "::"; + case TK_At: return "@"; + case TK_Decrement: return "--"; + case TK_Increment: return "++"; + case TK_PostDecrement: return "--"; + case TK_PostIncrement: return "++"; + case TK_LesserThenOrEqual: return "<="; + case TK_GreaterThenOrEqual: return ">="; + case TK_Equals: return "=="; + case TK_And: return "&&"; + case TK_Or: return "||"; + case TK_NotEquals: return "!="; + case TK_LeftShift: return "<<"; + case TK_RightShift: return ">>"; + case TK_Arrow: return "->"; + case TK_NewLine: return "New_Line"; + case TK_ExprSizeof: return "sizeof"; + case TK_DocComment: return "Doc_Comment"; + case TK_Comment: return "Comment"; + case TK_Identifier: return "Identifier"; + case TK_StringLit: return "String_Lit"; + case TK_UnicodeLit: return "Unicode_Lit"; + case TK_Error: return "Error"; + case TK_Float: return "Float"; + case TK_Integer: return "int"; + case TK_Keyword: return "Keyword"; + case TK_FOREIGN: return "#foreign"; + case CLOSE_SCOPE: return "Close_Scope"; + case OPEN_SCOPE: return "Open_Scope"; + case SAME_SCOPE: return "Same_Scope"; + default: invalid_codepath; return ""; + } +} diff --git a/parsing.cpp b/parsing.cpp new file mode 100644 index 0000000..1d8775f --- /dev/null +++ b/parsing.cpp @@ -0,0 +1,607 @@ + +function void +parsing_error(Token *token, const char *str, ...){ + Scratch scratch; + STRING_FMT(scratch, str, string); + + // @Note(Krzosa): Print nice error message + printf("\nError :: %s", string.str); + if(token){ + if(token->kind == TK_Error){ + printf("Token Error: %.*s", (int)token->error_val.len, token->error_val.str); + } + printf(" :: %s:%d\n", token->file.str, (S32)token->line + 1); + + // @Note(Krzosa): Print error line + { + int i = 0; + while(token->line_begin[i]!='\n' && token->line_begin[i]!=0) i++; + printf("%.*s\n", i, token->line_begin); + + // @Note(Krzosa): Print error marker + int token_i = token->str - token->line_begin; + for(int i = 0; i < token_i-2; i++) printf(" "); + printf("^^^^^^\n"); + } + } + + __debugbreak(); +} + +function Token * +token_get(S64 i = 0){ + i += pctx->token_iter; + if(i >= pctx->tokens.len){ + return &pctx->empty_token; + } + Token *result = &pctx->tokens[i]; + return result; +} + +function Token * +token_is_scope(){ + Token *token = token_get(); + if(lex_is_scope(token)) return token; + return 0; +} + +function Token * +token_next(){ + Token *token = token_get(); + if(lex_is_scope(token)) pctx->indent = token->indent; + pctx->token_iter++; + return token; +} + +function Token * +token_is(Token_Kind kind, S64 lookahead = 0){ + Token *token = token_get(lookahead); + if(token->kind == kind){ + return token; + } + return 0; +} + +function Token * +token_is_keyword(Intern_String keyword, S64 lookahead = 0){ + Token *token = token_get(lookahead); + if(token->kind == TK_Keyword){ + if(keyword.str == token->intern_val.str){ + return token; + } + } + return 0; +} + +function Token * +token_match(Token_Kind kind){ + Token *token = token_get(); + if(token->kind == kind){ + return token_next(); + } + return 0; +} + +function Token * +token_match(Token_Kind a, Token_Kind b){ + Token *ta = token_get(); + Token *tb = token_get(1); + if(ta->kind == a && tb->kind == b){ + token_next(); token_next(); + return ta; + } + return 0; +} + +function Token * +token_match_keyword(Intern_String string){ + Token *token = token_get(); + if(token->kind == TK_Keyword){ + if(string.str == token->intern_val.str){ + token = token_next(); + return token; + } + } + return 0; +} + +function Token * +token_expect(Token_Kind kind){ + Token *token = token_get(); + if(token->kind == kind) return token_next(); + parsing_error(token, "Expected token of kind: [%s], got instead token of kind: [%s]", name(kind), name(token->kind)); + return 0; +} + +function Ast_Expr *parse_expr(S64 minbp = 0); + +function Ast_Expr * +parse_init_stmt(Ast_Expr *expr){ + Token *token = token_get(); + if(token->kind == TK_ColonAssign && expr->kind != AST_IDENT) + parsing_error(expr->pos, "Binding with [:=] to something that is not an identifier"); + + if(token_is_assign(token)){ + token_next(); + Ast_Expr *value = parse_expr(); + Ast_Expr *result = ast_expr_binary((Ast_Atom *)expr, value, token); + result->flags = set_flag(result->flags, AST_STMT); + return result; + } + + return expr; +} + +function Ast_Call * +parse_expr_call(Ast_Expr *left){ + Scratch scratch; + Token *pos = token_get(); + Array exprs = {scratch}; + + while(!token_is(TK_CloseParen)){ + Token *token = token_get(); + Ast_Expr *index = 0; + Ast_Atom *name = 0; + if(token_match(TK_OpenBracket)){ + index = parse_expr(); + token_expect(TK_CloseBracket); + token_expect(TK_Assign); + } + + Ast_Expr *item = parse_expr(); + if(!index && token_match(TK_Assign)){ + assert(is_flag_set(item->flags, AST_ATOM)); + name = (Ast_Atom *)item; + item = parse_expr(); + } + + Ast_Call_Item *item_comp = ast_call_item(token, index, name, item); + exprs.add(item_comp); + + if(!token_match(TK_Comma)){ + break; + } + } + token_expect(TK_CloseParen); + + Ast_Call *result = ast_call(pos, left, exprs); + return result; +} + +function Ast_Expr * +parse_optional_type(){ + Ast_Expr *result = 0; + if(token_match(TK_Colon)) result = parse_expr(); + return result; +} + +function Ast_Named *parse_named(B32); +function Ast_Block * +parse_block(){ + Ast_Block *block = 0; + + if(token_expect(OPEN_SCOPE)){ // @todo: Fix error message here, it doesn't show proper token context + Token *token_block = token_get(); + + Scratch scratch; + Array stmts = {scratch}; + do{ + Token *token = token_get(); + if(token_match_keyword(keyword_return)){ + Ast_Expr *expr = 0; + if(!token_is_scope()) expr = parse_expr(); + stmts.add(ast_return(token, expr)); + } + + else if(token_match_keyword(keyword_pass)){ + stmts.add(ast_pass(token)); + } + + else if(token_match_keyword(keyword_for)){ + Ast_Expr *init = 0; + Ast_Expr *cond = 0; + Ast_Expr *iter = 0; + + if(!token_is(OPEN_SCOPE)){ + if(!token_is(TK_Comma)){ + Ast_Expr *expr_first = parse_expr(); + init = parse_init_stmt(expr_first); + } + + + if(token_match(TK_Comma)){ + if(!token_is(TK_Comma)) cond = parse_expr(); + if(token_match(TK_Comma)){ + iter = parse_expr(); + iter = parse_init_stmt(iter); + } + } + } + + Ast_Block *for_block = parse_block(); + stmts.add(ast_for(token, init, cond, iter, for_block)); + } + + else if(token_match_keyword(keyword_if)){ + Array if_nodes = {scratch}; + Ast_Expr *expr = parse_expr(); + Ast_Expr *init_val = parse_init_stmt(expr); + if(init_val != expr){ + if(token_match(TK_Comma)) expr = parse_expr(); + else expr = 0; + } + if(init_val == expr) init_val = 0; + + Ast_Block *if_block = parse_block(); + Ast_If_Node *if_node = ast_if_node(token, init_val, expr, if_block); + if_nodes.add(if_node); + + while(token_is(SAME_SCOPE) && token_is_keyword(keyword_else, 1)){ + token_next(); + token = token_next(); + if(token_match_keyword(keyword_if)){ + Ast_Expr *expr = parse_expr(); + Ast_Block *else_if_block = parse_block(); + Ast_If_Node *if_node = ast_if_node(token, 0, expr, else_if_block); + if_nodes.add(if_node); + } + else{ + Ast_Block *else_block = parse_block(); + Ast_If_Node *if_node = ast_if_node(token, 0, 0, else_block); + if_nodes.add(if_node); + break; + } + } + Ast_If *result_if = ast_if(token, if_nodes); + stmts.add(result_if); + + } + else{ + Ast *result = parse_named(false); + if(!result){ + result = parse_expr(); + result = parse_init_stmt((Ast_Expr *)result); + } + + if(result) { + result->flags = set_flag(result->flags, AST_STMT); + stmts.add(result); + } + else { + parsing_error(token, "Unexpected token [%s] while parsing statement", name(token->kind)); + } + + } + } while(token_match(SAME_SCOPE)); + token_expect(CLOSE_SCOPE); + block = ast_block(token_block, stmts); + } + return block; +} + +function Ast_Lambda * +parse_lambda(Token *token){ + Scratch scratch; + + B32 has_var_args = false; + Array params = {scratch}; + if(!token_is(TK_CloseParen)){ + for(;;){ + Token *name = token_get(); + if(token_match(TK_Identifier)){ + token_expect(TK_Colon); + Ast_Expr *typespec = parse_expr(); + + Ast_Expr *default_value = 0; + if(token_match(TK_Assign)) { + default_value = parse_expr(); + } + + Ast_Lambda_Arg *param = ast_expr_lambda_arg(name, name->intern_val, typespec, default_value); + params.add(param); + } + else if(token_match(TK_ThreeDots)){ + has_var_args = true; + break; + } + else parsing_error(name, "Expected [Identifier] or [...] when parsing lambda arguments"); + + if(!token_match(TK_Comma)) + break; + } + } + token_expect(TK_CloseParen); + + Ast_Expr *ret = parse_optional_type(); + Ast_Block *block = token_is(OPEN_SCOPE) ? parse_block() : 0; + Ast_Lambda *result = ast_lambda(token, params, has_var_args, ret, block); + return result; +} + +//----------------------------------------------------------------------------- +// Pratt expression parser +// Based on this really good article: https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html +//----------------------------------------------------------------------------- +struct Binding_Power{S64 left;S64 right;}; +enum Binding{Binding_Prefix,Binding_Infix,Binding_Postfix}; + +function Binding_Power +binding_power(Binding binding, Token_Kind kind){ + if(binding == Binding_Prefix) goto Prefix; + if(binding == Binding_Infix) goto Infix; + if(binding == Binding_Postfix) goto Postfix; + else invalid_codepath; + + Prefix: switch(kind){ + case TK_OpenBracket: + return {-2, 22}; + case TK_Increment: + case TK_Decrement: + case TK_Pointer: + case TK_Dereference: + case TK_Keyword: + case TK_OpenParen: + case TK_Sub: + case TK_Add: + case TK_Neg: + case TK_Not: + return{-2, 20}; + default: return {-1, -1}; + } + Infix: switch(kind){ + case TK_Or: + return {9,10}; + case TK_And: + return {11,12}; + case TK_Equals: + case TK_NotEquals: + case TK_GreaterThen: + case TK_GreaterThenOrEqual: + case TK_LesserThen: + case TK_LesserThenOrEqual: + return {13,14}; + case TK_Sub: + case TK_Add: + case TK_BitOr: + case TK_BitXor: + return {15,16}; + case TK_RightShift: + case TK_LeftShift: + case TK_BitAnd: + case TK_Mul: + case TK_Div: + case TK_Mod: + return {17,18}; + case TK_Dot: + return {24,23}; + default: return {}; + } + Postfix: switch(kind){ + case TK_Increment: + case TK_Decrement: + case TK_OpenBracket: + case TK_OpenParen: + return {21, -2}; + default: return{-1,-1}; + } +} + +function Ast_Expr * +parse_expr(S64 min_bp){ + Ast_Expr *left = 0; + Token *token = token_next(); + Binding_Power prefix_bp = binding_power(Binding_Prefix, token->kind); + + // @note: parse prefix expression + switch(token->kind){ + case TK_StringLit : left = ast_str(token, token->intern_val); break; + case TK_Identifier : left = ast_ident(token, token->intern_val); break; + case TK_Integer : left = ast_int(token, token->int_val); break; + case TK_UnicodeLit : left = ast_int(token, token->unicode); break; + case TK_Float : left = ast_float(token, token->f64_val); break; + case TK_Pointer : left = ast_expr_unary(token, TK_Pointer, parse_expr(prefix_bp.right)); break; + case TK_Dereference: left = ast_expr_unary(token, TK_Dereference, parse_expr(prefix_bp.right)); break; + case TK_Sub : left = ast_expr_unary(token, TK_Sub, parse_expr(prefix_bp.right)); break; + case TK_Add : left = ast_expr_unary(token, TK_Add, parse_expr(prefix_bp.right)); break; + case TK_Not : left = ast_expr_unary(token, TK_Not, parse_expr(prefix_bp.right)); break; + case TK_Neg : left = ast_expr_unary(token, TK_Neg, parse_expr(prefix_bp.right)); break; + case TK_Increment : left = ast_expr_unary(token, TK_Increment, parse_expr(prefix_bp.right)); break; + case TK_Decrement : left = ast_expr_unary(token, TK_Decrement, parse_expr(prefix_bp.right)); break; + + case TK_OpenBracket: { + Ast_Expr *expr = 0; + if(!token_is(TK_CloseBracket)) + expr = parse_expr(0); + + Ast_Array *result = ast_array(token, expr); + token_expect(TK_CloseBracket); + result->base = parse_expr(prefix_bp.right); + left = result; + }break; + + case TK_Keyword: { + if(token->intern_val == keyword_true) left = ast_bool(token, 1); + else if(token->intern_val == keyword_false) left = ast_bool(token, 0); + else if(token->intern_val == keyword_cast){ + token_expect(TK_OpenParen); + Ast_Expr *expr = parse_expr(0); + token_expect(TK_Colon); + Ast_Expr *typespec = parse_expr(0); + token_expect(TK_CloseParen); + left = ast_expr_cast(token, expr, typespec); + } + else parsing_error(token, "Unexpected keyword: [%s], expected keyword [cast]", token->intern_val.str); + }break; + + case TK_OpenParen: { + if(token_is(TK_CloseParen) || (token_is(TK_Identifier) && token_is(TK_Colon, 1)) || token_is(TK_ThreeDots)) + left = parse_lambda(token); + else{ + left = parse_expr(0); + token_expect(TK_CloseParen); + } + }break; + default: parsing_error(token, "Unexpected token of kind: [%s] in expression", name(token->kind)); return 0; + } + + for(;;){ + token = token_get(); + + // lets say [+] is left:1, right:2 and we parse 2+3+4 + // We pass min_bp of 2 to the next recursion + // in recursion we check if left(1) > min_bp(2) + // it's not so we don't recurse - we break + // We do standard do the for loop instead + + Binding_Power postfix_bp = binding_power(Binding_Postfix, token->kind); + Binding_Power infix_bp = binding_power(Binding_Infix, token->kind); + + // @note: parse postfix expression + if(postfix_bp.left > min_bp){ + token_next(); + switch(token->kind){ + case TK_OpenBracket:{ + Ast_Expr *index = parse_expr(0); + token_expect(TK_CloseBracket); + left = ast_expr_index(token, left, index); + }break; + case TK_OpenParen:{ + left = parse_expr_call(left); + }break; + default:{ + assert(token->kind == TK_Increment || token->kind == TK_Decrement); + if(token->kind == TK_Increment) token->kind = TK_PostIncrement; + else if(token->kind == TK_Decrement) token->kind = TK_PostDecrement; + left = ast_expr_unary(token, token->kind, left); + } + } + } + + // @note: parse infix expression + else if(infix_bp.left > min_bp){ + token = token_next(); + Ast_Expr *right = parse_expr(infix_bp.right); + left = ast_expr_binary(left, right, token); + } + + else break; + + } + return left; +} + +function Ast_Expr * +parse_assign_expr(){ + Ast_Expr *result = 0; + if(token_match(TK_Assign)) result = parse_expr(); + return result; +} + +function Ast_Struct * +parse_struct(Token *pos){ + Scratch scratch; + Array members = {scratch}; + Array members_const = {scratch}; + + token_match(OPEN_SCOPE); + do{ + Token *token = token_get(); + + Ast_Named *named = parse_named(false); + if(!named) parsing_error(token, "Failed to parse struct member"); + named->flags = set_flag(named->flags, AST_AGGREGATE_CHILD); + + if(named->kind == AST_CONST){ + members_const.add((Ast_Const *)named); + } + else { + assert(named->kind == AST_VAR); + members.add((Ast_Var *)named); + } + + }while(token_match(SAME_SCOPE)); + token_expect(CLOSE_SCOPE); + + Ast_Struct *result = ast_struct(pos, members, members_const); + return result; +} + +function Ast_Enum * +parse_enum(Token *pos){ + Scratch scratch; + Array members = {scratch}; + Ast_Expr *typespec = parse_optional_type(); + + token_match(OPEN_SCOPE); + do{ + Token *name = token_expect(TK_Identifier); + Ast_Expr *value = parse_assign_expr(); + Ast_Enum_Member *member = ast_enum_member(name, name->intern_val, value); + members.add(member); + }while(token_match(SAME_SCOPE)); + token_expect(CLOSE_SCOPE); + + Ast_Enum *result = ast_enum(pos, typespec, members); + return result; +} + +/* +Needs peeking only because I didn't want to duplicate code +for parsing statements and it makes code nicer. +Statements can have named syntax i := +*/ +function Ast_Named * +parse_named(B32 is_global){ + Ast_Named *result = 0; + if(is_global) { + token_match(SAME_SCOPE); + if(pctx->indent != 0){ + parsing_error(token_get(), "Top level declarations shouldn't be indented"); + } + } + + Ast_Flag flags = 0; + if(token_match(TK_FOREIGN)){ + flags = set_flag(flags, AST_FOREIGN); + } + + Token *tname = token_get(); + if(token_match(TK_Identifier, TK_DoubleColon)){ + // @note parse struct binding + Token *struct_pos = token_get(); + if(token_match_keyword(keyword_struct)){ + Ast_Struct *struct_val = parse_struct(struct_pos); + result = ast_const(tname, tname->intern_val, (Ast_Expr *)struct_val); + } + + else if(token_match_keyword(keyword_enum)){ + Ast_Enum *enum_val = parse_enum(struct_pos); + result = ast_const(tname, tname->intern_val, (Ast_Expr *)enum_val); + } + + // @note parse constant expression + else{ + Ast_Expr *expr = parse_expr(); + result = ast_const(tname, tname->intern_val, expr); + } + } + else if(token_match(TK_Identifier, TK_Colon)){ + Ast_Expr *typespec = typespec = parse_expr(); + Ast_Expr *expr = parse_assign_expr(); + result = ast_var(tname, typespec, tname->intern_val, expr); + } + + else if(token_match(TK_Identifier, TK_ColonAssign)){ + Ast_Expr *expr = parse_expr(); + result = ast_var(tname, 0, tname->intern_val, expr); + } + else if(is_global && tname->kind != TK_End){ + parsing_error(tname, "Unexpected token: [%s] when parsing a declaration", name(tname->kind)); + } + + if(result){ + result->flags = set_flag(result->flags, flags); + } + + return result; +} diff --git a/typechecking.cpp b/typechecking.cpp new file mode 100644 index 0000000..e320e12 --- /dev/null +++ b/typechecking.cpp @@ -0,0 +1,1150 @@ +#define CASE(kind,type) case AST_##kind: { Ast_##type *node = (Ast_##type *)ast; +#define BREAK() } break + +function void +type_error(Token *token, Ast_Resolved_Type *expected, Ast_Resolved_Type *actual, const char *str, ...){ + Scratch scratch; + STRING_FMT(scratch, str, string); + + // @Note(Krzosa): Print nice error message + printf("\nType error :: %s :: ", string.str); + printf("Expected %s, got instead %s", docname(expected), docname(actual)); + if(token){ + printf(" :: %s:%d\n", token->file.str, (S32)token->line + 1); + + // @Note(Krzosa): Print error line + { + int i = 0; + while(token->line_begin[i]!='\n' && token->line_begin[i]!=0) i++; + printf("%.*s\n", i, token->line_begin); + + // @Note(Krzosa): Print error marker + int token_i = token->str - token->line_begin; + for(int i = 0; i < token_i-2; i++) printf(" "); + printf("^^^^^^\n"); + } + } + + __debugbreak(); +} + +//----------------------------------------------------------------------------- +// Evaluating constant expressions +//----------------------------------------------------------------------------- +function void +check_value_bounds(Token *pos, Value *a){ + if(!is_int(a->type)) return; + + Scratch scratch; + if(!bigint_fits_in_bits(&a->big_int_val, a->type->size*8, is_signed_int(a->type))){ + const char *string = bigint_to_error_string(scratch, &a->big_int_val, 10); + parsing_error(pos, "Value %s doesn't fit in type %s", string, docname(a->type)); + } +} + +function Value +convert_untyped_to_typed(Token *pos, Value a, Ast_Resolved_Type *new_type){ + assert(new_type); + if(a.type == 0) return a; + if(is_typed(a.type)) return a; + + if(is_int(a.type) && is_int(new_type)) + assert(a.type == untyped_int); + else if(is_int(a.type) && is_float(new_type)) + a.f64_val = bigint_as_float(&a.big_int_val); // @leak bigint + else if(is_int(a.type) && is_pointer(new_type)) + ; + else if(is_float(a.type) && is_float(new_type)) + ; // nothing to do + else if(is_bool(a.type) && is_bool(new_type)) + ; // nothing to do + else if(is_string(a.type) && is_string(new_type)) + ; // nothing to do + else parsing_error(pos, "Type mismatch when converting from %s to %s", docname(a.type), docname(new_type)); + + a.type = new_type; + check_value_bounds(pos, &a); + return a; +} + +function void +make_sure_types_are_compatible(Token *pos, Value *a, Value *b){ + if((is_pointer(a->type) && is_int(b->type)) || (is_pointer(b->type) && is_int(a->type))){ + return; + } + else if(is_pointer(a->type) && is_pointer(b->type)){ + goto fail; + } + else if(is_typed(a->type) && is_typed(b->type)){ + if(a->type != b->type){ + fail: parsing_error(pos, "Type mismatch in make_sure_types_are_compatible - left: %s right: %s", docname(a->type), docname(b->type)); + } + } + + if(is_untyped(a->type) && is_typed(b->type)){ + assert(is_typed(b->type)); + *a = convert_untyped_to_typed(pos, *a, b->type); + } + else if(is_typed(a->type) && is_untyped(b->type)){ + assert(is_typed(a->type)); + *b = convert_untyped_to_typed(pos, *b, a->type); + } + else if(is_int(a->type) && is_float(b->type)){ + *a = value_float(a->big_int_val); + } + else if(is_float(a->type) && is_int(b->type)){ + *b = value_float(b->big_int_val); + } +} + +function Value +compare_values(Token *pos, Token_Kind op, Value a, Value b, bool is_const){ + if(!(is_numeric(a.type) && is_numeric(b.type))) + parsing_error(pos, "Constant application of binary %s on values of type %s and %s is not allowed", name(op), docname(a.type), docname(b.type)); + + make_sure_types_are_compatible(pos, &a, &b); + + B32 result = 0; + if(is_const){ + switch(a.type->kind){ + CASE_INT:{ + CmpRes cmp = bigint_cmp(&a.big_int_val, &b.big_int_val); + switch(op){ + case TK_LesserThenOrEqual: result = (cmp == CMP_LT) || (cmp == CMP_EQ); break; + case TK_GreaterThenOrEqual: result = (cmp == CMP_GT) || (cmp == CMP_EQ); break; + case TK_GreaterThen: result = cmp == CMP_GT; break; + case TK_LesserThen: result = cmp == CMP_LT; break; + case TK_Equals: result = cmp == CMP_EQ; break; + case TK_NotEquals: result = cmp != CMP_EQ; break; + invalid_default_case; + } + }break; + CASE_BOOL:{ + switch(op){ + case TK_Equals: result = a.bool_val == b.bool_val; break; + case TK_NotEquals: result = a.bool_val != b.bool_val; break; + invalid_default_case; + } + }break; + CASE_FLOAT:{ + switch(op){ + case TK_LesserThenOrEqual: result = a.f64_val <= b.f64_val; break; + case TK_GreaterThenOrEqual: result = a.f64_val >= b.f64_val; break; + case TK_GreaterThen: result = a.f64_val > b.f64_val; break; + case TK_LesserThen: result = a.f64_val < b.f64_val; break; + case TK_Equals: result = a.f64_val == b.f64_val; break; + case TK_NotEquals: result = a.f64_val != b.f64_val; break; + invalid_default_case; + } + }break; + CASE_STRING:{ + invalid_codepath; + }break; + invalid_default_case; + } + } + + + return value_bool(result); +} + +function Value +eval_binary(Token *pos, Token_Kind op, Value a, Value b, bool is_const){ + if(token_is_compare(op)) + return compare_values(pos, op, a, b, is_const); + + if(!(is_numeric(a.type) && is_numeric(b.type))) + parsing_error(pos, "Constant application of binary %s on values of type %s and %s is not allowed", name(op), docname(a.type), docname(b.type)); + + make_sure_types_are_compatible(pos, &a, &b); + + Value result = {}; + result.type = a.type; + if(is_const){ + switch(a.type->kind){ + CASE_INT:{ + switch(op){ + case TK_BitXor: bigint_xor(&result.big_int_val, &a.big_int_val, &b.big_int_val); break; + case TK_BitAnd: bigint_and(&result.big_int_val, &a.big_int_val, &b.big_int_val); break; + case TK_BitOr: bigint_or(&result.big_int_val, &a.big_int_val, &b.big_int_val); break; + case TK_Add: bigint_add(&result.big_int_val, &a.big_int_val, &b.big_int_val); break; + case TK_Sub: bigint_sub(&result.big_int_val, &a.big_int_val, &b.big_int_val); break; + case TK_Mul: bigint_mul(&result.big_int_val, &a.big_int_val, &b.big_int_val); break; + case TK_Div: bigint_div_trunc(&result.big_int_val, &a.big_int_val, &b.big_int_val); break; + case TK_Mod: bigint_mod(&result.big_int_val, &a.big_int_val, &b.big_int_val); break; + case TK_LeftShift: bigint_shl(&result.big_int_val, &a.big_int_val, &b.big_int_val); break; + case TK_RightShift: bigint_shr(&result.big_int_val, &a.big_int_val, &b.big_int_val); break; + invalid_default_case; + } + }break; + CASE_BOOL:{ + switch(op){ + case TK_And: result.bool_val = a.bool_val && b.bool_val; break; + case TK_Or: result.bool_val = a.bool_val || b.bool_val; break; + case TK_BitAnd: result.bool_val = a.bool_val & b.bool_val; break; + case TK_BitOr: result.bool_val = a.bool_val | b.bool_val; break; + case TK_BitXor: result.bool_val = a.bool_val ^ b.bool_val; break; + invalid_default_case; + } + }break; + CASE_FLOAT:{ + switch(op){ + case TK_Add: result.f64_val = a.f64_val + b.f64_val; break; + case TK_Sub: result.f64_val = a.f64_val - b.f64_val; break; + case TK_Mul: result.f64_val = a.f64_val * b.f64_val; break; + case TK_Div: result.f64_val = a.f64_val / b.f64_val; break; + invalid_default_case; + } + }break; + CASE_STRING:{ + invalid_codepath; + }break; + invalid_default_case; + } + } + + return result; +} + +function S64 +digit_count(const Value *a){ + S64 digit_count = a->big_int_val.digit_count*64; + if(is_typed(a->type)){ + digit_count = a->type->size*8; + } + return digit_count; +} + +function void +eval_unary(Token *pos, Token_Kind op, Operand *operand){ + Value *a = &operand->value; + Ast_Resolved_Type *type = a->type; + if(!is_numeric(type)) + parsing_error(pos, "Unary [%s] cant be applied to value of type %s", name(op), docname(type)); + + if(op == TK_Not) + a->type = untyped_bool; + + if(op == TK_Increment || op == TK_Decrement || op == TK_PostIncrement || op == TK_PostDecrement) + if(!operand->is_lvalue) + parsing_error(pos, "Unary [%s] requires an assignable value(lvalue)", name(op)); + + if(!operand->is_const) + return; + + BigInt result = {}; + switch(op){ + case TK_Add:{} break; + case TK_Sub:{ + switch(type->kind){ + CASE_INT:{ + bigint_negate(&result, &a->big_int_val); + a->big_int_val = result; + }break; + CASE_FLOAT:{ + a->f64_val = -a->f64_val; + }break; + default:goto failure; + } + } break; + case TK_Neg:{ + switch(type->kind){ + CASE_SINT: case TYPE_UNTYPED_INT: + bigint_not(&result, &a->big_int_val, digit_count(a), 1); break; + CASE_UINT: + bigint_not(&result, &a->big_int_val, digit_count(a), 0); break; + default:goto failure; + } + a->big_int_val = result; + } break; + case TK_Not:{ + switch(type->kind){ + CASE_INT: { + if(CMP_EQ == bigint_cmp_zero(&a->big_int_val)) + a->bool_val = 1; + a->bool_val = 0; + }break; + CASE_FLOAT: a->bool_val = !a->f64_val; break; + CASE_BOOL: a->bool_val = !a->bool_val; break; + default:goto failure; + } + } break; + default: failure: parsing_error(pos, "Constant application of unary %s on values of type %s is not allowed", name(op), docname(a->type)); + } +} + +function void +try_converting_untyped_to_typed(Operand *op){ + if(is_untyped(op->type)){ + switch(op->type->kind){ + case TYPE_UNTYPED_INT: op->type = type_s64; break; + case TYPE_UNTYPED_BOOL: op->type = type_bool; break; + case TYPE_UNTYPED_STRING: op->type = type_string; break; + case TYPE_UNTYPED_FLOAT: op->type = type_f64; break; + default: invalid_codepath; + } + } +} + +enum{ + TYPE_AND_EXPR_REQUIRED = 0, + TYPE_CAN_BE_NULL = 1, + EXPR_CAN_BE_NULL = 2 +}; + +function void +make_sure_value_is_compatible_with_type(Token *pos, Operand *expr, Ast_Resolved_Type *type, U64 debug_flag){ + if(type == expr->type){ + assert(type); + assert(expr->type); + return; + } + + if(!type){ + assert(is_flag_set(debug_flag, TYPE_CAN_BE_NULL)); + assert(expr->type); + try_converting_untyped_to_typed(expr); + } + else if(!expr->type){ + assert(is_flag_set(debug_flag, EXPR_CAN_BE_NULL)); + assert(type); + expr->type = type; + } + else if(is_untyped(expr->type)){ + expr->value = convert_untyped_to_typed(pos, expr->value, type); + } + else if(is_array(type) && type->arr.size == ARRAY_SIZE_INFERRED){ + if(type->arr.inferred_size_hash == expr->type->arr.inferred_size_hash) + expr->type = type; + } + + if(type && expr->type != type){ + parsing_error(pos, "Assigning but incompatible types, expression: %s expected var type: %s", docname(expr->type), docname(type)); + } + + type_complete(expr->type); + check_value_bounds(pos, &expr->value); + assert(expr->type); +} + +#define rewrite_into_const(ast,T,s) _rewrite_into_const(ast,sizeof(T),s) +function void +_rewrite_into_const(Ast *node, U64 ast_size, Value value){ + auto ast = (Ast_Atom *)node; + assert(ast_size >= sizeof(Ast_Atom)); + ast->flags = set_flag(ast->flags, AST_ATOM); + ast->kind = AST_VALUE; + ast->value = value; +} + +function void +_rewrite_into_const(Ast *node, U64 ast_size, Sym *sym){ + _rewrite_into_const(node, ast_size, sym->value); +} + +function Ast_Resolved_Type * +resolve_typespec(Ast_Expr *ast, B32 ast_can_be_null){ + if(ast_can_be_null && ast == 0) + return 0; + + Operand resolved = resolve_expr(ast); + if(resolved.type != type_type) + parsing_error(ast->pos, "Expected [Type] got instead %s", type_names[resolved.type->kind]); + return resolved.type_val; +} + +function Operand +require_const_int(Ast_Expr *expr, B32 ast_can_be_null){ + Operand op = resolve_expr(expr); + + if(expr == 0 && ast_can_be_null) + return op; + else if(expr == 0) + parsing_error(expr->pos, "This field cannot be null"); + + if(!op.is_const) + parsing_error(expr->pos, "Expected a const value"); + if(!is_int(op.type)) + parsing_error(expr->pos, "Expected a constant integer got instead %s", docname(op.type)); + + return op; +} + +function Operand +resolve_and_require_bool(const char *error, Ast_Expr *expr, B32 ast_can_be_null = AST_CANT_BE_NULL){ + if(!expr){ + if(ast_can_be_null) + return {}; + else parsing_error(0, "Compiler error: Null expression"); + } + + Operand op = resolve_expr(expr); + if(!is_bool(op.type)){ + type_error(expr->pos, type_bool, op.type, "%s", error); + } + + return op; +} + +// @note: Ret is return value of function passed down the stack +// to check if type matches +function void +resolve_stmt(Ast *ast, Ast_Resolved_Type *ret){ + if(!ast) return; + + switch(ast->kind){ + CASE(RETURN, Return){ // @todo: need to check if all paths return a value + Operand op = resolve_expr(node->expr); + if(!op.type && ret != type_void) parsing_error(node->pos, "Function expects a void return value but the returned value is %s", docname(op.type)); + op.value = convert_untyped_to_typed(node->pos, op.value, ret); + if(op.type && op.type != ret) parsing_error(node->pos, "Return statement has different type then returned value, expecting: %s got instead %s", docname(ret), docname(op.type)); + BREAK(); + } + + CASE(VAR, Var){ + Operand op = resolve_binding(node); + sym_var(node->name, op, node, INSERT_INTO_SCOPE); + BREAK(); + } + + CASE(CONST, Const){ + Operand op = resolve_binding(node); + sym_const(node->name, op, node, INSERT_INTO_SCOPE); + BREAK(); + } + + CASE(PASS, Pass){ + unused(node); + BREAK(); + } + + CASE(FOR, For){ + if(node->init && node->cond == 0 && node->iter == 0){ + if(!is_flag_set(node->init->flags, AST_STMT)){ + node->cond = node->init; + node->init = 0; + } + } + + S64 scope = scope_open(); + { + resolve_expr(node->init, ret); + resolve_and_require_bool("Conditional in a for loop condition", node->cond, AST_CAN_BE_NULL); + resolve_expr(node->iter, ret); + For(node->block->stmts) + resolve_stmt(it, ret); + } + scope_close(scope); + BREAK(); + } + + CASE(IF, If){ + For(node->ifs){ + resolve_stmt(it->init, ret); + S64 scope = scope_open(); + { + // @todo: maybe add else kind ?? and then make sure other then else are AST_CANT_BE_NULL + resolve_and_require_bool("Conditional in a if condition", it->expr, AST_CAN_BE_NULL); + For_It(it->block->stmts, jt) + resolve_stmt(jt, ret); + } + scope_close(scope); + } + BREAK(); + } + + default:{ + if(is_flag_set(ast->flags, AST_EXPR)){ + assert(is_flag_set(ast->flags, AST_STMT)); + resolve_expr((Ast_Expr *)ast); + } + else invalid_codepath; + } + } +} + +function Operand +resolve_lambda(Ast_Lambda *lambda, Sym *sym = 0){ + Scratch scratch; + Ast_Resolved_Type *lambda_type = 0; + Ast_Resolved_Type *ret_type = resolve_typespec(lambda->ret); + Array args = {scratch}; + For(lambda->args){ + Ast_Resolved_Type *type = + resolve_typespec(it->typespec, AST_CANT_BE_NULL); + Operand default_value = resolve_expr(it->default_value, type); + make_sure_value_is_compatible_with_type(it->pos, &default_value, type, EXPR_CAN_BE_NULL); + args.add(type); + } + + lambda_type = type_lambda(lambda, ret_type, args); + sym_type(lambda_type, lambda); + Operand result = operand_type(lambda_type); + + // @note: top level lambda needs to get marked as resolved + // so that the cyclic dependency wont trigger + if(sym){ + sym->type = lambda_type; + sym->state = SYM_RESOLVED; + } + + // @todo: We also need to make sure there is a return value when ret type is not void + // @note: then try resolving the block of lambda + if(lambda->block){ + S64 scope_index = scope_open(); + For(lambda->args){ + S64 i = lambda->args.get_index(&it); + Ast_Resolved_Type *type = args[i]; + sym_var(it->name, type, it, INSERT_INTO_SCOPE); + } + For(lambda->block->stmts){ + resolve_stmt(it, ret_type); + } + scope_close(scope_index); + + result = operand_lambda(lambda_type); + } + else if(is_flag_set(lambda->parent->flags, AST_FOREIGN)){ + result = operand_lambda(lambda_type); + } + + + return result; +} + +function Operand +field_access_builtin_string(Ast_Expr *right){ + if(right->kind == AST_BINARY) invalid_codepath; // @todo entire field access needs a rework + assert(right->kind == AST_IDENT); + + auto a = (Ast_Atom *)right; + if(a->intern_val == pctx->intern("len"_s)){ + return operand_lvalue(type_s64); + } + else if(a->intern_val == pctx->intern("str"_s)){ + return operand_lvalue(type_pointer(type_u8)); + } + else invalid_return; +} + +function Operand +field_access_builtin_array(Ast_Expr *right){ + if(right->kind == AST_BINARY) invalid_codepath; // @todo entire field access needs a rework + assert(right->kind == AST_IDENT); + + auto a = (Ast_Atom *)right; + if(a->intern_val == pctx->intern("len"_s)){ + return operand_lvalue(type_s64); + } + else invalid_return; +} + + +function Operand +resolve_expr(Ast_Expr *ast, Ast_Resolved_Type *expected_type, Sym *lambda_to_resolve){ + if(!ast) return {}; // @todo: add option for better error prevention + assert(is_flag_set(ast->flags, AST_EXPR)); + + switch(ast->kind){ + CASE(VALUE, Atom){ + return operand_const_rvalue(node->value); + BREAK(); + } + CASE(IDENT, Atom){ + Sym *sym = resolve_name(node->pos, node->intern_val); + + if(sym->kind == SYM_CONST && sym->type != type_type && sym->type->kind != TYPE_LAMBDA){ + rewrite_into_const(node, Ast_Atom, sym); + return operand(sym); + } + else if(sym->kind == SYM_VAR || sym->kind == SYM_CONST){ + sym_associate(node, sym); + return operand(sym); + } + + invalid_return; + BREAK(); + } + + // Typespec array [32]int + CASE(ARRAY, Array){ + // @todo: Arrays of inferred size [] + Operand type = resolve_expr(node->base); + if(type.type != type_type) parsing_error(node->pos, "Prefix array operator is only allowed on types"); + Operand expr = require_const_int(node->expr, AST_CAN_BE_NULL); + + Ast_Resolved_Type *resolved = type_array(type.type_val, node->expr ? 1 : 0, bigint_as_unsigned(&expr.big_int_val)); + sym_type(resolved, node); + return operand_type(resolved); + BREAK(); + } + + CASE(INDEX, Index){ + Operand left = resolve_expr(node->expr); + Operand index = resolve_expr(node->index); + if(!is_int(index.type)){ + type_error(node->pos, untyped_int, index.type,"Trying to index the array with invalid type, expected int"); + } + if(!is_array(left.type) && !is_pointer(left.type)){ + parsing_error(node->pos, "Indexing variable that is not an [Array] or [Pointer], it's of type %s instead", docname(left.type)); + } + + sym_new_resolved(SYM_VAR, {}, left.value, node); + return operand_lvalue(left.type->arr.base); + BREAK(); + } + + CASE(LAMBDA, Lambda){ + return resolve_lambda(node); + BREAK(); + } + + CASE(CALL, Call){ + Operand name = resolve_expr(node->name); + Ast_Resolved_Type *type = name.type; + if(name.type == type_type){ + type = name.type_val; + if(expected_type && expected_type != type) + parsing_error(node->pos, "Variable type different from explicit compound type"); + if(type->kind == TYPE_LAMBDA) + parsing_error(node->pos, "Calling a lambda type"); + } + type_complete(type); + node->type = type; + + if(type->kind == TYPE_ARRAY){ + if(node->exprs.len > type->arr.size && type->arr.size != ARRAY_SIZE_INFERRED) + parsing_error(node->pos, "compound statement has too many items for this type"); + Ast_Resolved_Type *item_type = type->arr.base; + + For(node->exprs){ + Ast_Call_Item *i = (Ast_Call_Item *)it; + assert(i->kind == AST_CALL_ITEM); + if(i->name) parsing_error(i->pos, "Invalid indexing kind in a compound expression of type %s", type_names[type->kind]); + if(i->index){ + Operand index_op = require_const_int(i->index, AST_CANT_BE_NULL); + U64 index = bigint_as_unsigned(&index_op.big_int_val); + if(index > (type->arr.size - 1)) parsing_error(i->pos, "Invalid index in compound expression, larger then type can store"); + } + Operand expr = resolve_expr(i->item, item_type); + expr.value = convert_untyped_to_typed(i->pos, expr.value, item_type); + // @todo I don't think this detects when types are different + } + } + + else if(type->kind == TYPE_STRUCT){ + auto agg = (Ast_Struct *)type->ast; + + S64 default_iter = 0; + For_It(node->exprs, expr){ + if(expr->index) parsing_error(expr->index->pos, "Function call indexing is illegal"); + Ast_Atom *name = expr->name; + S64 expr_index = node->exprs.get_index(&expr); + + Ast_Named *found = 0; + Ast_Resolved_Member *found_type = 0; + if(name){ + assert(name->kind == AST_IDENT); + For_It(agg->members, member){ + if(member->name.str == name->intern_val.str){ + if(member->kind == AST_CONST) parsing_error(expr->pos, "Initializing a value that is a constant"); + found = member; + found_type = &type->agg.members[agg->members.get_index(&member)]; + break; + } + } + } + else if(expr_index == default_iter){ + S64 i = default_iter++; + found = agg->members[i]; + found_type = &type->agg.members[i]; + if(i >= agg->members.len) parsing_error(expr->pos, "Too many arguments in compound constructor"); + } + else parsing_error(expr->pos, "Positional argument after named or indexed argument"); + + if(!found) parsing_error(expr->pos, "Invalid argument in compound constructor"); + if(is_flag_set(found->flags, AST_ITEM_INCLUDED)) parsing_error(found->pos, "Item included multiple times in compound constructor"); + found->flags = set_flag(found->flags, AST_ITEM_INCLUDED); + + Operand op = resolve_expr(expr->item, found_type->type); + + op.value = convert_untyped_to_typed(node->pos, op.value, found_type->type); + if(found_type->type != op.type) parsing_error(expr->pos, "Invalid type of compound constructor item, expected %s got instead %s", type_names[found_type->type->kind], type_names[op.type->kind]); + } + + // @note: cleanup, required? + For(agg->members) it->flags = unset_flag(it->flags, AST_ITEM_INCLUDED); + } + else if(type->kind == TYPE_LAMBDA){ + Scratch scratch; + Array items = {scratch}; + + S64 was_name_indexed = false; + S64 default_iter = 0; + auto lambda = (Ast_Lambda *)type->ast; + For(lambda->args){ + S64 i = lambda->args.get_index(&it); + Ast_Resolved_Type *resolved = type->func.args[i]; + Ast_Lambda_Arg *arg = it; + + // @note: match any in list of call items, if none matched then we have a problem + // there are three kinds of possible matches: indexed, named, default + Ast_Call_Item *item = 0; + For_It(node->exprs, expr){ + if(expr->index) parsing_error(expr->index->pos, "Function call indexing is illegal"); + Ast_Atom *name = expr->name; + + if(name){ + assert(name->kind == AST_IDENT); + was_name_indexed = true; + if(name->intern_val.str == arg->name.str) item = expr; + } + else if(node->exprs.get_index(&expr) == default_iter){ + default_iter++; + item = expr; + } + else if(node->exprs.get_index(&expr) > default_iter){ + parsing_error(expr->pos, "Positional argument after named argument"); + } + + if(item) + break; + } + + if(item){ + item->flags = set_flag(item->flags, AST_ITEM_INCLUDED); + Operand expr = resolve_expr(item->item); + make_sure_value_is_compatible_with_type(node->pos, &expr, resolved, TYPE_AND_EXPR_REQUIRED); + items.add(item); + } + else{ + if(arg->default_value){ + Ast_Call_Item *item_default = ast_call_item(arg->default_value->pos, 0, 0, arg->default_value); + items.add(item_default); + } + else parsing_error(arg->pos, "Required value in lambda call was not passed"); + } + } + + if(lambda->has_var_args){ + if(was_name_indexed) + parsing_error(lambda->pos, "Cant name index a lambda with var args"); + for(S64 i = lambda->args.len; i < node->exprs.len; i++){ + Ast_Call_Item *item = node->exprs.data[i]; + resolve_expr(item->item); + item->flags = set_flag(item->flags, AST_ITEM_INCLUDED); + items.add(item); + } + } + + // @note: check if all arguments are included and cleanup + For(node->exprs){ + if(!is_flag_set(it->flags, AST_ITEM_INCLUDED)) + parsing_error(it->pos, "Invalid argument to function call"); + else it->flags = unset_flag(it->flags, AST_ITEM_INCLUDED); + } + + node->exprs = items.tight_copy(pctx->perm); + type = type->func.ret; + } + else parsing_error(node->pos, "Invalid function call type"); + + return operand_rvalue(type); + BREAK(); + } + + CASE(CAST, Cast){ + Operand expr = resolve_expr(node->expr); + Ast_Resolved_Type *type = resolve_typespec(node->typespec); + Ast_Resolved_Type *original_type = expr.type; + + // @todo: cleanup, probably just want one big if + // @todo: factor this into a function for easier search + + switch(expr.type->kind){ + case TYPE_POINTER:{ + if(is_pointer(type)) + expr = operand_rvalue(type); + else goto failure; + } break; + CASE_UNTYPED: { + expr.value = convert_untyped_to_typed(node->pos, expr.value, type); + } break; + CASE_UINT: + CASE_SINT:{ + if(is_int(type)) + expr.type = type; + else if(is_float(type)){ + expr.value.type = type; + if(expr.is_const) expr.value.f64_val = bigint_as_float(&expr.big_int_val); // @leak + } else goto failure; + } break; + case TYPE_F32: case TYPE_F64: { + if(is_float(type)){ + expr.type = type; + } + else if(is_int(type)){ + if(expr.is_const) expr.value.big_int_val = bigint_s64((S64)expr.value.f64_val); // @todo: What to do here??? + expr.type = type; + } else goto failure; + } break; + default: failure: parsing_error(node->pos, "Failed to cast from %s to %s", docname(expr.type), docname(type));; + } + + if(original_type != type) assert(expr.type == type); + if(expr.is_const) check_value_bounds(node->pos, &expr.value); + return expr; + BREAK(); + } + + CASE(UNARY, Unary){ + Operand value = resolve_expr(node->expr); + switch(node->op){ + case TK_Pointer:{ + if(value.type->kind == TYPE_POINTER){return operand_lvalue(value.type->base);} + else if(value.type->kind == TYPE_TYPE){ + Ast_Resolved_Type *type = type_pointer(value.type_val); + sym_type(type, node); + return operand_type(type); + } + else{ parsing_error(node->pos, "Dereferencing expression %s that is not a [Pointer] or [Type]", type_names[value.type->kind]); return {}; } + }break; + case TK_Dereference:{ + return operand_lvalue(type_pointer(value.type)); + }break; + default:{ + Operand op = resolve_expr(node->expr); + eval_unary(node->pos, node->op, &op); + if(op.is_const){ + rewrite_into_const(node, Ast_Unary, op.value); + return operand_const_rvalue(op.value); + } + return operand_rvalue(op.value.type); + }break; + } + + BREAK(); + } + + CASE(BINARY, Binary){ + Operand result = {}; + if(node->op == TK_ColonAssign){ + // @note: This is actually a statement so it doesn't need to return Operand + assert(is_flag_set(node->flags, AST_STMT)); + assert(node->left->kind == AST_IDENT); + + Operand right = resolve_expr(node->right); + make_sure_value_is_compatible_with_type(node->pos, &right, 0, TYPE_CAN_BE_NULL); + assert(right.type); + + auto atom = (Ast_Atom *)node->left; + sym_var(atom->intern_val, right, node, INSERT_INTO_SCOPE); + } + //----------------------------------------------------------------------------- + + //----------------------------------------------------------------------------- + else if(token_is_assign(node->op)){ + assert(is_flag_set(node->flags, AST_STMT)); + Operand left = resolve_expr(node->left); + if(!left.is_lvalue) parsing_error(node->pos, "Assigning to rvalue"); + Operand right = resolve_expr(node->right); + + right.value = convert_untyped_to_typed(node->pos, right.value, left.type); + if(left.type != right.type) parsing_error(node->pos, "Can't assign value when left is %s and right is %s", docname(left.type), docname(right.type)); + } + //----------------------------------------------------------------------------- + + //----------------------------------------------------------------------------- + else if(node->op == TK_Dot){ + B32 required_to_be_const = false; + // @note: resolve first chunk which involves querying global map + // second part requires searching through a struct + // resolve.x.y + Operand resolved_ident = resolve_expr(node->left); + Ast_Resolved_Type *type = resolved_ident.type; + if(type == type_type){ + type = resolved_ident.type_val; + required_to_be_const = true; + } + // @copy_paste + if(is_pointer(type)) type = type->base; + + sym_var({}, resolved_ident.type, node->left); + if(is_string(type) && !required_to_be_const){ + result = field_access_builtin_string(node->right); + } + else if(is_array(type) && !required_to_be_const){ + result = field_access_builtin_array(node->right); + } + else{ + type_complete(type); + if(!(is_struct(type) || is_enum(type))) parsing_error(node->pos, "Trying to access inside a value that is not a struct or enum"); + + // This happens only on binary nodes which further chain with dots and require lookups + // This part cant happen on enums + // x.resolve.y + Ast_Binary *binary = (Ast_Binary *)node->right; + for(;!is_ident(binary); binary=(Ast_Binary *)binary->right){ + assert(is_ident(binary->left)); + Ast_Atom *ident = (Ast_Atom *)binary->left; + assert(is_binary(binary)); + + Ast_Struct *agg = (Ast_Struct *)type->ast; + Ast *query = query_struct(agg, ident->intern_val); + if(query){ + Sym *sym = resolved_get(query); + if(required_to_be_const && sym->kind != SYM_CONST) parsing_error(ident->pos, "Required to be constant"); + type = sym->type; + // @copy_paste + if(type == type_type){ + required_to_be_const = true; + type = sym->type_val; + } + if(is_pointer(type)) type = type->base; + type_complete(type); + if(!(is_struct(type) || is_enum(type))) parsing_error(node->pos, "Trying to access inside a value that is not a struct or enum"); + sym_associate(ident, sym); + + } else parsing_error(ident->pos, "No such member in struct"); + } + + // Here we can resolve the last part, this doesnt need to be a struct + // x.y.resolve + // @copy_paste + Ast_Atom *ident = (Ast_Atom *)binary; + if(is_enum(type)){ + Ast_Enum *enu = (Ast_Enum *)type->ast; + Ast_Enum_Member *query = query_enum(enu, ident->intern_val); + if(query){ + Sym *resolved = resolved_get(query); + assert(resolved); + rewrite_into_const(node, Ast_Binary, resolved); + result = operand(resolved); + } + } + else if(is_struct(type)){ + Ast_Struct *agg = (Ast_Struct *)type->ast; + Ast *query = query_struct(agg, ident->intern_val); + if(query){ + Sym *sym = resolved_get(query); + result = operand(sym); + assert(sym); + if(sym->kind == SYM_CONST) rewrite_into_const(node, Ast_Binary, sym); + else sym_associate(ident, sym); + + } else parsing_error(ident->pos, "No such member in struct"); + } + else parsing_error(ident->pos, "Trying to [.] access a value that is not [Enum] or [Struct]"); + + if(result.is_const == false && required_to_be_const){ + invalid_codepath; + } + } + } + //----------------------------------------------------------------------------- + + //----------------------------------------------------------------------------- + else{ + Operand left = resolve_expr(node->left); + Operand right = resolve_expr(node->right); + B32 is_const = left.is_const && right.is_const; + Value value = eval_binary(node->pos, node->op, left.value, right.value, is_const); + if(is_const){ + rewrite_into_const(node, Ast_Binary, value); + result = operand_const_rvalue(value); + } + else result = operand_rvalue(value.type); + + } + //----------------------------------------------------------------------------- + + //----------------------------------------------------------------------------- + + return result; + BREAK(); + } + + invalid_default_case; + } + + invalid_return; +} + +function Operand +resolve_const(Ast_Expr *ast, Sym *sym){ + switch(ast->kind){ + CASE(LAMBDA, Lambda){ + return resolve_lambda(node, sym); + BREAK(); + } + + CASE(ENUM, Enum){ + Ast_Resolved_Type *type = type_enum(node); + S64 scope_index = scope_open(); + S64 value = 0; + For(node->members){ + Operand op = require_const_int(it->value, AST_CAN_BE_NULL); + if(op.type){ + value = bigint_as_signed(&op.big_int_val) + 1; + } + else{ + op.type = untyped_int; + bigint_init_signed(&op.big_int_val, value++); + } + + sym_const(it->name, op, it, INSERT_INTO_SCOPE); + } + scope_close(scope_index); + return operand_type(type); + BREAK(); + } + + CASE(STRUCT, Struct){ + Ast_Resolved_Type *type = type_struct(node); + return operand_type(type); + BREAK(); + } + + default: return resolve_expr(ast, 0, sym); + } +} + +function Operand +resolve_binding(Ast *ast, Sym *sym){ + switch(ast->kind){ + CASE(VAR, Var){ + Ast_Resolved_Type *type = resolve_typespec(node->typespec, AST_CAN_BE_NULL); + Operand expr = resolve_expr(node->expr, type); + assert(expr.type != 0 || type != 0); + make_sure_value_is_compatible_with_type(node->pos, &expr, type, EXPR_CAN_BE_NULL|TYPE_CAN_BE_NULL); + assert(expr.type); + return expr; + BREAK(); + } + + CASE(CONST, Const){ + Operand expr = resolve_const((Ast_Expr *)node->value, sym); + if(!expr.is_const) parsing_error(node->pos, "Value of constant variable is not a constant expression"); + assert(expr.type); + return expr; + BREAK(); + } + invalid_default_case; return {}; + } +} + +function void +resolve_sym(Sym *sym){ + if(sym->state == SYM_RESOLVED){ + return; + } + else if(sym->state == SYM_RESOLVING){ + parsing_error(sym->ast->pos, "Cyclic dependency"); + return; + } + assert(sym->state == SYM_NOT_RESOLVED); + assert(sym->ast->kind == AST_VAR || sym->ast->kind == AST_CONST); + + sym->state = SYM_RESOLVING; + { + Operand op = resolve_binding(sym->ast, sym); + sym->value = op.value; + } + sym->state = SYM_RESOLVED; + + pctx->resolving_package->ordered.add((Ast_Named *)sym->ast); +} + +function Sym * +resolve_name(Token *pos, Intern_String name){ + Sym *sym = sym_get(name); + if(!sym) parsing_error(pos, "Unidentified name [%s]", name.str); + resolve_sym(sym); + return sym; +} + +function void +resolve_package(Ast_Package *package){ + For(package->decls){ + resolve_name(it->pos, it->name); + if(ast_is_struct(it)){ + type_complete(const_get_struct(it)->type); + } + } +} + +function Ast_Package * +parse_file(){ + Scratch scratch; + + // + // @note: pop the first token with token_next() / token_expect() + // which always should be an indentation token, + // it updates the indent info on the parser, + // making sure that indentation on + // the first line is properly updated + // + Token *token = token_get(); + Arraydecls = {scratch}; + while(!token_is(TK_End)){ + token_expect(SAME_SCOPE); + Ast_Named *decl = parse_named(true); + if(!decl) break; + + Sym *sym = sym_new(SYM_VAR, decl->name, decl); + if(decl->kind == AST_CONST) { + sym->kind = SYM_CONST; + Ast_Struct *s = const_try_getting_struct(decl); + if(s){ + s->type = type_incomplete(s); + sym->type_val = s->type; + sym->type = type_type; + sym->state = SYM_RESOLVED; + } + } + else assert(decl->kind == AST_VAR); + + sym_insert(sym); + decls.add(decl); + } + Ast_Package *result = ast_package(token, token->file, decls); + return result; +} + +function void +test_types(){ + Scratch scratch; + Parse_Ctx ctx = {}; + parse_init(&ctx, scratch, scratch); + pctx = &ctx; + + Ast_Resolved_Type *array_type1 = type_array(type_s64, 1, 32); + Ast_Resolved_Type *array_type2 = type_array(type_s64, 1, 32); + Ast_Resolved_Type *array_type3 = type_array(type_s64, 1, 48); + assert(array_type1 == array_type2); + assert(array_type2 != array_type3); + Ast_Resolved_Type *pointer_type1 = type_pointer(type_s64); + Ast_Resolved_Type *pointer_type2 = type_pointer(type_s64); + assert(pointer_type2 == pointer_type1); + Ast_Resolved_Type *pointer_type3 = type_pointer(pointer_type1); + Ast_Resolved_Type *pointer_type4 = type_pointer(pointer_type2); + assert(pointer_type3 != pointer_type1); + assert(pointer_type3 == pointer_type4); + + Array types = {scratch}; + types.add(type_array(type_s64, 1, 32)); + Ast_Resolved_Type *func_type1 = type_lambda(0, types[0], types); + Ast_Resolved_Type *func_type2 = type_lambda(0, types[0], types); + assert(func_type1 == func_type2); + + Array types2 = {scratch}; + { + types2.add(type_array(type_s64, 1, 32)); + types2.add(type_s64); + } + types.add(type_s64); + Ast_Resolved_Type *func_type3 = type_lambda(0, types[0], types); + Ast_Resolved_Type *func_type4 = type_lambda(0, types[0], types2); + assert(func_type1 != func_type3); + assert(func_type3 == func_type4); +} \ No newline at end of file diff --git a/typechecking.h b/typechecking.h new file mode 100644 index 0000000..59d59e4 --- /dev/null +++ b/typechecking.h @@ -0,0 +1,419 @@ + +//----------------------------------------------------------------------------- +// Symbols +//----------------------------------------------------------------------------- +enum Sym_Kind{ + SYM_NONE, + SYM_CONST, + SYM_VAR, +}; + +enum Sym_State{ + SYM_NOT_RESOLVED, + SYM_RESOLVING, + SYM_RESOLVED, +}; + +struct Sym{ + Intern_String name; + Sym_Kind kind; + Sym_State state; + Ast *ast; + INLINE_VALUE_FIELDS; +}; + +struct Operand{ + INLINE_VALUE_FIELDS; + bool is_const: 1; + bool is_lvalue: 1; +}; + +enum{AST_CANT_BE_NULL = 0, AST_CAN_BE_NULL = 1}; +function Ast_Resolved_Type *resolve_typespec(Ast_Expr *ast, B32 ast_can_be_null = AST_CANT_BE_NULL); +function Sym *resolve_name(Token *pos, Intern_String name); +function Operand resolve_expr(Ast_Expr *ast, Ast_Resolved_Type *compound_required_type = 0, Sym *const_sym = 0); +function Operand resolve_binding(Ast *ast, Sym *sym = 0); +global Ast_Named empty_decl = {}; + +//----------------------------------------------------------------------------- +// Symbol constructors and utils +//----------------------------------------------------------------------------- +function void +sym_insert(Sym *sym){ + U64 hash = hash_string(sym->name.s); + Sym *is_sym = (Sym *)map_get(&pctx->syms, hash); + if(is_sym) parsing_error(sym->ast->pos, "Symbol with name: [%s] defined multiple times", sym->name.s.str); + if(pctx->scope > 0) pctx->local_syms.add(sym); + map_insert(&pctx->syms, hash, sym); +} + +function Sym * +sym_get(Intern_String name){ + Sym *result = (Sym *)map_get(&pctx->syms, hash_string(name.s)); + return result; +} + +function S64 +scope_open(){ + S64 local_sym_count = pctx->local_syms.len; + pctx->scope++; + return local_sym_count; +} + +function void +scope_close(S64 local_sym_count){ + pctx->scope--; + assert(pctx->scope >= 0); + for(S64 i = local_sym_count; i < pctx->local_syms.len; i++){ + Sym *it = pctx->local_syms.data[i]; + void *removed = map_remove(&pctx->syms, hash_string(it->name.s)); + assert(removed); + } + pctx->local_syms.len = local_sym_count; +} + +function void +sym_associate(Ast *ast, Sym *sym){ + assert(ast); + assert(sym); + map_insert(&pctx->resolved, ast, sym); +} + +function Sym * +sym_new(Sym_Kind kind, Intern_String name, Ast *ast, B32 associate = true){ + Sym *result = exp_alloc_type(pctx->perm, Sym, AF_ZeroMemory); + result->name = name; + result->kind = kind; + result->ast = ast; + if(associate) sym_associate(ast, result); + return result; +} + +function Sym * +sym_new_resolved(Sym_Kind kind, Intern_String name, Value value, Ast *ast, B32 associate = true){ + Sym *result = sym_new(kind, name, ast, associate); + result->state = SYM_RESOLVED; + result->value = value; + return result; +} + +const B32 INSERT_INTO_SCOPE = true; + +function Sym * +sym_var(Intern_String name, Ast_Resolved_Type *type, Ast *ast, B32 insert_into_scope = false){ + Value value; + value.type = type; + Sym *sym = sym_new_resolved(SYM_VAR, name, value, ast); + if(insert_into_scope) sym_insert(sym); + return sym; +} + +function Sym * +sym_var(Intern_String name, Operand op, Ast *ast, B32 insert_into_scope = false){ + Sym *sym = sym_new_resolved(SYM_VAR, name, op.value, ast); + if(insert_into_scope) sym_insert(sym); + return sym; +} + +function Sym * +sym_const(Intern_String name, Operand op, Ast *ast, B32 insert_into_scope = false){ + Sym *sym = sym_new_resolved(SYM_CONST, name, op.value, ast); + if(insert_into_scope) sym_insert(sym); + return sym; +} + +function Sym * +resolved_get(Ast *ast){ + Sym *result = (Sym *)map_get(&pctx->resolved, ast); + assert(result); + return result; +} + +function Ast_Resolved_Type * +resolved_type_get(Ast_Expr *ast){ + Sym *result = resolved_get(ast); + assert(result->type == type_type); + assert(result->type); + return result->type_val; +} + +function Sym * +sym_type(Ast_Resolved_Type *type, Ast *ast, Intern_String name = {}, B32 associate = true){ + Value value; + value.type = type_type; + value.type_val = type; + Sym *result = sym_new_resolved(SYM_CONST, name, value, ast, associate); + return result; +} + +function Sym * +sym_insert(Sym_Kind kind, Intern_String name, Value value, Ast *ast){ + Sym *sym = sym_new_resolved(kind, name, value, ast); + sym_insert(sym); + return sym; +} + +function void +sym_insert_builtin_type(String name, Ast_Resolved_Type *type){ + Intern_String string = intern_string(&pctx->interns, name); + Sym *sym = sym_type(type, &empty_decl, string, false); + sym_insert(sym); +} + +function void +sym_insert_builtins(){ + sym_insert_builtin_type("void"_s , type_void); + sym_insert_builtin_type("Bool"_s , type_bool); + sym_insert_builtin_type("String"_s, type_string); + sym_insert_builtin_type("S8"_s, type_s8); + sym_insert_builtin_type("S16"_s, type_s16); + sym_insert_builtin_type("S32"_s, type_s32); + sym_insert_builtin_type("S64"_s, type_s64); + sym_insert_builtin_type("U8"_s, type_u8); + sym_insert_builtin_type("U16"_s, type_u16); + sym_insert_builtin_type("U32"_s, type_u32); + sym_insert_builtin_type("U64"_s, type_u64); + sym_insert_builtin_type("F32"_s, type_f32); + sym_insert_builtin_type("F64"_s, type_f64); +} + +//----------------------------------------------------------------------------- +// Operands +//----------------------------------------------------------------------------- +function Operand +operand(Sym *sym){ + Operand result = {}; + result.type = sym->type; + result.is_const = sym->kind == SYM_CONST ? true : false; + result.is_lvalue= sym->kind == SYM_CONST ? false : true; // Cant assign to const values + result.value = sym->value; + return result; +} + +function Operand +operand_type(Ast_Resolved_Type *type){ + Operand result = {}; + result.type = type_type; + result.is_const = true; + result.is_lvalue = false; + result.type_val = type; + return result; +} + +function Operand +operand_int(BigInt big_int){ + Operand result = {}; + result.type = untyped_int; + result.big_int_val = bigint_copy(pctx->perm, &big_int); + result.is_const = true; + result.is_lvalue = false; + return result; +} + +function Operand +operand_str(Intern_String intern_val){ + Operand result = {}; + result.type = type_string; + result.intern_val = intern_val; + result.is_const = true; + result.is_lvalue = false; + return result; +} + +function Operand +operand_lambda(Ast_Resolved_Type *type){ + Operand result = {}; + result.type = type; + result.is_const = true; + result.is_lvalue = false; + return result; +} + +function Operand +operand_const_rvalue(Value value){ + Operand result = {}; + result.is_const = true; + result.value = value; + return result; +} + +function Operand +operand_lvalue(Ast_Resolved_Type *type){ + Operand result = {}; + result.type = type; + result.is_const = false; + result.is_lvalue = true; + return result; +} + +function Operand +operand_rvalue(Ast_Resolved_Type *type){ + Operand result = {}; + result.type = type; + result.is_const = false; + result.is_lvalue = false; + return result; +} + +//----------------------------------------------------------------------------- +// Hash consed types +//----------------------------------------------------------------------------- +function Ast_Resolved_Type * +type_new(Allocator *allocator, Ast_Resolved_Type_Kind kind, SizeU size, SizeU align){ + Ast_Resolved_Type *result = exp_alloc_type(allocator, Ast_Resolved_Type, AF_ZeroMemory); + result->kind = kind; + result->size = size; + result->align = align; + return result; +} + +function Ast_Resolved_Type * +type_copy(Allocator *a, Ast_Resolved_Type *type){ + Ast_Resolved_Type *result = exp_alloc_type(a, Ast_Resolved_Type); + memory_copy(result, type, sizeof(Ast_Resolved_Type)); + return result; +} + +function Ast_Resolved_Type * +type_pointer(Ast_Resolved_Type *base){ + Ast_Resolved_Type *result = (Ast_Resolved_Type *)map_get(&pctx->type_map, (void *)base); + if(!result){ + result = type_new(pctx->perm, TYPE_POINTER, pointer_size, pointer_align); + result->base = base; + map_insert(&pctx->type_map, base, result); + } + assert(result->kind == TYPE_POINTER); + return result; +} + +function Ast_Resolved_Type * +type_array(Ast_Resolved_Type *base, B32 size_present, S64 size){ + if(!size_present){ + size = ARRAY_SIZE_INFERRED; + } + + U64 hash_base = hash_ptr(base); + U64 hash = hash_mix(hash_base, hash_u64(size)); + Ast_Resolved_Type *result = (Ast_Resolved_Type *)map_get(&pctx->type_map, hash); + if(result){ + assert(result->kind == TYPE_ARRAY); + assert(result->arr.size == size); + assert(result->arr.base == base); + return result; + } + + result = type_new(pctx->perm, TYPE_ARRAY, pointer_size, pointer_align); + result->arr.base = base; + result->arr.size = size; + result->arr.inferred_size_hash = hash_mix(hash_base, hash_u64(ARRAY_SIZE_INFERRED)); + map_insert(&pctx->type_map, hash, result); + return result; +} + +function Ast_Resolved_Type * +type_lambda(Ast *ast, Ast_Resolved_Type *ret, Array args){ + U64 hash = hash_ptr(ret); + For(args) hash = hash_mix(hash, hash_ptr(it)); + Ast_Resolved_Type *result = (Ast_Resolved_Type *)map_get(&pctx->type_map, hash); + + if(result){ + assert(result->kind == TYPE_LAMBDA); + assert(result->func.ret == ret); + assert(result->func.args.len == args.len); + return result; + } + + result = type_new(pctx->perm, TYPE_LAMBDA, pointer_size, pointer_align); + result->ast = ast; + result->func.ret = ret; + result->func.args = args.tight_copy(pctx->perm); + map_insert(&pctx->type_map, hash, result); + + return result; +} + +function Ast_Resolved_Type * +type_enum(Ast_Enum *ast){ + Ast_Resolved_Type *type = resolve_typespec(ast->typespec, AST_CAN_BE_NULL); + if(!type){ + type = untyped_int; + } + + Ast_Resolved_Type *result = type_new(pctx->perm, TYPE_ENUM, type->size, type->align); + result->base = type; + result->ast = ast; + return result; +} + +/* +2022.05.31 - Global scope structs vs nested structs +Structs exist in 2 variants, the global scope structs are a bit different +then scoped structs. They startout incomplete and when some operation +requires the actual struct size, alignment, field access etc. then it +should call complete_type. It resolves all the children, calculates the +size and makes sure there are no cyclic dependencies. This is require for +correct behaviour of order independent structs. If someone just wants a pointer +to that struct we don't need to complete the type, we know how large a pointer is. +This allows us to have cyclic dependency that is a pointer. Cause we know how large pointer is. +*/ +function Ast_Resolved_Type * +type_incomplete(Ast *ast){ + Ast_Resolved_Type *result = type_new(pctx->perm, TYPE_INCOMPLETE, 0, 0); + result->ast = ast; + return result; +} + +function void +type_struct_complete(Ast_Resolved_Type *type, Ast_Struct *node){ + // @todo: compute size, alignement, offset !!! + // @note: resolve all the struct members first + type->kind = TYPE_COMPLETING; + Scratch scratch; + Array members = {scratch}; + For(node->members){ + Operand op = resolve_binding(it); + Intern_String name = ast_get_name(it); + sym_var(name, op, it); + members.add({op.type, name}); + } + type->agg.members = members.tight_copy(pctx->perm); + type->kind = TYPE_STRUCT; + + /* + @note: resolve constant members after the struct got resolved + this way we avoid a problem where we start resolving the function + and this function has parameter of type parent struct + which is being resolved right now, cyclic dependency happens. + constants arent required to make struct work + */ + For(node->const_members){ + Operand op = resolve_binding(it); + Intern_String name = ast_get_name(it); + sym_const(name, op, it); + } +} + +function Ast_Resolved_Type * +type_struct(Ast_Struct *agg){ + Ast_Resolved_Type *result = type_new(pctx->perm, TYPE_STRUCT, 0, 0); + result->ast = agg; + type_struct_complete(result, agg); + return result; +} + +function void +type_complete(Ast_Resolved_Type *type){ + if(!type) { + return; + } + if(type->kind == TYPE_COMPLETING){ + parsing_error(type->ast->pos, "Cyclic type dependency"); + } + else if(type->kind != TYPE_INCOMPLETE){ + return; + } + + Ast_Struct *node = (Ast_Struct *)type->ast; + type_struct_complete(type, node); + pctx->resolving_package->ordered.add((Ast_Named *)node->parent); +}