From b945f3affdb7dc4599f9df4b29dc9069db4a570d Mon Sep 17 00:00:00 2001 From: Krzosa Karol Date: Wed, 25 May 2022 14:44:30 +0200 Subject: [PATCH] Lambdas, statements, typechecking lambdas --- cgenerate.cpp | 90 +++++++++++++++++++++++----------- test3.kl => globals.kl | 26 ++-------- lambdas.kl | 4 ++ new_ast.cpp | 16 +++++-- new_lex.cpp | 24 ++++------ new_parse.cpp | 73 +++++++++++++++++++--------- new_resolve.cpp | 106 +++++++++++++++++++++++++++++++++-------- new_type.cpp | 8 ++-- 8 files changed, 229 insertions(+), 118 deletions(-) rename test3.kl => globals.kl (85%) create mode 100644 lambdas.kl diff --git a/cgenerate.cpp b/cgenerate.cpp index 6064b66..421bd8f 100644 --- a/cgenerate.cpp +++ b/cgenerate.cpp @@ -5,7 +5,7 @@ global S32 global_indent; function void gen_indent(){ - for(S32 i = 0; i < global_indent; i++) gen(" "); + for(S32 i = 0; i < global_indent; i++) gen(" "); } // @todo: Gen complicated decl @@ -19,31 +19,31 @@ gen_indent(){ function void gen_simple_decl_prefix(Ast_Resolved_Type *ast){ - switch(ast->kind){ - case TYPE_Int: gen("int "); break; - case TYPE_Bool: gen("bool "); break; - case TYPE_Unsigned: gen("unsigned "); break; - case TYPE_String: gen("String "); break; - case TYPE_Void: gen("void "); break; - case TYPE_Pointer:{gen_simple_decl_prefix(ast->base); gen("*");} break; - case TYPE_Array: gen_simple_decl_prefix(ast->base); break; - case TYPE_Lambda:break; - invalid_default_case; + switch(ast->kind){ + case TYPE_Int: gen("int "); break; + case TYPE_Bool: gen("bool "); break; + case TYPE_Unsigned: gen("unsigned "); break; + case TYPE_String: gen("String "); break; + case TYPE_Void: gen("void "); break; + case TYPE_Pointer:{gen_simple_decl_prefix(ast->base); gen("*");} break; + case TYPE_Array: gen_simple_decl_prefix(ast->base); break; + case TYPE_Lambda:break; + invalid_default_case; } } function void gen_simple_decl_postfix(Ast_Resolved_Type *ast){ - switch(ast->kind){ - case TYPE_Int: break; - case TYPE_Bool: break; - case TYPE_Unsigned: break; - case TYPE_String: break; - case TYPE_Void: break; - case TYPE_Pointer: gen_simple_decl_postfix(ast->base); break; - case TYPE_Array: gen("[%d]", (int)ast->arr.size); gen_simple_decl_postfix(ast->arr.base); break; - case TYPE_Lambda:break; - invalid_default_case; +switch(ast->kind){ + case TYPE_Int: break; + case TYPE_Bool: break; + case TYPE_Unsigned: break; + case TYPE_String: break; + case TYPE_Void: break; + case TYPE_Pointer: gen_simple_decl_postfix(ast->base); break; + case TYPE_Array: gen("[%d]", (int)ast->arr.size); gen_simple_decl_postfix(ast->arr.base); break; + case TYPE_Lambda:break; + invalid_default_case; } } @@ -93,6 +93,15 @@ gen_expr(Ast_Expr *ast){ Ast_End(); } + Ast_Begin(AST_BINARY, Ast_Binary){ + gen("("); + gen_expr(node->left); + gen("%s", token_kind_string(node->op).str); + gen_expr(node->right); + gen(")"); + Ast_End(); + } + Ast_Begin(AST_UNARY, Ast_Unary){ switch(node->op){ case TK_Pointer: { @@ -157,13 +166,25 @@ gen_ast(Ast *ast){ switch(ast->kind){ Ast_Begin(AST_PACKAGE, Ast_Package){ - For(node->decls) gen_ast(*it); + For(node->decls) { + genln(""); + gen_ast(*it); + } + Ast_End(); + } + + Ast_Begin(AST_RETURN, Ast_Return){ + gen("return"); + if(node->expr){ + gen(" "); + gen_expr(node->expr); + } + gen(";"); Ast_End(); } Ast_Begin(AST_VAR, Ast_Decl){ - genln(""); - Sym *sym = sym_get(node->name); + Sym *sym = resolved_get(node); gen_simple_decl(sym->type, node->name); if(node->var.expr){ gen(" = "); @@ -179,7 +200,7 @@ gen_ast(Ast *ast){ if(sym->type->kind == TYPE_Lambda){ if(node->var.expr->kind == AST_LAMBDA){ Ast_Lambda *lambda = (Ast_Lambda *)node->var.expr; - genln(""); + gen("static "); gen_simple_decl(lambda->ret->resolved_type, node->name); gen("("); For(lambda->args){ @@ -188,9 +209,20 @@ gen_ast(Ast *ast){ if(it != (lambda->args.end() - 1)) gen(", "); } gen(")"); + + if(lambda->block) { + gen("{"); + global_indent++; + For(lambda->block->stmts) { + genln(""); + gen_ast(it[0]); + } + global_indent--; + genln("}"); + } + else gen(";"); } else{ - genln(""); gen_simple_decl(sym->type, node->name); gen(" = "); gen_expr(node->var.expr); @@ -198,10 +230,10 @@ gen_ast(Ast *ast){ } } else if(sym->type == type_int){ - genln("enum { %s = %lld };", node->name.str, sym->int_val); + gen("enum { %s = %lld };", node->name.str, sym->int_val); } else if(sym->type == type_string){ - genln("String %s = LIT(\"%s\");", node->name.str, sym->intern_val.str); + gen("String %s = LIT(\"%s\");", node->name.str, sym->intern_val.str); } else{ parsing_error(node->pos, "Unhandled type of constant expression"); @@ -217,7 +249,7 @@ gen_ast(Ast *ast){ function void test_gen(){ TEST_PARSER(); - String filename = "test3.kl"_s; + String filename = "globals.kl"_s; String file_content = os_read_file(scratch, filename); lex_restream(&ctx, file_content, filename); Ast_Package *result = parse_file(); diff --git a/test3.kl b/globals.kl similarity index 85% rename from test3.kl rename to globals.kl index 23fe12c..5782abc 100644 --- a/test3.kl +++ b/globals.kl @@ -1,27 +1,11 @@ -/* -Player :: struct - id : int - name: String -compound_of_struct: Player = { - id = 10, - name = "Guy", -} -second_compound_syntax := :Player{...} +returning_void :: (insert: int) + return -max_folding :: (a: int, b: int) { if a > b { return a; } return b; } - -max :: (a: int, b: int) - if a > b then return a - return b - -; - treated as new line -{ and } - treated as new line scope and end of new line scope -*/ - -arena_push :: (size: int) - return size + 10 +arena_push :: (size: int): int + result := size + 10 + return result //----------------------------------------------------------------------------- // Function types diff --git a/lambdas.kl b/lambdas.kl new file mode 100644 index 0000000..1516b53 --- /dev/null +++ b/lambdas.kl @@ -0,0 +1,4 @@ + +arena_push :: (size: int): int + result := size + 10 + return result \ No newline at end of file diff --git a/new_ast.cpp b/new_ast.cpp index fa6acac..de683b5 100644 --- a/new_ast.cpp +++ b/new_ast.cpp @@ -25,8 +25,11 @@ struct Parse_Ctx:Lexer{ Allocator *heap; U64 unique_ids; - Map global_syms; Map type_map; + + Map resolved; + Map syms; + S32 scope; Array local_syms; Token empty_token; @@ -40,8 +43,10 @@ struct Parse_Ctx:Lexer{ perm = perm_allocator; heap = heap_allocator; - global_syms = {heap}; - type_map = {heap}; + resolved = {heap}; + syms = {heap}; + type_map = {heap}; + local_syms = {heap}; lex_init(perm, heap, this); keyword_struct= intern("struct"_s); @@ -277,8 +282,9 @@ ast_expr_index(Token *pos, Ast_Expr *expr, Ast_Expr *index){ function Ast_Lambda * ast_lambda(Token *pos, Array params, Ast_Typespec *ret, Ast_Block *block){ AST_NEW(Lambda, AST_LAMBDA, pos); - result->args = params.tight_copy(pctx->perm); - result->ret = ret; + result->args = params.tight_copy(pctx->perm); + result->ret = ret; + result->block = block; if(!ret){ result->ret = ast_typespec_name(0, intern_void); } diff --git a/new_lex.cpp b/new_lex.cpp index e884961..d62a7c3 100644 --- a/new_lex.cpp +++ b/new_lex.cpp @@ -47,8 +47,6 @@ enum Token_Kind{ TK_Dot, TK_NewLine, - TK_NewUpScope, - TK_NewDownScope, TK_Colon, TK_Assign, @@ -86,6 +84,12 @@ enum Token_Kind{ TK_Pointer, TK_Dereference, + + // These are not produced by lexer + // but identified by parser + OPEN_SCOPE, + CLOSE_SCOPE, + SAME_SCOPE, }; struct Token{ @@ -525,17 +529,6 @@ lex_restream(Lexer *lexer, String istream, String file){ lexer->tokens.clear(); lexer->token_iter = 0; lex__stream(&lexer->interns, &lexer->tokens, &lexer->stream); - - S32 indent = 0; - For(lexer->tokens){ - if(it->kind == TK_NewLine){ - if(it->indent > indent) - it->kind = TK_NewUpScope; - if(it->indent < indent) - it->kind = TK_NewDownScope; - indent = it->indent; - } - } } function Lexer @@ -664,8 +657,9 @@ token_kind_string(Token_Kind kind){ case TK_Float: return "Float"_s; case TK_Integer: return "Int"_s; case TK_Keyword: return "Keyword"_s; - case TK_NewUpScope: return "New_Up_Scope"_s; - case TK_NewDownScope: return "New_Down_Scope"_s; + case CLOSE_SCOPE: return "Close_Scope"_s; + case OPEN_SCOPE: return "Open_Scope"_s; + case SAME_SCOPE: return "Same_Scope"_s; default: invalid_codepath; return ""_s; } } diff --git a/new_parse.cpp b/new_parse.cpp index eb5dce0..d9198f5 100644 --- a/new_parse.cpp +++ b/new_parse.cpp @@ -6,16 +6,13 @@ token_get(S64 i = 0){ return &pctx->empty_token; } Token *result = &pctx->tokens[i]; - if(result->kind == TK_NewLine){ - pctx->indent = result->indent; - } - return result; } function Token * token_next(){ Token *token = token_get(); + if(token->kind == TK_NewLine) pctx->indent = token->indent; pctx->token_iter++; return token; } @@ -79,14 +76,44 @@ parsing_error(Token *token, const char *str, ...){ function Token * token_expect(Token_Kind kind){ Token *token = token_get(); - if(token->kind == kind){ - token = token_next(); - return token; - } + if(token->kind == kind) return token_next(); parsing_error(token, "Expected token of kind: [%s], got instead token of kind: [%s]", token_kind_string(kind).str, token_kind_string(token->kind).str); return 0; } +function Token * +token_is_scope(Token_Kind scope){ + assert(scope == OPEN_SCOPE || scope == CLOSE_SCOPE || scope == SAME_SCOPE); + Token *token = token_get(); + if(token->kind == TK_NewLine){ + if (scope == OPEN_SCOPE && token->indent > pctx->indent) return token; + else if(scope == CLOSE_SCOPE && token->indent < pctx->indent) return token; + else if(scope == SAME_SCOPE && token->indent == pctx->indent) return token; + } + return 0; +} + +function Token * +token_match_scope(Token_Kind scope){ + Token *token = token_is_scope(scope); + if(token) return token_next(); + return 0; +} + +function Token * +token_expect_scope(Token_Kind scope){ + assert(scope == OPEN_SCOPE || scope == CLOSE_SCOPE || scope == SAME_SCOPE); + Token *token = token_get(); + if(token->kind == TK_NewLine){ + if (scope == OPEN_SCOPE && token->indent > pctx->indent) return token; + else if(scope == CLOSE_SCOPE && token->indent < pctx->indent) return token; + else if(scope == SAME_SCOPE && token->indent == pctx->indent) return token; + else parsing_error(token, "Expected a scope of kind [%s]", token_kind_string(scope)); + } + parsing_error(token, "Expected Scope[%s] got instead: [%s]", token_kind_string(scope).str, token_kind_string(token->kind).str); + return 0; +} + //----------------------------------------------------------------------------- // Expression parsing //----------------------------------------------------------------------------- @@ -165,10 +192,11 @@ parse_optional_type(){ return result; } +function Ast_Decl *parse_decl(B32); function Ast_Block * parse_block(){ Ast_Block *block = 0; - if(token_match(TK_NewUpScope)){ + if(token_match_scope(OPEN_SCOPE)){ Token *token_block = token_get(); Scratch scratch; @@ -177,19 +205,16 @@ parse_block(){ Token *token = token_get(); if(token_match_keyword(keyword_return)){ AST_NEW(Return, AST_RETURN, token); - result->expr = parse_expr(); + if(!token_is(TK_NewLine)) result->expr = parse_expr(); stmts.add(result); } else{ - // @todo - // Probably want to rewrite parse decls to allow for - // calling from other places, dont want to error messages - // to suffer though!!! - parsing_error(token, "Unexpected token while parsing statement"); + Ast_Decl *result = parse_decl(false); + if(result) stmts.add(result); + else parsing_error(token, "Unexpected token while parsing statement"); } - } while(token_match(TK_NewLine)); - token_expect(TK_NewDownScope); - + } while(token_match_scope(SAME_SCOPE)); + token_expect_scope(CLOSE_SCOPE); block = ast_block(token_block, stmts); } return block; @@ -390,10 +415,10 @@ parse_assign_expr(){ } function Ast_Decl * -parse_decl(){ +parse_decl(B32 is_global){ Ast_Decl *result = 0; if(token_is(TK_Identifier)){ - if(pctx->indent != 0) parsing_error(token_get(), "Top level declarations shouldn't be indented"); + if(is_global && pctx->indent != 0) parsing_error(token_get(), "Top level declarations shouldn't be indented"); Token *name = token_next(); if(token_match(TK_DoubleColon)){ // Constant Ast_Expr *expr = parse_expr(); @@ -414,8 +439,10 @@ parse_decl(){ } } else if(!token_is(TK_End)){ - Token *token = token_get(); - parsing_error(token, "Unexpected token: [%s] when parsing a declaration", token_kind_string(token->kind).str); + if(is_global){ + Token *token = token_get(); + parsing_error(token, "Unexpected token: [%s] when parsing a declaration", token_kind_string(token->kind).str); + } } return result; } @@ -427,7 +454,7 @@ parse_file(){ Arraydecls = {scratch}; while(!token_is(TK_End)){ while(token_match(TK_NewLine)); - Ast_Decl *decl = parse_decl(); + Ast_Decl *decl = parse_decl(true); if(!decl) break; decls.add(decl); } diff --git a/new_resolve.cpp b/new_resolve.cpp index bc749c4..91934e9 100644 --- a/new_resolve.cpp +++ b/new_resolve.cpp @@ -11,7 +11,7 @@ enum Sym_Kind{ struct Sym{ Intern_String name; Sym_Kind kind; - Ast_Decl *decl; + Ast *ast; Ast_Resolved_Type *type; union{ S64 int_val; @@ -33,27 +33,57 @@ global Ast_Decl empty_decl = {}; function void sym_insert(Sym *sym){ U64 hash = hash_string(sym->name.s); - Sym *is_sym = (Sym *)map_get_u64(&pctx->global_syms, hash); + Sym *is_sym = (Sym *)map_get(&pctx->syms, hash); if(is_sym){ - parsing_error(sym->decl->pos, "Symbol with name: [%s] defined multiple times", sym->name.s.str); + parsing_error(sym->ast->pos, "Symbol with name: [%s] defined multiple times", sym->name.s.str); + } + if(pctx->scope > 0){ + pctx->local_syms.add(sym); } - map_insert_u64(&pctx->global_syms, hash, sym); + map_insert(&pctx->syms, hash, sym); } function Sym * sym_get(Intern_String name){ - Sym *result = (Sym *)map_get_u64(&pctx->global_syms, hash_string(name.s)); + Sym *result = (Sym *)map_get(&pctx->syms, hash_string(name.s)); return result; } +function S64 +scope_push(){ + S64 local_sym_count = pctx->local_syms.len; + pctx->scope++; + return local_sym_count; +} + +function void +scope_pop(S64 local_sym_count){ + pctx->scope--; + assert(pctx->scope >= 0); + for(S64 i = local_sym_count; i < pctx->local_syms.len; i++){ + void *removed = map_remove(&pctx->syms, hash_string(pctx->local_syms.data[i]->name.s)); + assert(removed); + } + pctx->local_syms.len = local_sym_count; +} + function Sym * -sym_new(Sym_Kind kind, Intern_String name, Ast_Resolved_Type *type, Ast_Decl *decl){ +sym_new(Sym_Kind kind, Intern_String name, Ast_Resolved_Type *type, Ast *ast){ Sym *result = exp_alloc_type(pctx->perm, Sym); result->name = name; result->kind = kind; result->type = type; - result->decl = decl; + result->ast = ast; + assert(ast); + map_insert(&pctx->resolved, ast, result); + return result; +} + +function Sym * +resolved_get(Ast *ast){ + Sym *result = (Sym *)map_get(&pctx->resolved, ast); + assert(result); return result; } @@ -159,6 +189,40 @@ resolve_type_pair(Token *pos, Ast_Resolved_Type *a, Ast_Resolved_Type *b){ return result; } +function void +eval_var(Ast_Decl *node){ + Ast_Resolved_Type *type = eval_typespec(node->var.typespec); + Operand expr = node->var.expr ? eval_expr(node->var.expr, type) : Operand{}; + Ast_Resolved_Type *resolved_type = resolve_type_pair(node->pos, type, expr.type); + + Sym *sym = sym_new(SYM_Var, node->name, resolved_type, node); + sym_insert(sym); +} + +function void +eval_stmt(Ast *ast, Ast_Resolved_Type *ret){ + // @todo: need to check if all paths return a value + + switch(ast->kind){ + Ast_Begin(AST_RETURN, Ast_Return){ + Operand op = {}; + if(node->expr) op = eval_expr(node->expr); + if(!op.type && ret != type_void) parsing_error(node->pos, "Function expects a void return value but the returned value is [x]"); + if(op.type && op.type != ret) parsing_error(node->pos, "Return statement has different type then returned value"); + + Ast_End(); + } + + Ast_Begin(AST_VAR, Ast_Decl){ + eval_var(node); + Ast_End(); + } + + + invalid_default_case; + } +} + function Operand eval_expr(Ast_Expr *ast, Ast_Resolved_Type *expected_type){ switch(ast->kind){ @@ -226,13 +290,18 @@ eval_expr(Ast_Expr *ast, Ast_Resolved_Type *expected_type){ Ast_Begin(AST_LAMBDA, Ast_Lambda){ Ast_Resolved_Type *type = eval_typespec(ast_typespec_lambda(0, node)); - // @todo: typecheck the function - // enter scope - - // push local syms etc. - // Make sure return type is matching function return type - // quit scope - For(node->block->stmts){ - + // @todo: We also need to make sure there is a return value when ret type is not void + if(node->block){ + S64 scope_index = scope_push(); + For(node->args){ + Ast_Resolved_Type *type = eval_typespec(it[0]->typespec); + Sym *arg_sym = sym_new(SYM_Var, it[0]->name, type, it[0]); + sym_insert(arg_sym); + } + For(node->block->stmts){ + eval_stmt(it[0], node->ret->resolved_type); + } + scope_pop(scope_index); } return {type, true}; @@ -366,12 +435,7 @@ eval_decl(Ast *ast){ } Ast_Begin(AST_VAR, Ast_Decl){ - Ast_Resolved_Type *type = eval_typespec(node->var.typespec); - Operand expr = node->var.expr ? eval_expr(node->var.expr, type) : Operand{}; - Ast_Resolved_Type *resolved_type = resolve_type_pair(node->pos, type, expr.type); - - Sym *sym = sym_new(SYM_Var, node->name, resolved_type, node); - sym_insert(sym); + eval_var(node); Ast_End(); } @@ -397,7 +461,7 @@ eval_decl(Ast *ast){ function void test_resolve(){ TEST_PARSER(); - String filename = "test3.kl"_s; + String filename = "globals.kl"_s; String file_content = os_read_file(scratch, filename); lex_restream(&ctx, file_content, filename); Ast_Package *result = parse_file(); diff --git a/new_type.cpp b/new_type.cpp index dd8a220..0b719cc 100644 --- a/new_type.cpp +++ b/new_type.cpp @@ -95,7 +95,7 @@ type_pointer(Ast_Resolved_Type *base){ function Ast_Resolved_Type * type_array(Ast_Resolved_Type *base, SizeU size){ U64 hash = hash_mix(hash_ptr(base), hash_u64(size)); - Ast_Resolved_Type *result = (Ast_Resolved_Type *)map_get_u64(&pctx->type_map, hash); + Ast_Resolved_Type *result = (Ast_Resolved_Type *)map_get(&pctx->type_map, hash); if(result){ assert(result->kind == TYPE_Array); assert(result->arr.size == size); @@ -106,7 +106,7 @@ type_array(Ast_Resolved_Type *base, SizeU size){ result = type_new(pctx->perm, TYPE_Array, pointer_size, pointer_align); result->arr.base = base; result->arr.size = size; - map_insert_u64(&pctx->type_map, hash, result); + map_insert(&pctx->type_map, hash, result); return result; } @@ -114,7 +114,7 @@ function Ast_Resolved_Type * type_lambda(Ast_Resolved_Type *ret, Array args){ U64 hash = hash_ptr(ret); For(args) hash = hash_mix(hash, hash_ptr(*it)); - Ast_Resolved_Type *result = (Ast_Resolved_Type *)map_get_u64(&pctx->type_map, hash); + Ast_Resolved_Type *result = (Ast_Resolved_Type *)map_get(&pctx->type_map, hash); if(result){ assert(result->kind == TYPE_Lambda); @@ -126,7 +126,7 @@ type_lambda(Ast_Resolved_Type *ret, Array args){ result = type_new(pctx->perm, TYPE_Lambda, pointer_size, pointer_align); result->func.ret = ret; result->func.args = args.tight_copy(pctx->perm); - map_insert_u64(&pctx->type_map, hash, result); + map_insert(&pctx->type_map, hash, result); return result; }