diff --git a/compiler.h b/compiler.h new file mode 100644 index 0000000..f44010e --- /dev/null +++ b/compiler.h @@ -0,0 +1,226 @@ + +enum Token_Kind{ + TK_End, + + TK_Mul, + TK_Div, + TK_Mod, + TK_LeftShift, + TK_RightShift, + TK_FirstMul = TK_Mul, + TK_LastMul = TK_RightShift, + + TK_Add, + TK_Sub, + TK_FirstAdd = TK_Add, + TK_LastAdd = TK_Sub, + + TK_Equals, + TK_LesserThenOrEqual, + TK_GreaterThenOrEqual, + TK_LesserThen, + TK_GreaterThen, + TK_NotEquals, + TK_FirstCompare = TK_Equals, + TK_LastCompare = TK_NotEquals, + + TK_BitAnd, + TK_BitOr, + TK_BitXor, + TK_And, + TK_Or, + TK_FirstLogical = TK_BitAnd, + TK_LastLogical = TK_Or, + + TK_Neg, + TK_Not, + TK_OpenParen, + TK_CloseParen, + TK_OpenBrace, + TK_CloseBrace, + TK_OpenBracket, + TK_CloseBracket, + TK_Comma, + TK_Pound, + TK_Question, + TK_ThreeDots, + TK_Semicolon, + TK_Dot, + + TK_NewLine, + TK_Colon, + + TK_Assign, + TK_ColonAssign, + TK_DivAssign, + TK_MulAssign, + TK_ModAssign, + TK_SubAssign, + TK_AddAssign, + TK_AndAssign, + TK_OrAssign, + TK_XorAssign, + TK_LeftShiftAssign, + TK_RightShiftAssign, + TK_FirstAssign = TK_Assign, + TK_LastAssign = TK_RightShiftAssign, + + TK_DoubleColon, + TK_At, + TK_Decrement, + TK_Increment, + TK_PostDecrement, + TK_PostIncrement, + + TK_Arrow, + TK_ExprSizeof, + TK_DocComment, + TK_Comment, + TK_Identifier, + TK_UnicodeLit, + TK_StringLit, + TK_Error, + TK_Float, + TK_Integer, + TK_Keyword, + + TK_FOREIGN, + + TK_Pointer = TK_Mul, + TK_Dereference = TK_BitAnd, + + + OPEN_SCOPE = 128, + CLOSE_SCOPE, + SAME_SCOPE, +}; + +struct Token{ + Token_Kind kind; + union{ + String string; + struct{U8 *str; S64 len;}; + }; + + union { + U32 unicode; + BigInt int_val; + F64 f64_val; + String error_val; + Intern_String intern_val; + S64 indent; + }; + + String file; + S32 line; + U8 *line_begin; +}; + +struct Lex_Stream{ + String stream; + S64 iter; + + U8 *line_begin; + String file; + S32 line; + S32 inside_brace_paren; + Array indent_stack; +}; + +struct Lexer{ + Allocator *arena; + Lex_Stream stream; + Array tokens; + Intern_Table interns; + S64 token_iter; + + Intern_String intern(String string){ + return intern_string(&interns, string); + } +}; + +// Lexer::interns::map::allocator - array allocator, resizing +// Lexer::tokens - array allocator, resizing +// +// Parser::ast_arena - arena for asts +// Lexer::interns::string_allocator - arena for interns +// +Intern_String keyword_struct; +Intern_String keyword_union; +Intern_String keyword_return; +Intern_String keyword_if; +Intern_String keyword_else; +Intern_String keyword_true; +Intern_String keyword_false; +Intern_String keyword_for; +Intern_String keyword_pass; +Intern_String keyword_cast; +Intern_String keyword_enum; + +Intern_String intern_void; +Intern_String intern_foreign; + +struct Ast_Package; +struct Sym; +struct Parse_Ctx:Lexer{ + Allocator *perm; // Stores: AST, tokens, interns + Allocator *heap; + + U64 unique_ids; + Map type_map; + + Ast_Package *resolving_package; + Map resolved; + Map syms; + S32 scope; + Array local_syms; + + Token empty_token; + S64 indent; + + String_Builder gen; +}; + +//----------------------------------------------------------------------------- +// Constructors +//----------------------------------------------------------------------------- +thread_local Parse_Ctx *pctx; + +function void +lex_init(Allocator *token_string_arena, Allocator *map_allocator, Lexer *l){ + l->arena = token_string_arena; + l->tokens = array_make(token_string_arena, 1024*2); + l->interns= intern_table_make(token_string_arena, map_allocator, 1024); + + keyword_struct= l->intern("struct"_s); + keyword_union = l->intern("union"_s); + keyword_cast = l->intern("cast"_s); + keyword_true = l->intern("true"_s); + keyword_false = l->intern("false"_s); + keyword_return = l->intern("return"_s); + keyword_if = l->intern("if"_s); + keyword_pass = l->intern("pass"_s); + keyword_else = l->intern("else"_s); + keyword_for = l->intern("for"_s); + keyword_enum = intern_string(&l->interns, "enum"_s); + l->interns.first_keyword = keyword_struct.str; + l->interns.last_keyword = keyword_enum.str; + + intern_foreign = intern_string(&l->interns, "#foreign"_s); + intern_void = intern_string(&l->interns, "void"_s); +} + +function void +parse_init(Parse_Ctx *ctx, Allocator *perm_allocator, Allocator *heap_allocator){ + ctx->perm = perm_allocator; + ctx->heap = heap_allocator; + ctx->gen = {ctx->perm}; + ctx->resolved = {ctx->heap}; + ctx->syms = {ctx->heap}; + ctx->type_map = {ctx->heap}; + ctx->local_syms = {ctx->heap}; + bigint_allocator = ctx->perm; + + lex_init(ctx->perm, ctx->heap, ctx); + pctx = ctx; +} \ No newline at end of file diff --git a/lexer.cpp b/lexer.cpp new file mode 100644 index 0000000..63714a2 --- /dev/null +++ b/lexer.cpp @@ -0,0 +1,629 @@ +force_inline B32 token_is_assign(Token_Kind token){return token >= TK_FirstAssign && token <= TK_LastAssign;} +force_inline B32 token_is_assign(Token *token){return token_is_assign(token->kind);} +force_inline B32 token_is_compare(Token_Kind token){return token >= TK_FirstCompare && token <= TK_LastCompare;} +force_inline B32 token_is_compare(Token *token){return token_is_compare(token->kind);} + +function U8 +lexc(Lex_Stream *s){ + return s->stream.str[s->iter]; +} + +function U8 +lexci(Lex_Stream *s, S32 i){ + return s->stream.str[s->iter+i]; +} + +function U8 * +lexcp(Lex_Stream *s){ + return s->stream.str + s->iter; +} + +function B32 +lex_is_whitespace(U8 c){ + B32 result = c == ' ' || c == '\r'; + return result; +} + +function B32 +lex_is_alphabetic(U8 c){ + B32 result = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); + return result; +} + +function B32 +lex_is_numeric(U8 c){ + B32 result = c >= '0' && c <= '9'; + return result; +} + +function B32 +lex_is_alphanumeric(U8 c){ + B32 result = lex_is_numeric(c) || lex_is_alphabetic(c); + return result; +} + +function void +lex_set_len(Lex_Stream *s, Token *token){ + assert(lexcp(s) >= token->str); + token->len = lexcp(s) - token->str; +} + +function void +lex_set_keywords(Lexer *lexer, Array keywords){ + Intern_String keyword = {}; + For(keywords){ + keyword = intern_string(&lexer->interns, it); + if(&it == keywords.begin()) + lexer->interns.first_keyword = keyword.str; + } + lexer->interns.last_keyword = keyword.str; +} + +function B32 +lex_is_keyword(Intern_Table *lexer, Intern_String keyword){ + B32 result = keyword.str >= lexer->first_keyword && keyword.str <= lexer->last_keyword; + return result; +} + +function void +token_error(Token *t, String error_val){ + t->kind = TK_Error; + t->error_val = error_val; +} + +function void +lex_parse_u64(Lexer *lexer, Token *t){ + Scratch scratch; + Set_BigInt_Allocator(scratch); + + t->kind = TK_Integer; + BigInt m = bigint_u64(1); // @leak, it accumulates and potentially needs allocation + BigInt val10 = bigint_u64(10); + BigInt result = bigint_u64(0); + + for(S64 i = t->len - 1; i >= 0; --i){ + BigInt val = bigint_u64(t->str[i] - '0'); // I dont think this is a leak, too small + BigInt new_val = bigint_mul(&val, &m); // @leak + result = bigint_add(&result, &new_val); // @leak + m = bigint_mul(&m, &val10); // @leak + } + + t->int_val = bigint_copy(lexer->arena, &result); +} + +function void +lex_parse_f64(Token *t){ + t->kind = TK_Float; + char buffer[128]; + S64 len = clamp_top((int)t->len, 126); + memory_copy(buffer, t->str, len); + buffer[len] = 0; + t->f64_val = strtod(buffer, 0); +} + +function void +lex_advance(Lex_Stream *s){ + if(s->iter >= s->stream.len){ + return; + } + else if(lexc(s) == '\n'){ + s->iter++; + s->line++; + s->line_begin = lexcp(s); + } + else{ + s->iter++; + } +} + +function void +lex_parse_string(Lex_Stream *s, Token *t, U8 c){ + for(;;){ + if(lexc(s) == '\\') lex_advance(s); + else if(lexc(s) == c) break; + else if(lexc(s) == 0){ + token_error(t, "Unterminated string, reached end of file"_s); + break; + } + lex_advance(s); + } + if(t->kind != TK_Error){ + lex_advance(s); + lex_set_len(s,t); + } +} + +function void +lex_parse_ident(Intern_Table *table, Lex_Stream *s, Token *t){ + while(lex_is_alphanumeric(lexc(s)) || lexc(s) == '_') + lex_advance(s); + lex_set_len(s,t); + t->intern_val = intern_string(table, t->string); +} + +#define CASE2(op, OpName, Assign) \ + case op: \ + if (lexc(s) == '=') { \ + lex_advance(s); \ + t.kind = Assign; \ + } else { \ + t.kind = OpName; \ + } \ + break +#define CASE3(op, OpName, Assign, Incr) \ + case op: \ + if (lexc(s) == '=') { \ + lex_advance(s); \ + t.kind = Assign; \ + } else if (lexc(s) == op) { \ + lex_advance(s); \ + t.kind = Incr; \ + } else { \ + t.kind = OpName; \ + } \ + break + +function Token +token_make(U8 *str, String file, int line, U8 *line_begin){ + Token t = {}; + t.str = str; + t.file = file; + t.line = line; + t.line_begin = line_begin; + return t; +} + +global Token token_null = {SAME_SCOPE}; + +function Token * +lex_last_indent_token(Lex_Stream *s){ + if(s->indent_stack.len > 0){ + return *s->indent_stack.last(); + } + return &token_null; +} + +function B32 +lex_is_scope(Token *t){ + B32 result = t->kind == OPEN_SCOPE || t->kind == CLOSE_SCOPE || t->kind == SAME_SCOPE; + return result; +} + +function void +lex_unwind_indent_stack(Token *t, Lex_Stream *s, Array *array){ + for(S64 i = s->indent_stack.len-1; i >= 0; i-=1){ + auto it = s->indent_stack.data[i]; + assert(lex_is_scope(it)); + if(it->indent == t->indent){ + t->kind = SAME_SCOPE; + array->add(*t); + break; + } + else if(it->indent < t->indent){ + token_error(t, "Bad indentation"_s); + array->add(*t); + break; + } + else{ + s->indent_stack.pop(); + t->kind = CLOSE_SCOPE; + array->add(*t); + } + } +} + +function void +lex__stream(Lexer *lexer, Lex_Stream *s){ + Intern_Table *table = &lexer->interns; + Array *array = &lexer->tokens; + + B32 beginning = true; + for(;;){ + if(lexc(s) == 0 || s->iter >= s->stream.len){ + Token t = token_make(lexcp(s), s->file, s->line, s->line_begin); + lex_unwind_indent_stack(&t, s, array); + break; + } + + // @note: the lexer is going to be a 2 stage process + // first we tokenize the indentation and then proceed to tokenize + // the good stuff + + // for blocks of stmts we parse till we cant find another new line + // of same scope. + // parse_decl doesn't require preceding new line + // + // in that way new lines act as commas in function params + // seeing a comma means that there is a next thing to parse + // and it's easy to parse stuff using a do while loop + + // @note: first handle indentation + // mostly we want to merge multiple new lines + // but for down scopes we want to emit 2 new lines + // that will ease out parsing, one token to break out + // from a block parsing, second to allow continuation of surrounding scope + Token t = token_make(lexcp(s), s->file, s->line, s->line_begin); + B32 should_emit = beginning; + for(;;){ + switch(lexc(s)){ + case '\t': case ' ': lex_advance(s); t.indent++; break; + case '\r': lex_advance(s); break; + case '/': { + if(lexci(s,1) == '/'){ + lex_advance(s); lex_advance(s); + t.kind = TK_Comment; + for(;;){ + if(lexc(s) == '\n' || lexc(s) == 0) break; + lex_advance(s); + } + } + else if(lexci(s,1) == '*'){ + lex_advance(s); lex_advance(s); + t.kind = TK_Comment; + for(;;){ + if(lexc(s) == '*' && lexci(s,1) == '/'){ + lex_advance(s); lex_advance(s); + break; + } + else if(lexc(s) == 0){ + token_error(&t, "Unterminated block comment"_s); + break; + } + lex_advance(s); + } + } + else goto indent_loop_break; + } break; + + // @todo: add [;;] operator which adds new scope + // @todo: also need some way to detect indentation so that + // first of all we can check for consistency and second of + // all because we would know by how much to indent + // @todo: after detecting indentation 2 spaces would become 1 indent value + case ';' : { + Token semi = token_make(lexcp(s), s->file, s->line, s->line_begin); + Token *last = lex_last_indent_token(s); + semi.kind = SAME_SCOPE; + semi.indent = last->indent; + lex_advance(s); + array->add(semi); + } break; + + case '\n':{ + lex_advance(s); + should_emit = true; + t = token_make(lexcp(s), s->file, s->line, s->line_begin); + } break; + + default:{ + if(s->inside_brace_paren) should_emit = false; + if(should_emit){ + Token *last = lex_last_indent_token(s); + if(t.indent > last->indent){ + t.kind = OPEN_SCOPE; + array->add(t); + s->indent_stack.add(array->last()); + } + + else if(t.indent < last->indent){ + lex_unwind_indent_stack(&t, s, array); + } + else { + t.kind = SAME_SCOPE; + array->add(t); + } + } + + goto indent_loop_break; + } + } + } indent_loop_break: + beginning = false; + + // @note: handle the indented token + t = token_make(lexcp(s), s->file, s->line, s->line_begin); + lex_advance(s); + switch(*t.str){ + case 0 : break; + case '@': t.kind = TK_At; break; + case '(': s->inside_brace_paren++; t.kind = TK_OpenParen; break; + case ')': s->inside_brace_paren--; t.kind = TK_CloseParen; break; + case '{': s->inside_brace_paren++; t.kind = TK_OpenBrace; break; + case '}': s->inside_brace_paren--; t.kind = TK_CloseBrace; break; + case '[': s->inside_brace_paren++; t.kind = TK_OpenBracket; break; + case ']': s->inside_brace_paren--; t.kind = TK_CloseBracket; break; + case ',': t.kind = TK_Comma; break; + case '~': t.kind = TK_Neg; break; + case '?': t.kind = TK_Question; break; + case '^': t.kind = TK_BitXor; break; + CASE2('!', TK_Not, TK_NotEquals); + CASE2('=', TK_Assign, TK_Equals); + CASE2('*', TK_Mul, TK_MulAssign); + CASE2('%', TK_Mod, TK_ModAssign); + CASE3('+', TK_Add, TK_AddAssign, TK_Increment); + CASE3('&', TK_BitAnd, TK_AndAssign, TK_And); + CASE3('|', TK_BitOr, TK_OrAssign, TK_Or); + + case '#': { + lex_parse_ident(table, s, &t); + if(t.intern_val.str == intern_foreign.str){ + t.kind = TK_FOREIGN; + } + else token_error(&t, "Unrecognized #note"_s); + }break; + + case '.': { + if(lexc(s) == '.' && lexci(s,1) == '.') { + lex_advance(s); lex_advance(s); + t.kind = TK_ThreeDots; + } + else { + t.kind = TK_Dot; + } + } break; + + case '\'':{ + assert(s->stream.len >= s->iter); + UTF32_Result decode = utf8_to_utf32(lexcp(s), s->stream.len - s->iter); + if(!decode.error){ + for(S32 i = 0; i < decode.advance; i++) lex_advance(s); + t.unicode = decode.out_str; + t.kind = TK_UnicodeLit; + } + else{ + token_error(&t, "Invalid UTF8 sequence in unicode literal"_s); + } + } break; + + case '<': { + if (lexc(s) == '<') { + lex_advance(s); + if (lexc(s) == '=') { + lex_advance(s); + t.kind = TK_LeftShiftAssign; + } + else { + t.kind = TK_LeftShift; + } + } + else if (lexc(s) == '=') { + lex_advance(s); + t.kind = TK_LesserThenOrEqual; + } + else { + t.kind = TK_LesserThen; + } + } break; + + case '>': { + if (lexc(s) == '>') { + lex_advance(s); + if (lexc(s) == '=') { + lex_advance(s); + t.kind = TK_RightShiftAssign; + } + else { + t.kind = TK_RightShift; + } + } + else if (lexc(s) == '=') { + lex_advance(s); + t.kind = TK_GreaterThenOrEqual; + } + else { + t.kind = TK_GreaterThen; + } + } break; + + case ':': { + if (lexc(s) == ':') { + lex_advance(s); + t.kind = TK_DoubleColon; + } + else if(lexc(s) == '='){ + lex_advance(s); + t.kind = TK_ColonAssign; + } + else { + t.kind = TK_Colon; + } + } break; + + case '-':{ + if (lexc(s) == '=') { + lex_advance(s); + t.kind = TK_SubAssign; + } + else if (lexc(s) == '-') { + lex_advance(s); + t.kind = TK_Decrement; + } + else if (lexc(s) == '>') { + lex_advance(s); + t.kind = TK_Arrow; + } + else { + t.kind = TK_Sub; + } + } break; + + case '"': { + t.kind = TK_StringLit; + lex_parse_string(s,&t,'"'); + if(t.kind != TK_Error){ + t.str += 1; + t.len -= 2; + } + t.intern_val = intern_string(table, t.string); + } break; + + case '/': { + if(lexc(s) == '='){ + t.kind = TK_DivAssign; + lex_advance(s); + } + else { + t.kind = TK_Div; + } + } break; + + case '0':case '1':case '2':case '3':case '4': + case '5':case '6':case '7':case '8':case '9':{ + B32 found_dot = false; + for(;;){ + if(lex_is_numeric(lexc(s))) + ; + else if(lexc(s) == '.'){ + if(found_dot){ + token_error(&t, "Multiple '.' in float literal"_s); + goto end_of_switch; + } + found_dot = true; + } + else break; + + lex_advance(s); + } + lex_set_len(s, &t); + if(found_dot) lex_parse_f64(&t); + else lex_parse_u64(lexer, &t); + + } break; + + case 'A':case 'a':case 'M':case 'm':case 'B': + case 'b':case 'N':case 'n':case 'C':case 'c':case 'O': + case 'o':case 'D':case 'd':case 'P':case 'p':case 'E': + case 'e':case 'Q':case 'q':case 'F':case 'f':case 'R': + case 'r':case 'G':case 'g':case 'S':case 's':case 'H': + case 'h':case 'T':case 't':case 'I':case 'i':case 'U': + case 'u':case 'J':case 'j':case 'V':case 'v':case 'K': + case 'k':case 'W':case 'w':case 'L':case 'X':case 'l': + case 'x':case 'Z':case 'z':case 'Y':case 'y':case '_': { + t.kind = TK_Identifier; + lex_parse_ident(table, s, &t); + if(lex_is_keyword(table, t.intern_val)){ + t.kind = TK_Keyword; + } + } break; + + default: { + token_error(&t, "Unknown token"_s); + } + }end_of_switch: + + if(t.len==0) + lex_set_len(s,&t); + + array->add(t); + } +#undef CASE2 +#undef CASE3 +} + +function Lexer +lex_make(Allocator *token_string_arena, Allocator *map_allocator){ + Lexer result = {}; + lex_init(token_string_arena, map_allocator, &result); + return result; +} + +function void +lex_restream(Lexer *lexer, String istream, String file){ + lexer->stream = {}; + lexer->stream.stream = istream; + lexer->stream.line_begin = istream.str; + lexer->stream.file = file; + + + lexer->tokens.clear(); + lexer->token_iter = 0; + Scratch scratch; + lexer->stream.indent_stack.allocator = scratch; + lexer->stream.indent_stack.add(&token_null); + lex__stream(lexer, &lexer->stream); +} + +function Lexer +lex_stream(Allocator *token_string_arena, Allocator *map_allocator, String istream, String file){ + Lexer result = lex_make(token_string_arena, map_allocator); + lex_restream(&result, istream, file); + return result; +} + +//----------------------------------------------------------------------------- +// Token metadata +//----------------------------------------------------------------------------- +function const char * +name(Token_Kind kind){ + switch(kind){ + case TK_End: return "End of stream"; + case TK_Mul: return "*"; + case TK_Div: return "/"; + case TK_Add: return "+"; + case TK_Sub: return "-"; + case TK_Mod: return "%"; + case TK_BitAnd: return "&"; + case TK_BitOr: return "|"; + case TK_BitXor: return "^"; + case TK_Neg: return "~"; + case TK_Not: return "!"; + case TK_OpenParen: return "("; + case TK_CloseParen: return ")"; + case TK_OpenBrace: return "{"; + case TK_CloseBrace: return "}"; + case TK_OpenBracket: return "["; + case TK_CloseBracket: return "]"; + case TK_ColonAssign: return ":="; + case TK_Comma: return ","; + case TK_Pound: return "#"; + case TK_Question: return "?"; + case TK_ThreeDots: return "..."; + case TK_Semicolon: return ";"; + case TK_Dot: return "."; + case TK_LesserThen: return "<"; + case TK_GreaterThen: return ">"; + case TK_Colon: return ":"; + case TK_Assign: return "="; + case TK_DivAssign: return "/="; + case TK_MulAssign: return "*="; + case TK_ModAssign: return "%="; + case TK_SubAssign: return "-="; + case TK_AddAssign: return "+="; + case TK_AndAssign: return "&="; + case TK_OrAssign: return "|="; + case TK_XorAssign: return "^="; + case TK_LeftShiftAssign: return "<<="; + case TK_RightShiftAssign: return ">>="; + case TK_DoubleColon: return "::"; + case TK_At: return "@"; + case TK_Decrement: return "--"; + case TK_Increment: return "++"; + case TK_PostDecrement: return "--"; + case TK_PostIncrement: return "++"; + case TK_LesserThenOrEqual: return "<="; + case TK_GreaterThenOrEqual: return ">="; + case TK_Equals: return "=="; + case TK_And: return "&&"; + case TK_Or: return "||"; + case TK_NotEquals: return "!="; + case TK_LeftShift: return "<<"; + case TK_RightShift: return ">>"; + case TK_Arrow: return "->"; + case TK_NewLine: return "New_Line"; + case TK_ExprSizeof: return "sizeof"; + case TK_DocComment: return "Doc_Comment"; + case TK_Comment: return "Comment"; + case TK_Identifier: return "Identifier"; + case TK_StringLit: return "String_Lit"; + case TK_UnicodeLit: return "Unicode_Lit"; + case TK_Error: return "Error"; + case TK_Float: return "Float"; + case TK_Integer: return "int"; + case TK_Keyword: return "Keyword"; + case TK_FOREIGN: return "#foreign"; + case CLOSE_SCOPE: return "Close_Scope"; + case OPEN_SCOPE: return "Open_Scope"; + case SAME_SCOPE: return "Same_Scope"; + default: invalid_codepath; return ""; + } +} diff --git a/main.cpp b/main.cpp index 54a37e2..6783600 100644 --- a/main.cpp +++ b/main.cpp @@ -77,11 +77,9 @@ Expr: @todo -[ ] - We need ++ -- operators [ ] - Passing down program to compile through command line [ ] - Switch [ ] - Arrays with size passed -[ ] - Some way to call foreign functions [ ] - Comma notation when declaring variables thing1, thing2: S32 [ ] - Array of inferred size @@ -107,9 +105,11 @@ Expr: [ ] - Rust like enum where you associate values(other structs) with keys [ ] - Compound that zeros values - .{} , Compound that assumes defaults from struct definition - {} [ ] - Inject stack traces into the program -[ ] - Rewrite constants to embed lambda, types, structs etc.? ??? +[ ] - Rewrite constants to embed lambda, types, structs etc.? ??? @donzo +[x] - We need ++ -- operators +[x] - Some way to call foreign functions [x] - We are parsing wrong here: (t.str=(&string_to_lex.str)[i]); [x] - Test new operators, add constant eval for them [x] - lvalue, rvalue concept so we cant assign value to some arbitrary weird expression diff --git a/new_ast.cpp b/new_ast.cpp index 0049b75..01fb9c1 100644 --- a/new_ast.cpp +++ b/new_ast.cpp @@ -126,7 +126,7 @@ struct Ast_If: Ast{ Array ifs; }; -struct Ast_Pass: Ast{}; // @todo +struct Ast_Pass: Ast{}; struct Ast_For: Ast{ Ast_Expr *init; @@ -145,6 +145,7 @@ struct Ast_Lambda : Ast_Expr { Array args; Ast_Expr *ret; Ast_Block *block; + B32 has_var_args; }; struct Ast_Array: Ast_Expr{ @@ -315,12 +316,13 @@ ast_expr_index(Token *pos, Ast_Expr *expr, Ast_Expr *index){ } function Ast_Lambda * -ast_lambda(Token *pos, Array params, Ast_Expr *ret, Ast_Block *block){ +ast_lambda(Token *pos, Array params, B32 has_var_args, Ast_Expr *ret, Ast_Block *block){ AST_NEW(Lambda, LAMBDA, pos, AST_EXPR); result->flags = AST_EXPR; result->args = params.tight_copy(pctx->perm); result->block = block; result->ret = ret; + result->has_var_args = has_var_args; if(!ret) result->ret = ast_ident(result->pos, intern_void); if(result->block) result->block->parent = result; diff --git a/new_parse.cpp b/new_parse.cpp index a466dc7..88c75ed 100644 --- a/new_parse.cpp +++ b/new_parse.cpp @@ -283,30 +283,38 @@ function Ast_Lambda * parse_lambda(Token *token){ Scratch scratch; + B32 has_var_args = false; Array params = {scratch}; if(!token_is(TK_CloseParen)){ for(;;){ - Token *name = token_expect(TK_Identifier); - token_expect(TK_Colon); - Ast_Expr *typespec = parse_expr(); + Token *name = token_get(); + if(token_match(TK_Identifier)){ + token_expect(TK_Colon); + Ast_Expr *typespec = parse_expr(); - Ast_Expr *default_value = 0; - if(token_match(TK_Assign)) { - default_value = parse_expr(); + Ast_Expr *default_value = 0; + if(token_match(TK_Assign)) { + default_value = parse_expr(); + } + + Ast_Lambda_Arg *param = ast_expr_lambda_arg(name, name->intern_val, typespec, default_value); + params.add(param); } - - Ast_Lambda_Arg *param = ast_expr_lambda_arg(name, name->intern_val, typespec, default_value); - params.add(param); - if(!token_match(TK_Comma)){ + else if(token_match(TK_ThreeDots)){ + has_var_args = true; break; } + else parsing_error(name, "Expected [Identifier] or [...] when parsing lambda arguments"); + + if(!token_match(TK_Comma)) + break; } } token_expect(TK_CloseParen); Ast_Expr *ret = parse_optional_type(); Ast_Block *block = token_is(OPEN_SCOPE) ? parse_block() : 0; - Ast_Lambda *result = ast_lambda(token, params, ret, block); + Ast_Lambda *result = ast_lambda(token, params, has_var_args, ret, block); return result; } @@ -421,8 +429,8 @@ parse_expr(S64 min_bp){ }break; case TK_OpenParen: { - if(token_is(TK_CloseParen)) left = parse_lambda(token); - else if(token_is(TK_Identifier) && token_is(TK_Colon, 1)) left = parse_lambda(token); + if(token_is(TK_CloseParen) || (token_is(TK_Identifier) && token_is(TK_Colon, 1)) || token_is(TK_ThreeDots)) + left = parse_lambda(token); else{ left = parse_expr(0); token_expect(TK_CloseParen); diff --git a/program.c b/program.c index 446f846..fbf2186 100644 --- a/program.c +++ b/program.c @@ -29,7 +29,7 @@ int main(){ entry(); } -void do_something(U32 val); +void printf(); typedef struct Token{ U8 *str; S64 len; @@ -42,7 +42,7 @@ void entry(){ String string_to_lex = LIT("Identifier 2425525 Not_Number"); Token token_array[32]; U32 token_count; - do_something(32); + printf(LIT("test"), 32); Token t; for(S64 i = 0;(i items = {scratch}; + S64 was_name_indexed = false; S64 default_iter = 0; auto lambda = (Ast_Lambda *)type->ast; For(lambda->args){ @@ -652,15 +653,19 @@ resolve_expr(Ast_Expr *ast, Ast_Resolved_Type *expected_type, Sym *lambda_to_res if(name){ assert(name->kind == AST_IDENT); + was_name_indexed = true; if(name->intern_val.str == arg->name.str) item = expr; } else if(node->exprs.get_index(&expr) == default_iter){ default_iter++; item = expr; } - else if(node->exprs.get_index(&expr) > default_iter) parsing_error(expr->pos, "Positional argument after named argument"); + else if(node->exprs.get_index(&expr) > default_iter){ + parsing_error(expr->pos, "Positional argument after named argument"); + } - if(item) break; + if(item) + break; } if(item){ @@ -680,9 +685,20 @@ resolve_expr(Ast_Expr *ast, Ast_Resolved_Type *expected_type, Sym *lambda_to_res } } + if(lambda->has_var_args){ + if(was_name_indexed) + parsing_error(lambda->pos, "Cant name index a lambda with var args"); + for(S64 i = lambda->args.len; i < node->exprs.len; i++){ + Ast_Call_Item *item = node->exprs.data[i]; + item->flags = set_flag(item->flags, AST_ITEM_INCLUDED); + items.add(item); + } + } + // @note: check if all arguments are included and cleanup For(node->exprs){ - if(!is_flag_set(it->flags, AST_ITEM_INCLUDED)) parsing_error(it->pos, "Invalid argument to function call"); + if(!is_flag_set(it->flags, AST_ITEM_INCLUDED)) + parsing_error(it->pos, "Invalid argument to function call"); else it->flags = unset_flag(it->flags, AST_ITEM_INCLUDED); }