diff --git a/lambdas.kl b/lambdas.kl index ff8a15d..cdfcc47 100644 --- a/lambdas.kl +++ b/lambdas.kl @@ -1,3 +1,19 @@ +/* +/*begin of file*/ thing // indent 2 == error + + +add_10 :: (size: int): int // scope 0 + add_20 :: (new_size: int): int // up scope 2 + return 20 // up scope 2 + // down scope +// down scope +// scope 0 +thing + + + + +*/ add_10 :: (size: int): int add_20 :: (new_size: int): int diff --git a/new_lex.cpp b/new_lex.cpp index ed7fc0d..92d1ff6 100644 --- a/new_lex.cpp +++ b/new_lex.cpp @@ -121,6 +121,7 @@ struct Lex_Stream{ S32 line; S32 inside_brace_paren; S32 last_valid_indent; + Array indent_stack; }; struct Lexer{ @@ -269,18 +270,112 @@ lex_parse_string(Lex_Stream *s, Token *t, U8 c){ } \ break +function Token +token_make(U8 *str, String file, int line, U8 *line_begin){ + Token t = {}; + t.str = str; + t.file = file; + t.line = line; + t.line_begin = line_begin; + return t; +} + +global Token token_null = {SAME_SCOPE}; + +function Token * +lex_last_indent_token(Lex_Stream *s){ + if(s->indent_stack.len > 0){ + return *s->indent_stack.last(); + } + return &token_null; +} + +function B32 +token_is_scope(Token *t){ + B32 result = t->kind == OPEN_SCOPE || t->kind == CLOSE_SCOPE || t->kind == SAME_SCOPE; + return result; +} + function void lex__stream(Intern_Table *table, Array *array, Lex_Stream *s){ + B32 beginning = true; while(lexc(s)){ - while(lexc(s) == '\r') lex_advance(s); + if(s->iter >= s->stream.len) // End of stream + break; - Token t = {}; - t.str = lexcp(s); - t.file = s->file; - t.line = s->line; - t.line_begin = s->line_begin; + // @note: for now the lexer is going to be a 2 stage process + // first we tokenize the indentation and then proceed to tokenize + // the good stuff + + // for blocks of stmts we parse till we cant find another new line + // of same scope. + // parse_decl doesn't require preceding new line + // + // in that way new lines act as commas in function params + // seeing a comma means that there is a next thing to parse + // and it's easy to parse stuff using a do while loop + + // @note: first handle indentation + // mostly we want to merge multiple new lines + // but for down scopes we want to emit 2 new lines + // that will ease out parsing, one token to break out + // from a block parsing, second to allow continuation of surrounding scope + Token t = token_make(lexcp(s), s->file, s->line, s->line_begin); + B32 should_emit = beginning; + for(;;){ + switch(lexc(s)){ + case '\t': case ' ': lex_advance(s); t.indent++; break; + case '\r': lex_advance(s); break; + case '\n':{ + lex_advance(s); + should_emit = true; + t = token_make(lexcp(s), s->file, s->line, s->line_begin); + } break; + default:{ + if(s->inside_brace_paren) should_emit = false; + if(should_emit){ + Token *last = lex_last_indent_token(s); + if(t.indent > last->indent){ + t.kind = OPEN_SCOPE; + array->add(t); + s->indent_stack.add(array->last()); + } + else if(t.indent < last->indent){ + For_Reverse(s->indent_stack){ + assert(token_is_scope(*it)); + if(it[0]->indent == t.indent){ + t.kind = SAME_SCOPE; + array->add(t); + break; + } + else if(it[0]->indent < t.indent){ + token_error(&t, "Bad indentation"_s); + array->add(t); + break; + } + else{ + s->indent_stack.pop(); + t.kind = CLOSE_SCOPE; + array->add(t); + } + } + } + else { + t.kind = SAME_SCOPE; + array->add(t); // else SAME_SCOPE + } + } + + goto indent_loop_break; + } + } + } indent_loop_break: + beginning = false; + + t = token_make(lexcp(s), s->file, s->line, s->line_begin); lex_advance(s); + // @note: handle the indented token switch(*t.str){ case 0 : break; case '@': t.kind = TK_At; break; @@ -302,29 +397,10 @@ lex__stream(Intern_Table *table, Array *array, Lex_Stream *s){ CASE3('+', TK_Add, TK_AddAssign, TK_Increment); CASE3('&', TK_BitAnd, TK_AndAssign, TK_And); CASE3('|', TK_BitOr, TK_OrAssign, TK_Or); -#undef CASE2 -#undef CASE3 case ';': { t.kind = TK_Semicolon; }break; - case '\r': case ' ' : s->stream.str -= 1; - case '\n': { - t.kind = TK_NewLine; - if(lexc(s) == '\r') - lex_advance(s); - - for(;;){ - if(lexc(s) == ' ') { - t.indent++; - // @Todo(Krzosa): Detect indentation method, file an error while methods are mixed - } - else if(lexc(s) == '\t') t.indent++; - else break; - lex_advance(s); - } - - }break; case '.': { if(lexc(s) == '.' && lexci(s,1) == '.') { lex_advance(s); lex_advance(s); @@ -489,21 +565,10 @@ lex__stream(Intern_Table *table, Array *array, Lex_Stream *s){ if(t.len==0) lex_set_len(s,&t); - B32 skip = 0; - if(t.kind == TK_NewLine){ - if(s->inside_brace_paren > 0) skip = 1; - if(array->len > 0 && array->last()->kind == TK_NewLine) array->pop(); - } - if(!skip){ - array->add(t); - } - - while(lex_is_whitespace(lexc(s))) - lex_advance(s); - - if(s->iter >= s->stream.len) // End of stream - break; + array->add(t); } +#undef CASE2 +#undef CASE3 } function void @@ -529,6 +594,9 @@ lex_restream(Lexer *lexer, String istream, String file){ lexer->tokens.clear(); lexer->token_iter = 0; + Scratch scratch; + lexer->stream.indent_stack.allocator = scratch; + lexer->stream.indent_stack.add(&token_null); lex__stream(&lexer->interns, &lexer->tokens, &lexer->stream); } @@ -542,7 +610,7 @@ lex_stream(Allocator *token_string_arena, Allocator *map_allocator, String istre function void lex_test(){ Scratch scratch; - String test = "Keyword //R\n 18446744073709551616{})(@?&+-;....->,:::/**/\"Thing\" Thingy" + String test = "Keyword //R\n 18446744073709551616\n {}\n)(@?&+-;....->,:::/**/\"Thing\" Thingy" "\"Test_Meme\"+=-===42524 4294967295 18446744073709551615" "for if while switch :="_s; @@ -559,7 +627,8 @@ lex_test(){ Array arr = lexer.tokens; Token_Kind kind[] = { - TK_Keyword, TK_NewLine, TK_Error,TK_OpenBrace,TK_CloseBrace,TK_CloseParen,TK_OpenParen, + SAME_SCOPE, + TK_Keyword, OPEN_SCOPE, TK_Error, OPEN_SCOPE, TK_OpenBrace,TK_CloseBrace,CLOSE_SCOPE, CLOSE_SCOPE, SAME_SCOPE, TK_CloseParen,TK_OpenParen, TK_At,TK_Question,TK_BitAnd,TK_Add,TK_Sub,TK_Semicolon, TK_ThreeDots, TK_Dot, TK_Arrow, TK_Comma, TK_DoubleColon, TK_Colon, TK_StringLit, TK_Identifier, TK_StringLit, TK_AddAssign, TK_SubAssign, @@ -568,7 +637,7 @@ lex_test(){ TK_Colon, TK_Assign, TK_End }; String strs[] = { - "Keyword"_s, "\n "_s, "18446744073709551616"_s,"{"_s,"}"_s,")"_s,"("_s, + ""_s, "Keyword"_s, ""_s, "18446744073709551616"_s, ""_s, "{"_s,"}"_s, ""_s, ""_s, ""_s, ")"_s, "("_s, "@"_s,"?"_s,"&"_s,"+"_s,"-"_s,";"_s, "..."_s,"."_s,"->"_s,","_s,"::"_s,":"_s, "Thing"_s,"Thingy"_s,"Test_Meme"_s, "+="_s,"-="_s,