diff --git a/main.cpp b/main.cpp index 0dab845..ecb72dc 100644 --- a/main.cpp +++ b/main.cpp @@ -127,13 +127,15 @@ int main(){ result = compile_file("new_types.kl"_s); printf("%s", result.str); + +#if 0 result = compile_file("program.kl"_s); FILE *f = fopen("program.c", "w"); assert(f); fprintf(f, "%.*s", (int)result.len, result.str); fclose(f); - system("clang.exe program.c -g -o program.exe && program.exe"); +#endif __debugbreak(); } diff --git a/new_lex.cpp b/new_lex.cpp index df0bff1..c66df8b 100644 --- a/new_lex.cpp +++ b/new_lex.cpp @@ -77,8 +77,8 @@ enum Token_Kind{ TK_DocComment, TK_Comment, TK_Identifier, + TK_UnicodeLit, TK_StringLit, - TK_Character, TK_Error, TK_Float, TK_Integer, @@ -102,6 +102,7 @@ struct Token{ }; union { + U32 unicode; U64 int_val; F64 f64_val; String error_val; @@ -416,17 +417,6 @@ lex__stream(Intern_Table *table, Array *array, Lex_Stream *s){ t = token_make(lexcp(s), s->file, s->line, s->line_begin); } break; - // @todo: add open and close brace handling as OPEN_SCOPE CLOSE_SCOPE - // when it comes to compound statements it's going to check for scopes - // and then it's going to specialize and look for brace string - - // case '{': { - // s->inside_brace_paren++; t.kind = TK_OpenBrace; - // } break; - // case '}': { - // s->inside_brace_paren--; - // t.kind = CLOSE_SCOPE; - // } break; default:{ if(s->inside_brace_paren) should_emit = false; if(should_emit){ @@ -486,6 +476,18 @@ lex__stream(Intern_Table *table, Array *array, Lex_Stream *s){ } } break; + case '\'':{ + assert(s->stream.len >= s->iter); + UTF32_Result decode = utf8_to_utf32(lexcp(s), s->stream.len - s->iter); + if(!decode.error){ + for(S32 i = 0; i < decode.advance; i++) lex_advance(s); + t.unicode = decode.out_str; + t.kind = TK_UnicodeLit; + } + else{ + token_error(&t, "Invalid UTF8 sequence in unicode literal"_s); + } + } break; case '<': { if (lexc(s) == '<') { @@ -559,8 +561,6 @@ lex__stream(Intern_Table *table, Array *array, Lex_Stream *s){ } } break; - - case '\'':{not_implemented;} break; case '"': { t.kind = TK_StringLit; lex_parse_string(s,&t,'"'); @@ -738,7 +738,7 @@ name(Token_Kind kind){ case TK_Comment: return "Comment"; case TK_Identifier: return "Identifier"; case TK_StringLit: return "String_Lit"; - case TK_Character: return "Character"; + case TK_UnicodeLit: return "Unicode_Lit"; case TK_Error: return "Error"; case TK_Float: return "Float"; case TK_Integer: return "int"; diff --git a/new_parse.cpp b/new_parse.cpp index 3bd01a8..e1b991e 100644 --- a/new_parse.cpp +++ b/new_parse.cpp @@ -7,6 +7,9 @@ parsing_error(Token *token, const char *str, ...){ // @Note(Krzosa): Print nice error message printf("\nError: %s", string.str); if(token){ + if(token->kind == TK_Error){ + printf("Token Error: %.*s", (int)token->error_val.len, token->error_val.str); + } printf(" %s:%d\n", token->file.str, (S32)token->line + 1); // @Note(Krzosa): Print error line @@ -372,6 +375,7 @@ parse_expr(S64 min_bp){ case TK_StringLit : left = ast_str(token, token->intern_val); break; case TK_Identifier : left = ast_ident(token, token->intern_val); break; case TK_Integer : left = ast_int(token, token->int_val); break; + case TK_UnicodeLit : left = ast_int(token, token->unicode); break; case TK_Float : left = ast_float(token, token->f64_val); break; case TK_Pointer : left = ast_expr_unary(token, TK_Pointer, parse_expr(prefix_bp.right)); break; case TK_Dereference: left = ast_expr_unary(token, TK_Dereference, parse_expr(prefix_bp.right)); break; diff --git a/new_types.kl b/new_types.kl index a38fbd1..51ec422 100644 --- a/new_types.kl +++ b/new_types.kl @@ -28,6 +28,7 @@ binary_test :: () bit_and :: 3 & 1 bit_or :: 1 | 4 bit_xor :: 8 ^ 7 + character :: 'รณ boolean_equals :: true == false boolean_var: Bool = boolean_equals