From d462892e143379675ba89b39d91ba3577ded7bff Mon Sep 17 00:00:00 2001 From: Krzosa Karol Date: Thu, 28 Apr 2022 13:49:32 +0200 Subject: [PATCH] Working on lexer first --- .gitignore | 5 + build.bat | 3 + ideas.cpp | 44 +++++++ lex.h | 164 ++++++++++++++++++++++++ main.c | 363 +++++++++++++++++++++++++++++++++++++++++++++++++++++ os.cpp | 27 ++++ types.h | 39 ++++++ 7 files changed, 645 insertions(+) create mode 100644 .gitignore create mode 100644 build.bat create mode 100644 ideas.cpp create mode 100644 lex.h create mode 100644 main.c create mode 100644 os.cpp create mode 100644 types.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b75f097 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.exe +*.ilk +*.pdb +*.txt +*.4c \ No newline at end of file diff --git a/build.bat b/build.bat new file mode 100644 index 0000000..8ed2f5f --- /dev/null +++ b/build.bat @@ -0,0 +1,3 @@ +@echo off + +clang main.c -fdiagnostics-absolute-paths -std=c99 -g -o main.exe -Wl,-subsystem:windows -Wl,user32.lib diff --git a/ideas.cpp b/ideas.cpp new file mode 100644 index 0000000..d61293c --- /dev/null +++ b/ideas.cpp @@ -0,0 +1,44 @@ +Builtin types: +B8,B16,B32,B64 +S8,S16,S32,S64 +U8,U16,U32,U64 +SizeI, SizeU +F32, F64 + +Decls: +S32 *var; +S32 (*func)(); +S32 *var[expr][expr]; +S32 **var; + +Global scope: +function S32 +do_thing(S32 a, U32 b){ + stmt_list +} + +function S32 +do_thing(S32 a, S32 b); + +typedef struct Thing Thing; +struct Thing{}; +typedef struct Thing{} Thing; + +typedef enum Thingy Thingy; +enum Thingy{}; +typedef enum Thingy{} Thingy; + +global S32 variable = expr | compound; + +// typedef S32 NewName; +// typedef S32 BaseFunctionType(S32 thing); +// typedef S32 (*FunctionPointer)(S32 thing); + +Local scope-(stmts): +S32 variable = expr; +variable = expr; +variable++; +return 0; +if(a){}elseif(b){}else{} + + diff --git a/lex.h b/lex.h new file mode 100644 index 0000000..f2eb87b --- /dev/null +++ b/lex.h @@ -0,0 +1,164 @@ +#pragma once + +typedef enum Token_Kind{ + meta("End of stream")TK_End, + meta("*")TK_Mul, + meta("/")TK_Div, + meta("+")TK_Add, + meta("-")TK_Sub, + meta("%")TK_Mod, + meta("&")TK_BitAnd, + meta("|")TK_BitOr, + meta("^")TK_BitXor, + meta("~")TK_Neg, + meta("!")TK_Not, + meta("(")TK_OpenParen, + meta(")")TK_CloseParen, + meta("{")TK_OpenBrace, + meta("}")TK_CloseBrace, + meta("[")TK_OpenBracket, + meta("]")TK_CloseBracket, + meta(",")TK_Comma, + meta("#")TK_Pound, + meta("?")TK_Question, + meta("...")TK_ThreeDots, + meta(";")TK_Semicolon, + meta(".")TK_Dot, + meta("<")TK_LesserThen, + meta(">")TK_GreaterThen, + meta(":")TK_Colon, + meta("=")TK_Assign, + meta("/=")TK_DivAssign, + meta("*=")TK_MulAssign, + meta("%=")TK_ModAssign, + meta("-=")TK_SubAssign, + meta("+=")TK_AddAssign, + meta("&=")TK_AndAssign, + meta("|=")TK_OrAssign, + meta("^=")TK_XorAssign, + meta("<<=")TK_LeftShiftAssign, + meta(">>=")TK_RightShiftAssign, + meta("::")TK_DoubleColon, + meta("@")TK_At, + meta("--")TK_Decrement, + meta("++")TK_Increment, + meta("--")TK_PostDecrement, + meta("++")TK_PostIncrement, + meta("<=")TK_LesserThenOrEqual, + meta(">=")TK_GreaterThenOrEqual, + meta("==")TK_Equals, + meta("&&")TK_And, + meta("||")TK_Or, + meta("!=")TK_NotEquals, + meta("<<")TK_LeftShift, + meta(">>")TK_RightShift, + meta("->")TK_Arrow, + meta("sizeof")TK_ExprSizeof, + TK_DocComment, + TK_Comment, + TK_Identifier, + TK_StringLit, + TK_U8Lit, + TK_Error, + TK_Float, + TK_Int, + TK_Keyword, +}Token_Kind; + +typedef struct Token{ + Token_Kind kind; + union{ + struct{ + U8 *str; + S64 len; + }; + String string; + }; + union { + S64 int_val; + String error_val; + }; + + String file; + S64 line; + U8 *line_begin; +} Token; + +typedef struct Tokens{ + Token *tokens; + S64 len; + S64 cap; +}Tokens; + +typedef struct Lex_Stream{ + U8 *stream; + U8 *line_begin; + String filename; + S64 line; +}Lex_Stream; + +//----------------------------------------------------------------------------- +// +//----------------------------------------------------------------------------- +global String token_kind_string[] = { + [TK_End] = lit("End of stream"), + [TK_Error] = lit("Error"), + [TK_Comment] = lit("Comment"), + [TK_Identifier] = lit("Identifier"), + [TK_StringLit] = lit("StringLiteral"), + [TK_U8Lit] = lit("U8Literal"), + [TK_Float] = lit("Float"), + [TK_Int] = lit("Integer"), + [TK_Mul] = lit("*"), + [TK_Div] = lit("/"), + [TK_Add] = lit("+"), + [TK_Sub] = lit("-"), + [TK_Mod] = lit("%"), + [TK_BitAnd] = lit("&"), + [TK_BitOr] = lit("|"), + [TK_BitXor] = lit("^"), + [TK_Neg] = lit("~"), + [TK_Not] = lit("!"), + [TK_OpenParen] = lit("("), + [TK_CloseParen] = lit(")"), + [TK_OpenBrace] = lit("{"), + [TK_CloseBrace] = lit("}"), + [TK_OpenBracket] = lit("["), + [TK_CloseBracket] = lit("]"), + [TK_Comma] = lit(","), + [TK_Pound] = lit("#"), + [TK_Question] = lit("?"), + [TK_ThreeDots] = lit("..."), + [TK_Semicolon] = lit(";"), + [TK_Dot] = lit("."), + [TK_LesserThen] = lit("<"), + [TK_GreaterThen] = lit(">"), + [TK_Colon] = lit(":"), + [TK_Assign] = lit("="), + [TK_DivAssign] = lit("/="), + [TK_MulAssign] = lit("*="), + [TK_ModAssign] = lit("%="), + [TK_SubAssign] = lit("-="), + [TK_AddAssign] = lit("+="), + [TK_AndAssign] = lit("&="), + [TK_OrAssign] = lit("|="), + [TK_XorAssign] = lit("^="), + [TK_LeftShiftAssign] = lit("<<="), + [TK_RightShiftAssign] = lit(">>="), + [TK_DoubleColon] = lit("::"), + [TK_At] = lit("@"), + [TK_Decrement] = lit("--"), + [TK_Increment] = lit("++"), + [TK_PostDecrement] = lit("--"), + [TK_PostIncrement] = lit("++"), + [TK_LesserThenOrEqual] = lit("<="), + [TK_GreaterThenOrEqual] = lit(">="), + [TK_Equals] = lit("=="), + [TK_And] = lit("&&"), + [TK_Or] = lit("||"), + [TK_NotEquals] = lit("!="), + [TK_LeftShift] = lit("<<"), + [TK_RightShift] = lit(">>"), + [TK_Arrow] = lit("->"), + [TK_ExprSizeof] = lit("sizeof"), +}; diff --git a/main.c b/main.c new file mode 100644 index 0000000..1b28e2c --- /dev/null +++ b/main.c @@ -0,0 +1,363 @@ +#include "os.cpp" +#include "lex.h" +global FILE *global_output_file; +#define lex_print(...) fprintf(global_output_file, __VA_ARGS__) + +function void +memory_zero(void *p, SizeU size){ + U8 *pp = p; + for(SizeU i = 0; i < size; i++) + pp[i] = 0; +} + +function B32 +string_compare(String a, String b){ + if(a.len != b.len) + return false; + for(S64 i = 0; i < a.len; i++){ + if(a.str[i] != b.str[i]) + return false; + } + return true; +} + +function Token * +token_alloc(Tokens *t){ + if(t->cap == 0){ + t->cap = 1024; + t->tokens = malloc(sizeof(Token)*t->cap); + } + else if(t->len+1 > t->cap){ + t->cap *= 2; + t->tokens = realloc(t->tokens, sizeof(Token)*t->cap); + } + + Token *result = t->tokens + t->len++; + memory_zero(result, sizeof(*result)); + return result; +} + +function void +lex_advance(Lex_Stream *s){ + if(*s->stream == '\n'){ + s->stream++; + s->line++; + s->line_begin = s->stream; + } + else if(*s->stream == 0){ + // Don't advance, end of stream + } + else{ + s->stream++; + } +} + +function U64 +parse_u64(U8 *str, S64 len){ + U64 result = 0; + U64 m = 1; + for(S64 i = len - 1; i >= 0; --i){ + U64 val = str[i] - '0'; + result += val * m; + m *= 10; + } + return result; +} + +function B32 +lex_is_whitespace(U8 c){ + B32 result = c == '\n' || c == '\r' || c == ' ' || c == '\r'; + return result; +} + +function B32 +lex_is_alphabetic(U8 c){ + B32 result = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); + return result; +} + +function B32 +lex_is_numeric(U8 c){ + B32 result = c >= '0' && c <= '9'; + return result; +} + +function B32 +lex_is_alphanumeric(U8 c){ + B32 result = lex_is_numeric(c) || lex_is_alphabetic(c); + return result; +} + +function void +lex_set_len(Lex_Stream *s, Token *token){ + assert(s->stream > token->str); + token->len = s->stream - token->str; +} + +function U8 +lexc(Lex_Stream *s){ + return *s->stream; +} + +function void +token_error(Token *t, String error_val){ + t->kind = TK_Error; + t->error_val = error_val; +} + +function void +lex_parse_string(Lex_Stream *s, Token *t, U8 c){ + for(;;){ + if(lexc(s) == '\\') lex_advance(s); + else if(lexc(s) == c) break; + else if(lexc(s) == 0){ + token_error(t, lit("Unterminated string, reached end of file")); + break; + } + lex_advance(s); + } + if(t->kind != TK_Error){ + lex_advance(s); + lex_set_len(s,t); + } +} + +function void +lex_token_seed(Lex_Stream *s, Token *t){ + t->str = s->stream; + t->file = s->filename; + t->line = s->line; + t->line_begin = s->line_begin; +} + +function void +token_push_error(Lex_Stream *stream, Tokens *tokens, String error_val){ + Token *token = token_alloc(tokens); + token->kind = TK_Error; + token->error_val = error_val; + lex_token_seed(stream, token); +} + +function void +lex_base(Lex_Stream *s, Tokens *tokens){ + while(*s->stream){ + while(lex_is_whitespace(*s->stream)) + lex_advance(s); + + +#define CASE2(op, OpName, Assign) \ +case op: \ +if (lexc(s) == '=') { \ +lex_advance(s); \ +t->kind = Assign; \ +} else { \ +t->kind = OpName; \ +} \ +break +#define CASE3(op, OpName, Assign, Incr) \ +case op: \ +if (lexc(s) == '=') { \ +lex_advance(s); \ +t->kind = Assign; \ +} else if (lexc(s) == op) { \ +lex_advance(s); \ +t->kind = Incr; \ +} else { \ +t->kind = OpName; \ +} \ +break + + + Token *t = token_alloc(tokens); + lex_token_seed(s, t); + lex_advance(s); + switch(*t->str) { + CASE2('!', TK_Not, TK_NotEquals); + CASE2('^', TK_BitXor, TK_XorAssign); + CASE2('=', TK_Assign, TK_Equals); + CASE2('*', TK_Mul, TK_MulAssign); + CASE2('%', TK_Mod, TK_ModAssign); + CASE3('+', TK_Add, TK_AddAssign, TK_Increment); + CASE3('&', TK_BitAnd, TK_AndAssign, TK_And); + CASE3('|', TK_BitOr, TK_OrAssign, TK_Or); +#undef CASE2 +#undef CASE3 + case '@': t->kind = TK_At; break; + case '(': t->kind = TK_OpenParen; break; + case ')': t->kind = TK_CloseParen; break; + case '{': t->kind = TK_OpenBrace; break; + case '}': t->kind = TK_CloseBrace; break; + case '[': t->kind = TK_OpenBracket; break; + case ']': t->kind = TK_CloseBracket; break; + case ',': t->kind = TK_Comma; break; + case '~': t->kind = TK_Neg; break; + case '?': t->kind = TK_Question; break; + case ';': t->kind = TK_Semicolon; break; + case '-':{ + if (lexc(s) == '=') { + lex_advance(s); + t->kind = TK_SubAssign; + } + else if (lexc(s) == '-') { + lex_advance(s); + t->kind = TK_Decrement; + } + else if (lexc(s) == '>') { + lex_advance(s); + t->kind = TK_Arrow; + } + else { + t->kind = TK_Sub; + } + } break; + case '\'':{not_implemented;} break; + case '"': { + t->kind = TK_U8Lit; + lex_parse_string(s,t,'"'); + if(t->kind != TK_Error){ + t->str += 1; + t->len -= 2; + } + } break; + case '/': { + if(lexc(s) == '/'){ + lex_advance(s); + if(lexc(s) == '/'){ + lex_advance(s); + t->kind = TK_DocComment; + } + else { + t->kind = TK_Comment; + } + for(;;){ + if(lexc(s) == '\n' || lexc(s) == 0) break; + lex_advance(s); + } + lex_set_len(s,t); + } + else if(lexc(s) == '*'){ + lex_advance(s); + t->kind = TK_Comment; + for(;;){ + if(s->stream[0] == '*' && s->stream[1] == '/'){ + lex_advance(s); + lex_advance(s); + break; + } + else if(lexc(s) == 0){ + token_error(t, lit("Unterminated block comment")); + break; + } + lex_advance(s); + } + lex_set_len(s,t); + } + else t->kind = TK_Div; + } break; + case '0': + case '1':case '2':case '3': + case '4':case '5':case '6': + case '7':case '8':case '9': { + t->kind = TK_Int; + while(lex_is_numeric(lexc(s))) + lex_advance(s); + lex_set_len(s, t); + t->int_val = parse_u64(t->str, t->len); + } break; + case 'l':{ + if(s->stream[0] == 'i' && s->stream[1] == 't' && s->stream[2] == '(' && s->stream[3] == '"'){ + t->kind = TK_StringLit; + lex_advance(s);lex_advance(s);lex_advance(s);lex_advance(s); + lex_parse_string(s,t,'"'); + + if(s->stream[0] == ')') { + t->str += 5; + t->len -= 6; + lex_advance(s); + } + else token_error(t, lit("Unterminated string literal, missing closing parenthesis")); + + break; + } + }; + case 'A':case 'a':case 'M':case 'm':case 'B': + case 'b':case 'N':case 'n':case 'C':case 'c':case 'O': + case 'o':case 'D':case 'd':case 'P':case 'p':case 'E': + case 'e':case 'Q':case 'q':case 'F':case 'f':case 'R': + case 'r':case 'G':case 'g':case 'S':case 's':case 'H': + case 'h':case 'T':case 't':case 'I':case 'i':case 'U': + case 'u':case 'J':case 'j':case 'V':case 'v':case 'K': + case 'k':case 'W':case 'w':case 'L':case 'X': + case 'x':case 'Z':case 'z':case 'Y':case 'y':case '_': { + t->kind = TK_Identifier; + while(lex_is_alphanumeric(lexc(s)) || lexc(s) == '_') + lex_advance(s); + lex_set_len(s,t); + } break; + default: { + token_error(t, lit("Unknown token")); + } break; + } + } +} + +function Tokens +lex_stream(String in_stream, String filename){ + Lex_Stream stream = {in_stream.str, in_stream.str, filename, 0}; + Tokens tokens = {}; + lex_base(&stream, &tokens); + return tokens; +} + +function void +token_print(Tokens tokens){ + lex_print("\n== Token count = %d\n", (S32)tokens.len); + for(Token *t = tokens.tokens; t != tokens.tokens + tokens.len; t++){ + lex_print("%s %.*s\n", token_kind_string[t->kind].str, (S32)t->len, t->str); + } +} + +function B32 +token_compare(Token *t, String str){ + B32 result = string_compare(t->string, str); + return result; +} + +function void +lex_test(){ + Tokens t; + t = lex_stream(lit("32523 42524 \"U8Literal\""), lit("test")); + token_print(t); + assert(t.len == 3); + assert(t.tokens[0].int_val == 32523); + assert(t.tokens[1].int_val == 42524); + assert(t.tokens[2].kind == TK_U8Lit); + assert(token_compare(t.tokens + 2, lit("U8Literal"))); + + t = lex_stream(lit("_identifier Thing Thing2 lit(\"String_Test\")"), lit("test")); + token_print(t); + assert(t.tokens[0].kind == TK_Identifier); + assert(t.tokens[1].kind == TK_Identifier); + assert(t.tokens[2].kind == TK_Identifier); + assert(t.tokens[3].kind == TK_StringLit); + assert(token_compare(t.tokens, lit("_identifier"))); + assert(token_compare(t.tokens+1, lit("Thing"))); + assert(token_compare(t.tokens+2, lit("Thing2"))); + assert(token_compare(t.tokens+3, lit("String_Test"))); + + + t = lex_stream(lit("lit(\"String_Test\"{})(324*=+=-/ *% // Comment \n Thing /*Thing*/ /*Error"), lit("test")); + assert(t.tokens[0].kind == TK_Error); + token_print(t); +} + +function S32 +os_main(){ + global_output_file = fopen("output.txt", "w"); + assert_msg(global_output_file, "Failed to open output.txt"); + lex_test(); + + + fclose(global_output_file); + return 0; +} \ No newline at end of file diff --git a/os.cpp b/os.cpp new file mode 100644 index 0000000..80302c1 --- /dev/null +++ b/os.cpp @@ -0,0 +1,27 @@ +#define _CRT_SECURE_NO_WARNINGS +#include +#include +#include "types.h" + +function S32 os_main(); + +LRESULT CALLBACK +WindowProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam){ + return DefWindowProcW(hwnd, uMsg, wParam, lParam);; +} + +int +WinMain(HINSTANCE hInstance, HINSTANCE a, LPSTR b, int nShowCmd){ + wchar_t *CLASS_NAME = L"Cool window class"; + wchar_t *WINDOW_NAME = L"Have a good day!"; + + WNDCLASSW wc = { }; + wc.lpfnWndProc = WindowProc; + wc.hInstance = hInstance; + wc.lpszClassName = CLASS_NAME; + RegisterClassW(&wc); + + HWND window_handle = CreateWindowExW(0, CLASS_NAME, WINDOW_NAME, WS_OVERLAPPEDWINDOW, CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT, 0, 0, hInstance, 0); + ShowWindow(window_handle, nShowCmd); + return os_main(); +} \ No newline at end of file diff --git a/types.h b/types.h new file mode 100644 index 0000000..1d8f65b --- /dev/null +++ b/types.h @@ -0,0 +1,39 @@ +#pragma once + +#define global static +#define function static + +#define assert(x) do{if(!(x)) __debugbreak();}while(0) +#define assert_msg(x,msg) assert(x) +#define not_implemented assert_msg(0, "Not implemented") +#define invalid_codepath assert_msg(0, "Invalid codepath") + +#define buff_cap(x) (sizeof(x)/sizeof((x)[0])) +#define lit(x) ((String){(U8*)x,buff_cap(x)-1}) +#define meta(x) + +#include +typedef int8_t S8; +typedef int16_t S16; +typedef int32_t S32; +typedef int64_t S64; +typedef uint8_t U8; +typedef uint16_t U16; +typedef uint32_t U32; +typedef uint64_t U64; +typedef S8 B8; +typedef S16 B16; +typedef S32 B32; +typedef S64 B64; +typedef uint64_t SizeU; +typedef int64_t SizeI; +typedef float F32; +typedef double F64; + +const B32 true = 1; +const B32 false = 0; + +typedef struct String{ + U8 *str; + S64 len; +}String;