diff --git a/common.c b/common.c new file mode 100644 index 0000000..72c1691 --- /dev/null +++ b/common.c @@ -0,0 +1,57 @@ + +function S64 +clamp_top_s64(S64 val, S64 max){ + if(val>max)return max; + return val; +} + +function SizeU +get_align_offset(SizeU size, SizeU align){ + SizeU mask = align - 1; + SizeU val = size & mask; + if(val){ + val = align - val; + } + return val; +} + +function SizeU +align_up(SizeU size, SizeU align){ + SizeU result = size + get_align_offset(size, align); + return result; +} + +function U64 +hash_fnv(String string) { + U64 hash = (U64)14695981039346656037ULL; + for (U64 i = 0; i < string.len; i++) { + hash = hash ^ (U64)(string.str[i]); + hash = hash * (U64)1099511628211ULL; + } + return hash; +} + +function U64 +is_pow2(U64 x) { + assert(x != 0); + B32 result = (x & (x - 1llu)) == 0; + return result; +} + +function U64 +wrap_around_pow2(U64 x, U64 power_of_2) { + assert(is_pow2(power_of_2)); + U64 r = (((x)&((power_of_2)-1llu))); + return r; +} + +function B32 +string_compare(String a, String b){ + if(a.len != b.len) + return false; + for(S64 i = 0; i < a.len; i++){ + if(a.str[i] != b.str[i]) + return false; + } + return true; +} diff --git a/decl.c b/decl.c new file mode 100644 index 0000000..c8eea08 --- /dev/null +++ b/decl.c @@ -0,0 +1,50 @@ + +function Decl * +decl_new(Parser *p, Decl_Kind kind, Token *token, Intern_String name){ + Decl *result = arena_push_struct(&p->main_arena, Decl); + memory_zero(result, sizeof(Decl)); + + result->token = token; + result->kind = kind; + result->name = name; + return result; +} + +function Decl_Enum_Child * +decl_enum_child(Parser *p, Token *token, Expr *expr){ + Decl_Enum_Child *result = arena_push_struct(&p->main_arena, Decl_Enum_Child); + memory_zero(result, sizeof(Decl_Enum_Child)); + result->expr = expr; + result->token = token; + return result; +} + +function Decl * +decl_enum(Parser *p, Token *token, Intern_String name){ + Decl *result = decl_new(p, DK_Enum, token, name); + return result; +} + +function Decl * +decl_struct(Parser *p, Token *token, Intern_String name){ + Decl *result = decl_new(p, DK_Struct, token, name); + return result; +} +function Decl * +decl_union(Parser *p, Token *token, Intern_String name){ + Decl *result = decl_new(p, DK_Union, token, name); + return result; +} + +function void +decl_aggregate_push(Decl *a, Decl *b){ + SLLQueuePush(a->aggregate_val.first, a->aggregate_val.last, b); +} +function void +decl_function_push(Decl *a, Decl *b){ + SLLQueuePush(a->func_val.first, a->func_val.last, b); +} +function void +decl_enum_push(Decl *a, Decl_Enum_Child *b){ + SLLQueuePush(a->enum_val.first, a->enum_val.last, b); +} diff --git a/decl.h b/decl.h new file mode 100644 index 0000000..0096c74 --- /dev/null +++ b/decl.h @@ -0,0 +1,63 @@ +#pragma once +typedef struct Note Note; +typedef struct Decl Decl; +typedef struct Decl_Enum_Child Decl_Enum_Child; + +typedef enum Decl_Kind{ + DK_None, + DK_Variable, + DK_Typedef, + DK_Struct, + DK_Union, + DK_Enum, + DK_Function, +}Decl_Kind; + +struct Note{ + Intern_String string; + Token *token; + Expr *expr; + + Note *next; + Note *first; + Note *last; +}; + +struct Decl_Enum_Child{ + Decl_Enum_Child *next; + Token *token; // name + Expr *expr; +}; + +struct Decl{ + Decl_Kind kind; + Decl *next; + Intern_String name; + Token *token; + + Note *first_note; + Note *last_note; + union{ + struct{ + Decl_Enum_Child *first; + Decl_Enum_Child *last; + } enum_val; + struct{ + Decl *first; + Decl *last; + } aggregate_val; + struct{ + Decl *first; + Decl *last; + Type *return_type; + }func_val; + struct{ + Type *type; + Expr *expr; + }var_val; + struct{ + Type *type; + }typedef_val; + }; +}; + diff --git a/expr.c b/expr.c new file mode 100644 index 0000000..003fea5 --- /dev/null +++ b/expr.c @@ -0,0 +1,74 @@ + +function Expr * +expr_new(Parser *p, Expr_Kind kind, Token *token){ + Expr *expr = arena_push_struct(&p->main_arena, Expr); + memory_zero(expr, sizeof(Expr)); + expr->kind = kind; + expr->token = token; + return expr; +} + +function Expr * +expr_atom(Parser *p, Token *token){ + Expr *expr = expr_new(p, EK_Atom, token); + return expr; +} + +function Expr * +expr_unary(Parser *p, Token *op, Expr *exp){ + Expr *expr = expr_new(p, EK_Unary, op); + expr->unary.expr = exp; + return expr; +} + +function Expr * +expr_binary(Parser *p, Token *op, Expr *left, Expr *right){ + Expr *expr = expr_new(p, EK_Binary, op); + expr->binary.left = left; + expr->binary.right = right; + return expr; +} + +function Expr * +expr_ternary(Parser *p, Token *op, Expr *cond, Expr *on_true, Expr *on_false){ + Expr *expr = expr_new(p, EK_Ternary, op); + expr->ternary.cond = cond; + expr->ternary.on_true = on_true; + expr->ternary.on_false = on_false; + return expr; +} + +function Expr * +expr_call(Parser *p, Token *token, Expr *atom, Expr *list){ + Expr *expr = expr_new(p, EK_Call, token); + expr->call.atom = atom; + expr->call.list = list; + return expr; +} + +function Expr * +expr_index(Parser *p, Token *token, Expr *atom, Expr *index){ + Expr *expr = expr_new(p, EK_Index, token); + expr->index.atom = atom; + expr->index.index = index; + return expr; +} + +function Expr * +expr_cast(Parser *p, Token *token, Type *type, Expr *exp){ + Expr *expr = expr_new(p, EK_Cast, token); + expr->cast.type = type; + expr->cast.expr = exp; + return expr; +} + +function Expr * +expr_list(Parser *p, Token *token){ + Expr *expr = expr_new(p, EK_List, token); + return expr; +} + +function void +expr_list_push(Expr *list, Expr *expr){ + SLLQueuePush(list->list.first, list->list.last, expr); +} diff --git a/expr.h b/expr.h new file mode 100644 index 0000000..adc0dbf --- /dev/null +++ b/expr.h @@ -0,0 +1,50 @@ +#pragma once +typedef struct Expr Expr; + +typedef enum Expr_Kind{ + EK_None, + EK_Atom, + EK_Unary, + EK_Binary, + EK_Ternary, + EK_Cast, + EK_List, + EK_Call, + EK_Index, +} Expr_Kind; + +struct Expr { + Expr_Kind kind; + Token *token; + Expr *next; + union { + struct { + Type *type; + Expr* expr; + } cast; + struct { + Expr *first; + Expr *last; + } list; + struct { + Expr *atom; + Expr *list; + } call; + struct { + Expr *atom; + Expr *index; + } index; + struct { + Expr* expr; + } unary; + struct { + Expr* left; + Expr* right; + } binary; + struct { + Expr* cond; + Expr* on_true; + Expr* on_false; + } ternary; + }; +}; diff --git a/types.h b/lang.h similarity index 81% rename from types.h rename to lang.h index 1d8f65b..f3955e6 100644 --- a/types.h +++ b/lang.h @@ -32,8 +32,20 @@ typedef double F64; const B32 true = 1; const B32 false = 0; +#define kib(x) ((x)*1024llu) +#define mib(x) (kib(x)*1024llu) +#define gib(x) (mib(x)*1024llu) typedef struct String{ U8 *str; S64 len; }String; + +#define SLLQueuePush(f,l,n) do{\ +if((f)==0){\ +(f)=(l)=(n);\ +}\ +else{\ +(l)=(l)->next=(n);\ +} \ +}while(0) \ No newline at end of file diff --git a/lex.c b/lex.c new file mode 100644 index 0000000..183c73a --- /dev/null +++ b/lex.c @@ -0,0 +1,498 @@ +global Token token_end_of_stream = {}; + +function Token * +token_alloc(Tokens *t){ + if(t->cap == 0){ + t->cap = 1024; + t->tokens = malloc(sizeof(Token)*t->cap); + } + else if(t->len+1 > t->cap){ + t->cap *= 2; + t->tokens = realloc(t->tokens, sizeof(Token)*t->cap); + } + + Token *result = t->tokens + t->len++; + memory_zero(result, sizeof(*result)); + return result; +} + +function void +lex_advance(Lex_Stream *s){ + if(*s->stream == '\n'){ + s->stream++; + s->line++; + s->line_begin = s->stream; + } + else if(*s->stream == 0){ + // Don't advance, end of stream + } + else{ + s->stream++; + } +} + +function B32 +lex_is_whitespace(U8 c){ + B32 result = c == '\n' || c == '\r' || c == ' ' || c == '\r'; + return result; +} + +function B32 +lex_is_alphabetic(U8 c){ + B32 result = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); + return result; +} + +function B32 +lex_is_numeric(U8 c){ + B32 result = c >= '0' && c <= '9'; + return result; +} + +function B32 +lex_is_alphanumeric(U8 c){ + B32 result = lex_is_numeric(c) || lex_is_alphabetic(c); + return result; +} + +function void +lex_set_len(Lex_Stream *s, Token *token){ + assert(s->stream > token->str); + token->len = s->stream - token->str; +} + +function U8 +lexc(Lex_Stream *s){ + return *s->stream; +} + +function void +token_error(Token *t, String error_val){ + t->kind = TK_Error; + t->error_val = error_val; +} + +function void +lex_parse_string(Lex_Stream *s, Token *t, U8 c){ + for(;;){ + if(lexc(s) == '\\') lex_advance(s); + else if(lexc(s) == c) break; + else if(lexc(s) == 0){ + token_error(t, lit("Unterminated string, reached end of file")); + break; + } + lex_advance(s); + } + if(t->kind != TK_Error){ + lex_advance(s); + lex_set_len(s,t); + } +} + +function void +lex_token_seed(Lex_Stream *s, Token *t){ + t->str = s->stream; + t->file = s->filename; + t->line = s->line; + t->line_begin = s->line_begin; +} + +function U64 +parse_u64(U8 *str, S64 len){ + U64 result = 0; + U64 m = 1; + for(S64 i = len - 1; i >= 0; --i){ + U64 val = str[i] - '0'; + U64 new_val = val * m; + assert_msg(result+new_val >= result, "Integer overflow!"); + result+=new_val; + m *= 10; + } + return result; +} + +function void +token_push_error(Lex_Stream *stream, Tokens *tokens, String error_val){ + Token *token = token_alloc(tokens); + token->kind = TK_Error; + token->error_val = error_val; + lex_token_seed(stream, token); +} + +function void +lex_base(Lex_Stream *s, Tokens *tokens){ + while(*s->stream){ + while(lex_is_whitespace(*s->stream)) + lex_advance(s); + + +#define CASE2(op, OpName, Assign) \ +case op: \ +if (lexc(s) == '=') { \ +lex_advance(s); \ +t->kind = Assign; \ +} else { \ +t->kind = OpName; \ +} \ +break +#define CASE3(op, OpName, Assign, Incr) \ +case op: \ +if (lexc(s) == '=') { \ +lex_advance(s); \ +t->kind = Assign; \ +} else if (lexc(s) == op) { \ +lex_advance(s); \ +t->kind = Incr; \ +} else { \ +t->kind = OpName; \ +} \ +break + + + Token *t = token_alloc(tokens); + lex_token_seed(s, t); + lex_advance(s); + switch(*t->str) { + CASE2('!', TK_Not, TK_NotEquals); + CASE2('^', TK_BitXor, TK_XorAssign); + CASE2('=', TK_Assign, TK_Equals); + CASE2('*', TK_Mul, TK_MulAssign); + CASE2('%', TK_Mod, TK_ModAssign); + CASE3('+', TK_Add, TK_AddAssign, TK_Increment); + CASE3('&', TK_BitAnd, TK_AndAssign, TK_And); + CASE3('|', TK_BitOr, TK_OrAssign, TK_Or); +#undef CASE2 +#undef CASE3 + + case '@': t->kind = TK_At; break; + case '(': t->kind = TK_OpenParen; break; + case ')': t->kind = TK_CloseParen; break; + case '{': t->kind = TK_OpenBrace; break; + case '}': t->kind = TK_CloseBrace; break; + case '[': t->kind = TK_OpenBracket; break; + case ']': t->kind = TK_CloseBracket; break; + case ',': t->kind = TK_Comma; break; + case '~': t->kind = TK_Neg; break; + case '?': t->kind = TK_Question; break; + case ';': t->kind = TK_Semicolon; break; + + case '#': { + t->kind = TK_Pound; + // @Todo(Krzosa): Some convenient way to recognize macros + } break; + + case '.': { + if(s->stream[0] == '.' && s->stream[1] == '.') { + lex_advance(s); + lex_advance(s); + t->kind = TK_ThreeDots; + } + else { + t->kind = TK_Dot; + } + } break; + + case '<': { + if (lexc(s) == '<') { + lex_advance(s); + if (lexc(s) == '=') { + lex_advance(s); + t->kind = TK_LeftShiftAssign; + } + else { + t->kind = TK_LeftShift; + } + } + else if (lexc(s) == '=') { + lex_advance(s); + t->kind = TK_LesserThenOrEqual; + } + else { + t->kind = TK_LesserThen; + } + } break; + + case '>': { + if (lexc(s) == '>') { + lex_advance(s); + if (lexc(s) == '=') { + lex_advance(s); + t->kind = TK_RightShiftAssign; + } + else { + t->kind = TK_RightShift; + } + } + else if (lexc(s) == '=') { + lex_advance(s); + t->kind = TK_GreaterThenOrEqual; + } + else { + t->kind = TK_GreaterThen; + } + } break; + + case ':': { + if (lexc(s) == ':') { + lex_advance(s); + t->kind = TK_DoubleColon; + } + else { + t->kind = TK_Colon; + } + } break; + + case '-':{ + if (lexc(s) == '=') { + lex_advance(s); + t->kind = TK_SubAssign; + } + else if (lexc(s) == '-') { + lex_advance(s); + t->kind = TK_Decrement; + } + else if (lexc(s) == '>') { + lex_advance(s); + t->kind = TK_Arrow; + } + else { + t->kind = TK_Sub; + } + } break; + + case '\'':{not_implemented;} break; + case '"': { + t->kind = TK_U8Lit; + lex_parse_string(s,t,'"'); + if(t->kind != TK_Error){ + t->str += 1; + t->len -= 2; + } + } break; + + case '/': { + if(lexc(s) == '='){ + t->kind = TK_DivAssign; + lex_advance(s); + } + else if(lexc(s) == '/'){ + lex_advance(s); + if(lexc(s) == '/'){ + lex_advance(s); + t->kind = TK_DocComment; + } + else { + t->kind = TK_Comment; + } + for(;;){ + if(lexc(s) == '\n' || lexc(s) == 0) break; + lex_advance(s); + } + lex_set_len(s,t); + } + else if(lexc(s) == '*'){ + lex_advance(s); + t->kind = TK_Comment; + for(;;){ + if(s->stream[0] == '*' && s->stream[1] == '/'){ + lex_advance(s); + lex_advance(s); + break; + } + else if(lexc(s) == 0){ + token_error(t, lit("Unterminated block comment")); + break; + } + lex_advance(s); + } + lex_set_len(s,t); + } + else t->kind = TK_Div; + } break; + + case '0': + case '1':case '2':case '3': + case '4':case '5':case '6': + case '7':case '8':case '9': { + t->kind = TK_Int; + while(lex_is_numeric(lexc(s))) + lex_advance(s); + lex_set_len(s, t); + t->int_val = parse_u64(t->str, t->len); + } break; + + case 'l':{ + if(s->stream[0] == 'i' && s->stream[1] == 't' && s->stream[2] == '(' && s->stream[3] == '"'){ + t->kind = TK_StringLit; + lex_advance(s);lex_advance(s);lex_advance(s);lex_advance(s); + lex_parse_string(s,t,'"'); + + if(s->stream[0] == ')') { + t->str += 5; + t->len -= 6; + lex_advance(s); + } + else token_error(t, lit("Unterminated string literal, missing closing parenthesis")); + + break; + } + }; + + case 'A':case 'a':case 'M':case 'm':case 'B': + case 'b':case 'N':case 'n':case 'C':case 'c':case 'O': + case 'o':case 'D':case 'd':case 'P':case 'p':case 'E': + case 'e':case 'Q':case 'q':case 'F':case 'f':case 'R': + case 'r':case 'G':case 'g':case 'S':case 's':case 'H': + case 'h':case 'T':case 't':case 'I':case 'i':case 'U': + case 'u':case 'J':case 'j':case 'V':case 'v':case 'K': + case 'k':case 'W':case 'w':case 'L':case 'X': + case 'x':case 'Z':case 'z':case 'Y':case 'y':case '_': { + t->kind = TK_Identifier; + while(lex_is_alphanumeric(lexc(s)) || lexc(s) == '_') + lex_advance(s); + lex_set_len(s,t); + } break; + default: { + token_error(t, lit("Unknown token")); + } break; + } + if(t->len==0){ + lex_set_len(s,t); + } + } + + // Token end of stream + Token *t = token_alloc(tokens); + *t = token_end_of_stream; + tokens->len -= 1; +} + +function Tokens +lex_stream(String in_stream, String filename){ + Lex_Stream stream = {in_stream.str, in_stream.str, filename, 0}; + Tokens tokens = {}; + lex_base(&stream, &tokens); + return tokens; +} + +function void +parser_lex_stream(Parser *p, String in_stream, String filename){ + Lex_Stream stream = {in_stream.str, in_stream.str, filename, 0}; + p->tokens.len = 0; + p->tokens.iter = 0; + lex_base(&stream, &p->tokens); + intern_tokens(p); +} + +//----------------------------------------------------------------------------- +// +//----------------------------------------------------------------------------- + +function B32 +token_compare(Token *t, String str){ + B32 result = string_compare(t->string, str); + return result; +} + +function B32 +token_is_comment(Token *token){ + B32 result = token->kind == TK_Comment || token->kind == TK_DocComment; + return result; +} + +function Token * +token_get(Parser *p){ + Token *token = p->tokens.tokens + p->tokens.iter; + return token; +} + +function B32 +intern_compare(Intern_String a, Intern_String b){ + B32 result = a.s.str == b.s.str; + return result; +} + +function Token * +token_is_keyword(Parser *p, Intern_String keyword){ + assert(intern_is_keyword(p, keyword)); + Token *t = token_get(p); + if(t->kind == TK_Keyword && intern_compare(t->intern_val, keyword)){ + return t; + } + return 0; +} + +function void +token_advance(Parser *p){ + do{ + p->tokens.iter = clamp_top_s64(p->tokens.iter + 1, p->tokens.len); + }while(token_is_comment(token_get(p))); +} + +function Token * +token_next(Parser *p){ + Token *token = token_get(p); + token_advance(p); + return token; +} + +function Token * +token_match(Parser *p, Token_Kind kind){ + Token *token = token_get(p); + if(token->kind == kind){ + return token_next(p); + } + return 0; +} + +function Token * +token_match_keyword(Parser *p, Intern_String keyword){ + assert(intern_is_keyword(p, keyword)); + Token *token = token_get(p); + if(token->kind == TK_Keyword && intern_compare(keyword, token->intern_val)){ + return token_next(p); + } + return 0; +} + +function Token * +token_expect(Parser *p, Token_Kind kind){ + Token *token = token_get(p); + if(token->kind == kind){ + return token_next(p); + } + + parser_push_error(p, token, + "Expected token of kind: %s, got instead token of kind: %s", + token_kind_string[kind].str, token_kind_string[token->kind].str); + return 0; +} + +function B32 +token_is(Parser *p, Token_Kind kind){ + B32 result = token_get(p)->kind == kind; + return result; +} + +function Token * +token_is_assignment(Parser *p){ + Token *t = token_get(p); + if(t->kind >= TK_Assign && t->kind <= TK_RightShiftAssign) + return t; + return 0; +} + +function Token * +token_peek(Parser *p, S64 count){ + S64 index = clamp_top_s64(p->tokens.iter + count, p->tokens.len); + Token *result = p->tokens.tokens + index; + return result; +} + +function Token * +token_peek_is(Parser *p, S64 count, Token_Kind kind){ + Token *token = token_peek(p, count); + if(token->kind == kind) + return token; + return 0; +} \ No newline at end of file diff --git a/lex.h b/lex.h index f2eb87b..8bc4d88 100644 --- a/lex.h +++ b/lex.h @@ -1,4 +1,7 @@ #pragma once +typedef struct Intern_String{ + String s; +}Intern_String; typedef enum Token_Kind{ meta("End of stream")TK_End, @@ -59,6 +62,7 @@ typedef enum Token_Kind{ TK_Identifier, TK_StringLit, TK_U8Lit, + TK_Character, TK_Error, TK_Float, TK_Int, @@ -77,6 +81,7 @@ typedef struct Token{ union { S64 int_val; String error_val; + Intern_String intern_val; }; String file; @@ -88,6 +93,7 @@ typedef struct Tokens{ Token *tokens; S64 len; S64 cap; + S64 iter; }Tokens; typedef struct Lex_Stream{ @@ -105,6 +111,8 @@ global String token_kind_string[] = { [TK_Error] = lit("Error"), [TK_Comment] = lit("Comment"), [TK_Identifier] = lit("Identifier"), + [TK_Keyword] = lit("Keyword"), + [TK_Character] = lit("Character"), [TK_StringLit] = lit("StringLiteral"), [TK_U8Lit] = lit("U8Literal"), [TK_Float] = lit("Float"), diff --git a/main.c b/main.c index 1b28e2c..a766d64 100644 --- a/main.c +++ b/main.c @@ -1,341 +1,41 @@ -#include "os.cpp" +#define _CRT_SECURE_NO_WARNINGS +#include +#include + +#include "lang.h" +#include "os.h" +#include "memory.h" #include "lex.h" -global FILE *global_output_file; -#define lex_print(...) fprintf(global_output_file, __VA_ARGS__) +#include "parser.h" +#include "type.h" +#include "expr.h" +#include "decl.h" -function void -memory_zero(void *p, SizeU size){ - U8 *pp = p; - for(SizeU i = 0; i < size; i++) - pp[i] = 0; -} - -function B32 -string_compare(String a, String b){ - if(a.len != b.len) - return false; - for(S64 i = 0; i < a.len; i++){ - if(a.str[i] != b.str[i]) - return false; - } - return true; -} - -function Token * -token_alloc(Tokens *t){ - if(t->cap == 0){ - t->cap = 1024; - t->tokens = malloc(sizeof(Token)*t->cap); - } - else if(t->len+1 > t->cap){ - t->cap *= 2; - t->tokens = realloc(t->tokens, sizeof(Token)*t->cap); - } - - Token *result = t->tokens + t->len++; - memory_zero(result, sizeof(*result)); - return result; -} - -function void -lex_advance(Lex_Stream *s){ - if(*s->stream == '\n'){ - s->stream++; - s->line++; - s->line_begin = s->stream; - } - else if(*s->stream == 0){ - // Don't advance, end of stream - } - else{ - s->stream++; - } -} - -function U64 -parse_u64(U8 *str, S64 len){ - U64 result = 0; - U64 m = 1; - for(S64 i = len - 1; i >= 0; --i){ - U64 val = str[i] - '0'; - result += val * m; - m *= 10; - } - return result; -} - -function B32 -lex_is_whitespace(U8 c){ - B32 result = c == '\n' || c == '\r' || c == ' ' || c == '\r'; - return result; -} - -function B32 -lex_is_alphabetic(U8 c){ - B32 result = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); - return result; -} - -function B32 -lex_is_numeric(U8 c){ - B32 result = c >= '0' && c <= '9'; - return result; -} - -function B32 -lex_is_alphanumeric(U8 c){ - B32 result = lex_is_numeric(c) || lex_is_alphabetic(c); - return result; -} - -function void -lex_set_len(Lex_Stream *s, Token *token){ - assert(s->stream > token->str); - token->len = s->stream - token->str; -} - -function U8 -lexc(Lex_Stream *s){ - return *s->stream; -} - -function void -token_error(Token *t, String error_val){ - t->kind = TK_Error; - t->error_val = error_val; -} - -function void -lex_parse_string(Lex_Stream *s, Token *t, U8 c){ - for(;;){ - if(lexc(s) == '\\') lex_advance(s); - else if(lexc(s) == c) break; - else if(lexc(s) == 0){ - token_error(t, lit("Unterminated string, reached end of file")); - break; - } - lex_advance(s); - } - if(t->kind != TK_Error){ - lex_advance(s); - lex_set_len(s,t); - } -} - -function void -lex_token_seed(Lex_Stream *s, Token *t){ - t->str = s->stream; - t->file = s->filename; - t->line = s->line; - t->line_begin = s->line_begin; -} - -function void -token_push_error(Lex_Stream *stream, Tokens *tokens, String error_val){ - Token *token = token_alloc(tokens); - token->kind = TK_Error; - token->error_val = error_val; - lex_token_seed(stream, token); -} - -function void -lex_base(Lex_Stream *s, Tokens *tokens){ - while(*s->stream){ - while(lex_is_whitespace(*s->stream)) - lex_advance(s); - - -#define CASE2(op, OpName, Assign) \ -case op: \ -if (lexc(s) == '=') { \ -lex_advance(s); \ -t->kind = Assign; \ -} else { \ -t->kind = OpName; \ -} \ -break -#define CASE3(op, OpName, Assign, Incr) \ -case op: \ -if (lexc(s) == '=') { \ -lex_advance(s); \ -t->kind = Assign; \ -} else if (lexc(s) == op) { \ -lex_advance(s); \ -t->kind = Incr; \ -} else { \ -t->kind = OpName; \ -} \ -break - - - Token *t = token_alloc(tokens); - lex_token_seed(s, t); - lex_advance(s); - switch(*t->str) { - CASE2('!', TK_Not, TK_NotEquals); - CASE2('^', TK_BitXor, TK_XorAssign); - CASE2('=', TK_Assign, TK_Equals); - CASE2('*', TK_Mul, TK_MulAssign); - CASE2('%', TK_Mod, TK_ModAssign); - CASE3('+', TK_Add, TK_AddAssign, TK_Increment); - CASE3('&', TK_BitAnd, TK_AndAssign, TK_And); - CASE3('|', TK_BitOr, TK_OrAssign, TK_Or); -#undef CASE2 -#undef CASE3 - case '@': t->kind = TK_At; break; - case '(': t->kind = TK_OpenParen; break; - case ')': t->kind = TK_CloseParen; break; - case '{': t->kind = TK_OpenBrace; break; - case '}': t->kind = TK_CloseBrace; break; - case '[': t->kind = TK_OpenBracket; break; - case ']': t->kind = TK_CloseBracket; break; - case ',': t->kind = TK_Comma; break; - case '~': t->kind = TK_Neg; break; - case '?': t->kind = TK_Question; break; - case ';': t->kind = TK_Semicolon; break; - case '-':{ - if (lexc(s) == '=') { - lex_advance(s); - t->kind = TK_SubAssign; - } - else if (lexc(s) == '-') { - lex_advance(s); - t->kind = TK_Decrement; - } - else if (lexc(s) == '>') { - lex_advance(s); - t->kind = TK_Arrow; - } - else { - t->kind = TK_Sub; - } - } break; - case '\'':{not_implemented;} break; - case '"': { - t->kind = TK_U8Lit; - lex_parse_string(s,t,'"'); - if(t->kind != TK_Error){ - t->str += 1; - t->len -= 2; - } - } break; - case '/': { - if(lexc(s) == '/'){ - lex_advance(s); - if(lexc(s) == '/'){ - lex_advance(s); - t->kind = TK_DocComment; - } - else { - t->kind = TK_Comment; - } - for(;;){ - if(lexc(s) == '\n' || lexc(s) == 0) break; - lex_advance(s); - } - lex_set_len(s,t); - } - else if(lexc(s) == '*'){ - lex_advance(s); - t->kind = TK_Comment; - for(;;){ - if(s->stream[0] == '*' && s->stream[1] == '/'){ - lex_advance(s); - lex_advance(s); - break; - } - else if(lexc(s) == 0){ - token_error(t, lit("Unterminated block comment")); - break; - } - lex_advance(s); - } - lex_set_len(s,t); - } - else t->kind = TK_Div; - } break; - case '0': - case '1':case '2':case '3': - case '4':case '5':case '6': - case '7':case '8':case '9': { - t->kind = TK_Int; - while(lex_is_numeric(lexc(s))) - lex_advance(s); - lex_set_len(s, t); - t->int_val = parse_u64(t->str, t->len); - } break; - case 'l':{ - if(s->stream[0] == 'i' && s->stream[1] == 't' && s->stream[2] == '(' && s->stream[3] == '"'){ - t->kind = TK_StringLit; - lex_advance(s);lex_advance(s);lex_advance(s);lex_advance(s); - lex_parse_string(s,t,'"'); - - if(s->stream[0] == ')') { - t->str += 5; - t->len -= 6; - lex_advance(s); - } - else token_error(t, lit("Unterminated string literal, missing closing parenthesis")); - - break; - } - }; - case 'A':case 'a':case 'M':case 'm':case 'B': - case 'b':case 'N':case 'n':case 'C':case 'c':case 'O': - case 'o':case 'D':case 'd':case 'P':case 'p':case 'E': - case 'e':case 'Q':case 'q':case 'F':case 'f':case 'R': - case 'r':case 'G':case 'g':case 'S':case 's':case 'H': - case 'h':case 'T':case 't':case 'I':case 'i':case 'U': - case 'u':case 'J':case 'j':case 'V':case 'v':case 'K': - case 'k':case 'W':case 'w':case 'L':case 'X': - case 'x':case 'Z':case 'z':case 'Y':case 'y':case '_': { - t->kind = TK_Identifier; - while(lex_is_alphanumeric(lexc(s)) || lexc(s) == '_') - lex_advance(s); - lex_set_len(s,t); - } break; - default: { - token_error(t, lit("Unknown token")); - } break; - } - } -} - -function Tokens -lex_stream(String in_stream, String filename){ - Lex_Stream stream = {in_stream.str, in_stream.str, filename, 0}; - Tokens tokens = {}; - lex_base(&stream, &tokens); - return tokens; -} - -function void -token_print(Tokens tokens){ - lex_print("\n== Token count = %d\n", (S32)tokens.len); - for(Token *t = tokens.tokens; t != tokens.tokens + tokens.len; t++){ - lex_print("%s %.*s\n", token_kind_string[t->kind].str, (S32)t->len, t->str); - } -} - -function B32 -token_compare(Token *t, String str){ - B32 result = string_compare(t->string, str); - return result; -} +#include "common.c" +#include "memory.c" +#include "parser.c" +#include "os_win32.c" +#include "lex.c" +#include "expr.c" +#include "decl.c" +#include "type.c" +#include "parse_expr.c" +#include "parse_decl.c" +#include "print.c" function void lex_test(){ Tokens t; - t = lex_stream(lit("32523 42524 \"U8Literal\""), lit("test")); - token_print(t); + t = lex_stream(lit("3252342510 42524 \"U8Literal\""), lit("test")); + //tokens_print(t); assert(t.len == 3); - assert(t.tokens[0].int_val == 32523); + assert(t.tokens[0].int_val == 3252342510); assert(t.tokens[1].int_val == 42524); assert(t.tokens[2].kind == TK_U8Lit); assert(token_compare(t.tokens + 2, lit("U8Literal"))); t = lex_stream(lit("_identifier Thing Thing2 lit(\"String_Test\")"), lit("test")); - token_print(t); + //tokens_print(t); assert(t.tokens[0].kind == TK_Identifier); assert(t.tokens[1].kind == TK_Identifier); assert(t.tokens[2].kind == TK_Identifier); @@ -345,10 +45,68 @@ lex_test(){ assert(token_compare(t.tokens+2, lit("Thing2"))); assert(token_compare(t.tokens+3, lit("String_Test"))); - - t = lex_stream(lit("lit(\"String_Test\"{})(324*=+=-/ *% // Comment \n Thing /*Thing*/ /*Error"), lit("test")); + t = lex_stream(lit("lit(\"String_Test\"{})(324*=+=-/ *% // Comment \n" + "Thing /*Thing*/ += -= =- +/%^&*&&|| |>> << <<= >>=/*Error"), + lit("test")); assert(t.tokens[0].kind == TK_Error); - token_print(t); + //tokens_print(t); +} + +function void +parser_test(){ + Parser p = {}; + { + parser_init(&p); + Intern_String a = intern_string(&p, lit("Thing")); + Intern_String b = intern_string(&p, lit("Thing")); + assert(a.s.str == b.s.str); + } + + parser_lex_stream(&p, lit("S64 thing; S64 second_var = 10;"), lit("File")); + assert(token_match(&p, TK_Keyword)); + assert(token_match(&p, TK_Identifier)); + assert(token_match(&p, TK_Semicolon)); + assert(token_match(&p, TK_Keyword)); + assert(token_match(&p, TK_Identifier)); + assert(token_match(&p, TK_Assign)); + assert(token_match(&p, TK_Int)); + assert(token_match(&p, TK_Semicolon)); + assert(token_match(&p, TK_End)); + assert(token_match(&p, TK_End)); + assert(token_match(&p, TK_End)); + + String exprs[] = { + lit("(4+2*53)"), + lit("((4+2)*53"), + lit("++5"), + lit("5--"), // @Todo(Krzosa): + lit("-5"), + lit("(+5)"), + lit("(S64)5"), + lit("(S64)5+3"), + lit("534>43?435:42"), + lit("(534>43?435:42,234,(S64)42,Thing[10][2],Thing(1,2))"), + }; + for(S64 i = 0; i < buff_cap(exprs); i++){ + parser_lex_stream(&p, exprs[i], lit("File")); + Expr *expr = parse_expr(&p); + assert(expr); + expr_print(expr); + lex_print("\n"); + } + type_test(&p); + lex_new_line(); + + String decls[] = { + lit("enum Thing{ Thing_1, Thing_2 = 2}"), + }; + for(S64 i = 0; i < buff_cap(decls); i++){ + parser_lex_stream(&p, decls[i], lit("File")); + Decl *decl = parse_decl_global(&p); + assert(decl); + decl_print(decl); + lex_new_line(); + } } function S32 @@ -356,8 +114,28 @@ os_main(){ global_output_file = fopen("output.txt", "w"); assert_msg(global_output_file, "Failed to open output.txt"); lex_test(); + parser_test(); - + /* + String keywords[]={ + lit("S64"), + lit("U64"), + lit("void"), + lit("SizeU"), + lit("struct"), + lit("union"), + lit("function"), + lit("global"), + }; + + for(S64 i = 0; i < buff_cap(keywords); i++){ + lex_print("global Intern_String keyword_%s;\n", keywords[i].str); + } + for(S64 i = 0; i < buff_cap(keywords); i++){ + lex_print("keyword_%s = intern_string(p, lit(\"%s\"));\n", keywords[i].str, keywords[i].str); + } + */ fclose(global_output_file); + return 0; } \ No newline at end of file diff --git a/memory.c b/memory.c new file mode 100644 index 0000000..2718c6c --- /dev/null +++ b/memory.c @@ -0,0 +1,47 @@ +global const SizeU default_reserve_size = gib(4); +global const SizeU default_alignment = 8; +global const SizeU additional_commit_size = mib(1); + +function void +memory_copy(U8 *dst, U8 *src, SizeU size){ + for(SizeU i = 0; i < size; i++){ + dst[i] = src[i]; + } +} + +function void +memory_zero(void *p, SizeU size){ + U8 *pp = p; + for(SizeU i = 0; i < size; i++) + pp[i] = 0; +} + +function void +arena_init(Arena *a){ + a->memory = os_reserve(default_reserve_size); + a->alignment = default_alignment; +} + +function void * +arena_push_size(Arena *a, SizeU size){ + SizeU generous_size = size + a->alignment; + if(generous_size>a->memory.commit){ + if(a->memory.reserve == 0){ + arena_init(a); + } + os_commit(&a->memory, generous_size+additional_commit_size); + } + + a->len = align_up(a->len, a->alignment); + void *result = (U8*)a->memory.data + a->len; + a->len += size; + return result; +} + +function String +arena_push_string_copy(Arena *arena, String string){ + U8 *copy = arena_push_array(arena, U8, string.len+1); + memory_copy(copy, string.str, string.len); + copy[string.len] = 0; + return (String){copy, string.len}; +} diff --git a/memory.h b/memory.h new file mode 100644 index 0000000..182a221 --- /dev/null +++ b/memory.h @@ -0,0 +1,12 @@ + +typedef struct Arena{ + OS_Memory memory; + U64 len; + U64 alignment; +}Arena; + +function B32 string_compare(String a, String b); +function void *arena_push_size(Arena *a, SizeU size); +function String arena_push_string_copy(Arena *arena, String string); +#define arena_push_array(a,T,c) arena_push_size(a,sizeof(T)*(c)) +#define arena_push_struct(a,T) arena_push_array(a,T,1) \ No newline at end of file diff --git a/os.h b/os.h new file mode 100644 index 0000000..7c37253 --- /dev/null +++ b/os.h @@ -0,0 +1,9 @@ + +typedef struct OS_Memory{ + void *data; + SizeU commit; + SizeU reserve; +}OS_Memory; + +function OS_Memory os_reserve(SizeU size); +function void os_commit(OS_Memory *m, SizeU size); \ No newline at end of file diff --git a/os.cpp b/os_win32.c similarity index 53% rename from os.cpp rename to os_win32.c index 80302c1..516bc7c 100644 --- a/os.cpp +++ b/os_win32.c @@ -1,9 +1,5 @@ -#define _CRT_SECURE_NO_WARNINGS -#include -#include -#include "types.h" - function S32 os_main(); +const SizeU page_size = 4096; LRESULT CALLBACK WindowProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam){ @@ -24,4 +20,22 @@ WinMain(HINSTANCE hInstance, HINSTANCE a, LPSTR b, int nShowCmd){ HWND window_handle = CreateWindowExW(0, CLASS_NAME, WINDOW_NAME, WS_OVERLAPPEDWINDOW, CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT, 0, 0, hInstance, 0); ShowWindow(window_handle, nShowCmd); return os_main(); +} + +function OS_Memory +os_reserve(SizeU size){ + OS_Memory result = {0}; + SizeU adjusted_size = align_up(size, page_size); + result.data = VirtualAlloc(0, adjusted_size, MEM_RESERVE, PAGE_READWRITE); + assert_msg(result.data, "Failed to reserve virtual memory"); + result.reserve = adjusted_size; + return result; +} + +function void +os_commit(OS_Memory *m, SizeU size){ + SizeU commit = align_up(size, page_size); + void *p = VirtualAlloc((U8 *)m->data + m->commit, commit, MEM_COMMIT, PAGE_READWRITE); + assert_msg(p, "Failed to commit more memory"); + m->commit += commit; } \ No newline at end of file diff --git a/parse_decl.c b/parse_decl.c new file mode 100644 index 0000000..29307f6 --- /dev/null +++ b/parse_decl.c @@ -0,0 +1,60 @@ + +function Decl * +parse_decl_enum(Parser *p){ + Token *token = token_get(p); + Intern_String name = {}; + if(token_match(p, TK_Identifier)){ + name = token->intern_val; + } + + Decl *result = decl_enum(p, token, name); + if(token_match(p, TK_OpenBrace)){ + for(;;){ + Token *token = token_get(p); + if(token_match(p, TK_Identifier)){ + Expr *expr = 0; + if(token_match(p, TK_Assign)){ + expr = parse_expr(p); + } + Decl_Enum_Child *child = decl_enum_child(p, token, expr); + decl_enum_push(result, child); + } + else break; + + if(!token_match(p, TK_Comma)) + break; + } + token_expect(p, TK_CloseBrace); + } + else { + if(name.s.str == 0){ + parser_push_error(p, token, "Unnamed enum without body is illegal"); + } + } + + return result; +} + +function Decl * +parse_decl_global(Parser *p){ + Decl *result = 0; + if(token_match_keyword(p, keyword_enum)){ + result = parse_decl_enum(p); + } + else if(token_match_keyword(p, keyword_typedef)){ + + } + else if(token_match_keyword(p, keyword_union)){ + + } + else if(token_match_keyword(p, keyword_struct)){ + + } + else if(token_match_keyword(p, keyword_global)){ + + } + else if(token_match_keyword(p, keyword_function)){ + + } + return result; +} \ No newline at end of file diff --git a/parse_expr.c b/parse_expr.c new file mode 100644 index 0000000..842d964 --- /dev/null +++ b/parse_expr.c @@ -0,0 +1,281 @@ +function Expr* parse_expr(Parser* p); +function Expr* parse_list_expr(Parser* p); + +function Expr* +parse_atom_expr(Parser* p){ + Expr* result = 0; + if (token_is(p, TK_Identifier) || + token_is(p, TK_StringLit) || + token_is(p, TK_U8Lit) || + token_is(p, TK_Int)){ + result = expr_atom(p, token_next(p)); + } + else if (token_match(p, TK_OpenParen)){ + result = parse_list_expr(p); + token_expect(p, TK_CloseParen); + } + else { + parser_push_error(p, token_next(p), "Invalid expression token"); + } + return result; +} + +function Expr* +parse_postfix_expr(Parser* p){ + Expr* result = parse_atom_expr(p); + while (token_is(p, TK_Dot) + || token_is(p, TK_Arrow) + || token_is(p, TK_DoubleColon) + || token_is(p, TK_OpenParen) + || token_is(p, TK_OpenBracket) + || token_is(p, TK_Decrement) + || token_is(p, TK_Increment)){ + Token *op = token_get(p); + + if (token_match(p, TK_Arrow) + || token_match(p, TK_DoubleColon) + || token_match(p, TK_Dot)){ + Expr* r = parse_atom_expr(p); + result = expr_binary(p, op, result, r); + } + + else if (token_match(p, TK_OpenParen)){ + Expr* list = 0; + if (!token_match(p, TK_CloseParen)){ + list = parse_list_expr(p); + token_expect(p, TK_CloseParen); + } + result = expr_call(p, op, result, list); + } + + else if (token_match(p, TK_OpenBracket)){ + Expr* list = 0; + if (!token_match(p, TK_CloseBracket)){ + list = parse_list_expr(p); + token_match(p, TK_CloseBracket); + } + result = expr_index(p, op, result, list); + } + + else { + assert(op->kind == TK_Increment || op->kind == TK_Decrement); + token_next(p); + if (op->kind == TK_Increment) op->kind = TK_PostIncrement; + else if (op->kind == TK_Decrement) op->kind = TK_PostDecrement; + result = expr_unary(p, op, result); + } + + + } + return result; +} + +function +Expr* parse_unary_expr(Parser* p) { + Expr* result = 0; + if (token_is(p, TK_Sub) + || token_is(p, TK_Add) + || token_is(p, TK_Mul) + || token_is(p, TK_BitAnd) + || token_is(p, TK_Not) + || token_is(p, TK_Neg) + || token_is(p, TK_Increment) + || token_is(p, TK_Decrement)) { + Token *op = token_next(p); + result = parse_unary_expr(p); + result = expr_unary(p, op, result); + } + else if (token_is_keyword(p, keyword_sizeof)) { + Token *token = token_next(p); + result = parse_unary_expr(p); + result = expr_unary(p, token, result); + } + else if (token_is(p, TK_OpenParen)) { // cast requires lookahead + Token *token = token_peek(p, 1); + if (token->kind == TK_Keyword || token->kind == TK_Identifier) { + Type *type = type_get(p, token); + if(type != type_undefined){ + token_next(p); + token_next(p); + // @Todo(Krzosa): Parse pointer types + token_expect(p, TK_CloseParen); + result = parse_unary_expr(p); + result = expr_cast(p, token, type, result); + } + else { + result = parse_postfix_expr(p); + } + } + else { + result = parse_postfix_expr(p); + } + } + else { + result = parse_postfix_expr(p); + } + return result; +} + +function +Expr* parse_mul_expr(Parser* p) { + Expr* result = parse_unary_expr(p); + while (token_is(p, TK_Mul) + || token_is(p, TK_Div) + || token_is(p, TK_Mod)) { + Token *op = token_next(p); + Expr* r = parse_unary_expr(p); + result = expr_binary(p, op, result, r); + } + return result; +} + +function +Expr* parse_add_expr(Parser* p) { + Expr* result = parse_mul_expr(p); + while (token_is(p, TK_Add) + || token_is(p, TK_Sub)) { + Token *op = token_next(p); + Expr* right = parse_mul_expr(p); + result = expr_binary(p, op, result, right); + } + return result; +} + +function +Expr* parse_shift_expr(Parser* p) { + Expr* result = parse_add_expr(p); + while (token_is(p, TK_RightShift) + || token_is(p, TK_LeftShift)) { + Token *op = token_next(p); + Expr* r = parse_add_expr(p); + result = expr_binary(p, op, result, r); + } + return result; +} + +function +Expr* parse_compare_expr(Parser* p) { + Expr* result = parse_shift_expr(p); + while (token_is(p, TK_LesserThen) + || token_is(p, TK_GreaterThen) + || token_is(p, TK_LesserThenOrEqual) + || token_is(p, TK_GreaterThenOrEqual)) { + Token *op = token_next(p); + Expr* r = parse_shift_expr(p); + result = expr_binary(p, op, result, r); + } + return result; +} + +function +Expr* parse_equality_expr(Parser* p) { + Expr* result = parse_compare_expr(p); + while (token_is(p, TK_Equals) + || token_is(p, TK_NotEquals)) { + Token *op = token_next(p); + Expr* r = parse_compare_expr(p); + result = expr_binary(p, op, result, r); + } + return result; +} + +function +Expr* parse_bit_and_expr(Parser* p) { + Expr* result = parse_equality_expr(p); + while (token_is(p, TK_BitAnd)) { + Token *op = token_next(p); + Expr* r = parse_equality_expr(p); + result = expr_binary(p, op, result, r); + } + return result; +} + +function +Expr* parse_bit_xor_expr(Parser* p) { + Expr* result = parse_bit_and_expr(p); + while (token_is(p, TK_BitXor)) { + Token *op = token_next(p); + Expr* r = parse_bit_and_expr(p); + result = expr_binary(p, op, result, r); + } + return result; +} + +function +Expr* parse_bit_or_expr(Parser* p) { + Expr* result = parse_bit_xor_expr(p); + while (token_is(p, TK_BitOr)) { + Token *op = token_next(p); + Expr* r = parse_bit_xor_expr(p); + result = expr_binary(p, op, result, r); + } + return result; +} + +function +Expr* parse_and_expr(Parser* p) { + Expr* result = parse_bit_or_expr(p); + while (token_is(p, TK_And)) { + Token *op = token_next(p); + Expr* r = parse_bit_or_expr(p); + result = expr_binary(p, op, result, r); + } + return result; +} + +function +Expr* parse_or_expr(Parser* p) { + Expr* result = parse_and_expr(p); + while (token_is(p, TK_Or)) { + Token *op = token_next(p); + Expr* r = parse_and_expr(p); + result = expr_binary(p, op, result, r); + } + return result; +} + +function +Expr* parse_ternary_expr(Parser* p) { + Expr* result = parse_or_expr(p); + if (token_is(p, TK_Question)) { + Token *token = token_next(p); + Expr* on_true = parse_ternary_expr(p); + token_expect(p, TK_Colon); + Expr* on_false = parse_ternary_expr(p); + result = expr_ternary(p, token, result, on_true, on_false); + } + return result; +} + +function +Expr* parse_assign_expr(Parser* p) { + Expr* result = parse_ternary_expr(p); + if (token_is_assignment(p)) { + Token *op = token_next(p); + Expr* right = parse_assign_expr(p); + result = expr_binary(p, op, result, right); + } + return result; +} + +function +Expr* parse_list_expr(Parser* p) { + Expr* result = parse_assign_expr(p); + if (token_is(p, TK_Comma)) { + Expr *list = expr_list(p, token_get(p)); + expr_list_push(list, result); + result = list; + } + while (token_is(p, TK_Comma)) { + Token *token = token_next(p); + Expr* expr = parse_assign_expr(p); + expr_list_push(result, expr); + } + + return result; +} + +function Expr* +parse_expr(Parser* p) { + return parse_assign_expr(p); +} diff --git a/parser.c b/parser.c new file mode 100644 index 0000000..c947962 --- /dev/null +++ b/parser.c @@ -0,0 +1,214 @@ +global Intern_String keyword_s64; +global Intern_String keyword_u64; +global Intern_String keyword_void; +global Intern_String keyword_sizeu; +global Intern_String keyword_sizeof; + +global Intern_String keyword_enum; +global Intern_String keyword_typedef; +global Intern_String keyword_struct; +global Intern_String keyword_union; +global Intern_String keyword_function; +global Intern_String keyword_global; + +function void +parser_init(Parser *p){ + p->interns_count = 4096; + p->interns = arena_push_array(&p->intern_table_arena, Intern_String, p->interns_count); + memory_zero(p->interns, sizeof(Intern_String)*p->interns_count); + + p->symbols_count = 4096; + p->symbols = arena_push_array(&p->intern_table_arena, Intern_String, p->symbols_count); + memory_zero(p->symbols, sizeof(Intern_String)*p->symbols_count); + + keyword_s64 = intern_string(p, lit("S64")); + keyword_u64 = intern_string(p, lit("U64")); + keyword_void = intern_string(p, lit("void")); + keyword_sizeu = intern_string(p, lit("SizeU")); + keyword_sizeof = intern_string(p, lit("sizeof")); + keyword_struct = intern_string(p, lit("struct")); + keyword_enum = intern_string(p, lit("enum")); + keyword_typedef = intern_string(p, lit("typedef")); + keyword_union = intern_string(p, lit("union")); + keyword_function = intern_string(p, lit("function")); + keyword_global = intern_string(p, lit("global")); + p->first_keyword = keyword_s64.s.str; + p->last_keyword = keyword_global.s.str; + + type_insert(p, type_s64, keyword_s64); + type_insert(p, type_u64, keyword_u64); + type_insert(p, type_sizeu, keyword_sizeu); + type_insert(p, type_void, keyword_void); +} + +function B32 +intern_is_keyword(Parser *p, Intern_String intern){ + if(intern.s.str >= p->first_keyword && intern.s.str <= p->last_keyword) + return true; + return false; +} + +function void +parser_push_error(Parser *p, Token *token, char *str, ...){ + String string; + { + va_list args1, args2; + va_start(args1, str); + va_copy(args2, args1); + string.len = vsnprintf(0, 0, str, args2); + va_end(args2); + + string.str = arena_push_size(&p->main_arena, string.len + 1); + vsnprintf((char*)string.str, string.len + 1, str, args1); + va_end(args1); + } + + Parser_Error *error = arena_push_struct(&p->main_arena, Parser_Error); + error->message = string; + error->next = 0; + error->token = token; + SLLQueuePush(p->first_error, p->last_error, error); +} + +//----------------------------------------------------------------------------- +// +//----------------------------------------------------------------------------- +typedef struct Table_Index{ + U64 hash; + U64 index; + U64 iter; + U64 max_size; +}Table_Index; + +function Table_Index +table_index_from_hash(U64 hash, U64 max_size){ + Table_Index result = {0}; + result.hash = hash; + result.index = result.hash % max_size; + result.iter = result.index; + result.max_size = max_size; + return result; +} + +function Table_Index +table_index_from_string(String string, U64 max_size){ + U64 hash = hash_fnv(string); + Table_Index result = table_index_from_hash(hash, max_size); + return result; +} + +function B32 +table_index_advance(Table_Index *index){ + index->iter = wrap_around_pow2(index->iter + 1, index->max_size); + B32 result = index->iter == index->index; + return result; +} + +function Intern_String +intern_string(Parser *p, String string){ + Intern_String result = {}; + Table_Index index = table_index_from_string(string, p->interns_count); + for(;;){ + Intern_String *intern = p->interns + index.iter; + if(intern->s.str == 0){ + result.s = arena_push_string_copy(&p->main_arena, string); + *intern = result; + break; + } + else if(string_compare(intern->s, string)){ + result = *intern; + break; + } + + if (table_index_advance(&index)) + break; + + } + + return result; +} + +function void +intern_tokens(Parser *p){ + for(S64 i = 0; i < p->tokens.len; i++){ + Token *t = p->tokens.tokens + i; + if(t->kind == TK_Identifier){ + t->intern_val = intern_string(p, t->string); + if(intern_is_keyword(p, t->intern_val)){ + t->kind = TK_Keyword; + } + } + } +} + +function void +symbol_insert(Parser *p, Symbol symbol){ + String string = symbol.string.s; + Table_Index index = table_index_from_string(string, p->symbols_count); + + for(;;){ + Symbol *slot = p->symbols + index.iter; + if(slot->string.s.str == 0){ + *slot = symbol; + break; + } + else if(slot->string.s.str == string.str){ + invalid_codepath; + break; + } + + if (table_index_advance(&index)) + break; + + } +} + +function Symbol * +symbol_get(Parser *p, Intern_String string){ + Table_Index index = table_index_from_string(string.s, p->symbols_count); + for(;;){ + Symbol *slot = p->symbols + index.iter; + if(slot->string.s.str == string.s.str){ + return slot; + } + + if (table_index_advance(&index)) + break; + + } + return 0; +} + +function void +type_insert(Parser *p, Type *type, Intern_String string){ + Symbol symbol = {.kind=SK_Type, .string=string, .type=type}; + symbol_insert(p, symbol); +} + +function Type * +type_get(Parser *p, Token *token){ + Type *result = 0; + if(token->kind == TK_Identifier || token->kind == TK_Keyword){ + Symbol *symbol = symbol_get(p, token->intern_val); + if(symbol){ + if(symbol->kind == SK_Type){ + result = symbol->type; + } + else { + parser_push_error(p, token, "Symbol is not a type"); + } + } + else{ + parser_push_error(p, token, "Undefined type"); + } + } + else { + parser_push_error(p, token, "Trying to lookup a type with token of wrong kind"); + } + + if(!result){ + result = type_undefined; + } + + return result; +} diff --git a/parser.h b/parser.h new file mode 100644 index 0000000..6490fe0 --- /dev/null +++ b/parser.h @@ -0,0 +1,46 @@ +#pragma once +typedef struct Type Type; +typedef struct Parser_Error Parser_Error; + +typedef enum Symbol_Kind{ + SK_None, + SK_Type, +}Symbol_Kind; + +typedef struct Symbol{ + Symbol_Kind kind; + Intern_String string; + struct{ + Type *type; + }; +}Symbol; + +struct Parser_Error{ + Parser_Error *next; + String message; + Token *token; +}; + +typedef struct Parser{ + Arena main_arena; + Arena intern_table_arena; + Arena symbol_table_arena; + + Symbol *symbols; + S64 symbols_count; + + Intern_String *interns; + S64 interns_count; + + U8 *first_keyword; + U8 *last_keyword; + + Parser_Error *first_error; + Parser_Error *last_error; + + Tokens tokens; +}Parser; + +function Intern_String intern_string(Parser *p, String string); +function void type_insert(Parser *p, Type *type, Intern_String string); + diff --git a/print.c b/print.c new file mode 100644 index 0000000..e251952 --- /dev/null +++ b/print.c @@ -0,0 +1,133 @@ + +global FILE *global_output_file; +#define lex_print(...) fprintf(global_output_file, __VA_ARGS__) +#define lex_new_line() lex_print("\n") + +function void +tokens_print(Tokens tokens){ + lex_print("\n== Token count = %d\n", (S32)tokens.len); + for(Token *t = tokens.tokens; t != tokens.tokens + tokens.len; t++){ + lex_print("%s \"%.*s\"\n", token_kind_string[t->kind].str, (S32)t->len, t->str); + } +} + +function void +token_print(Token *token){ + lex_print("%.*s", (S32)token->len, token->str); +} + +function Type *type_pointer(Parser *p, Type *base); +function void expr_print(Expr *expr); + +function void +type_print(Type *type){ + switch(type->kind) { + case TK_S64: case TK_U64: + case TK_SizeU: case TK_Void: { + lex_print("%s", type_kind_string[type->kind].str); + } break; + case TK_Pointer:{ + type_print(type->pointer); + lex_print("*"); + } break; + case TK_Array:{ + type_print(type->array.pointer); + lex_print("["); + expr_print(type->array.size); + lex_print("]"); + } break; + default: {invalid_codepath;} break; + } +} + +function void +type_test(Parser *p){ + Type *t = type_pointer(p, type_s64); + t = type_pointer(p, t); + type_print(t); +} + +function void +expr_print(Expr *expr){ + switch(expr->kind) { + case EK_Atom: { + token_print(expr->token); + } break; + + case EK_Binary:{ + lex_print("("); + expr_print(expr->binary.left); + token_print(expr->token); + expr_print(expr->binary.right); + lex_print(")"); + } break; + case EK_Unary:{ + lex_print("("); + token_print(expr->token); + expr_print(expr->unary.expr); + lex_print(")"); + } break; + + case EK_Ternary:{ + lex_print("("); + expr_print(expr->ternary.cond); + lex_print("?"); + expr_print(expr->ternary.on_true); + lex_print(":"); + expr_print(expr->ternary.on_false); + lex_print(")"); + } break; + case EK_List:{ + lex_print("("); + for(Expr *n = expr->list.first; n; n=n->next){ + expr_print(n); + if(n!=expr->list.last) lex_print(","); + } + lex_print(")"); + }break; + + case EK_Cast:{ + lex_print("("); + lex_print("("); + type_print(expr->cast.type); + lex_print(")"); + expr_print(expr->cast.expr); + lex_print(")"); + } break; + + case EK_Index:{ + expr_print(expr->index.atom); + lex_print("["); + expr_print(expr->index.index); + lex_print("]"); + }break; + + case EK_Call:{ + expr_print(expr->call.atom); + expr_print(expr->call.list); + }break; + default: {invalid_codepath;} break; + } +} + + +function void +decl_print(Decl *decl){ + switch(decl->kind) { + case DK_Enum: { + lex_print("enum %s{\n", decl->name.s.str); + for(Decl_Enum_Child *n = decl->enum_val.first; n; n=n->next){ + lex_print(" "); + token_print(n->token); + if(n->expr){ + lex_print(" = "); + expr_print(n->expr); + } + lex_print(","); + lex_new_line(); + } + lex_print("};\n"); + } break; + default: {invalid_codepath;} break; + } +} \ No newline at end of file diff --git a/type.c b/type.c new file mode 100644 index 0000000..05fb6d8 --- /dev/null +++ b/type.c @@ -0,0 +1,25 @@ + +function Type * +type_new(Parser *p, Type_Kind kind){ + Type *result = arena_push_struct(&p->main_arena, Type); + memory_zero(result, sizeof(Type)); + + result->kind = kind; + return result; +} + +function Type * +type_pointer(Parser *p, Type *base){ + Type *result = type_new(p, TK_Pointer); + result->size = sizeof(SizeU); + result->pointer = base; + return result; +} + +function Type * +type_array(Parser *p, Type *base, Expr *index){ + Type *result = type_new(p, TK_Array); + result->array.pointer = base; + result->array.size = index; + return result; +} \ No newline at end of file diff --git a/type.h b/type.h new file mode 100644 index 0000000..68142ee --- /dev/null +++ b/type.h @@ -0,0 +1,65 @@ +#pragma once +typedef struct Type Type; +typedef struct Expr Expr; + +typedef enum Type_Kind{ + TK_None, + TK_Void, + TK_S64, + TK_U64, + TK_SizeU, + + TK_Undefined, + TK_Function, + TK_Pointer, + TK_Array, + TK_Struct, + TK_Union, +} Type_Kind; + +struct Type{ + Type_Kind kind; + SizeU size; + union{ + Type *pointer; + struct{ + Type *pointer; + Expr *size; + } array; + }; +}; + +//----------------------------------------------------------------------------- +// +//----------------------------------------------------------------------------- +global String type_kind_string[] = { + [TK_None] = lit("None"), + [TK_Void] = lit("void"), + [TK_S64] = lit("S64"), + [TK_U64] = lit("U64"), + [TK_SizeU] = lit("SizeU"), + [TK_Undefined] = lit("Undefined"), + [TK_Pointer] = lit("Pointer"), + [TK_Array] = lit("Array"), + [TK_Struct] = lit("Struct"), + [TK_Union] = lit("Union"), +}; + +global Type type_table[] = { + [TK_None] = {0}, + [TK_Void] = {TK_Void}, + [TK_S64] = {TK_S64, sizeof(S64)}, + [TK_U64] = {TK_U64, sizeof(U64)}, + [TK_SizeU] = {TK_SizeU, sizeof(SizeU)}, + [TK_Undefined] = {TK_Undefined}, + [TK_Pointer] = {TK_Pointer,sizeof(SizeU)}, + [TK_Array] = {TK_Array,sizeof(SizeU)}, + [TK_Struct] = {TK_Struct}, + [TK_Union] = {TK_Union}, +}; + +global Type *type_void = type_table + 1; +global Type *type_s64 = type_table + 2; +global Type *type_u64 = type_table + 3; +global Type *type_sizeu = type_table + 4; +global Type *type_undefined = type_table + 5;