diff --git a/build.bat b/build.bat index 4b6ee53..a3b2358 100644 --- a/build.bat +++ b/build.bat @@ -3,5 +3,6 @@ rem clang generate.c -fdiagnostics-absolute-paths -std=c99 -g -o generate.exe -Wl,user32.lib rem generate.exe -clang main.cpp -Wall -Wno-unused-function -fdiagnostics-absolute-paths -g -o main.exe -Wl,user32.lib +clang main.cpp -Wall -Wno-unused-function -fno-exceptions -fdiagnostics-absolute-paths -g -o main.exe -Wl,user32.lib +rem gcc main.cpp rem cl main.c -std:c17 diff --git a/lex.c b/lex.c index bd7fb8e..cb334aa 100644 --- a/lex.c +++ b/lex.c @@ -257,12 +257,6 @@ lex_advance(Lex_Stream *s){ } } -function Token -token_int(U64 val){ - Token result = {.kind = TK_Int, .int_val=val}; - return result; -} - function void lex_parse_string(Lex_Stream *s, Token *t, U8 c){ for(;;){ diff --git a/main.cpp b/main.cpp index 0e9d9cd..e0e766a 100644 --- a/main.cpp +++ b/main.cpp @@ -29,7 +29,14 @@ typedef double F64; #define mib(x) (kib(x)*1024llu) #define gib(x) (mib(x)*1024llu) struct String{U8 *str;S64 len;}; +union Intern_String{ + String s; + struct{ U8 *str; S64 len; }; +}; +//----------------------------------------------------------------------------- +// Utilities +//----------------------------------------------------------------------------- function SizeU get_align_offset(SizeU size, SizeU align){ SizeU mask = align - 1; @@ -69,6 +76,18 @@ memory_zero(void *p, SizeU size){ pp[i] = 0; } +template +T max(T a, T b){ + if(a > b) return a; + return b; +} + +template +T min(T a, T b){ + if(a > b) return b; + return a; +} + template T clamp_top(T val, T max){ if(val > max) val = max; @@ -88,6 +107,53 @@ T clamp(T min, T val, T max){ return val; } +function U64 +hash_string(String string) { + U64 hash = (U64)14695981039346656037ULL; + for (U64 i = 0; i < string.len; i++) { + hash = hash ^ (U64)(string.str[i]); + hash = hash * (U64)1099511628211ULL; + } + return hash; +} + +function U64 +hash_u64(U64 x) { + x *= 0xff51afd7ed558ccd; + x ^= x >> 32; + return x; +} + +function U64 +hash_ptr(const void *ptr) { + return hash_u64((uintptr_t)ptr); +} + +function U64 +hash_mix(U64 x, U64 y) { + x ^= y; + x *= 0xff51afd7ed558ccd; + x ^= x >> 32; + return x; +} + +function U64 +is_pow2(U64 x) { + assert(x != 0); + B32 result = (x & (x - 1llu)) == 0; + return result; +} + +function U64 +wrap_around_pow2(U64 x, U64 power_of_2) { + assert(is_pow2(power_of_2)); + U64 r = (((x)&((power_of_2)-1llu))); + return r; +} + +//----------------------------------------------------------------------------- +// OS Memory +//----------------------------------------------------------------------------- constexpr SizeU os_page_size = 4096; struct OS_Memory{ SizeU commit, reserve; @@ -136,8 +202,8 @@ os_decommit_pos(OS_Memory *m, SizeU pos){ SizeU adjusted_pos = clamp_top(aligned, m->commit); SizeU size_to_decommit = m->commit - adjusted_pos; if(size_to_decommit){ - U8 *imp_address = m->data + adjusted_pos; - BOOL result = VirtualFree(imp_address, size_to_decommit, MEM_DECOMMIT); + U8 *base_address = m->data + adjusted_pos; + BOOL result = VirtualFree(base_address, size_to_decommit, MEM_DECOMMIT); if(result){ m->commit -= size_to_decommit; return true; @@ -173,11 +239,17 @@ test_os_memory(){ assert(memory.data == 0); } -enum Allocation_Kind{Allocation_Alloc,Allocation_Resize,Allocation_FreeAll,Allocation_Free}; +//----------------------------------------------------------------------------- +// Base Allocator stuff +//----------------------------------------------------------------------------- +enum Allocation_Kind{Allocation_Alloc,Allocation_Resize,Allocation_FreeAll,Allocation_Free,Allocation_Destroy}; struct Allocator; typedef void *Allocator_Proc(Allocator*, Allocation_Kind, void *, SizeU); struct Allocator{Allocator_Proc *proc;}; +//----------------------------------------------------------------------------- +// Memory arenas +//----------------------------------------------------------------------------- global const SizeU default_reserve_size = gib(4); global const SizeU default_alignment = 8; global const SizeU additional_commit_size = mib(1); @@ -195,6 +267,11 @@ arena_pop_pos(Arena *arena, SizeU pos){ arena->len = pos; } +function void +arena_release(Arena *arena){ + os_release(&arena->memory); +} + function void arena_clear(Arena *arena){ arena_pop_pos(arena, 0); @@ -202,8 +279,8 @@ arena_clear(Arena *arena){ function void * arena_push_size(Arena *a, SizeU size){ - SizeU generous_size = size; - if(a->memory.commit+generous_size>a->memory.commit){ + SizeU generous_size = size + a->alignment; + if(a->len+generous_size>a->memory.commit){ if(a->memory.reserve == 0){ arena_init(a); } @@ -223,14 +300,21 @@ arena_allocator_proc(Allocator *a, Allocation_Kind kind, void *old_pointer, Size Arena *arena = (Arena *)a; switch(kind){ case Allocation_Alloc: return arena_push_size(arena, size); - case Allocation_Resize: return arena_push_size(arena, size); + case Allocation_Resize:{ + void *result = arena_push_size(arena, size); + memory_copy(result, old_pointer, size); + return result; + } case Allocation_Free : invalid_codepath; return 0; case Allocation_FreeAll: arena_clear(arena); return 0; + case Allocation_Destroy: arena_release(arena); return 0; } + invalid_codepath; + return 0; } force_inline void * -big_personal_arena_allocator_proc(Allocator *a, Allocation_Kind kind, void *old_pointer, SizeU size){ +personal_arena_allocator_proc(Allocator *a, Allocation_Kind kind, void *old_pointer, SizeU size){ Arena *arena = (Arena *)a; arena->alignment = 1; return arena_allocator_proc(a, kind, old_pointer, size); @@ -243,6 +327,17 @@ arena_init(Arena *a){ if(!a->proc) a->proc = arena_allocator_proc; } +function Arena +arena_make_personal(){ + Arena arena = {}; + arena.proc = personal_arena_allocator_proc; + arena_init(&arena); + return arena; +} + +//----------------------------------------------------------------------------- +// OS Heap allocator +//----------------------------------------------------------------------------- struct OS_Heap:Allocator{ HANDLE handle; }; @@ -252,8 +347,14 @@ os_heap_allocator_proc(Allocator *a, Allocation_Kind kind, void *old_pointer, Si OS_Heap *heap = (OS_Heap *)a; switch(kind){ case Allocation_FreeAll:{ + invalid_codepath; + return 0; + } + case Allocation_Destroy:{ BOOL result = HeapDestroy(heap->handle); assert(result != 0); + heap->handle = 0; + heap->proc = 0; return 0; } case Allocation_Free:{ @@ -271,7 +372,9 @@ os_heap_allocator_proc(Allocator *a, Allocation_Kind kind, void *old_pointer, Si assert(result); return result; } + default: invalid_codepath; } + return 0; } function OS_Heap // max_size == 0 == growing heap @@ -283,9 +386,16 @@ win32_os_heap_create(B32 multithreaded, SizeU initial_size, SizeU max_size){ return result; } +enum Log_Kind{Log_Kind_Normal, Log_Kind_Error}; +typedef void Log_Proc(Log_Kind kind, String string, char *file, int line); +//----------------------------------------------------------------------------- +// Thread Context +//----------------------------------------------------------------------------- struct Thread_Ctx{ - Arena scratch[2]; + Arena scratch[2]; Allocator *implicit_allocator; + void *ctx; + Log_Proc *log_proc; }; thread_local Thread_Ctx thread_ctx; global Arena pernament_arena; @@ -323,12 +433,26 @@ struct Scoped_Allocator{ } }; -#define exp_alloc_array(a, T, size) (T *)exp_alloc(a, sizeof(T)*(size)) -#define exp_alloc_type(a, T) exp_alloc_array(a, T, 1) -#define exp_resize_array(a, p, T, size) expr_resize(a, p, sizeof(T)*(size)) +#define Get_Ctx(T) T *ctx = (T *)thread_ctx.ctx +#define Set_Ctx(ctx) Scoped_Ctx scoped_ctx_##__LINE__((void *)ctx) +struct Scoped_Ctx{ + void *prev_ctx; + Scoped_Ctx(void *in_ctx){ + prev_ctx = thread_ctx.ctx; + thread_ctx.ctx = in_ctx; + } + ~Scoped_Ctx(){thread_ctx.ctx = prev_ctx;} +}; + +enum Alloc_Flag{AF_None,AF_ZeroMemory}; +#define exp_alloc_array(a, T, size,...) (T *)exp_alloc(a, sizeof(T)*(size), ## __VA_ARGS__) +#define exp_alloc_type(a, T, ...) exp_alloc_array(a, T, 1, ## __VA_ARGS__) +#define exp_resize_array(a, p, T, size, ...) (T *)exp_resize(a, p, sizeof(T)*(size),## __VA_ARGS__) force_inline void * -exp_alloc(Allocator *a, SizeU size){ - return a->proc(a, Allocation_Alloc, 0, size); +exp_alloc(Allocator *a, SizeU size, Alloc_Flag flag = AF_None){ + void *result = a->proc(a, Allocation_Alloc, 0, size); + if(flag & AF_ZeroMemory) memory_zero(result, size); + return result; } force_inline void * exp_resize(Allocator *a, void *pointer, SizeU size){ @@ -342,13 +466,17 @@ force_inline void exp_free_all(Allocator *a){ a->proc(a, Allocation_FreeAll, 0, 0); } +force_inline void +exp_destroy(Allocator *a){ + a->proc(a, Allocation_Destroy, 0, 0); +} -#define imp_alloc_array(T,size) (T *)imp_alloc(sizeof(T) * (size)) -#define imp_alloc_type (T) imp_alloc_array(T,1) -#define imp_resize_array(p, T,size) (T *)imp_resize(p, sizeof(T) * (size)) +#define imp_alloc_array(T,size,...) (T *)imp_alloc(sizeof(T) * (size),##__VA_ARGS__) +#define imp_alloc_type (T,...) imp_alloc_array(T,1,## __VA_ARGS__) +#define imp_resize_array(p, T,size, ...) (T *)imp_resize(p, sizeof(T) * (size),##__VA_ARGS__) force_inline void * -imp_alloc(SizeU size){ - return exp_alloc(thread_ctx.implicit_allocator, size); +imp_alloc(SizeU size, Alloc_Flag flag=AF_None){ + return exp_alloc(thread_ctx.implicit_allocator, size, flag); } force_inline void * imp_resize(void *pointer, SizeU size){ @@ -362,6 +490,16 @@ force_inline void imp_free_all(){ exp_free_all(thread_ctx.implicit_allocator); } +force_inline void +imp_destroy(){ + exp_destroy(thread_ctx.implicit_allocator); +} + +force_inline Allocator * +imp_get(){ + assert(thread_ctx.implicit_allocator); + return thread_ctx.implicit_allocator; +} function void thread_ctx_init(){ @@ -370,6 +508,54 @@ thread_ctx_init(){ arena_init(&pernament_arena); os_process_heap.proc = os_heap_allocator_proc; os_process_heap.handle = GetProcessHeap(); + thread_ctx.implicit_allocator = &os_process_heap; +} + +function String +string_copy(Allocator *a, String string){ + U8 *copy = exp_alloc_array(a, U8, string.len+1); + memory_copy(copy, string.str, string.len); + copy[string.len] = 0; + return (String){copy, string.len}; +} + +#include +function String +string_fmtv(Allocator *a, const char *str, va_list args1) { + va_list args2; + va_copy(args2, args1); + S64 len = vsnprintf(0, 0, str, args2); + va_end(args2); + + char *result = exp_alloc_array(a, char, len + 1); + vsnprintf(result, len + 1, str, args1); + + String res = {(U8 *)result, len}; + return res; +} + +#define STRING_FMT(alloc, str, result) \ +va_list args1; \ +va_start(args1, str); \ +String result = string_fmtv(alloc, str, args1); \ +va_end(args1) + +function String +string_fmt(Allocator *a, const char *str, ...) { + STRING_FMT(a, str, result); + return result; +} + +#define log(...) handle_log_message(Log_Kind_Normal, __LINE__, __FILE__, ## __VA_ARGS__) +#define log_error(...) handle_log_message(Log_Kind_Error, __LINE__, __FILE__, ## __VA_ARGS__) +function void +handle_log_message(Log_Kind kind, int line, const char *file, const char *str, ...){ + Set_Backup_Scratch(); + STRING_FMT(imp_get(), str, message); + if(thread_ctx.log_proc) thread_ctx.log_proc(kind, message, (char *)file, line); + else{ + printf("%s", message.str); + } } function void @@ -382,7 +568,7 @@ test_heap_allocator(){ result[1023] = 1; result = exp_alloc_type(&heap, U8); *result = 0; - imp_free_all(); + imp_destroy(); assert(thread_ctx.implicit_allocator == &heap); { @@ -393,8 +579,498 @@ test_heap_allocator(){ assert(thread_ctx.implicit_allocator == &heap); } -int main(){ - test_heap_allocator(); - thread_ctx_init(); - +struct Test_Context{ + int value; +}; + +function void +test_custom_context_2(){ + Get_Ctx(Test_Context); + ctx->value += 10; +} + +function void +test_custom_context_1(){ + Test_Context context = {}; + Set_Ctx(&context); + Get_Ctx(Test_Context); + ctx->value = 10; + test_custom_context_2(); + test_custom_context_2(); + test_custom_context_2(); + assert(ctx->value == 40); + assert(thread_ctx.ctx == &context); +} + +function void +test_custom_context(){ + assert(thread_ctx.ctx == 0); + test_custom_context_1(); + assert(thread_ctx.ctx == 0); +} + + +//----------------------------------------------------------------------------- +// Defer +// http://www.gingerbill.org/article/2015/08/19/defer-in-cpp/ +//----------------------------------------------------------------------------- +template +struct Defer_Scope { + F f; + Defer_Scope(F f) : f(f) {} + ~Defer_Scope() { f(); } +}; + +template +Defer_Scope defer_func(F f) { + return Defer_Scope(f); +} +#define DEFER_1(x, y) x##y +#define DEFER_2(x, y) DEFER_1(x, y) +#define DEFER_3(x) DEFER_2(x, __COUNTER__) +#define defer(code) auto DEFER_3(_defer_) = defer_func([&](){code;}) + +//----------------------------------------------------------------------------- +// Array +//----------------------------------------------------------------------------- +template +struct Array{ + T *data; + S64 cap; + S64 len; + Allocator *allocator; + + T *begin(){ return data; } + T *end (){ return data + len; } + T &operator[](S64 i){ return data[i]; } +}; +#define For(array,it,i) for(SizeU i = 0; i < array.len; i++) for(auto *it = &array[i]; it; it = 0) +#define IterList(list,it) for(auto *it = list->first; it; it=it->next) + +template +void array_init(Array *a, S64 size){ + if(!a->allocator) a->allocator = thread_ctx.implicit_allocator; + a->data = exp_alloc_array(a->allocator, T, size); + a->cap = size; +} + +template +void array_grow(Array *a, S64 required_size){ + if(a->cap == 0){ + S64 cap = max(required_size*2, (S64)16); + array_init(a, cap); + } + else if(a->len + required_size > a->cap){ + S64 cap = (a->len + required_size)*2; + a->data = exp_resize_array(a->allocator, a->data, T, cap); + a->cap = cap; + } +} + +template +Array array_make(S64 size){ + Array result = {}; + array_init(&result, size); + return result; +} + +template +T *array_alloc(Array *a, S64 count){ + array_grow(a, count); + T *result = a->data + a->len; + a->len += count; + return result; +} + +template +void array_push(Array *a, T &item){ + array_grow(a, 1); + a->data[a->len++] = item; +} + +template +T array_pop_get(Array *a){ + assert(a->len > 0); + return a->data[--a->len]; +} + +template +void array_pop(Array *a){ + assert(a->len > 0); + --a->len; +} + +template +void array_clear(Array *array){ + array->len = 0; +} + +function void +test_array(){ + Set_Scratch(); + Array array = {}; + int size = 1000; + for(int i = 0; i < size; i++){ + array_push(&array, i); + } + For(array, it, i){ + assert(*it == i); + } + + Arena arena = arena_make_personal(); + Array array2 = {}; + array2.allocator = &arena; + for(int i = 0; i < size; i++){ + array_push(&array2, i); + } + For(array2, iterator, count){ + assert(*iterator == count); + } + for(int i = 999; i > 950; i--){ + assert(array_pop_get(&array) == i); + } + for(int i = 0; i < 10; i++){ + array_pop(&array2); + } + exp_destroy(&arena); + assert(arena.memory.data == 0); + assert(thread_ctx.scratch->memory.data != 0); + assert(thread_ctx.scratch == thread_ctx.implicit_allocator); +} + +//----------------------------------------------------------------------------- +// Map +//----------------------------------------------------------------------------- +struct Map_Key_Val{ + U64 key; + void *value; +}; + +struct Map{ + Map_Key_Val *data; + S64 len; + S64 cap; + Allocator *allocator; +}; +function void map_insert_u64(Map *map, U64 key, void *val); + +function void +map_grow(Map *map, S64 new_size){ + new_size = max((S64)16, new_size); + assert(new_size > map->cap); + assert(is_pow2(new_size)); + if(!map->allocator) map->allocator = imp_get(); + + Map new_map = {}; + new_map.data = exp_alloc_array(map->allocator, Map_Key_Val, new_size, AF_ZeroMemory), + new_map.cap = new_size, + new_map.allocator = map->allocator; + + for(S64 i = 0; i < map->cap; i++){ + if(map->data[i].key){ + map_insert_u64(&new_map, map->data[i].key, map->data[i].value); + } + } + if(map->data) free(map->data); + *map = new_map; +} + +function void +map_insert_u64(Map *map, U64 key, void *val){ + assert(val); + if(key == 0) key++; + if((2*map->len) + 1 > map->cap){ + map_grow(map, 2*map->cap); + } + U64 hash = hash_u64(key); + U64 index = wrap_around_pow2(hash, map->cap); + U64 i = index; + for(;;){ + if(map->data[i].key == 0){ + map->len++; + map->data[i].key = key; + map->data[i].value = val; + return; + } + else if(map->data[i].key == key){ + map->data[i].value = val; + return; + } + + i = wrap_around_pow2(i+1, map->cap); + if(i == map->cap){ + return; + } + } +} + +function void * +map_get_u64(Map *map, U64 key){ + if(map->len == 0) return 0; + if(key == 0) key++; + U64 hash = hash_u64(key); + U64 index = wrap_around_pow2(hash, map->cap); + U64 i = index; + for(;;){ + if(map->data[i].key == key){ + return map->data[i].value; + } + else if(map->data[i].key == 0){ + return 0; + } + + i = wrap_around_pow2(i+1, map->cap); + if(i == map->cap){ + return 0; + } + } +} + +function void * +map_get(Map *map, void *pointer){ + return map_get_u64(map, (U64)pointer); +} + +function void +map_insert(Map *map, void *key, void *value){ + map_insert_u64(map, (U64)key, value); +} + +function void +map_test(){ + Map map = {0}; + const SizeU size = 1025; + for(SizeU i = 1; i < size; i++){ + map_insert_u64(&map, i, (void *)i); + } + for(SizeU i = 1; i < size; i++){ + SizeU val = (SizeU)map_get_u64(&map, i); + assert(val == i); + } +} + +//----------------------------------------------------------------------------- +// Bucket Array +//----------------------------------------------------------------------------- +template +struct Bucket_Array{ + struct Bucket{ + Bucket *next; + S64 len, cap; + T data[0]; + }; + + Allocator *allocator; + Bucket *first; + Bucket *last; + + Bucket *iter; + S64 iter_len; +}; + +function void +test_bucket_array(){ + Bucket_Array arr = {}; +} + +//----------------------------------------------------------------------------- +// Linked lists +//----------------------------------------------------------------------------- +#define SLLQueuePushMod(f,l,n,next) do{\ +if((f)==0){\ +(f)=(l)=(n);\ +}\ +else{\ +(l)=(l)->next=(n);\ +} \ +}while(0) +#define SLLQueuePush(f,l,n) SLLQueuePushMod(f,l,n,next) + +#define SLLStackPush(l,n) do{\ +(n)->next = (l);\ +(l) = (n);\ +}while(0) + +#define SLLStackPop(l,n) do{\ +if(l){\ +(n) = (l);\ +(l) = (l)->next;\ +(n)->next = 0;\ +}\ +}while(0) + +//----------------------------------------------------------------------------- +// String builder +//----------------------------------------------------------------------------- +#include +struct String_Builder_Block{ + String_Builder_Block *next; + S64 cap; + S64 len; + U8 data[0]; +}; + +struct String_Builder{ + String_Builder_Block *first; + String_Builder_Block *last; + Allocator *allocator; +}; + +function void +string_builder_push_block(String_Builder *b, SizeU size){ + String_Builder_Block *block = (String_Builder_Block *)imp_alloc(sizeof(String_Builder_Block) + size); + memory_zero(block, sizeof(String_Builder_Block)+1); // Also clear first byte of character data + block->cap = size; + SLLQueuePush(b->first, b->last, block); +} + +function void +string_builder_init(String_Builder *b, SizeU size = 4096){ + if(!b->allocator) b->allocator = imp_get(); + string_builder_push_block(b, size); +} + +function void +appendf(String_Builder *b, const char *str, ...){ + if(b->first == 0){ + string_builder_init(b); + } + va_list args, args2; + va_start(args, str); defer(va_end(args)); + retry:{ + String_Builder_Block *block = b->last; + int block_size = block->cap - block->len; + char *write_address = (char *)block->data + block->len; + + va_copy(args2, args); defer(va_end(args2)); + int written = vsnprintf(write_address, block_size, str, args2); + + if(written > block_size){ + int new_block_size = max(4096, (written+1)*2); + string_builder_push_block(b, new_block_size); + goto retry; + } + block->len += written; + } +} + +function String +string_flatten(String_Builder *b){ + // @Note(Krzosa): Only single block, no need to flatten, vsnprintf null terminates too + if(b->first == b->last){ + String result = {b->first->data, b->first->len}; + return result; + } + + // @Note(Krzosa): Compute size to allocate + S64 size = 1; + IterList(b, it){ + size += it->len; + } + String result = {}; + result.str = (U8 *)exp_alloc(b->allocator, size); + + // @Note(Krzosa): Copy the content of each block into the string + IterList(b, it){ + memory_copy(result.str + result.len, it->data, it->len); + result.len += it->len; + } + result.str[result.len] = 0; + return result; +} + +function B32 +string_compare(String a, String b){ + if(a.len != b.len) + return false; + for(S64 i = 0; i < a.len; i++){ + if(a.str[i] != b.str[i]) + return false; + } + return true; +} + +function U8 +char_to_lower(U8 c){ + if(c >= 'A' && c <= 'Z') + c += 32; + return c; +} + +function U8 +char_to_upper(U8 c){ + if(c >= 'a' && c <= 'z') + c -= 32; + return c; +} + +force_inline String +operator""_s(const char *str, size_t size){ + return String{(U8 *)str, (S64)size}; +} + +function void +test_string_builder(){ + Set_Scratch(); + String_Builder sb = {}; + string_builder_init(&sb, 4); + appendf(&sb, "Thing, %d", 242252); + String f = string_flatten(&sb); + assert(string_compare(f, "Thing, 242252"_s)); + appendf(&sb, "-%f %f %f", 23.0, 42.29, 2925.2); + f = string_flatten(&sb); +} + +//----------------------------------------------------------------------------- +// String intern +//----------------------------------------------------------------------------- +struct Intern_Table{ + Allocator *string_allocator; + Map map; +}; + +function Intern_String +intern_string(Intern_Table *t, String string){ + if(!t->string_allocator) t->string_allocator = imp_get(); + U64 hash = hash_string(string); + U8 *slot = (U8 *)map_get_u64(&t->map, hash); + if(slot){ + Intern_String result = {{slot, *(slot-sizeof(S64))}}; + return result; + } + + S64 *len_address = (S64 *)exp_alloc(t->string_allocator, string.len+1+sizeof(S64)); + *len_address = string.len; + + U8 *string_address = (U8 *)(len_address + 1); + memory_copy(string_address, string.str, string.len); + string_address[string.len] = 0; + + map_insert_u64(&t->map, hash, string_address); + Intern_String result = {{string_address, *len_address}}; + + return result; +} + +function void +test_intern_table(){ Set_Scratch(); + Intern_Table table = {}; + Intern_String intern1 = intern_string(&table, "Thing"_s); + Intern_String intern2 = intern_string(&table, "Thing"_s); + Intern_String intern3 = intern_string(&table, "Not Thing"_s); + assert(intern1.str == intern2.str); + assert(intern3.str != intern2.str); +} + +#include "new_lex.cpp" +int main(){ + test_custom_context(); + test_heap_allocator(); + test_os_memory(); + thread_ctx_init(); + test_array(); + map_test(); + test_string_builder(); + test_intern_table(); + lex_test(); } diff --git a/new_lex.cpp b/new_lex.cpp new file mode 100644 index 0000000..7c7dac2 --- /dev/null +++ b/new_lex.cpp @@ -0,0 +1,581 @@ + +enum Token_Kind{ + TK_End, + + TK_Mul, + TK_Div, + TK_Mod, + TK_LeftShift, + TK_RightShift, + TK_FirstMul = TK_Mul, + TK_LastMul = TK_RightShift, + + TK_Add, + TK_Sub, + TK_FirstAdd = TK_Add, + TK_LastAdd = TK_Sub, + + TK_Equals, + TK_LesserThenOrEqual, + TK_GreaterThenOrEqual, + TK_LesserThen, + TK_GreaterThen, + TK_NotEquals, + TK_FirstCompare = TK_Equals, + TK_LastCompare = TK_NotEquals, + + TK_BitAnd, + TK_BitOr, + TK_BitXor, + TK_And, + TK_Or, + TK_FirstLogical = TK_BitAnd, + TK_LastLogical = TK_Or, + + TK_Neg, + TK_Not, + TK_OpenParen, + TK_CloseParen, + TK_OpenBrace, + TK_CloseBrace, + TK_OpenBracket, + TK_CloseBracket, + TK_Comma, + TK_Pound, + TK_Question, + TK_ThreeDots, + TK_Semicolon, + TK_Dot, + + TK_NewLine, + TK_Colon, + + TK_Assign, + TK_ColonAssign, + TK_DivAssign, + TK_MulAssign, + TK_ModAssign, + TK_SubAssign, + TK_AddAssign, + TK_AndAssign, + TK_OrAssign, + TK_XorAssign, + TK_LeftShiftAssign, + TK_RightShiftAssign, + TK_FirstAssign = TK_Assign, + TK_LastAssign = TK_RightShiftAssign, + + TK_DoubleColon, + TK_At, + TK_Decrement, + TK_Increment, + TK_PostDecrement, + TK_PostIncrement, + + TK_Arrow, + TK_ExprSizeof, + TK_DocComment, + TK_Comment, + TK_Identifier, + TK_StringLit, + TK_Character, + TK_Error, + TK_Float, + TK_Int, + TK_Keyword, +}; + +struct Token{ + Token_Kind kind; + union{ + String string; + struct{U8 *str; S64 len;}; + }; + + union { + U64 int_val; + F64 float_val; + String error_val; + Intern_String intern_val; + S64 indent; + }; + + String file; + S32 line; + U8 *line_begin; +}; + +struct Lex_Stream{ + String stream; + S64 iter; + + U8 *line_begin; + String file; + S32 line; +}; + +function U8 +lexc(Lex_Stream *s){ + return s->stream.str[s->iter]; +} + +function U8 +lexci(Lex_Stream *s, S32 i){ + return s->stream.str[s->iter+i]; +} + +function U8 * +lexcp(Lex_Stream *s){ + return s->stream.str + s->iter; +} + +function B32 +lex_is_whitespace(U8 c){ + B32 result = c == '\r' || c == ' ' || c == '\r'; + return result; +} + +function B32 +lex_is_alphabetic(U8 c){ + B32 result = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); + return result; +} + +function B32 +lex_is_numeric(U8 c){ + B32 result = c >= '0' && c <= '9'; + return result; +} + +function B32 +lex_is_alphanumeric(U8 c){ + B32 result = lex_is_numeric(c) || lex_is_alphabetic(c); + return result; +} + +function void +lex_set_len(Lex_Stream *s, Token *token){ + assert(lexcp(s) >= token->str); + token->len = lexcp(s) - token->str; +} + +function void +token_error(Token *t, String error_val){ + t->kind = TK_Error; + t->error_val = error_val; +} + +function void +lex_parse_u64(Token *t){ + U64 result = 0; + U64 m = 1; + for(S64 i = t->len - 1; i >= 0; --i){ + U64 val = t->str[i] - '0'; + U64 new_val = val * m; + if((result + new_val) < result){ + token_error(t, "Integer overflow"_s); + return; + } + result+=new_val; + m *= 10; + } + t->int_val = result; +} + +function void +lex_advance(Lex_Stream *s){ + if(s->iter >= s->stream.len){ + return; + } + else if(lexc(s) == '\n'){ + s->iter++; + s->line++; + s->line_begin = lexcp(s); + } + else{ + s->iter++; + } +} + +function void +lex_parse_string(Lex_Stream *s, Token *t, U8 c){ + for(;;){ + if(lexc(s) == '\\') lex_advance(s); + else if(lexc(s) == c) break; + else if(lexc(s) == 0){ + token_error(t, "Unterminated string, reached end of file"_s); + break; + } + lex_advance(s); + } + if(t->kind != TK_Error){ + lex_advance(s); + lex_set_len(s,t); + } +} + +#define CASE2(op, OpName, Assign) \ +case op: \ +if (lexc(s) == '=') { \ +lex_advance(s); \ +t.kind = Assign; \ +} else { \ +t.kind = OpName; \ +} \ +break +#define CASE3(op, OpName, Assign, Incr) \ +case op: \ +if (lexc(s) == '=') { \ +lex_advance(s); \ +t.kind = Assign; \ +} else if (lexc(s) == op) { \ +lex_advance(s); \ +t.kind = Incr; \ +} else { \ +t.kind = OpName; \ +} \ +break + +function void +lex__stream(Array *array, Lex_Stream *s){ + while(lexc(s)){ + while(lex_is_whitespace(lexc(s))) + lex_advance(s); + + Token t = {}; + t.str = lexcp(s); + t.file = s->file; + t.line = s->line; + t.line_begin = s->line_begin; + lex_advance(s); + + switch(*t.str){ + case 0: break; + case '@': t.kind = TK_At; break; + case '(': t.kind = TK_OpenParen; break; + case ')': t.kind = TK_CloseParen; break; + case '{': t.kind = TK_OpenBrace; break; + case '}': t.kind = TK_CloseBrace; break; + case '[': t.kind = TK_OpenBracket; break; + case ']': t.kind = TK_CloseBracket; break; + case ',': t.kind = TK_Comma; break; + case '~': t.kind = TK_Neg; break; + case '?': t.kind = TK_Question; break; + case ';': t.kind = TK_Semicolon; break; + case '#': t.kind = TK_Pound; break; + CASE2('!', TK_Not, TK_NotEquals); + CASE2('^', TK_BitXor, TK_XorAssign); + CASE2('=', TK_Assign, TK_Equals); + CASE2('*', TK_Mul, TK_MulAssign); + CASE2('%', TK_Mod, TK_ModAssign); + CASE3('+', TK_Add, TK_AddAssign, TK_Increment); + CASE3('&', TK_BitAnd, TK_AndAssign, TK_And); + CASE3('|', TK_BitOr, TK_OrAssign, TK_Or); +#undef CASE2 +#undef CASE3 + case '\n': { + t.kind = TK_NewLine; + if(lexc(s) == '\r') + lex_advance(s); + + for(;;){ + if(lexc(s) == ' ') t.indent++; + else if(lexc(s) == '\t') t.indent += 2; + else break; + lex_advance(s); + } + + }break; + case '.': { + if(lexc(s) == '.' && lexci(s,1) == '.') { + lex_advance(s); lex_advance(s); + t.kind = TK_ThreeDots; + } + else { + t.kind = TK_Dot; + } + } break; + + + case '<': { + if (lexc(s) == '<') { + lex_advance(s); + if (lexc(s) == '=') { + lex_advance(s); + t.kind = TK_LeftShiftAssign; + } + else { + t.kind = TK_LeftShift; + } + } + else if (lexc(s) == '=') { + lex_advance(s); + t.kind = TK_LesserThenOrEqual; + } + else { + t.kind = TK_LesserThen; + } + } break; + + case '>': { + if (lexc(s) == '>') { + lex_advance(s); + if (lexc(s) == '=') { + lex_advance(s); + t.kind = TK_RightShiftAssign; + } + else { + t.kind = TK_RightShift; + } + } + else if (lexc(s) == '=') { + lex_advance(s); + t.kind = TK_GreaterThenOrEqual; + } + else { + t.kind = TK_GreaterThen; + } + } break; + + case ':': { + if (lexc(s) == ':') { + lex_advance(s); + t.kind = TK_DoubleColon; + } + else if(lexc(s) == '='){ + lex_advance(s); + t.kind = TK_ColonAssign; + } + else { + t.kind = TK_Colon; + } + } break; + + case '-':{ + if (lexc(s) == '=') { + lex_advance(s); + t.kind = TK_SubAssign; + } + else if (lexc(s) == '-') { + lex_advance(s); + t.kind = TK_Decrement; + } + else if (lexc(s) == '>') { + lex_advance(s); + t.kind = TK_Arrow; + } + else { + t.kind = TK_Sub; + } + } break; + + + case '\'':{not_implemented;} break; + case '"': { + t.kind = TK_StringLit; + lex_parse_string(s,&t,'"'); + if(t.kind != TK_Error){ + t.str += 1; + t.len -= 2; + } + //t.intern_val = intern_string(&array->interns, t.string); + } break; + + case '/': { + if(lexc(s) == '='){ + t.kind = TK_DivAssign; + lex_advance(s); + } + else if(lexc(s) == '/'){ + lex_advance(s); + t.kind = TK_Comment; + for(;;){ + if(lexc(s) == '\n' || lexc(s) == 0) break; + lex_advance(s); + } + continue; + } + else if(lexc(s) == '*'){ + lex_advance(s); + t.kind = TK_Comment; + for(;;){ + if(lexc(s) == '*' && lexci(s,1) == '/'){ + lex_advance(s); + lex_advance(s); + break; + } + else if(lexc(s) == 0){ + token_error(&t, "Unterminated block comment"_s); + goto skip_continue; + } + lex_advance(s); + } + continue; + skip_continue:; + } + else { + t.kind = TK_Div; + } + } break; + + case '0':case '1':case '2':case '3':case '4': + case '5':case '6':case '7':case '8':case '9':{ + t.kind = TK_Int; + while(lex_is_numeric(lexc(s))) + lex_advance(s); + lex_set_len(s, &t); + lex_parse_u64(&t); + } break; + + case 'A':case 'a':case 'M':case 'm':case 'B': + case 'b':case 'N':case 'n':case 'C':case 'c':case 'O': + case 'o':case 'D':case 'd':case 'P':case 'p':case 'E': + case 'e':case 'Q':case 'q':case 'F':case 'f':case 'R': + case 'r':case 'G':case 'g':case 'S':case 's':case 'H': + case 'h':case 'T':case 't':case 'I':case 'i':case 'U': + case 'u':case 'J':case 'j':case 'V':case 'v':case 'K': + case 'k':case 'W':case 'w':case 'L':case 'X':case 'l': + case 'x':case 'Z':case 'z':case 'Y':case 'y':case '_': { + t.kind = TK_Identifier; + while(lex_is_alphanumeric(lexc(s)) || lexc(s) == '_') + lex_advance(s); + lex_set_len(s,&t); + //t.intern_val = intern_string(&array->interns, t.string); + //if(lex_is_keyword(t.intern_val)){ + //t.kind = TK_Keyword; + //} + } break; + + default: { + token_error(&t, "Unknown token"_s); + } + } + + if(t.len==0) + lex_set_len(s,&t); + + array_push(array, t); + } +} + +function Array +lex_stream(String istream, String file){ + Lex_Stream stream = {istream, 0, istream.str, file, 0}; + Array tokens = array_make(1024); + lex__stream(&tokens, &stream); + return tokens; +} + +function void +lex_test(){ + Set_Scratch(); + String test = "//R\n 18446744073709551616{})(@?&+-;....->,:::/**/\"Thing\" Thingy" + "\"Test_Meme\"+=-===42524 4294967295 18446744073709551615" + "for if while switch :="_s; + Array array = lex_stream(test, "Test1"_s); + + Token_Kind kind[] = { + TK_NewLine, TK_Error,TK_OpenBrace,TK_CloseBrace,TK_CloseParen,TK_OpenParen, + TK_At,TK_Question,TK_BitAnd,TK_Add,TK_Sub,TK_Semicolon, + TK_ThreeDots, TK_Dot, TK_Arrow, TK_Comma, TK_DoubleColon, TK_Colon, + TK_StringLit, TK_Identifier, TK_StringLit, TK_AddAssign, TK_SubAssign, + TK_Equals, TK_Int, TK_Int, TK_Int, + TK_Identifier, TK_Identifier, TK_Identifier, TK_Identifier, + // TK_Keyword, TK_Keyword, TK_Keyword, TK_Keyword, + TK_ColonAssign, TK_End + }; + String strs[] = { + "\n "_s, "18446744073709551616"_s,"{"_s,"}"_s,")"_s,"("_s, + "@"_s,"?"_s,"&"_s,"+"_s,"-"_s,";"_s, + "..."_s,"."_s,"->"_s,","_s,"::"_s,":"_s, + "Thing"_s,"Thingy"_s,"Test_Meme"_s, "+="_s,"-="_s, + "=="_s,"42524"_s,"4294967295"_s,"18446744073709551615"_s, + "for"_s, "if"_s, "while"_s, "switch"_s, ":="_s, ""_s, + }; + U64 vals[] = { + 42524, 4294967295, 18446744073709551615llu + }; + + int ui = 0; + For(array, t, i){ + assert(t->kind == kind[i]); + assert(string_compare(t->string, strs[i])); + if(t->kind == TK_Int){ + assert(t->int_val == vals[ui++]); + } + } + +} + +//----------------------------------------------------------------------------- +// Token metadata +//----------------------------------------------------------------------------- +function String +token_kind_string(Token_Kind kind){ + switch(kind){ + case TK_End: return "End of stream"_s; + case TK_Mul: return "*"_s; + case TK_Div: return "/"_s; + case TK_Add: return "+"_s; + case TK_Sub: return "-"_s; + case TK_Mod: return "%"_s; + case TK_BitAnd: return "&"_s; + case TK_BitOr: return "|"_s; + case TK_BitXor: return "^"_s; + case TK_Neg: return "~"_s; + case TK_Not: return "!"_s; + case TK_OpenParen: return "("_s; + case TK_CloseParen: return " "_s; + case TK_OpenBrace: return "{"_s; + case TK_CloseBrace: return "}"_s; + case TK_OpenBracket: return "["_s; + case TK_CloseBracket: return "]"_s; + case TK_Comma: return ","_s; + case TK_Pound: return "#"_s; + case TK_Question: return "?"_s; + case TK_ThreeDots: return "..."_s; + case TK_Semicolon: return ";"_s; + case TK_Dot: return "."_s; + case TK_LesserThen: return "<"_s; + case TK_GreaterThen: return ">"_s; + case TK_Colon: return ":"_s; + case TK_Assign: return "="_s; + case TK_ColonAssign: return ":="_s; + case TK_DivAssign: return "/="_s; + case TK_MulAssign: return "*="_s; + case TK_ModAssign: return "%="_s; + case TK_SubAssign: return "-="_s; + case TK_AddAssign: return "+="_s; + case TK_AndAssign: return "&="_s; + case TK_OrAssign: return "|="_s; + case TK_XorAssign: return "^="_s; + case TK_LeftShiftAssign: return "<<="_s; + case TK_RightShiftAssign: return ">>="_s; + case TK_DoubleColon: return "::"_s; + case TK_At: return "@"_s; + case TK_Decrement: return "--"_s; + case TK_Increment: return "++"_s; + case TK_PostDecrement: return "--"_s; + case TK_PostIncrement: return "++"_s; + case TK_LesserThenOrEqual: return "<="_s; + case TK_GreaterThenOrEqual: return ">="_s; + case TK_Equals: return "=="_s; + case TK_And: return "&&"_s; + case TK_Or: return "||"_s; + case TK_NotEquals: return "!="_s; + case TK_LeftShift: return "<<"_s; + case TK_RightShift: return ">>"_s; + case TK_Arrow: return "->"_s; + case TK_ExprSizeof: return "sizeof"_s; + case TK_DocComment: return "DocComment"_s; + case TK_Comment: return "Comment"_s; + case TK_Identifier: return "Identifier"_s; + case TK_StringLit: return "StringLit"_s; + case TK_Character: return "Character"_s; + case TK_Error: return "Error"_s; + case TK_Float: return "Float"_s; + case TK_Int: return "Int"_s; + case TK_Keyword: return "Keyword"_s; + default: invalid_codepath; return ""_s; + } +}