Working on lexer first

This commit is contained in:
Krzosa Karol
2022-04-28 13:49:32 +02:00
commit d462892e14
7 changed files with 645 additions and 0 deletions

5
.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
*.exe
*.ilk
*.pdb
*.txt
*.4c

3
build.bat Normal file
View File

@@ -0,0 +1,3 @@
@echo off
clang main.c -fdiagnostics-absolute-paths -std=c99 -g -o main.exe -Wl,-subsystem:windows -Wl,user32.lib

44
ideas.cpp Normal file
View File

@@ -0,0 +1,44 @@
Builtin types:
B8,B16,B32,B64
S8,S16,S32,S64
U8,U16,U32,U64
SizeI, SizeU
F32, F64
Decls:
S32 *var;
S32 (*func)();
S32 *var[expr][expr];
S32 **var;
Global scope:
function S32
do_thing(S32 a, U32 b){
stmt_list
}
function S32
do_thing(S32 a, S32 b);
typedef struct Thing Thing;
struct Thing{};
typedef struct Thing{} Thing;
typedef enum Thingy Thingy;
enum Thingy{};
typedef enum Thingy{} Thingy;
global S32 variable = expr | compound;
// typedef S32 NewName;
// typedef S32 BaseFunctionType(S32 thing);
// typedef S32 (*FunctionPointer)(S32 thing);
Local scope-(stmts):
S32 variable = expr;
variable = expr;
variable++;
return 0;
if(a){}elseif(b){}else{}

164
lex.h Normal file
View File

@@ -0,0 +1,164 @@
#pragma once
typedef enum Token_Kind{
meta("End of stream")TK_End,
meta("*")TK_Mul,
meta("/")TK_Div,
meta("+")TK_Add,
meta("-")TK_Sub,
meta("%")TK_Mod,
meta("&")TK_BitAnd,
meta("|")TK_BitOr,
meta("^")TK_BitXor,
meta("~")TK_Neg,
meta("!")TK_Not,
meta("(")TK_OpenParen,
meta(")")TK_CloseParen,
meta("{")TK_OpenBrace,
meta("}")TK_CloseBrace,
meta("[")TK_OpenBracket,
meta("]")TK_CloseBracket,
meta(",")TK_Comma,
meta("#")TK_Pound,
meta("?")TK_Question,
meta("...")TK_ThreeDots,
meta(";")TK_Semicolon,
meta(".")TK_Dot,
meta("<")TK_LesserThen,
meta(">")TK_GreaterThen,
meta(":")TK_Colon,
meta("=")TK_Assign,
meta("/=")TK_DivAssign,
meta("*=")TK_MulAssign,
meta("%=")TK_ModAssign,
meta("-=")TK_SubAssign,
meta("+=")TK_AddAssign,
meta("&=")TK_AndAssign,
meta("|=")TK_OrAssign,
meta("^=")TK_XorAssign,
meta("<<=")TK_LeftShiftAssign,
meta(">>=")TK_RightShiftAssign,
meta("::")TK_DoubleColon,
meta("@")TK_At,
meta("--")TK_Decrement,
meta("++")TK_Increment,
meta("--")TK_PostDecrement,
meta("++")TK_PostIncrement,
meta("<=")TK_LesserThenOrEqual,
meta(">=")TK_GreaterThenOrEqual,
meta("==")TK_Equals,
meta("&&")TK_And,
meta("||")TK_Or,
meta("!=")TK_NotEquals,
meta("<<")TK_LeftShift,
meta(">>")TK_RightShift,
meta("->")TK_Arrow,
meta("sizeof")TK_ExprSizeof,
TK_DocComment,
TK_Comment,
TK_Identifier,
TK_StringLit,
TK_U8Lit,
TK_Error,
TK_Float,
TK_Int,
TK_Keyword,
}Token_Kind;
typedef struct Token{
Token_Kind kind;
union{
struct{
U8 *str;
S64 len;
};
String string;
};
union {
S64 int_val;
String error_val;
};
String file;
S64 line;
U8 *line_begin;
} Token;
typedef struct Tokens{
Token *tokens;
S64 len;
S64 cap;
}Tokens;
typedef struct Lex_Stream{
U8 *stream;
U8 *line_begin;
String filename;
S64 line;
}Lex_Stream;
//-----------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------
global String token_kind_string[] = {
[TK_End] = lit("End of stream"),
[TK_Error] = lit("Error"),
[TK_Comment] = lit("Comment"),
[TK_Identifier] = lit("Identifier"),
[TK_StringLit] = lit("StringLiteral"),
[TK_U8Lit] = lit("U8Literal"),
[TK_Float] = lit("Float"),
[TK_Int] = lit("Integer"),
[TK_Mul] = lit("*"),
[TK_Div] = lit("/"),
[TK_Add] = lit("+"),
[TK_Sub] = lit("-"),
[TK_Mod] = lit("%"),
[TK_BitAnd] = lit("&"),
[TK_BitOr] = lit("|"),
[TK_BitXor] = lit("^"),
[TK_Neg] = lit("~"),
[TK_Not] = lit("!"),
[TK_OpenParen] = lit("("),
[TK_CloseParen] = lit(")"),
[TK_OpenBrace] = lit("{"),
[TK_CloseBrace] = lit("}"),
[TK_OpenBracket] = lit("["),
[TK_CloseBracket] = lit("]"),
[TK_Comma] = lit(","),
[TK_Pound] = lit("#"),
[TK_Question] = lit("?"),
[TK_ThreeDots] = lit("..."),
[TK_Semicolon] = lit(";"),
[TK_Dot] = lit("."),
[TK_LesserThen] = lit("<"),
[TK_GreaterThen] = lit(">"),
[TK_Colon] = lit(":"),
[TK_Assign] = lit("="),
[TK_DivAssign] = lit("/="),
[TK_MulAssign] = lit("*="),
[TK_ModAssign] = lit("%="),
[TK_SubAssign] = lit("-="),
[TK_AddAssign] = lit("+="),
[TK_AndAssign] = lit("&="),
[TK_OrAssign] = lit("|="),
[TK_XorAssign] = lit("^="),
[TK_LeftShiftAssign] = lit("<<="),
[TK_RightShiftAssign] = lit(">>="),
[TK_DoubleColon] = lit("::"),
[TK_At] = lit("@"),
[TK_Decrement] = lit("--"),
[TK_Increment] = lit("++"),
[TK_PostDecrement] = lit("--"),
[TK_PostIncrement] = lit("++"),
[TK_LesserThenOrEqual] = lit("<="),
[TK_GreaterThenOrEqual] = lit(">="),
[TK_Equals] = lit("=="),
[TK_And] = lit("&&"),
[TK_Or] = lit("||"),
[TK_NotEquals] = lit("!="),
[TK_LeftShift] = lit("<<"),
[TK_RightShift] = lit(">>"),
[TK_Arrow] = lit("->"),
[TK_ExprSizeof] = lit("sizeof"),
};

363
main.c Normal file
View File

@@ -0,0 +1,363 @@
#include "os.cpp"
#include "lex.h"
global FILE *global_output_file;
#define lex_print(...) fprintf(global_output_file, __VA_ARGS__)
function void
memory_zero(void *p, SizeU size){
U8 *pp = p;
for(SizeU i = 0; i < size; i++)
pp[i] = 0;
}
function B32
string_compare(String a, String b){
if(a.len != b.len)
return false;
for(S64 i = 0; i < a.len; i++){
if(a.str[i] != b.str[i])
return false;
}
return true;
}
function Token *
token_alloc(Tokens *t){
if(t->cap == 0){
t->cap = 1024;
t->tokens = malloc(sizeof(Token)*t->cap);
}
else if(t->len+1 > t->cap){
t->cap *= 2;
t->tokens = realloc(t->tokens, sizeof(Token)*t->cap);
}
Token *result = t->tokens + t->len++;
memory_zero(result, sizeof(*result));
return result;
}
function void
lex_advance(Lex_Stream *s){
if(*s->stream == '\n'){
s->stream++;
s->line++;
s->line_begin = s->stream;
}
else if(*s->stream == 0){
// Don't advance, end of stream
}
else{
s->stream++;
}
}
function U64
parse_u64(U8 *str, S64 len){
U64 result = 0;
U64 m = 1;
for(S64 i = len - 1; i >= 0; --i){
U64 val = str[i] - '0';
result += val * m;
m *= 10;
}
return result;
}
function B32
lex_is_whitespace(U8 c){
B32 result = c == '\n' || c == '\r' || c == ' ' || c == '\r';
return result;
}
function B32
lex_is_alphabetic(U8 c){
B32 result = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
return result;
}
function B32
lex_is_numeric(U8 c){
B32 result = c >= '0' && c <= '9';
return result;
}
function B32
lex_is_alphanumeric(U8 c){
B32 result = lex_is_numeric(c) || lex_is_alphabetic(c);
return result;
}
function void
lex_set_len(Lex_Stream *s, Token *token){
assert(s->stream > token->str);
token->len = s->stream - token->str;
}
function U8
lexc(Lex_Stream *s){
return *s->stream;
}
function void
token_error(Token *t, String error_val){
t->kind = TK_Error;
t->error_val = error_val;
}
function void
lex_parse_string(Lex_Stream *s, Token *t, U8 c){
for(;;){
if(lexc(s) == '\\') lex_advance(s);
else if(lexc(s) == c) break;
else if(lexc(s) == 0){
token_error(t, lit("Unterminated string, reached end of file"));
break;
}
lex_advance(s);
}
if(t->kind != TK_Error){
lex_advance(s);
lex_set_len(s,t);
}
}
function void
lex_token_seed(Lex_Stream *s, Token *t){
t->str = s->stream;
t->file = s->filename;
t->line = s->line;
t->line_begin = s->line_begin;
}
function void
token_push_error(Lex_Stream *stream, Tokens *tokens, String error_val){
Token *token = token_alloc(tokens);
token->kind = TK_Error;
token->error_val = error_val;
lex_token_seed(stream, token);
}
function void
lex_base(Lex_Stream *s, Tokens *tokens){
while(*s->stream){
while(lex_is_whitespace(*s->stream))
lex_advance(s);
#define CASE2(op, OpName, Assign) \
case op: \
if (lexc(s) == '=') { \
lex_advance(s); \
t->kind = Assign; \
} else { \
t->kind = OpName; \
} \
break
#define CASE3(op, OpName, Assign, Incr) \
case op: \
if (lexc(s) == '=') { \
lex_advance(s); \
t->kind = Assign; \
} else if (lexc(s) == op) { \
lex_advance(s); \
t->kind = Incr; \
} else { \
t->kind = OpName; \
} \
break
Token *t = token_alloc(tokens);
lex_token_seed(s, t);
lex_advance(s);
switch(*t->str) {
CASE2('!', TK_Not, TK_NotEquals);
CASE2('^', TK_BitXor, TK_XorAssign);
CASE2('=', TK_Assign, TK_Equals);
CASE2('*', TK_Mul, TK_MulAssign);
CASE2('%', TK_Mod, TK_ModAssign);
CASE3('+', TK_Add, TK_AddAssign, TK_Increment);
CASE3('&', TK_BitAnd, TK_AndAssign, TK_And);
CASE3('|', TK_BitOr, TK_OrAssign, TK_Or);
#undef CASE2
#undef CASE3
case '@': t->kind = TK_At; break;
case '(': t->kind = TK_OpenParen; break;
case ')': t->kind = TK_CloseParen; break;
case '{': t->kind = TK_OpenBrace; break;
case '}': t->kind = TK_CloseBrace; break;
case '[': t->kind = TK_OpenBracket; break;
case ']': t->kind = TK_CloseBracket; break;
case ',': t->kind = TK_Comma; break;
case '~': t->kind = TK_Neg; break;
case '?': t->kind = TK_Question; break;
case ';': t->kind = TK_Semicolon; break;
case '-':{
if (lexc(s) == '=') {
lex_advance(s);
t->kind = TK_SubAssign;
}
else if (lexc(s) == '-') {
lex_advance(s);
t->kind = TK_Decrement;
}
else if (lexc(s) == '>') {
lex_advance(s);
t->kind = TK_Arrow;
}
else {
t->kind = TK_Sub;
}
} break;
case '\'':{not_implemented;} break;
case '"': {
t->kind = TK_U8Lit;
lex_parse_string(s,t,'"');
if(t->kind != TK_Error){
t->str += 1;
t->len -= 2;
}
} break;
case '/': {
if(lexc(s) == '/'){
lex_advance(s);
if(lexc(s) == '/'){
lex_advance(s);
t->kind = TK_DocComment;
}
else {
t->kind = TK_Comment;
}
for(;;){
if(lexc(s) == '\n' || lexc(s) == 0) break;
lex_advance(s);
}
lex_set_len(s,t);
}
else if(lexc(s) == '*'){
lex_advance(s);
t->kind = TK_Comment;
for(;;){
if(s->stream[0] == '*' && s->stream[1] == '/'){
lex_advance(s);
lex_advance(s);
break;
}
else if(lexc(s) == 0){
token_error(t, lit("Unterminated block comment"));
break;
}
lex_advance(s);
}
lex_set_len(s,t);
}
else t->kind = TK_Div;
} break;
case '0':
case '1':case '2':case '3':
case '4':case '5':case '6':
case '7':case '8':case '9': {
t->kind = TK_Int;
while(lex_is_numeric(lexc(s)))
lex_advance(s);
lex_set_len(s, t);
t->int_val = parse_u64(t->str, t->len);
} break;
case 'l':{
if(s->stream[0] == 'i' && s->stream[1] == 't' && s->stream[2] == '(' && s->stream[3] == '"'){
t->kind = TK_StringLit;
lex_advance(s);lex_advance(s);lex_advance(s);lex_advance(s);
lex_parse_string(s,t,'"');
if(s->stream[0] == ')') {
t->str += 5;
t->len -= 6;
lex_advance(s);
}
else token_error(t, lit("Unterminated string literal, missing closing parenthesis"));
break;
}
};
case 'A':case 'a':case 'M':case 'm':case 'B':
case 'b':case 'N':case 'n':case 'C':case 'c':case 'O':
case 'o':case 'D':case 'd':case 'P':case 'p':case 'E':
case 'e':case 'Q':case 'q':case 'F':case 'f':case 'R':
case 'r':case 'G':case 'g':case 'S':case 's':case 'H':
case 'h':case 'T':case 't':case 'I':case 'i':case 'U':
case 'u':case 'J':case 'j':case 'V':case 'v':case 'K':
case 'k':case 'W':case 'w':case 'L':case 'X':
case 'x':case 'Z':case 'z':case 'Y':case 'y':case '_': {
t->kind = TK_Identifier;
while(lex_is_alphanumeric(lexc(s)) || lexc(s) == '_')
lex_advance(s);
lex_set_len(s,t);
} break;
default: {
token_error(t, lit("Unknown token"));
} break;
}
}
}
function Tokens
lex_stream(String in_stream, String filename){
Lex_Stream stream = {in_stream.str, in_stream.str, filename, 0};
Tokens tokens = {};
lex_base(&stream, &tokens);
return tokens;
}
function void
token_print(Tokens tokens){
lex_print("\n== Token count = %d\n", (S32)tokens.len);
for(Token *t = tokens.tokens; t != tokens.tokens + tokens.len; t++){
lex_print("%s %.*s\n", token_kind_string[t->kind].str, (S32)t->len, t->str);
}
}
function B32
token_compare(Token *t, String str){
B32 result = string_compare(t->string, str);
return result;
}
function void
lex_test(){
Tokens t;
t = lex_stream(lit("32523 42524 \"U8Literal\""), lit("test"));
token_print(t);
assert(t.len == 3);
assert(t.tokens[0].int_val == 32523);
assert(t.tokens[1].int_val == 42524);
assert(t.tokens[2].kind == TK_U8Lit);
assert(token_compare(t.tokens + 2, lit("U8Literal")));
t = lex_stream(lit("_identifier Thing Thing2 lit(\"String_Test\")"), lit("test"));
token_print(t);
assert(t.tokens[0].kind == TK_Identifier);
assert(t.tokens[1].kind == TK_Identifier);
assert(t.tokens[2].kind == TK_Identifier);
assert(t.tokens[3].kind == TK_StringLit);
assert(token_compare(t.tokens, lit("_identifier")));
assert(token_compare(t.tokens+1, lit("Thing")));
assert(token_compare(t.tokens+2, lit("Thing2")));
assert(token_compare(t.tokens+3, lit("String_Test")));
t = lex_stream(lit("lit(\"String_Test\"{})(324*=+=-/ *% // Comment \n Thing /*Thing*/ /*Error"), lit("test"));
assert(t.tokens[0].kind == TK_Error);
token_print(t);
}
function S32
os_main(){
global_output_file = fopen("output.txt", "w");
assert_msg(global_output_file, "Failed to open output.txt");
lex_test();
fclose(global_output_file);
return 0;
}

27
os.cpp Normal file
View File

@@ -0,0 +1,27 @@
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <windows.h>
#include "types.h"
function S32 os_main();
LRESULT CALLBACK
WindowProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam){
return DefWindowProcW(hwnd, uMsg, wParam, lParam);;
}
int
WinMain(HINSTANCE hInstance, HINSTANCE a, LPSTR b, int nShowCmd){
wchar_t *CLASS_NAME = L"Cool window class";
wchar_t *WINDOW_NAME = L"Have a good day!";
WNDCLASSW wc = { };
wc.lpfnWndProc = WindowProc;
wc.hInstance = hInstance;
wc.lpszClassName = CLASS_NAME;
RegisterClassW(&wc);
HWND window_handle = CreateWindowExW(0, CLASS_NAME, WINDOW_NAME, WS_OVERLAPPEDWINDOW, CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT, 0, 0, hInstance, 0);
ShowWindow(window_handle, nShowCmd);
return os_main();
}

39
types.h Normal file
View File

@@ -0,0 +1,39 @@
#pragma once
#define global static
#define function static
#define assert(x) do{if(!(x)) __debugbreak();}while(0)
#define assert_msg(x,msg) assert(x)
#define not_implemented assert_msg(0, "Not implemented")
#define invalid_codepath assert_msg(0, "Invalid codepath")
#define buff_cap(x) (sizeof(x)/sizeof((x)[0]))
#define lit(x) ((String){(U8*)x,buff_cap(x)-1})
#define meta(x)
#include <stdint.h>
typedef int8_t S8;
typedef int16_t S16;
typedef int32_t S32;
typedef int64_t S64;
typedef uint8_t U8;
typedef uint16_t U16;
typedef uint32_t U32;
typedef uint64_t U64;
typedef S8 B8;
typedef S16 B16;
typedef S32 B32;
typedef S64 B64;
typedef uint64_t SizeU;
typedef int64_t SizeI;
typedef float F32;
typedef double F64;
const B32 true = 1;
const B32 false = 0;
typedef struct String{
U8 *str;
S64 len;
}String;