Delete lex.c

This commit is contained in:
Krzosa Karol
2022-06-03 23:16:15 +02:00
parent a324a5abf0
commit 4910671ab9

642
lex.c
View File

@@ -1,642 +0,0 @@
global Intern_String keyword_if;
global Intern_String keyword_for;
global Intern_String keyword_cast;
global Intern_String keyword_else;
global Intern_String keyword_defer;
global Intern_String keyword_do;
global Intern_String keyword_size_type;
global Intern_String keyword_size_expr;
global Intern_String keyword_const;
global Intern_String keyword_typedef;
global Intern_String keyword_return;
global Intern_String keyword_typeof;
global Intern_String keyword_while;
global Intern_String keyword_switch;
global Intern_String keyword_case;
global Intern_String keyword_struct;
global Intern_String keyword_enum;
global Intern_String keyword_union;
global U8 *first_keyword;
global U8 *last_keyword;
global Intern_String intern_char;
global Intern_String intern_void;
global Intern_String intern_int;
function void
init_default_keywords(Intern_Table *t){
keyword_if = intern_string(t, lit("if"));
first_keyword = keyword_if.s.str;
keyword_cast = intern_string(t, lit("cast"));
keyword_for = intern_string(t, lit("for"));
keyword_else = intern_string(t, lit("else"));
keyword_defer = intern_string(t, lit("defer"));
keyword_do = intern_string(t, lit("do"));
keyword_size_type = intern_string(t, lit("size_type"));
keyword_size_expr = intern_string(t, lit("size_expr"));
keyword_typeof = intern_string(t, lit("typeof"));
keyword_const = intern_string(t, lit("const"));
keyword_while = intern_string(t, lit("while"));
keyword_return = intern_string(t, lit("return"));
keyword_switch = intern_string(t, lit("switch"));
keyword_typedef = intern_string(t, lit("typedef"));
keyword_case = intern_string(t, lit("case"));
keyword_struct = intern_string(t, lit("struct"));
keyword_enum = intern_string(t, lit("enum"));
keyword_union = intern_string(t, lit("union"));
last_keyword = keyword_union.s.str;
intern_char = intern_string(t, lit("char"));
intern_void = intern_string(t, lit("void"));
intern_int = intern_string(t, lit("int"));
}
function B32
lex_is_keyword(Intern_String str){
B32 result = str.s.str >= first_keyword && str.s.str <= last_keyword;
return result;
}
typedef enum Token_Kind{
TK_End,
TK_Mul,
TK_Div,
TK_Mod,
TK_LeftShift,
TK_RightShift,
TK_FirstMul = TK_Mul,
TK_LastMul = TK_RightShift,
TK_Add,
TK_Sub,
TK_FirstAdd = TK_Add,
TK_LastAdd = TK_Sub,
TK_Equals,
TK_LesserThenOrEqual,
TK_GreaterThenOrEqual,
TK_LesserThen,
TK_GreaterThen,
TK_NotEquals,
TK_FirstCompare = TK_Equals,
TK_LastCompare = TK_NotEquals,
TK_BitAnd,
TK_BitOr,
TK_Pointer,
TK_And,
TK_Or,
TK_FirstLogical = TK_BitAnd,
TK_LastLogical = TK_Or,
TK_Neg,
TK_Not,
TK_OpenParen,
TK_CloseParen,
TK_OpenBrace,
TK_CloseBrace,
TK_OpenBracket,
TK_CloseBracket,
TK_Comma,
TK_Pound,
TK_Question,
TK_ThreeDots,
TK_Semicolon,
TK_Dot,
TK_Colon,
TK_Assign,
TK_ColonAssign,
TK_DivAssign,
TK_MulAssign,
TK_ModAssign,
TK_SubAssign,
TK_AddAssign,
TK_AndAssign,
TK_OrAssign,
TK_XorAssign,
TK_LeftShiftAssign,
TK_RightShiftAssign,
TK_FirstAssign = TK_Assign,
TK_LastAssign = TK_RightShiftAssign,
TK_DoubleColon,
TK_At,
TK_Decrement,
TK_Increment,
TK_PostDecrement,
TK_PostIncrement,
TK_Arrow,
TK_ExprSizeof,
TK_DocComment,
TK_Comment,
TK_Identifier,
TK_StringLit,
TK_Character,
TK_Error,
TK_Float,
TK_Integer,
TK_Keyword,
}Token_Kind;
typedef struct Token{
Token_Kind kind;
union{
String string;
struct{U8 *str; S64 len;};
};
union {
U64 int_val;
F64 float_val;
String error_val;
Intern_String intern_val;
};
String file;
S32 line;
U8 *line_begin;
}Token;
#include "token_array.c"
typedef struct Lex_Stream{
String stream;
S64 iter;
U8 *line_begin;
String file;
S32 line;
}Lex_Stream;
function U8
lexc(Lex_Stream *s){
return s->stream.str[s->iter];
}
function U8
lexci(Lex_Stream *s, S32 i){
return s->stream.str[s->iter+i];
}
function U8 *
lexcp(Lex_Stream *s){
return s->stream.str + s->iter;
}
function B32
lex_is_whitespace(U8 c){
B32 result = c == '\n' || c == '\r' || c == ' ' || c == '\r';
return result;
}
function B32
lex_is_alphabetic(U8 c){
B32 result = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
return result;
}
function B32
lex_is_numeric(U8 c){
B32 result = c >= '0' && c <= '9';
return result;
}
function B32
lex_is_alphanumeric(U8 c){
B32 result = lex_is_numeric(c) || lex_is_alphabetic(c);
return result;
}
function void
lex_set_len(Lex_Stream *s, Token *token){
assert(lexcp(s) >= token->str);
token->len = lexcp(s) - token->str;
}
function void
token_error(Token *t, String error_val){
t->kind = TK_Error;
t->error_val = error_val;
}
function void
lex_parse_u64(Token *t){
U64 result = 0;
U64 m = 1;
for(S64 i = t->len - 1; i >= 0; --i){
U64 val = t->str[i] - '0';
U64 new_val = val * m;
if((result + new_val) < result){
token_error(t, lit("Integer overflow"));
return;
}
result+=new_val;
m *= 10;
}
t->int_val = result;
}
function void
lex_advance(Lex_Stream *s){
if(s->iter >= s->stream.len){
return;
}
else if(lexc(s) == '\n'){
s->iter++;
s->line++;
s->line_begin = lexcp(s);
}
else{
s->iter++;
}
}
function void
lex_parse_string(Lex_Stream *s, Token *t, U8 c){
for(;;){
if(lexc(s) == '\\') lex_advance(s);
else if(lexc(s) == c) break;
else if(lexc(s) == 0){
token_error(t, lit("Unterminated string, reached end of file"));
break;
}
lex_advance(s);
}
if(t->kind != TK_Error){
lex_advance(s);
lex_set_len(s,t);
}
}
#define CASE2(op, OpName, Assign) \
case op: \
if (lexc(s) == '=') { \
lex_advance(s); \
t.kind = Assign; \
} else { \
t.kind = OpName; \
} \
break
#define CASE3(op, OpName, Assign, Incr) \
case op: \
if (lexc(s) == '=') { \
lex_advance(s); \
t.kind = Assign; \
} else if (lexc(s) == op) { \
lex_advance(s); \
t.kind = Incr; \
} else { \
t.kind = OpName; \
} \
break
function void
lex__stream(Token_Array *array, Lex_Stream *s){
while(lexc(s)){
while(lex_is_whitespace(lexc(s)))
lex_advance(s);
Token t = {0};
t.str = lexcp(s);
t.file = s->file;
t.line = s->line;
t.line_begin = s->line_begin;
lex_advance(s);
switch(*t.str){
case 0: break;
case '@': t.kind = TK_At; break;
case '(': t.kind = TK_OpenParen; break;
case ')': t.kind = TK_CloseParen; break;
case '{': t.kind = TK_OpenBrace; break;
case '}': t.kind = TK_CloseBrace; break;
case '[': t.kind = TK_OpenBracket; break;
case ']': t.kind = TK_CloseBracket; break;
case ',': t.kind = TK_Comma; break;
case '~': t.kind = TK_Neg; break;
case '?': t.kind = TK_Question; break;
case ';': t.kind = TK_Semicolon; break;
case '#': t.kind = TK_Pound; break;
CASE2('!', TK_Not, TK_NotEquals);
CASE2('^', TK_Pointer, TK_XorAssign);
CASE2('=', TK_Assign, TK_Equals);
CASE2('*', TK_Mul, TK_MulAssign);
CASE2('%', TK_Mod, TK_ModAssign);
CASE3('+', TK_Add, TK_AddAssign, TK_Increment);
CASE3('&', TK_BitAnd, TK_AndAssign, TK_And);
CASE3('|', TK_BitOr, TK_OrAssign, TK_Or);
#undef CASE2
#undef CASE3
case '.': {
if(lexc(s) == '.' && lexci(s,1) == '.') {
lex_advance(s); lex_advance(s);
t.kind = TK_ThreeDots;
}
else {
t.kind = TK_Dot;
}
} break;
case '<': {
if (lexc(s) == '<') {
lex_advance(s);
if (lexc(s) == '=') {
lex_advance(s);
t.kind = TK_LeftShiftAssign;
}
else {
t.kind = TK_LeftShift;
}
}
else if (lexc(s) == '=') {
lex_advance(s);
t.kind = TK_LesserThenOrEqual;
}
else {
t.kind = TK_LesserThen;
}
} break;
case '>': {
if (lexc(s) == '>') {
lex_advance(s);
if (lexc(s) == '=') {
lex_advance(s);
t.kind = TK_RightShiftAssign;
}
else {
t.kind = TK_RightShift;
}
}
else if (lexc(s) == '=') {
lex_advance(s);
t.kind = TK_GreaterThenOrEqual;
}
else {
t.kind = TK_GreaterThen;
}
} break;
case ':': {
if (lexc(s) == ':') {
lex_advance(s);
t.kind = TK_DoubleColon;
}
else if(lexc(s) == '='){
lex_advance(s);
t.kind = TK_ColonAssign;
}
else {
t.kind = TK_Colon;
}
} break;
case '-':{
if (lexc(s) == '=') {
lex_advance(s);
t.kind = TK_SubAssign;
}
else if (lexc(s) == '-') {
lex_advance(s);
t.kind = TK_Decrement;
}
else if (lexc(s) == '>') {
lex_advance(s);
t.kind = TK_Arrow;
}
else {
t.kind = TK_Sub;
}
} break;
case '\'':{not_implemented;} break;
case '"': {
t.kind = TK_StringLit;
lex_parse_string(s,&t,'"');
if(t.kind != TK_Error){
t.str += 1;
t.len -= 2;
}
t.intern_val = intern_string(&array->interns, t.string);
} break;
case '/': {
if(lexc(s) == '='){
t.kind = TK_DivAssign;
lex_advance(s);
}
else if(lexc(s) == '/'){
lex_advance(s);
t.kind = TK_Comment;
for(;;){
if(lexc(s) == '\n' || lexc(s) == 0) break;
lex_advance(s);
}
continue;
}
else if(lexc(s) == '*'){
lex_advance(s);
t.kind = TK_Comment;
for(;;){
if(lexc(s) == '*' && lexci(s,1) == '/'){
lex_advance(s);
lex_advance(s);
break;
}
else if(lexc(s) == 0){
token_error(&t, lit("Unterminated block comment"));
goto skip_continue;
}
lex_advance(s);
}
continue;
skip_continue:;
}
else {
t.kind = TK_Div;
}
} break;
case '0':case '1':case '2':case '3':case '4':
case '5':case '6':case '7':case '8':case '9':{
t.kind = TK_Integer;
while(lex_is_numeric(lexc(s)))
lex_advance(s);
lex_set_len(s, &t);
lex_parse_u64(&t);
} break;
case 'A':case 'a':case 'M':case 'm':case 'B':
case 'b':case 'N':case 'n':case 'C':case 'c':case 'O':
case 'o':case 'D':case 'd':case 'P':case 'p':case 'E':
case 'e':case 'Q':case 'q':case 'F':case 'f':case 'R':
case 'r':case 'G':case 'g':case 'S':case 's':case 'H':
case 'h':case 'T':case 't':case 'I':case 'i':case 'U':
case 'u':case 'J':case 'j':case 'V':case 'v':case 'K':
case 'k':case 'W':case 'w':case 'L':case 'X':case 'l':
case 'x':case 'Z':case 'z':case 'Y':case 'y':case '_': {
t.kind = TK_Identifier;
while(lex_is_alphanumeric(lexc(s)) || lexc(s) == '_')
lex_advance(s);
lex_set_len(s,&t);
t.intern_val = intern_string(&array->interns, t.string);
if(lex_is_keyword(t.intern_val)){
t.kind = TK_Keyword;
}
} break;
default: {
token_error(&t, lit("Unknown token"));
}
}
if(t.len==0)
lex_set_len(s,&t);
token_array_push(array, &t);
}
}
function void
lex_add_stream(Token_Array *array, String stream, String file){
Lex_Stream s = {stream, 0, stream.str, file, 0};
lex__stream(array, &s);
}
function Token_Array
lex_make_token_array(Arena *arena){
Token_Array array = token_array_make(arena);
init_default_keywords(&array.interns);
return array;
}
function Token_Array
lex_stream(Arena *arena, String stream, String file){
Token_Array array = lex_make_token_array(arena);
lex_add_stream(&array, stream, file);
return array;
}
function void
lex_restream(Token_Array *array, String stream, String file){
token_array_reset(array);
lex_add_stream(array, stream, file);
}
function void
lex_test(){
Arena *scratch = arena_begin_scratch();
String test = lit("18446744073709551616{})(@?&+-;....->,:::/**/\"Thing\"//R\n Thingy"
"\"Test_Meme\"+=-===42524 4294967295 18446744073709551615"
"for if while switch :=");
Token_Array array = lex_stream(scratch, test, lit("Test1"));
Token_Kind kind[] = {
TK_Error,TK_OpenBrace,TK_CloseBrace,TK_CloseParen,TK_OpenParen,
TK_At,TK_Question,TK_BitAnd,TK_Add,TK_Sub,TK_Semicolon,
TK_ThreeDots, TK_Dot, TK_Arrow, TK_Comma, TK_DoubleColon, TK_Colon,
TK_StringLit, TK_Identifier, TK_StringLit, TK_AddAssign, TK_SubAssign,
TK_Equals, TK_Integer, TK_Integer, TK_Integer, TK_Keyword, TK_Keyword,
TK_Keyword, TK_Keyword, TK_ColonAssign, TK_End
};
String strs[] = {
lit("18446744073709551616"),lit("{"),lit("}"),lit(")"),lit("("),
lit("@"),lit("?"),lit("&"),lit("+"),lit("-"),lit(";"),
lit("..."),lit("."),lit("->"),lit(","),lit("::"),lit(":"),
lit("Thing"),lit("Thingy"),lit("Test_Meme"), lit("+="),lit("-="),
lit("=="),lit("42524"),lit("4294967295"),lit("18446744073709551615"),
lit("for"), lit("if"), lit("while"), lit("switch"), lit(":="), lit(""),
};
U64 vals[] = {
42524, 4294967295, 18446744073709551615llu
};
int i = 0;
int ui = 0;
for(Token *t = token_array_iter_begin(&array); t->kind != TK_End; t = token_array_iter_next(&array)){
assert(t->kind == kind[i]);
assert(string_compare(t->string, strs[i++]));
if(t->kind == TK_Integer){
assert(t->int_val == vals[ui++]);
}
}
arena_end_scratch();
}
//-----------------------------------------------------------------------------
// Token metadata
//-----------------------------------------------------------------------------
global const char *token_kind_string[] = {
[TK_End] = "End of stream",
[TK_Mul] = "*",
[TK_Div] = "/",
[TK_Add] = "+",
[TK_Sub] = "-",
[TK_Mod] = "%",
[TK_BitAnd] = "&",
[TK_BitOr] = "|",
[TK_Pointer] = "^",
[TK_Neg] = "~",
[TK_Not] = "!",
[TK_OpenParen] = "(",
[TK_CloseParen] = " ",
[TK_OpenBrace] = "{",
[TK_CloseBrace] = "}",
[TK_OpenBracket] = "[",
[TK_CloseBracket] = "]",
[TK_Comma] = ",",
[TK_Pound] = "#",
[TK_Question] = "?",
[TK_ThreeDots] = "...",
[TK_Semicolon] = ";",
[TK_Dot] = ".",
[TK_LesserThen] = "<",
[TK_GreaterThen] = ">",
[TK_Colon] = ":",
[TK_Assign] = "=",
[TK_ColonAssign] = ":=",
[TK_DivAssign] = "/=",
[TK_MulAssign] = "*=",
[TK_ModAssign] = "%=",
[TK_SubAssign] = "-=",
[TK_AddAssign] = "+=",
[TK_AndAssign] = "&=",
[TK_OrAssign] = "|=",
[TK_XorAssign] = "^=",
[TK_LeftShiftAssign] = "<<=",
[TK_RightShiftAssign] = ">>=",
[TK_DoubleColon] = "::",
[TK_At] = "@",
[TK_Decrement] = "--",
[TK_Increment] = "++",
[TK_PostDecrement] = "--",
[TK_PostIncrement] = "++",
[TK_LesserThenOrEqual] = "<=",
[TK_GreaterThenOrEqual] = ">=",
[TK_Equals] = "==",
[TK_And] = "&&",
[TK_Or] = "||",
[TK_NotEquals] = "!=",
[TK_LeftShift] = "<<",
[TK_RightShift] = ">>",
[TK_Arrow] = "->",
[TK_ExprSizeof] = "sizeof",
[TK_DocComment] = "DocComment",
[TK_Comment] = "Comment",
[TK_Identifier] = "Identifier",
[TK_StringLit] = "StringLit",
[TK_Character] = "Character",
[TK_Error] = "Error",
[TK_Float] = "Float",
[TK_Integer] = "int",
[TK_Keyword] = "Keyword",
};