Big renames
This commit is contained in:
637
lexer.cpp
637
lexer.cpp
@@ -1,637 +0,0 @@
|
||||
force_inline B32 token_is_assign(Token_Kind token){return token >= TK_FirstAssign && token <= TK_LastAssign;}
|
||||
force_inline B32 token_is_assign(Token *token){return token_is_assign(token->kind);}
|
||||
force_inline B32 token_is_compare(Token_Kind token){return token >= TK_FirstCompare && token <= TK_LastCompare;}
|
||||
force_inline B32 token_is_compare(Token *token){return token_is_compare(token->kind);}
|
||||
global Token token_null = {SAME_SCOPE};
|
||||
|
||||
function U8
|
||||
lexc(Lex_Stream *s){
|
||||
return s->stream.str[s->iter];
|
||||
}
|
||||
|
||||
function U8
|
||||
lexci(Lex_Stream *s, S32 i){
|
||||
return s->stream.str[s->iter+i];
|
||||
}
|
||||
|
||||
function U8 *
|
||||
lexcp(Lex_Stream *s){
|
||||
return s->stream.str + s->iter;
|
||||
}
|
||||
|
||||
function B32
|
||||
lex_is_whitespace(U8 c){
|
||||
B32 result = c == ' ' || c == '\r';
|
||||
return result;
|
||||
}
|
||||
|
||||
function B32
|
||||
lex_is_alphabetic(U8 c){
|
||||
B32 result = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
|
||||
return result;
|
||||
}
|
||||
|
||||
function B32
|
||||
lex_is_numeric(U8 c){
|
||||
B32 result = c >= '0' && c <= '9';
|
||||
return result;
|
||||
}
|
||||
|
||||
function B32
|
||||
lex_is_alphanumeric(U8 c){
|
||||
B32 result = lex_is_numeric(c) || lex_is_alphabetic(c);
|
||||
return result;
|
||||
}
|
||||
|
||||
function void
|
||||
lex_set_len(Lex_Stream *s, Token *token){
|
||||
assert(lexcp(s) >= token->str);
|
||||
token->len = lexcp(s) - token->str;
|
||||
}
|
||||
|
||||
function void
|
||||
lex_set_keywords(Lexer *lexer, Array<String> keywords){
|
||||
Intern_String keyword = {};
|
||||
For(keywords){
|
||||
keyword = intern_string(&lexer->interns, it);
|
||||
if(&it == keywords.begin())
|
||||
lexer->interns.first_keyword = keyword.str;
|
||||
}
|
||||
lexer->interns.last_keyword = keyword.str;
|
||||
}
|
||||
|
||||
function B32
|
||||
lex_is_keyword(Intern_Table *lexer, Intern_String keyword){
|
||||
B32 result = keyword.str >= lexer->first_keyword && keyword.str <= lexer->last_keyword;
|
||||
return result;
|
||||
}
|
||||
|
||||
function void
|
||||
token_error(Token *t, String error_val){
|
||||
t->kind = TK_Error;
|
||||
t->error_val = error_val;
|
||||
}
|
||||
|
||||
function void
|
||||
lex_parse_u64(Lexer *lexer, Token *t){
|
||||
Scratch scratch;
|
||||
Set_BigInt_Allocator(scratch);
|
||||
|
||||
t->kind = TK_Integer;
|
||||
BigInt m = bigint_u64(1); // @leak, it accumulates and potentially needs allocation
|
||||
BigInt val10 = bigint_u64(10);
|
||||
BigInt result = bigint_u64(0);
|
||||
|
||||
for(S64 i = t->len - 1; i >= 0; --i){
|
||||
BigInt val = bigint_u64(t->str[i] - '0'); // I dont think this is a leak, too small
|
||||
BigInt new_val = bigint_mul(&val, &m); // @leak
|
||||
result = bigint_add(&result, &new_val); // @leak
|
||||
m = bigint_mul(&m, &val10); // @leak
|
||||
}
|
||||
|
||||
t->int_val = bigint_copy(lexer->arena, &result);
|
||||
}
|
||||
|
||||
function void
|
||||
lex_parse_f64(Token *t){
|
||||
t->kind = TK_Float;
|
||||
char buffer[128];
|
||||
S64 len = clamp_top((int)t->len, 126);
|
||||
memory_copy(buffer, t->str, len);
|
||||
buffer[len] = 0;
|
||||
t->f64_val = strtod(buffer, 0);
|
||||
}
|
||||
|
||||
function void
|
||||
lex_advance(Lex_Stream *s){
|
||||
if(s->iter >= s->stream.len){
|
||||
return;
|
||||
}
|
||||
else if(lexc(s) == '\n'){
|
||||
s->iter++;
|
||||
s->line++;
|
||||
s->line_begin = lexcp(s);
|
||||
}
|
||||
else{
|
||||
s->iter++;
|
||||
}
|
||||
}
|
||||
|
||||
function void
|
||||
lex_parse_string(Lex_Stream *s, Token *t, U8 c){
|
||||
for(;;){
|
||||
if(lexc(s) == '\\') lex_advance(s);
|
||||
else if(lexc(s) == c) break;
|
||||
else if(lexc(s) == 0){
|
||||
token_error(t, "Unterminated string, reached end of file"_s);
|
||||
break;
|
||||
}
|
||||
lex_advance(s);
|
||||
}
|
||||
if(t->kind != TK_Error){
|
||||
lex_advance(s);
|
||||
lex_set_len(s,t);
|
||||
}
|
||||
}
|
||||
|
||||
function void
|
||||
lex_parse_ident(Intern_Table *table, Lex_Stream *s, Token *t){
|
||||
while(lex_is_alphanumeric(lexc(s)) || lexc(s) == '_')
|
||||
lex_advance(s);
|
||||
lex_set_len(s,t);
|
||||
t->intern_val = intern_string(table, t->string);
|
||||
}
|
||||
|
||||
#define CASE2(op, OpName, Assign) \
|
||||
case op: \
|
||||
if (lexc(s) == '=') { \
|
||||
lex_advance(s); \
|
||||
t.kind = Assign; \
|
||||
} else { \
|
||||
t.kind = OpName; \
|
||||
} \
|
||||
break
|
||||
#define CASE3(op, OpName, Assign, Incr) \
|
||||
case op: \
|
||||
if (lexc(s) == '=') { \
|
||||
lex_advance(s); \
|
||||
t.kind = Assign; \
|
||||
} else if (lexc(s) == op) { \
|
||||
lex_advance(s); \
|
||||
t.kind = Incr; \
|
||||
} else { \
|
||||
t.kind = OpName; \
|
||||
} \
|
||||
break
|
||||
|
||||
function Token
|
||||
token_make(Lexer *lexer, U8 *str, String file, int line, U8 *line_begin){
|
||||
Token t = {};
|
||||
t.str = str;
|
||||
t.file = file;
|
||||
t.line = line;
|
||||
t.line_begin = line_begin;
|
||||
t.di = lexer->token_debug_ids++;
|
||||
return t;
|
||||
}
|
||||
|
||||
function Token
|
||||
token_make(Lexer *lexer){
|
||||
return token_make(lexer, lexcp(&lexer->stream), lexer->stream.file, lexer->stream.line, lexer->stream.line_begin);
|
||||
}
|
||||
|
||||
function Token *
|
||||
lex_last_indent_token(Lex_Stream *s){
|
||||
if(s->indent_stack.len > 0){
|
||||
return *s->indent_stack.last();
|
||||
}
|
||||
return &token_null;
|
||||
}
|
||||
|
||||
function B32
|
||||
lex_is_scope(Token *t){
|
||||
B32 result = t->kind == OPEN_SCOPE || t->kind == CLOSE_SCOPE || t->kind == SAME_SCOPE;
|
||||
return result;
|
||||
}
|
||||
|
||||
function void
|
||||
lex_unwind_indent_stack(Token *t, Lex_Stream *s, Array<Token> *array){
|
||||
for(S64 i = s->indent_stack.len-1; i >= 0; i-=1){
|
||||
auto it = s->indent_stack.data[i];
|
||||
assert(lex_is_scope(it));
|
||||
if(it->indent == t->indent){
|
||||
t->kind = SAME_SCOPE;
|
||||
array->add(*t);
|
||||
break;
|
||||
}
|
||||
else if(it->indent < t->indent){
|
||||
token_error(t, "Bad indentation"_s);
|
||||
array->add(*t);
|
||||
break;
|
||||
}
|
||||
else{
|
||||
s->indent_stack.pop();
|
||||
t->kind = CLOSE_SCOPE;
|
||||
array->add(*t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function void
|
||||
lex__stream(Lexer *lexer){
|
||||
Intern_Table *table = &lexer->interns;
|
||||
Array<Token> *array = &lexer->tokens;
|
||||
Lex_Stream *s = &lexer->stream;
|
||||
|
||||
B32 beginning = true;
|
||||
for(;;){
|
||||
if(lexc(s) == 0 || s->iter >= s->stream.len){
|
||||
end_of_stream:
|
||||
Token t = token_make(lexer);
|
||||
lex_unwind_indent_stack(&t, s, array);
|
||||
break;
|
||||
}
|
||||
|
||||
// @note: the lexer is going to be a 2 stage process
|
||||
// first we tokenize the indentation and then proceed to tokenize
|
||||
// the good stuff
|
||||
|
||||
// for blocks of stmts we parse till we cant find another new line
|
||||
// of same scope.
|
||||
// parse_decl doesn't require preceding new line
|
||||
//
|
||||
// in that way new lines act as commas in function params
|
||||
// seeing a comma means that there is a next thing to parse
|
||||
// and it's easy to parse stuff using a do while loop
|
||||
|
||||
// @note: first handle indentation
|
||||
// mostly we want to merge multiple new lines
|
||||
// but for down scopes we want to emit 2 new lines
|
||||
// that will ease out parsing, one token to break out
|
||||
// from a block parsing, second to allow continuation of surrounding scope
|
||||
Token t = token_make(lexer);
|
||||
B32 should_emit = beginning;
|
||||
for(;;){
|
||||
switch(lexc(s)){
|
||||
case 0 : goto end_of_stream; break;
|
||||
case '\t': case ' ': lex_advance(s); t.indent++; break;
|
||||
case '\r': lex_advance(s); break;
|
||||
case '/': {
|
||||
if(lexci(s,1) == '/'){
|
||||
lex_advance(s); lex_advance(s);
|
||||
t.kind = TK_Comment;
|
||||
for(;;){
|
||||
if(lexc(s) == '\n' || lexc(s) == 0) break;
|
||||
lex_advance(s);
|
||||
}
|
||||
}
|
||||
else if(lexci(s,1) == '*'){
|
||||
lex_advance(s); lex_advance(s);
|
||||
t.kind = TK_Comment;
|
||||
for(;;){
|
||||
if(lexc(s) == '*' && lexci(s,1) == '/'){
|
||||
lex_advance(s); lex_advance(s);
|
||||
break;
|
||||
}
|
||||
else if(lexc(s) == 0){
|
||||
token_error(&t, "Unterminated block comment"_s);
|
||||
break;
|
||||
}
|
||||
lex_advance(s);
|
||||
}
|
||||
}
|
||||
else goto indent_loop_break;
|
||||
} break;
|
||||
|
||||
// @todo: add [;;] operator which adds new scope
|
||||
// @todo: also need some way to detect indentation so that
|
||||
// first of all we can check for consistency and second of
|
||||
// all because we would know by how much to indent
|
||||
// @todo: after detecting indentation 2 spaces would become 1 indent value
|
||||
case ';' : {
|
||||
Token semi = token_make(lexer);
|
||||
Token *last = lex_last_indent_token(s);
|
||||
semi.kind = SAME_SCOPE;
|
||||
semi.indent = last->indent;
|
||||
lex_advance(s);
|
||||
array->add(semi);
|
||||
} break;
|
||||
|
||||
case '\n':{
|
||||
lex_advance(s);
|
||||
should_emit = true;
|
||||
t = token_make(lexer);
|
||||
} break;
|
||||
|
||||
default:{
|
||||
if(s->inside_brace_paren) should_emit = false;
|
||||
if(should_emit){
|
||||
Token *last = lex_last_indent_token(s);
|
||||
if(t.indent > last->indent){
|
||||
t.kind = OPEN_SCOPE;
|
||||
array->add(t);
|
||||
s->indent_stack.add(array->last());
|
||||
}
|
||||
|
||||
else if(t.indent < last->indent){
|
||||
lex_unwind_indent_stack(&t, s, array);
|
||||
}
|
||||
else {
|
||||
t.kind = SAME_SCOPE;
|
||||
array->add(t);
|
||||
}
|
||||
}
|
||||
|
||||
goto indent_loop_break;
|
||||
}
|
||||
}
|
||||
} indent_loop_break:
|
||||
beginning = false;
|
||||
|
||||
// @note: handle the indented token
|
||||
t = token_make(lexer);
|
||||
lex_advance(s);
|
||||
switch(*t.str){
|
||||
case 0 : goto end_of_stream; break;
|
||||
case '@': t.kind = TK_At; break;
|
||||
case '(': s->inside_brace_paren++; t.kind = TK_OpenParen; break;
|
||||
case ')': s->inside_brace_paren--; t.kind = TK_CloseParen; break;
|
||||
case '{': s->inside_brace_paren++; t.kind = TK_OpenBrace; break;
|
||||
case '}': s->inside_brace_paren--; t.kind = TK_CloseBrace; break;
|
||||
case '[': s->inside_brace_paren++; t.kind = TK_OpenBracket; break;
|
||||
case ']': s->inside_brace_paren--; t.kind = TK_CloseBracket; break;
|
||||
case ',': t.kind = TK_Comma; break;
|
||||
case '~': t.kind = TK_Neg; break;
|
||||
case '?': t.kind = TK_Question; break;
|
||||
case '^': t.kind = TK_BitXor; break;
|
||||
CASE2('!', TK_Not, TK_NotEquals);
|
||||
CASE2('=', TK_Assign, TK_Equals);
|
||||
CASE2('*', TK_Mul, TK_MulAssign);
|
||||
CASE2('%', TK_Mod, TK_ModAssign);
|
||||
CASE3('+', TK_Add, TK_AddAssign, TK_Increment);
|
||||
CASE3('&', TK_BitAnd, TK_AndAssign, TK_And);
|
||||
CASE3('|', TK_BitOr, TK_OrAssign, TK_Or);
|
||||
|
||||
case '#': {
|
||||
lex_parse_ident(table, s, &t);
|
||||
if(t.intern_val.str == intern_foreign.str){
|
||||
t.kind = TK_FOREIGN;
|
||||
}
|
||||
else token_error(&t, "Unrecognized #note"_s);
|
||||
}break;
|
||||
|
||||
case '.': {
|
||||
if(lexc(s) == '.' && lexci(s,1) == '.') {
|
||||
lex_advance(s); lex_advance(s);
|
||||
t.kind = TK_ThreeDots;
|
||||
}
|
||||
else {
|
||||
t.kind = TK_Dot;
|
||||
}
|
||||
} break;
|
||||
|
||||
case '\'':{
|
||||
assert(s->stream.len >= s->iter);
|
||||
UTF32_Result decode = utf8_to_utf32(lexcp(s), s->stream.len - s->iter);
|
||||
if(!decode.error){
|
||||
for(S32 i = 0; i < decode.advance; i++) lex_advance(s);
|
||||
t.unicode = decode.out_str;
|
||||
t.kind = TK_UnicodeLit;
|
||||
}
|
||||
else{
|
||||
token_error(&t, "Invalid UTF8 sequence in unicode literal"_s);
|
||||
}
|
||||
} break;
|
||||
|
||||
case '<': {
|
||||
if (lexc(s) == '<') {
|
||||
lex_advance(s);
|
||||
if (lexc(s) == '=') {
|
||||
lex_advance(s);
|
||||
t.kind = TK_LeftShiftAssign;
|
||||
}
|
||||
else {
|
||||
t.kind = TK_LeftShift;
|
||||
}
|
||||
}
|
||||
else if (lexc(s) == '=') {
|
||||
lex_advance(s);
|
||||
t.kind = TK_LesserThenOrEqual;
|
||||
}
|
||||
else {
|
||||
t.kind = TK_LesserThen;
|
||||
}
|
||||
} break;
|
||||
|
||||
case '>': {
|
||||
if (lexc(s) == '>') {
|
||||
lex_advance(s);
|
||||
if (lexc(s) == '=') {
|
||||
lex_advance(s);
|
||||
t.kind = TK_RightShiftAssign;
|
||||
}
|
||||
else {
|
||||
t.kind = TK_RightShift;
|
||||
}
|
||||
}
|
||||
else if (lexc(s) == '=') {
|
||||
lex_advance(s);
|
||||
t.kind = TK_GreaterThenOrEqual;
|
||||
}
|
||||
else {
|
||||
t.kind = TK_GreaterThen;
|
||||
}
|
||||
} break;
|
||||
|
||||
case ':': {
|
||||
if (lexc(s) == ':') {
|
||||
lex_advance(s);
|
||||
t.kind = TK_DoubleColon;
|
||||
}
|
||||
else if(lexc(s) == '='){
|
||||
lex_advance(s);
|
||||
t.kind = TK_ColonAssign;
|
||||
}
|
||||
else {
|
||||
t.kind = TK_Colon;
|
||||
}
|
||||
} break;
|
||||
|
||||
case '-':{
|
||||
if (lexc(s) == '=') {
|
||||
lex_advance(s);
|
||||
t.kind = TK_SubAssign;
|
||||
}
|
||||
else if (lexc(s) == '-') {
|
||||
lex_advance(s);
|
||||
t.kind = TK_Decrement;
|
||||
}
|
||||
else if (lexc(s) == '>') {
|
||||
lex_advance(s);
|
||||
t.kind = TK_Arrow;
|
||||
}
|
||||
else {
|
||||
t.kind = TK_Sub;
|
||||
}
|
||||
} break;
|
||||
|
||||
case '"': {
|
||||
t.kind = TK_StringLit;
|
||||
lex_parse_string(s,&t,'"');
|
||||
if(t.kind != TK_Error){
|
||||
t.str += 1;
|
||||
t.len -= 2;
|
||||
}
|
||||
t.intern_val = intern_string(table, t.string);
|
||||
} break;
|
||||
|
||||
case '/': {
|
||||
if(lexc(s) == '='){
|
||||
t.kind = TK_DivAssign;
|
||||
lex_advance(s);
|
||||
}
|
||||
else {
|
||||
t.kind = TK_Div;
|
||||
}
|
||||
} break;
|
||||
|
||||
case '0':case '1':case '2':case '3':case '4':
|
||||
case '5':case '6':case '7':case '8':case '9':{
|
||||
B32 found_dot = false;
|
||||
for(;;){
|
||||
if(lex_is_numeric(lexc(s)))
|
||||
;
|
||||
else if(lexc(s) == '.'){
|
||||
if(found_dot){
|
||||
token_error(&t, "Multiple '.' in float literal"_s);
|
||||
goto end_of_switch;
|
||||
}
|
||||
found_dot = true;
|
||||
}
|
||||
else break;
|
||||
|
||||
lex_advance(s);
|
||||
}
|
||||
lex_set_len(s, &t);
|
||||
if(found_dot) lex_parse_f64(&t);
|
||||
else lex_parse_u64(lexer, &t);
|
||||
|
||||
} break;
|
||||
|
||||
case 'A':case 'a':case 'M':case 'm':case 'B':
|
||||
case 'b':case 'N':case 'n':case 'C':case 'c':case 'O':
|
||||
case 'o':case 'D':case 'd':case 'P':case 'p':case 'E':
|
||||
case 'e':case 'Q':case 'q':case 'F':case 'f':case 'R':
|
||||
case 'r':case 'G':case 'g':case 'S':case 's':case 'H':
|
||||
case 'h':case 'T':case 't':case 'I':case 'i':case 'U':
|
||||
case 'u':case 'J':case 'j':case 'V':case 'v':case 'K':
|
||||
case 'k':case 'W':case 'w':case 'L':case 'X':case 'l':
|
||||
case 'x':case 'Z':case 'z':case 'Y':case 'y':case '_': {
|
||||
t.kind = TK_Identifier;
|
||||
lex_parse_ident(table, s, &t);
|
||||
if(lex_is_keyword(table, t.intern_val)){
|
||||
t.kind = TK_Keyword;
|
||||
}
|
||||
} break;
|
||||
|
||||
default: {
|
||||
token_error(&t, "Unknown token"_s);
|
||||
}
|
||||
}end_of_switch:
|
||||
|
||||
if(t.len==0)
|
||||
lex_set_len(s,&t);
|
||||
|
||||
array->add(t);
|
||||
}
|
||||
#undef CASE2
|
||||
#undef CASE3
|
||||
}
|
||||
|
||||
function Lexer
|
||||
lex_make(Allocator *token_string_arena, Allocator *map_allocator){
|
||||
Lexer result = {};
|
||||
lex_init(token_string_arena, map_allocator, &result);
|
||||
return result;
|
||||
}
|
||||
|
||||
function void
|
||||
lex_restream(Lexer *lexer, String istream, String file){
|
||||
lexer->stream = {};
|
||||
lexer->stream.stream = istream;
|
||||
lexer->stream.line_begin = istream.str;
|
||||
lexer->stream.file = file;
|
||||
|
||||
|
||||
lexer->tokens.clear();
|
||||
lexer->token_iter = 0;
|
||||
Scratch scratch;
|
||||
lexer->stream.indent_stack.allocator = scratch;
|
||||
lexer->stream.indent_stack.add(&token_null);
|
||||
lex__stream(lexer);
|
||||
}
|
||||
|
||||
function Lexer
|
||||
lex_stream(Allocator *token_string_arena, Allocator *map_allocator, String istream, String file){
|
||||
Lexer result = lex_make(token_string_arena, map_allocator);
|
||||
lex_restream(&result, istream, file);
|
||||
return result;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Token metadata
|
||||
//-----------------------------------------------------------------------------
|
||||
function const char *
|
||||
name(Token_Kind kind){
|
||||
switch(kind){
|
||||
case TK_End: return "End of stream";
|
||||
case TK_Mul: return "*";
|
||||
case TK_Div: return "/";
|
||||
case TK_Add: return "+";
|
||||
case TK_Sub: return "-";
|
||||
case TK_Mod: return "%";
|
||||
case TK_BitAnd: return "&";
|
||||
case TK_BitOr: return "|";
|
||||
case TK_BitXor: return "^";
|
||||
case TK_Neg: return "~";
|
||||
case TK_Not: return "!";
|
||||
case TK_OpenParen: return "(";
|
||||
case TK_CloseParen: return ")";
|
||||
case TK_OpenBrace: return "{";
|
||||
case TK_CloseBrace: return "}";
|
||||
case TK_OpenBracket: return "[";
|
||||
case TK_CloseBracket: return "]";
|
||||
case TK_ColonAssign: return ":=";
|
||||
case TK_Comma: return ",";
|
||||
case TK_Pound: return "#";
|
||||
case TK_Question: return "?";
|
||||
case TK_ThreeDots: return "...";
|
||||
case TK_Semicolon: return ";";
|
||||
case TK_Dot: return ".";
|
||||
case TK_LesserThen: return "<";
|
||||
case TK_GreaterThen: return ">";
|
||||
case TK_Colon: return ":";
|
||||
case TK_Assign: return "=";
|
||||
case TK_DivAssign: return "/=";
|
||||
case TK_MulAssign: return "*=";
|
||||
case TK_ModAssign: return "%=";
|
||||
case TK_SubAssign: return "-=";
|
||||
case TK_AddAssign: return "+=";
|
||||
case TK_AndAssign: return "&=";
|
||||
case TK_OrAssign: return "|=";
|
||||
case TK_XorAssign: return "^=";
|
||||
case TK_LeftShiftAssign: return "<<=";
|
||||
case TK_RightShiftAssign: return ">>=";
|
||||
case TK_DoubleColon: return "::";
|
||||
case TK_At: return "@";
|
||||
case TK_Decrement: return "--";
|
||||
case TK_Increment: return "++";
|
||||
case TK_PostDecrement: return "--";
|
||||
case TK_PostIncrement: return "++";
|
||||
case TK_LesserThenOrEqual: return "<=";
|
||||
case TK_GreaterThenOrEqual: return ">=";
|
||||
case TK_Equals: return "==";
|
||||
case TK_And: return "&&";
|
||||
case TK_Or: return "||";
|
||||
case TK_NotEquals: return "!=";
|
||||
case TK_LeftShift: return "<<";
|
||||
case TK_RightShift: return ">>";
|
||||
case TK_Arrow: return "->";
|
||||
case TK_NewLine: return "New_Line";
|
||||
case TK_ExprSizeof: return "sizeof";
|
||||
case TK_DocComment: return "Doc_Comment";
|
||||
case TK_Comment: return "Comment";
|
||||
case TK_Identifier: return "Identifier";
|
||||
case TK_StringLit: return "String_Lit";
|
||||
case TK_UnicodeLit: return "Unicode_Lit";
|
||||
case TK_Error: return "Error";
|
||||
case TK_Float: return "Float";
|
||||
case TK_Integer: return "int";
|
||||
case TK_Keyword: return "Keyword";
|
||||
case TK_FOREIGN: return "#foreign";
|
||||
case CLOSE_SCOPE: return "Close_Scope";
|
||||
case OPEN_SCOPE: return "Open_Scope";
|
||||
case SAME_SCOPE: return "Same_Scope";
|
||||
default: invalid_codepath; return "<Undefined>";
|
||||
}
|
||||
}
|
||||
11
main.cpp
11
main.cpp
@@ -158,13 +158,12 @@ Expr:
|
||||
#include "base_unicode.cpp"
|
||||
#include "big_int_c3.cpp"
|
||||
#include "compiler.h"
|
||||
#include "lexer.cpp"
|
||||
#include "lexing.cpp"
|
||||
#include "types.h"
|
||||
// #include "big_int.cpp"
|
||||
#include "new_ast.cpp"
|
||||
#include "new_parse.cpp"
|
||||
#include "typecheck.h"
|
||||
#include "typecheck.cpp"
|
||||
#include "ast.cpp"
|
||||
#include "parsing.cpp"
|
||||
#include "typechecking.h"
|
||||
#include "typechecking.cpp"
|
||||
#include "ccodegen.cpp"
|
||||
|
||||
int main(int argument_count, char **arguments){
|
||||
|
||||
628
new_ast.cpp
628
new_ast.cpp
@@ -1,628 +0,0 @@
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// AST
|
||||
//-----------------------------------------------------------------------------
|
||||
enum Ast_Kind: U32{
|
||||
AST_NONE,
|
||||
|
||||
AST_PACKAGE,
|
||||
|
||||
AST_VALUE,
|
||||
AST_CAST,
|
||||
AST_IDENT,
|
||||
AST_INDEX,
|
||||
AST_UNARY,
|
||||
AST_BINARY,
|
||||
AST_CALL_ITEM,
|
||||
AST_CALL,
|
||||
|
||||
AST_POINTER,
|
||||
AST_ARRAY,
|
||||
AST_FOR,
|
||||
AST_IF,
|
||||
AST_IF_NODE,
|
||||
AST_RETURN,
|
||||
AST_BLOCK,
|
||||
AST_PASS,
|
||||
AST_LAMBDA,
|
||||
AST_LAMBDA_ARG,
|
||||
AST_ENUM,
|
||||
AST_ENUM_MEMBER,
|
||||
AST_STRUCT,
|
||||
AST_CONST,
|
||||
AST_VAR,
|
||||
};
|
||||
|
||||
typedef U32 Ast_Flag;
|
||||
enum{
|
||||
AST_EXPR = 1,
|
||||
AST_STMT = 2,
|
||||
AST_BINDING = 4,
|
||||
AST_AGGREGATE = 8,
|
||||
AST_AGGREGATE_CHILD = 16,
|
||||
AST_ITEM_INCLUDED = 32,
|
||||
AST_ATOM = 64,
|
||||
AST_FOREIGN = 128,
|
||||
};
|
||||
|
||||
struct Ast{
|
||||
U64 id;
|
||||
Token *pos;
|
||||
|
||||
Ast_Kind kind;
|
||||
Ast *parent;
|
||||
Ast_Flag flags;
|
||||
};
|
||||
|
||||
struct Ast_Resolved_Type;
|
||||
struct Ast_Expr:Ast{};
|
||||
|
||||
#define VALUE_FIELDS \
|
||||
Ast_Resolved_Type *type; \
|
||||
union{ \
|
||||
bool bool_val; \
|
||||
F64 f64_val; \
|
||||
Intern_String intern_val; \
|
||||
BigInt big_int_val;\
|
||||
Ast_Resolved_Type *type_val; \
|
||||
};
|
||||
#define INLINE_VALUE_FIELDS union{Value value; struct{VALUE_FIELDS};}
|
||||
struct Value{VALUE_FIELDS};
|
||||
// BigInt big_int_val;
|
||||
|
||||
struct Ast_Atom: Ast_Expr{
|
||||
INLINE_VALUE_FIELDS;
|
||||
};
|
||||
|
||||
struct Ast_Call_Item: Ast_Expr{
|
||||
Ast_Atom *name; // index | name
|
||||
Ast_Expr *index;
|
||||
Ast_Expr *item;
|
||||
};
|
||||
|
||||
struct Ast_Call: Ast_Expr{
|
||||
Ast_Resolved_Type *type; // @todo: to map
|
||||
Ast_Expr *name;
|
||||
Array<Ast_Call_Item *> exprs;
|
||||
};
|
||||
|
||||
struct Ast_Unary: Ast_Expr{
|
||||
Token_Kind op;
|
||||
Ast_Expr *expr;
|
||||
U64 padding[3]; // For folding constants into atoms
|
||||
};
|
||||
|
||||
struct Ast_Cast: Ast_Expr{
|
||||
Ast_Expr *expr;
|
||||
Ast_Expr *typespec;
|
||||
};
|
||||
|
||||
struct Ast_Index: Ast_Expr{
|
||||
Ast_Expr *expr;
|
||||
Ast_Expr *index;
|
||||
};
|
||||
|
||||
struct Ast_Binary: Ast_Expr{
|
||||
Token_Kind op;
|
||||
Ast_Expr *left;
|
||||
Ast_Expr *right;
|
||||
};
|
||||
|
||||
struct Ast_Block : Ast {
|
||||
Array<Ast *> stmts;
|
||||
};
|
||||
|
||||
struct Ast_Return: Ast{
|
||||
Ast_Expr *expr;
|
||||
};
|
||||
|
||||
struct Ast_If_Node: Ast{
|
||||
Ast_Expr *expr ;
|
||||
Ast_Block *block;
|
||||
Ast_Binary*init;
|
||||
};
|
||||
|
||||
struct Ast_If: Ast{
|
||||
Array<Ast_If_Node *> ifs;
|
||||
};
|
||||
|
||||
struct Ast_Pass: Ast{};
|
||||
|
||||
struct Ast_For: Ast{
|
||||
Ast_Expr *init;
|
||||
Ast_Expr *cond;
|
||||
Ast_Expr *iter;
|
||||
Ast_Block *block;
|
||||
};
|
||||
|
||||
struct Ast_Lambda_Arg: Ast_Expr{
|
||||
Intern_String name;
|
||||
Ast_Expr *typespec;
|
||||
Ast_Expr *default_value;
|
||||
};
|
||||
|
||||
struct Ast_Lambda : Ast_Expr {
|
||||
Array<Ast_Lambda_Arg *> args;
|
||||
Ast_Expr *ret;
|
||||
Ast_Block *block;
|
||||
B32 has_var_args;
|
||||
};
|
||||
|
||||
struct Ast_Array: Ast_Expr{
|
||||
Ast_Expr *base;
|
||||
Ast_Expr *expr;
|
||||
};
|
||||
|
||||
struct Ast_Named:Ast{
|
||||
Intern_String name;
|
||||
};
|
||||
|
||||
struct Ast_Var: Ast_Named{
|
||||
Ast_Expr *typespec;
|
||||
Ast_Expr *expr;
|
||||
};
|
||||
|
||||
struct Ast_Const;
|
||||
struct Ast_Resolved_Type;
|
||||
struct Ast_Struct: Ast{
|
||||
// Required to be Ast_Struct or Ast_Var or Ast_Const
|
||||
Array<Ast_Var *> members;
|
||||
Array<Ast_Const *> const_members;
|
||||
Ast_Resolved_Type *type;
|
||||
};
|
||||
|
||||
struct Ast_Enum_Member: Ast{
|
||||
Intern_String name;
|
||||
Ast_Expr *value;
|
||||
};
|
||||
|
||||
struct Ast_Enum: Ast{
|
||||
Ast_Expr *typespec;
|
||||
Array<Ast_Enum_Member *> members;
|
||||
};
|
||||
|
||||
struct Ast_Const: Ast_Named{
|
||||
union{
|
||||
Ast *ast;
|
||||
Ast_Expr *value;
|
||||
Ast_Struct *agg;
|
||||
Ast_Enum *enu;
|
||||
};
|
||||
};
|
||||
|
||||
struct Ast_Package:Ast{
|
||||
Intern_String name;
|
||||
Array<Ast_Named *> decls;
|
||||
Array<Ast_Named *> ordered;
|
||||
};
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// AST Constructors beginning with expressions
|
||||
//-----------------------------------------------------------------------------
|
||||
#define AST_NEW(T,ikind,ipos,iflags) \
|
||||
Ast_##T *result = exp_alloc_type(pctx->perm, Ast_##T, AF_ZeroMemory);\
|
||||
result->flags = iflags; \
|
||||
result->kind = AST_##ikind; \
|
||||
result->pos = ipos; \
|
||||
result->id = ++pctx->unique_ids
|
||||
|
||||
function Ast_Atom *
|
||||
ast_str(Token *pos, Intern_String string){
|
||||
AST_NEW(Atom, VALUE, pos, AST_EXPR | AST_ATOM);
|
||||
result->type = untyped_string;
|
||||
result->intern_val = string;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Atom *
|
||||
ast_ident(Token *pos, Intern_String string){
|
||||
AST_NEW(Atom, IDENT, pos, AST_EXPR | AST_ATOM);
|
||||
result->intern_val = string;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Atom *
|
||||
ast_bool(Token *pos, B32 bool_val){
|
||||
AST_NEW(Atom, VALUE, pos, AST_EXPR | AST_ATOM);
|
||||
result->bool_val = bool_val;
|
||||
result->type = untyped_bool;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Atom *
|
||||
ast_float(Token *pos, F64 value){
|
||||
AST_NEW(Atom, VALUE, pos, AST_EXPR | AST_ATOM);
|
||||
result->type = untyped_float;
|
||||
result->f64_val = value;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Atom *
|
||||
ast_int(Token *pos, BigInt val){
|
||||
AST_NEW(Atom, VALUE, pos, AST_EXPR | AST_ATOM);
|
||||
result->type = untyped_int;
|
||||
result->big_int_val = bigint_copy(pctx->perm, &val);
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Atom *
|
||||
ast_int(Token *pos, U64 value){
|
||||
return ast_int(pos, bigint_u64(value));
|
||||
}
|
||||
|
||||
function Ast_Expr *
|
||||
ast_expr_binary(Ast_Expr *left, Ast_Expr *right, Token *op){
|
||||
AST_NEW(Binary, BINARY, op, AST_EXPR);
|
||||
result->op = op->kind;
|
||||
result->left = left;
|
||||
result->right = right;
|
||||
result->left->parent = result;
|
||||
if(result->right) result->right->parent = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Call *
|
||||
ast_call(Token *pos, Ast_Expr *name, Array<Ast_Call_Item *> exprs){
|
||||
AST_NEW(Call, CALL, pos, AST_EXPR);
|
||||
result->name = name;
|
||||
result->exprs = exprs.tight_copy(pctx->perm);
|
||||
if(result->name) result->name->parent = result;
|
||||
For(result->exprs) it->parent = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Call_Item *
|
||||
ast_call_item(Token *pos, Ast_Expr *index, Ast_Atom *name, Ast_Expr *item){
|
||||
AST_NEW(Call_Item, CALL_ITEM, pos, AST_EXPR);
|
||||
result->name = name;
|
||||
result->index = index;
|
||||
result->item = item;
|
||||
if(result->name) result->name->parent = result;
|
||||
if(result->index) result->index->parent = result;
|
||||
item->parent = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Expr *
|
||||
ast_expr_cast(Token *pos, Ast_Expr *expr, Ast_Expr *typespec){
|
||||
AST_NEW(Cast, CAST, pos, AST_EXPR);
|
||||
result->flags = AST_EXPR;
|
||||
result->expr = expr;
|
||||
result->typespec = typespec;
|
||||
expr->parent = result;
|
||||
typespec->parent = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Expr *
|
||||
ast_expr_unary(Token *pos, Token_Kind op, Ast_Expr *expr){
|
||||
AST_NEW(Unary, UNARY, pos, AST_EXPR);
|
||||
result->flags = AST_EXPR;
|
||||
result->expr = expr;
|
||||
result->op = op;
|
||||
expr->parent = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Expr *
|
||||
ast_expr_index(Token *pos, Ast_Expr *expr, Ast_Expr *index){
|
||||
AST_NEW(Index, INDEX, pos, AST_EXPR);
|
||||
result->flags = AST_EXPR;
|
||||
result->expr = expr;
|
||||
result->index = index;
|
||||
expr->parent = result;
|
||||
index->parent = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Lambda *
|
||||
ast_lambda(Token *pos, Array<Ast_Lambda_Arg *> params, B32 has_var_args, Ast_Expr *ret, Ast_Block *block){
|
||||
AST_NEW(Lambda, LAMBDA, pos, AST_EXPR);
|
||||
result->flags = AST_EXPR;
|
||||
result->args = params.tight_copy(pctx->perm);
|
||||
result->block = block;
|
||||
result->ret = ret;
|
||||
result->has_var_args = has_var_args;
|
||||
if(!ret) result->ret = ast_ident(result->pos, intern_void);
|
||||
|
||||
if(result->block) result->block->parent = result;
|
||||
result->ret->parent = result;
|
||||
For(result->args) it->parent = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Lambda_Arg *
|
||||
ast_expr_lambda_arg(Token *pos, Intern_String name, Ast_Expr *typespec, Ast_Expr *default_value){
|
||||
AST_NEW(Lambda_Arg, LAMBDA_ARG, pos, AST_EXPR);
|
||||
result->flags = AST_EXPR;
|
||||
result->name = name;
|
||||
result->typespec = typespec;
|
||||
result->default_value = default_value;
|
||||
result->typespec->parent = result;
|
||||
if(result->default_value) result->default_value->parent = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Block *
|
||||
ast_block(Token *pos, Array<Ast *> stmts){
|
||||
AST_NEW(Block, BLOCK, pos, AST_STMT);
|
||||
result->stmts = stmts.tight_copy(pctx->perm);
|
||||
For(result->stmts) it->parent = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_If *
|
||||
ast_if(Token *pos, Array<Ast_If_Node *> ifs){
|
||||
AST_NEW(If, IF, pos, AST_STMT);
|
||||
result->ifs = ifs.tight_copy(pctx->perm);
|
||||
For(result->ifs) it->parent = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_For *
|
||||
ast_for(Token *pos, Ast_Expr *init, Ast_Expr *cond, Ast_Expr *iter, Ast_Block *block){
|
||||
AST_NEW(For, FOR, pos, AST_STMT);
|
||||
result->init = init;
|
||||
result->cond = cond;
|
||||
result->iter = iter;
|
||||
result->block = block;
|
||||
if(result->init) result->init->parent = result;
|
||||
if(result->cond) result->cond->parent = result;
|
||||
if(result->iter) result->iter->parent = result;
|
||||
result->block->parent = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Pass *
|
||||
ast_pass(Token *pos){
|
||||
AST_NEW(Pass, PASS, pos, AST_STMT);
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Return *
|
||||
ast_return(Token *pos, Ast_Expr *expr){
|
||||
AST_NEW(Return, RETURN, pos, AST_STMT);
|
||||
if(expr){
|
||||
assert(is_flag_set(expr->flags, AST_EXPR));
|
||||
result->expr = expr;
|
||||
result->expr->parent = result;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_If_Node *
|
||||
ast_if_node(Token *pos, Ast_Expr *init, Ast_Expr *expr, Ast_Block *block){
|
||||
AST_NEW(If_Node, IF_NODE, pos, AST_STMT);
|
||||
result->block = block;
|
||||
result->expr = expr;
|
||||
result->init = (Ast_Binary *)init;
|
||||
if(result->block) result->block->parent = result;
|
||||
if(result->expr) result->expr->parent = result;
|
||||
if(result->init) {
|
||||
assert(init->kind == AST_BINARY);
|
||||
result->init->parent = result;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Array *
|
||||
ast_array(Token *pos, Ast_Expr *expr){
|
||||
AST_NEW(Array, ARRAY, pos, AST_EXPR);
|
||||
result->expr = expr;
|
||||
if(result->expr) result->expr->parent = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Enum_Member *
|
||||
ast_enum_member(Token *pos, Intern_String name, Ast_Expr *default_value){
|
||||
AST_NEW(Enum_Member, ENUM_MEMBER, pos, AST_AGGREGATE_CHILD);
|
||||
result->name = name;
|
||||
result->value = default_value;
|
||||
if(result->value) result->value->parent = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Enum *
|
||||
ast_enum(Token *pos, Ast_Expr *typespec, Array<Ast_Enum_Member *> members){
|
||||
AST_NEW(Enum, ENUM, pos, AST_AGGREGATE);
|
||||
result->members = members.tight_copy(pctx->perm);
|
||||
result->typespec = typespec;
|
||||
if(result->typespec) result->typespec->parent = result;
|
||||
For(result->members){
|
||||
it->parent = result;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Struct *
|
||||
ast_struct(Token *pos, Array<Ast_Var *> members, Array<Ast_Const *> const_members){
|
||||
AST_NEW(Struct, STRUCT, pos, AST_AGGREGATE);
|
||||
result->members = members.tight_copy(pctx->perm);
|
||||
result->const_members = const_members.tight_copy(pctx->perm);
|
||||
For(result->members) {
|
||||
assert(is_flag_set(it->flags, AST_BINDING));
|
||||
assert(it->kind == AST_VAR);
|
||||
it->parent = result;
|
||||
}
|
||||
For(result->const_members) {
|
||||
assert(is_flag_set(it->flags, AST_BINDING));
|
||||
assert(it->kind == AST_CONST);
|
||||
it->parent = result;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Declarations
|
||||
//-----------------------------------------------------------------------------
|
||||
function Ast_Var *
|
||||
ast_var(Token *pos, Ast_Expr *typespec, Intern_String name, Ast_Expr *expr){
|
||||
AST_NEW(Var, VAR, pos, AST_BINDING);
|
||||
result->expr = expr;
|
||||
result->typespec = typespec;
|
||||
result->name = name;
|
||||
if(result->expr) result->expr->parent = result;
|
||||
if(result->typespec) result->typespec->parent = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Const *
|
||||
ast_const(Token *pos, Intern_String name, Ast_Expr *value){
|
||||
assert(is_flag_set(value->flags, AST_AGGREGATE) || is_flag_set(value->flags, AST_EXPR) );
|
||||
AST_NEW(Const, CONST, pos, AST_BINDING);
|
||||
result->value = value;
|
||||
result->name = name;
|
||||
result->value->parent = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Package *
|
||||
ast_package(Token *pos, String name, Array<Ast_Named *> decls){
|
||||
AST_NEW(Package, PACKAGE, pos, 0);
|
||||
result->decls = decls.tight_copy(pctx->perm);
|
||||
result->ordered = array_make<Ast_Named *>(pctx->perm, decls.len);
|
||||
result->name = intern_string(&pctx->interns, name);
|
||||
For(result->decls) it->parent = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Value
|
||||
//-----------------------------------------------------------------------------
|
||||
function Value
|
||||
value_bool(B32 v){
|
||||
Value value;
|
||||
value.bool_val = v;
|
||||
value.type = untyped_bool;
|
||||
return value;
|
||||
}
|
||||
|
||||
function Value
|
||||
value_int(BigInt b){
|
||||
Value value;
|
||||
value.big_int_val = b;
|
||||
value.type = untyped_int;
|
||||
return value;
|
||||
}
|
||||
|
||||
function Value
|
||||
value_int(S64 s64){
|
||||
Value value;
|
||||
value.type = untyped_int;
|
||||
bigint_init_signed(&value.big_int_val, s64);
|
||||
return value;
|
||||
}
|
||||
|
||||
function Value
|
||||
value_float(F64 b){
|
||||
Value value;
|
||||
value.f64_val = b;
|
||||
value.type = untyped_float;
|
||||
return value;
|
||||
}
|
||||
|
||||
function Value
|
||||
value_float(BigInt a){
|
||||
Value value;
|
||||
value.f64_val = bigint_as_float(&a);
|
||||
value.type = untyped_float;
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Utillities
|
||||
//-----------------------------------------------------------------------------
|
||||
function Ast_Struct *
|
||||
const_try_getting_struct(Ast *ast){
|
||||
assert(ast->kind == AST_CONST);
|
||||
Ast_Const *constant = (Ast_Const *)ast;
|
||||
if(constant->value->kind == AST_STRUCT){
|
||||
return (Ast_Struct *)constant->value;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
function Ast_Struct *
|
||||
const_get_struct(Ast *ast){
|
||||
auto result = const_try_getting_struct(ast);
|
||||
assert(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Lambda *
|
||||
const_try_getting_lambda(Ast *ast){
|
||||
assert(ast->kind == AST_CONST);
|
||||
Ast_Const *constant = (Ast_Const *)ast;
|
||||
if(constant->value->kind == AST_LAMBDA){
|
||||
return (Ast_Lambda *)constant->value;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
function Ast_Lambda *
|
||||
const_get_lambda(Ast *ast){
|
||||
auto result = const_try_getting_lambda(ast);
|
||||
assert(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
function Intern_String
|
||||
ast_get_name(Ast *ast){
|
||||
assert(is_flag_set(ast->flags, AST_BINDING));
|
||||
auto constant = (Ast_Named *)ast;
|
||||
return constant->name;
|
||||
}
|
||||
|
||||
function B32
|
||||
ast_is_struct(Ast *ast){
|
||||
if(ast->kind == AST_CONST){
|
||||
auto a = (Ast_Const *)ast;
|
||||
B32 result = a->agg->kind == AST_STRUCT;
|
||||
return result;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function B32
|
||||
is_ident(Ast *ast){
|
||||
B32 result = ast->kind == AST_IDENT;
|
||||
return result;
|
||||
}
|
||||
|
||||
function B32
|
||||
is_binary(Ast *ast){
|
||||
B32 result = ast->kind == AST_BINARY;
|
||||
return result;
|
||||
}
|
||||
|
||||
function B32
|
||||
is_atom(Ast *ast){
|
||||
B32 result = is_flag_set(ast->flags, AST_ATOM);
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast *
|
||||
query_struct(Ast_Struct *agg, Intern_String string){
|
||||
For(agg->members){
|
||||
if(it->name == string){
|
||||
return it;
|
||||
}
|
||||
}
|
||||
For(agg->const_members){
|
||||
if(it->name == string){
|
||||
return it;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
function Ast_Enum_Member *
|
||||
query_enum(Ast_Enum *enu, Intern_String string){
|
||||
For(enu->members){
|
||||
if(it->name == string){
|
||||
return it;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
607
new_parse.cpp
607
new_parse.cpp
@@ -1,607 +0,0 @@
|
||||
|
||||
function void
|
||||
parsing_error(Token *token, const char *str, ...){
|
||||
Scratch scratch;
|
||||
STRING_FMT(scratch, str, string);
|
||||
|
||||
// @Note(Krzosa): Print nice error message
|
||||
printf("\nError :: %s", string.str);
|
||||
if(token){
|
||||
if(token->kind == TK_Error){
|
||||
printf("Token Error: %.*s", (int)token->error_val.len, token->error_val.str);
|
||||
}
|
||||
printf(" :: %s:%d\n", token->file.str, (S32)token->line + 1);
|
||||
|
||||
// @Note(Krzosa): Print error line
|
||||
{
|
||||
int i = 0;
|
||||
while(token->line_begin[i]!='\n' && token->line_begin[i]!=0) i++;
|
||||
printf("%.*s\n", i, token->line_begin);
|
||||
|
||||
// @Note(Krzosa): Print error marker
|
||||
int token_i = token->str - token->line_begin;
|
||||
for(int i = 0; i < token_i-2; i++) printf(" ");
|
||||
printf("^^^^^^\n");
|
||||
}
|
||||
}
|
||||
|
||||
__debugbreak();
|
||||
}
|
||||
|
||||
function Token *
|
||||
token_get(S64 i = 0){
|
||||
i += pctx->token_iter;
|
||||
if(i >= pctx->tokens.len){
|
||||
return &pctx->empty_token;
|
||||
}
|
||||
Token *result = &pctx->tokens[i];
|
||||
return result;
|
||||
}
|
||||
|
||||
function Token *
|
||||
token_is_scope(){
|
||||
Token *token = token_get();
|
||||
if(lex_is_scope(token)) return token;
|
||||
return 0;
|
||||
}
|
||||
|
||||
function Token *
|
||||
token_next(){
|
||||
Token *token = token_get();
|
||||
if(lex_is_scope(token)) pctx->indent = token->indent;
|
||||
pctx->token_iter++;
|
||||
return token;
|
||||
}
|
||||
|
||||
function Token *
|
||||
token_is(Token_Kind kind, S64 lookahead = 0){
|
||||
Token *token = token_get(lookahead);
|
||||
if(token->kind == kind){
|
||||
return token;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
function Token *
|
||||
token_is_keyword(Intern_String keyword, S64 lookahead = 0){
|
||||
Token *token = token_get(lookahead);
|
||||
if(token->kind == TK_Keyword){
|
||||
if(keyword.str == token->intern_val.str){
|
||||
return token;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
function Token *
|
||||
token_match(Token_Kind kind){
|
||||
Token *token = token_get();
|
||||
if(token->kind == kind){
|
||||
return token_next();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
function Token *
|
||||
token_match(Token_Kind a, Token_Kind b){
|
||||
Token *ta = token_get();
|
||||
Token *tb = token_get(1);
|
||||
if(ta->kind == a && tb->kind == b){
|
||||
token_next(); token_next();
|
||||
return ta;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
function Token *
|
||||
token_match_keyword(Intern_String string){
|
||||
Token *token = token_get();
|
||||
if(token->kind == TK_Keyword){
|
||||
if(string.str == token->intern_val.str){
|
||||
token = token_next();
|
||||
return token;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
function Token *
|
||||
token_expect(Token_Kind kind){
|
||||
Token *token = token_get();
|
||||
if(token->kind == kind) return token_next();
|
||||
parsing_error(token, "Expected token of kind: [%s], got instead token of kind: [%s]", name(kind), name(token->kind));
|
||||
return 0;
|
||||
}
|
||||
|
||||
function Ast_Expr *parse_expr(S64 minbp = 0);
|
||||
|
||||
function Ast_Expr *
|
||||
parse_init_stmt(Ast_Expr *expr){
|
||||
Token *token = token_get();
|
||||
if(token->kind == TK_ColonAssign && expr->kind != AST_IDENT)
|
||||
parsing_error(expr->pos, "Binding with [:=] to something that is not an identifier");
|
||||
|
||||
if(token_is_assign(token)){
|
||||
token_next();
|
||||
Ast_Expr *value = parse_expr();
|
||||
Ast_Expr *result = ast_expr_binary((Ast_Atom *)expr, value, token);
|
||||
result->flags = set_flag(result->flags, AST_STMT);
|
||||
return result;
|
||||
}
|
||||
|
||||
return expr;
|
||||
}
|
||||
|
||||
function Ast_Call *
|
||||
parse_expr_call(Ast_Expr *left){
|
||||
Scratch scratch;
|
||||
Token *pos = token_get();
|
||||
Array<Ast_Call_Item *> exprs = {scratch};
|
||||
|
||||
while(!token_is(TK_CloseParen)){
|
||||
Token *token = token_get();
|
||||
Ast_Expr *index = 0;
|
||||
Ast_Atom *name = 0;
|
||||
if(token_match(TK_OpenBracket)){
|
||||
index = parse_expr();
|
||||
token_expect(TK_CloseBracket);
|
||||
token_expect(TK_Assign);
|
||||
}
|
||||
|
||||
Ast_Expr *item = parse_expr();
|
||||
if(!index && token_match(TK_Assign)){
|
||||
assert(is_flag_set(item->flags, AST_ATOM));
|
||||
name = (Ast_Atom *)item;
|
||||
item = parse_expr();
|
||||
}
|
||||
|
||||
Ast_Call_Item *item_comp = ast_call_item(token, index, name, item);
|
||||
exprs.add(item_comp);
|
||||
|
||||
if(!token_match(TK_Comma)){
|
||||
break;
|
||||
}
|
||||
}
|
||||
token_expect(TK_CloseParen);
|
||||
|
||||
Ast_Call *result = ast_call(pos, left, exprs);
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Expr *
|
||||
parse_optional_type(){
|
||||
Ast_Expr *result = 0;
|
||||
if(token_match(TK_Colon)) result = parse_expr();
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Named *parse_named(B32);
|
||||
function Ast_Block *
|
||||
parse_block(){
|
||||
Ast_Block *block = 0;
|
||||
|
||||
if(token_expect(OPEN_SCOPE)){ // @todo: Fix error message here, it doesn't show proper token context
|
||||
Token *token_block = token_get();
|
||||
|
||||
Scratch scratch;
|
||||
Array<Ast *> stmts = {scratch};
|
||||
do{
|
||||
Token *token = token_get();
|
||||
if(token_match_keyword(keyword_return)){
|
||||
Ast_Expr *expr = 0;
|
||||
if(!token_is_scope()) expr = parse_expr();
|
||||
stmts.add(ast_return(token, expr));
|
||||
}
|
||||
|
||||
else if(token_match_keyword(keyword_pass)){
|
||||
stmts.add(ast_pass(token));
|
||||
}
|
||||
|
||||
else if(token_match_keyword(keyword_for)){
|
||||
Ast_Expr *init = 0;
|
||||
Ast_Expr *cond = 0;
|
||||
Ast_Expr *iter = 0;
|
||||
|
||||
if(!token_is(OPEN_SCOPE)){
|
||||
if(!token_is(TK_Comma)){
|
||||
Ast_Expr *expr_first = parse_expr();
|
||||
init = parse_init_stmt(expr_first);
|
||||
}
|
||||
|
||||
|
||||
if(token_match(TK_Comma)){
|
||||
if(!token_is(TK_Comma)) cond = parse_expr();
|
||||
if(token_match(TK_Comma)){
|
||||
iter = parse_expr();
|
||||
iter = parse_init_stmt(iter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ast_Block *for_block = parse_block();
|
||||
stmts.add(ast_for(token, init, cond, iter, for_block));
|
||||
}
|
||||
|
||||
else if(token_match_keyword(keyword_if)){
|
||||
Array<Ast_If_Node *> if_nodes = {scratch};
|
||||
Ast_Expr *expr = parse_expr();
|
||||
Ast_Expr *init_val = parse_init_stmt(expr);
|
||||
if(init_val != expr){
|
||||
if(token_match(TK_Comma)) expr = parse_expr();
|
||||
else expr = 0;
|
||||
}
|
||||
if(init_val == expr) init_val = 0;
|
||||
|
||||
Ast_Block *if_block = parse_block();
|
||||
Ast_If_Node *if_node = ast_if_node(token, init_val, expr, if_block);
|
||||
if_nodes.add(if_node);
|
||||
|
||||
while(token_is(SAME_SCOPE) && token_is_keyword(keyword_else, 1)){
|
||||
token_next();
|
||||
token = token_next();
|
||||
if(token_match_keyword(keyword_if)){
|
||||
Ast_Expr *expr = parse_expr();
|
||||
Ast_Block *else_if_block = parse_block();
|
||||
Ast_If_Node *if_node = ast_if_node(token, 0, expr, else_if_block);
|
||||
if_nodes.add(if_node);
|
||||
}
|
||||
else{
|
||||
Ast_Block *else_block = parse_block();
|
||||
Ast_If_Node *if_node = ast_if_node(token, 0, 0, else_block);
|
||||
if_nodes.add(if_node);
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ast_If *result_if = ast_if(token, if_nodes);
|
||||
stmts.add(result_if);
|
||||
|
||||
}
|
||||
else{
|
||||
Ast *result = parse_named(false);
|
||||
if(!result){
|
||||
result = parse_expr();
|
||||
result = parse_init_stmt((Ast_Expr *)result);
|
||||
}
|
||||
|
||||
if(result) {
|
||||
result->flags = set_flag(result->flags, AST_STMT);
|
||||
stmts.add(result);
|
||||
}
|
||||
else {
|
||||
parsing_error(token, "Unexpected token [%s] while parsing statement", name(token->kind));
|
||||
}
|
||||
|
||||
}
|
||||
} while(token_match(SAME_SCOPE));
|
||||
token_expect(CLOSE_SCOPE);
|
||||
block = ast_block(token_block, stmts);
|
||||
}
|
||||
return block;
|
||||
}
|
||||
|
||||
function Ast_Lambda *
|
||||
parse_lambda(Token *token){
|
||||
Scratch scratch;
|
||||
|
||||
B32 has_var_args = false;
|
||||
Array<Ast_Lambda_Arg *> params = {scratch};
|
||||
if(!token_is(TK_CloseParen)){
|
||||
for(;;){
|
||||
Token *name = token_get();
|
||||
if(token_match(TK_Identifier)){
|
||||
token_expect(TK_Colon);
|
||||
Ast_Expr *typespec = parse_expr();
|
||||
|
||||
Ast_Expr *default_value = 0;
|
||||
if(token_match(TK_Assign)) {
|
||||
default_value = parse_expr();
|
||||
}
|
||||
|
||||
Ast_Lambda_Arg *param = ast_expr_lambda_arg(name, name->intern_val, typespec, default_value);
|
||||
params.add(param);
|
||||
}
|
||||
else if(token_match(TK_ThreeDots)){
|
||||
has_var_args = true;
|
||||
break;
|
||||
}
|
||||
else parsing_error(name, "Expected [Identifier] or [...] when parsing lambda arguments");
|
||||
|
||||
if(!token_match(TK_Comma))
|
||||
break;
|
||||
}
|
||||
}
|
||||
token_expect(TK_CloseParen);
|
||||
|
||||
Ast_Expr *ret = parse_optional_type();
|
||||
Ast_Block *block = token_is(OPEN_SCOPE) ? parse_block() : 0;
|
||||
Ast_Lambda *result = ast_lambda(token, params, has_var_args, ret, block);
|
||||
return result;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Pratt expression parser
|
||||
// Based on this really good article: https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
|
||||
//-----------------------------------------------------------------------------
|
||||
struct Binding_Power{S64 left;S64 right;};
|
||||
enum Binding{Binding_Prefix,Binding_Infix,Binding_Postfix};
|
||||
|
||||
function Binding_Power
|
||||
binding_power(Binding binding, Token_Kind kind){
|
||||
if(binding == Binding_Prefix) goto Prefix;
|
||||
if(binding == Binding_Infix) goto Infix;
|
||||
if(binding == Binding_Postfix) goto Postfix;
|
||||
else invalid_codepath;
|
||||
|
||||
Prefix: switch(kind){
|
||||
case TK_OpenBracket:
|
||||
return {-2, 22};
|
||||
case TK_Increment:
|
||||
case TK_Decrement:
|
||||
case TK_Pointer:
|
||||
case TK_Dereference:
|
||||
case TK_Keyword:
|
||||
case TK_OpenParen:
|
||||
case TK_Sub:
|
||||
case TK_Add:
|
||||
case TK_Neg:
|
||||
case TK_Not:
|
||||
return{-2, 20};
|
||||
default: return {-1, -1};
|
||||
}
|
||||
Infix: switch(kind){
|
||||
case TK_Or:
|
||||
return {9,10};
|
||||
case TK_And:
|
||||
return {11,12};
|
||||
case TK_Equals:
|
||||
case TK_NotEquals:
|
||||
case TK_GreaterThen:
|
||||
case TK_GreaterThenOrEqual:
|
||||
case TK_LesserThen:
|
||||
case TK_LesserThenOrEqual:
|
||||
return {13,14};
|
||||
case TK_Sub:
|
||||
case TK_Add:
|
||||
case TK_BitOr:
|
||||
case TK_BitXor:
|
||||
return {15,16};
|
||||
case TK_RightShift:
|
||||
case TK_LeftShift:
|
||||
case TK_BitAnd:
|
||||
case TK_Mul:
|
||||
case TK_Div:
|
||||
case TK_Mod:
|
||||
return {17,18};
|
||||
case TK_Dot:
|
||||
return {24,23};
|
||||
default: return {};
|
||||
}
|
||||
Postfix: switch(kind){
|
||||
case TK_Increment:
|
||||
case TK_Decrement:
|
||||
case TK_OpenBracket:
|
||||
case TK_OpenParen:
|
||||
return {21, -2};
|
||||
default: return{-1,-1};
|
||||
}
|
||||
}
|
||||
|
||||
function Ast_Expr *
|
||||
parse_expr(S64 min_bp){
|
||||
Ast_Expr *left = 0;
|
||||
Token *token = token_next();
|
||||
Binding_Power prefix_bp = binding_power(Binding_Prefix, token->kind);
|
||||
|
||||
// @note: parse prefix expression
|
||||
switch(token->kind){
|
||||
case TK_StringLit : left = ast_str(token, token->intern_val); break;
|
||||
case TK_Identifier : left = ast_ident(token, token->intern_val); break;
|
||||
case TK_Integer : left = ast_int(token, token->int_val); break;
|
||||
case TK_UnicodeLit : left = ast_int(token, token->unicode); break;
|
||||
case TK_Float : left = ast_float(token, token->f64_val); break;
|
||||
case TK_Pointer : left = ast_expr_unary(token, TK_Pointer, parse_expr(prefix_bp.right)); break;
|
||||
case TK_Dereference: left = ast_expr_unary(token, TK_Dereference, parse_expr(prefix_bp.right)); break;
|
||||
case TK_Sub : left = ast_expr_unary(token, TK_Sub, parse_expr(prefix_bp.right)); break;
|
||||
case TK_Add : left = ast_expr_unary(token, TK_Add, parse_expr(prefix_bp.right)); break;
|
||||
case TK_Not : left = ast_expr_unary(token, TK_Not, parse_expr(prefix_bp.right)); break;
|
||||
case TK_Neg : left = ast_expr_unary(token, TK_Neg, parse_expr(prefix_bp.right)); break;
|
||||
case TK_Increment : left = ast_expr_unary(token, TK_Increment, parse_expr(prefix_bp.right)); break;
|
||||
case TK_Decrement : left = ast_expr_unary(token, TK_Decrement, parse_expr(prefix_bp.right)); break;
|
||||
|
||||
case TK_OpenBracket: {
|
||||
Ast_Expr *expr = 0;
|
||||
if(!token_is(TK_CloseBracket))
|
||||
expr = parse_expr(0);
|
||||
|
||||
Ast_Array *result = ast_array(token, expr);
|
||||
token_expect(TK_CloseBracket);
|
||||
result->base = parse_expr(prefix_bp.right);
|
||||
left = result;
|
||||
}break;
|
||||
|
||||
case TK_Keyword: {
|
||||
if(token->intern_val == keyword_true) left = ast_bool(token, 1);
|
||||
else if(token->intern_val == keyword_false) left = ast_bool(token, 0);
|
||||
else if(token->intern_val == keyword_cast){
|
||||
token_expect(TK_OpenParen);
|
||||
Ast_Expr *expr = parse_expr(0);
|
||||
token_expect(TK_Colon);
|
||||
Ast_Expr *typespec = parse_expr(0);
|
||||
token_expect(TK_CloseParen);
|
||||
left = ast_expr_cast(token, expr, typespec);
|
||||
}
|
||||
else parsing_error(token, "Unexpected keyword: [%s], expected keyword [cast]", token->intern_val.str);
|
||||
}break;
|
||||
|
||||
case TK_OpenParen: {
|
||||
if(token_is(TK_CloseParen) || (token_is(TK_Identifier) && token_is(TK_Colon, 1)) || token_is(TK_ThreeDots))
|
||||
left = parse_lambda(token);
|
||||
else{
|
||||
left = parse_expr(0);
|
||||
token_expect(TK_CloseParen);
|
||||
}
|
||||
}break;
|
||||
default: parsing_error(token, "Unexpected token of kind: [%s] in expression", name(token->kind)); return 0;
|
||||
}
|
||||
|
||||
for(;;){
|
||||
token = token_get();
|
||||
|
||||
// lets say [+] is left:1, right:2 and we parse 2+3+4
|
||||
// We pass min_bp of 2 to the next recursion
|
||||
// in recursion we check if left(1) > min_bp(2)
|
||||
// it's not so we don't recurse - we break
|
||||
// We do standard do the for loop instead
|
||||
|
||||
Binding_Power postfix_bp = binding_power(Binding_Postfix, token->kind);
|
||||
Binding_Power infix_bp = binding_power(Binding_Infix, token->kind);
|
||||
|
||||
// @note: parse postfix expression
|
||||
if(postfix_bp.left > min_bp){
|
||||
token_next();
|
||||
switch(token->kind){
|
||||
case TK_OpenBracket:{
|
||||
Ast_Expr *index = parse_expr(0);
|
||||
token_expect(TK_CloseBracket);
|
||||
left = ast_expr_index(token, left, index);
|
||||
}break;
|
||||
case TK_OpenParen:{
|
||||
left = parse_expr_call(left);
|
||||
}break;
|
||||
default:{
|
||||
assert(token->kind == TK_Increment || token->kind == TK_Decrement);
|
||||
if(token->kind == TK_Increment) token->kind = TK_PostIncrement;
|
||||
else if(token->kind == TK_Decrement) token->kind = TK_PostDecrement;
|
||||
left = ast_expr_unary(token, token->kind, left);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// @note: parse infix expression
|
||||
else if(infix_bp.left > min_bp){
|
||||
token = token_next();
|
||||
Ast_Expr *right = parse_expr(infix_bp.right);
|
||||
left = ast_expr_binary(left, right, token);
|
||||
}
|
||||
|
||||
else break;
|
||||
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
function Ast_Expr *
|
||||
parse_assign_expr(){
|
||||
Ast_Expr *result = 0;
|
||||
if(token_match(TK_Assign)) result = parse_expr();
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Struct *
|
||||
parse_struct(Token *pos){
|
||||
Scratch scratch;
|
||||
Array<Ast_Var *> members = {scratch};
|
||||
Array<Ast_Const *> members_const = {scratch};
|
||||
|
||||
token_match(OPEN_SCOPE);
|
||||
do{
|
||||
Token *token = token_get();
|
||||
|
||||
Ast_Named *named = parse_named(false);
|
||||
if(!named) parsing_error(token, "Failed to parse struct member");
|
||||
named->flags = set_flag(named->flags, AST_AGGREGATE_CHILD);
|
||||
|
||||
if(named->kind == AST_CONST){
|
||||
members_const.add((Ast_Const *)named);
|
||||
}
|
||||
else {
|
||||
assert(named->kind == AST_VAR);
|
||||
members.add((Ast_Var *)named);
|
||||
}
|
||||
|
||||
}while(token_match(SAME_SCOPE));
|
||||
token_expect(CLOSE_SCOPE);
|
||||
|
||||
Ast_Struct *result = ast_struct(pos, members, members_const);
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Enum *
|
||||
parse_enum(Token *pos){
|
||||
Scratch scratch;
|
||||
Array<Ast_Enum_Member *> members = {scratch};
|
||||
Ast_Expr *typespec = parse_optional_type();
|
||||
|
||||
token_match(OPEN_SCOPE);
|
||||
do{
|
||||
Token *name = token_expect(TK_Identifier);
|
||||
Ast_Expr *value = parse_assign_expr();
|
||||
Ast_Enum_Member *member = ast_enum_member(name, name->intern_val, value);
|
||||
members.add(member);
|
||||
}while(token_match(SAME_SCOPE));
|
||||
token_expect(CLOSE_SCOPE);
|
||||
|
||||
Ast_Enum *result = ast_enum(pos, typespec, members);
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
Needs peeking only because I didn't want to duplicate code
|
||||
for parsing statements and it makes code nicer.
|
||||
Statements can have named syntax i :=
|
||||
*/
|
||||
function Ast_Named *
|
||||
parse_named(B32 is_global){
|
||||
Ast_Named *result = 0;
|
||||
if(is_global) {
|
||||
token_match(SAME_SCOPE);
|
||||
if(pctx->indent != 0){
|
||||
parsing_error(token_get(), "Top level declarations shouldn't be indented");
|
||||
}
|
||||
}
|
||||
|
||||
Ast_Flag flags = 0;
|
||||
if(token_match(TK_FOREIGN)){
|
||||
flags = set_flag(flags, AST_FOREIGN);
|
||||
}
|
||||
|
||||
Token *tname = token_get();
|
||||
if(token_match(TK_Identifier, TK_DoubleColon)){
|
||||
// @note parse struct binding
|
||||
Token *struct_pos = token_get();
|
||||
if(token_match_keyword(keyword_struct)){
|
||||
Ast_Struct *struct_val = parse_struct(struct_pos);
|
||||
result = ast_const(tname, tname->intern_val, (Ast_Expr *)struct_val);
|
||||
}
|
||||
|
||||
else if(token_match_keyword(keyword_enum)){
|
||||
Ast_Enum *enum_val = parse_enum(struct_pos);
|
||||
result = ast_const(tname, tname->intern_val, (Ast_Expr *)enum_val);
|
||||
}
|
||||
|
||||
// @note parse constant expression
|
||||
else{
|
||||
Ast_Expr *expr = parse_expr();
|
||||
result = ast_const(tname, tname->intern_val, expr);
|
||||
}
|
||||
}
|
||||
else if(token_match(TK_Identifier, TK_Colon)){
|
||||
Ast_Expr *typespec = typespec = parse_expr();
|
||||
Ast_Expr *expr = parse_assign_expr();
|
||||
result = ast_var(tname, typespec, tname->intern_val, expr);
|
||||
}
|
||||
|
||||
else if(token_match(TK_Identifier, TK_ColonAssign)){
|
||||
Ast_Expr *expr = parse_expr();
|
||||
result = ast_var(tname, 0, tname->intern_val, expr);
|
||||
}
|
||||
else if(is_global && tname->kind != TK_End){
|
||||
parsing_error(tname, "Unexpected token: [%s] when parsing a declaration", name(tname->kind));
|
||||
}
|
||||
|
||||
if(result){
|
||||
result->flags = set_flag(result->flags, flags);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -1 +0,0 @@
|
||||
|
||||
1150
typecheck.cpp
1150
typecheck.cpp
File diff suppressed because it is too large
Load Diff
419
typecheck.h
419
typecheck.h
@@ -1,419 +0,0 @@
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Symbols
|
||||
//-----------------------------------------------------------------------------
|
||||
enum Sym_Kind{
|
||||
SYM_NONE,
|
||||
SYM_CONST,
|
||||
SYM_VAR,
|
||||
};
|
||||
|
||||
enum Sym_State{
|
||||
SYM_NOT_RESOLVED,
|
||||
SYM_RESOLVING,
|
||||
SYM_RESOLVED,
|
||||
};
|
||||
|
||||
struct Sym{
|
||||
Intern_String name;
|
||||
Sym_Kind kind;
|
||||
Sym_State state;
|
||||
Ast *ast;
|
||||
INLINE_VALUE_FIELDS;
|
||||
};
|
||||
|
||||
struct Operand{
|
||||
INLINE_VALUE_FIELDS;
|
||||
bool is_const: 1;
|
||||
bool is_lvalue: 1;
|
||||
};
|
||||
|
||||
enum{AST_CANT_BE_NULL = 0, AST_CAN_BE_NULL = 1};
|
||||
function Ast_Resolved_Type *resolve_typespec(Ast_Expr *ast, B32 ast_can_be_null = AST_CANT_BE_NULL);
|
||||
function Sym *resolve_name(Token *pos, Intern_String name);
|
||||
function Operand resolve_expr(Ast_Expr *ast, Ast_Resolved_Type *compound_required_type = 0, Sym *const_sym = 0);
|
||||
function Operand resolve_binding(Ast *ast, Sym *sym = 0);
|
||||
global Ast_Named empty_decl = {};
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Symbol constructors and utils
|
||||
//-----------------------------------------------------------------------------
|
||||
function void
|
||||
sym_insert(Sym *sym){
|
||||
U64 hash = hash_string(sym->name.s);
|
||||
Sym *is_sym = (Sym *)map_get(&pctx->syms, hash);
|
||||
if(is_sym) parsing_error(sym->ast->pos, "Symbol with name: [%s] defined multiple times", sym->name.s.str);
|
||||
if(pctx->scope > 0) pctx->local_syms.add(sym);
|
||||
map_insert(&pctx->syms, hash, sym);
|
||||
}
|
||||
|
||||
function Sym *
|
||||
sym_get(Intern_String name){
|
||||
Sym *result = (Sym *)map_get(&pctx->syms, hash_string(name.s));
|
||||
return result;
|
||||
}
|
||||
|
||||
function S64
|
||||
scope_open(){
|
||||
S64 local_sym_count = pctx->local_syms.len;
|
||||
pctx->scope++;
|
||||
return local_sym_count;
|
||||
}
|
||||
|
||||
function void
|
||||
scope_close(S64 local_sym_count){
|
||||
pctx->scope--;
|
||||
assert(pctx->scope >= 0);
|
||||
for(S64 i = local_sym_count; i < pctx->local_syms.len; i++){
|
||||
Sym *it = pctx->local_syms.data[i];
|
||||
void *removed = map_remove(&pctx->syms, hash_string(it->name.s));
|
||||
assert(removed);
|
||||
}
|
||||
pctx->local_syms.len = local_sym_count;
|
||||
}
|
||||
|
||||
function void
|
||||
sym_associate(Ast *ast, Sym *sym){
|
||||
assert(ast);
|
||||
assert(sym);
|
||||
map_insert(&pctx->resolved, ast, sym);
|
||||
}
|
||||
|
||||
function Sym *
|
||||
sym_new(Sym_Kind kind, Intern_String name, Ast *ast, B32 associate = true){
|
||||
Sym *result = exp_alloc_type(pctx->perm, Sym, AF_ZeroMemory);
|
||||
result->name = name;
|
||||
result->kind = kind;
|
||||
result->ast = ast;
|
||||
if(associate) sym_associate(ast, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
function Sym *
|
||||
sym_new_resolved(Sym_Kind kind, Intern_String name, Value value, Ast *ast, B32 associate = true){
|
||||
Sym *result = sym_new(kind, name, ast, associate);
|
||||
result->state = SYM_RESOLVED;
|
||||
result->value = value;
|
||||
return result;
|
||||
}
|
||||
|
||||
const B32 INSERT_INTO_SCOPE = true;
|
||||
|
||||
function Sym *
|
||||
sym_var(Intern_String name, Ast_Resolved_Type *type, Ast *ast, B32 insert_into_scope = false){
|
||||
Value value;
|
||||
value.type = type;
|
||||
Sym *sym = sym_new_resolved(SYM_VAR, name, value, ast);
|
||||
if(insert_into_scope) sym_insert(sym);
|
||||
return sym;
|
||||
}
|
||||
|
||||
function Sym *
|
||||
sym_var(Intern_String name, Operand op, Ast *ast, B32 insert_into_scope = false){
|
||||
Sym *sym = sym_new_resolved(SYM_VAR, name, op.value, ast);
|
||||
if(insert_into_scope) sym_insert(sym);
|
||||
return sym;
|
||||
}
|
||||
|
||||
function Sym *
|
||||
sym_const(Intern_String name, Operand op, Ast *ast, B32 insert_into_scope = false){
|
||||
Sym *sym = sym_new_resolved(SYM_CONST, name, op.value, ast);
|
||||
if(insert_into_scope) sym_insert(sym);
|
||||
return sym;
|
||||
}
|
||||
|
||||
function Sym *
|
||||
resolved_get(Ast *ast){
|
||||
Sym *result = (Sym *)map_get(&pctx->resolved, ast);
|
||||
assert(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Resolved_Type *
|
||||
resolved_type_get(Ast_Expr *ast){
|
||||
Sym *result = resolved_get(ast);
|
||||
assert(result->type == type_type);
|
||||
assert(result->type);
|
||||
return result->type_val;
|
||||
}
|
||||
|
||||
function Sym *
|
||||
sym_type(Ast_Resolved_Type *type, Ast *ast, Intern_String name = {}, B32 associate = true){
|
||||
Value value;
|
||||
value.type = type_type;
|
||||
value.type_val = type;
|
||||
Sym *result = sym_new_resolved(SYM_CONST, name, value, ast, associate);
|
||||
return result;
|
||||
}
|
||||
|
||||
function Sym *
|
||||
sym_insert(Sym_Kind kind, Intern_String name, Value value, Ast *ast){
|
||||
Sym *sym = sym_new_resolved(kind, name, value, ast);
|
||||
sym_insert(sym);
|
||||
return sym;
|
||||
}
|
||||
|
||||
function void
|
||||
sym_insert_builtin_type(String name, Ast_Resolved_Type *type){
|
||||
Intern_String string = intern_string(&pctx->interns, name);
|
||||
Sym *sym = sym_type(type, &empty_decl, string, false);
|
||||
sym_insert(sym);
|
||||
}
|
||||
|
||||
function void
|
||||
sym_insert_builtins(){
|
||||
sym_insert_builtin_type("void"_s , type_void);
|
||||
sym_insert_builtin_type("Bool"_s , type_bool);
|
||||
sym_insert_builtin_type("String"_s, type_string);
|
||||
sym_insert_builtin_type("S8"_s, type_s8);
|
||||
sym_insert_builtin_type("S16"_s, type_s16);
|
||||
sym_insert_builtin_type("S32"_s, type_s32);
|
||||
sym_insert_builtin_type("S64"_s, type_s64);
|
||||
sym_insert_builtin_type("U8"_s, type_u8);
|
||||
sym_insert_builtin_type("U16"_s, type_u16);
|
||||
sym_insert_builtin_type("U32"_s, type_u32);
|
||||
sym_insert_builtin_type("U64"_s, type_u64);
|
||||
sym_insert_builtin_type("F32"_s, type_f32);
|
||||
sym_insert_builtin_type("F64"_s, type_f64);
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Operands
|
||||
//-----------------------------------------------------------------------------
|
||||
function Operand
|
||||
operand(Sym *sym){
|
||||
Operand result = {};
|
||||
result.type = sym->type;
|
||||
result.is_const = sym->kind == SYM_CONST ? true : false;
|
||||
result.is_lvalue= sym->kind == SYM_CONST ? false : true; // Cant assign to const values
|
||||
result.value = sym->value;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Operand
|
||||
operand_type(Ast_Resolved_Type *type){
|
||||
Operand result = {};
|
||||
result.type = type_type;
|
||||
result.is_const = true;
|
||||
result.is_lvalue = false;
|
||||
result.type_val = type;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Operand
|
||||
operand_int(BigInt big_int){
|
||||
Operand result = {};
|
||||
result.type = untyped_int;
|
||||
result.big_int_val = bigint_copy(pctx->perm, &big_int);
|
||||
result.is_const = true;
|
||||
result.is_lvalue = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Operand
|
||||
operand_str(Intern_String intern_val){
|
||||
Operand result = {};
|
||||
result.type = type_string;
|
||||
result.intern_val = intern_val;
|
||||
result.is_const = true;
|
||||
result.is_lvalue = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Operand
|
||||
operand_lambda(Ast_Resolved_Type *type){
|
||||
Operand result = {};
|
||||
result.type = type;
|
||||
result.is_const = true;
|
||||
result.is_lvalue = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Operand
|
||||
operand_const_rvalue(Value value){
|
||||
Operand result = {};
|
||||
result.is_const = true;
|
||||
result.value = value;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Operand
|
||||
operand_lvalue(Ast_Resolved_Type *type){
|
||||
Operand result = {};
|
||||
result.type = type;
|
||||
result.is_const = false;
|
||||
result.is_lvalue = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Operand
|
||||
operand_rvalue(Ast_Resolved_Type *type){
|
||||
Operand result = {};
|
||||
result.type = type;
|
||||
result.is_const = false;
|
||||
result.is_lvalue = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Hash consed types
|
||||
//-----------------------------------------------------------------------------
|
||||
function Ast_Resolved_Type *
|
||||
type_new(Allocator *allocator, Ast_Resolved_Type_Kind kind, SizeU size, SizeU align){
|
||||
Ast_Resolved_Type *result = exp_alloc_type(allocator, Ast_Resolved_Type, AF_ZeroMemory);
|
||||
result->kind = kind;
|
||||
result->size = size;
|
||||
result->align = align;
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Resolved_Type *
|
||||
type_copy(Allocator *a, Ast_Resolved_Type *type){
|
||||
Ast_Resolved_Type *result = exp_alloc_type(a, Ast_Resolved_Type);
|
||||
memory_copy(result, type, sizeof(Ast_Resolved_Type));
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Resolved_Type *
|
||||
type_pointer(Ast_Resolved_Type *base){
|
||||
Ast_Resolved_Type *result = (Ast_Resolved_Type *)map_get(&pctx->type_map, (void *)base);
|
||||
if(!result){
|
||||
result = type_new(pctx->perm, TYPE_POINTER, pointer_size, pointer_align);
|
||||
result->base = base;
|
||||
map_insert(&pctx->type_map, base, result);
|
||||
}
|
||||
assert(result->kind == TYPE_POINTER);
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Resolved_Type *
|
||||
type_array(Ast_Resolved_Type *base, B32 size_present, S64 size){
|
||||
if(!size_present){
|
||||
size = ARRAY_SIZE_INFERRED;
|
||||
}
|
||||
|
||||
U64 hash_base = hash_ptr(base);
|
||||
U64 hash = hash_mix(hash_base, hash_u64(size));
|
||||
Ast_Resolved_Type *result = (Ast_Resolved_Type *)map_get(&pctx->type_map, hash);
|
||||
if(result){
|
||||
assert(result->kind == TYPE_ARRAY);
|
||||
assert(result->arr.size == size);
|
||||
assert(result->arr.base == base);
|
||||
return result;
|
||||
}
|
||||
|
||||
result = type_new(pctx->perm, TYPE_ARRAY, pointer_size, pointer_align);
|
||||
result->arr.base = base;
|
||||
result->arr.size = size;
|
||||
result->arr.inferred_size_hash = hash_mix(hash_base, hash_u64(ARRAY_SIZE_INFERRED));
|
||||
map_insert(&pctx->type_map, hash, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Resolved_Type *
|
||||
type_lambda(Ast *ast, Ast_Resolved_Type *ret, Array<Ast_Resolved_Type *> args){
|
||||
U64 hash = hash_ptr(ret);
|
||||
For(args) hash = hash_mix(hash, hash_ptr(it));
|
||||
Ast_Resolved_Type *result = (Ast_Resolved_Type *)map_get(&pctx->type_map, hash);
|
||||
|
||||
if(result){
|
||||
assert(result->kind == TYPE_LAMBDA);
|
||||
assert(result->func.ret == ret);
|
||||
assert(result->func.args.len == args.len);
|
||||
return result;
|
||||
}
|
||||
|
||||
result = type_new(pctx->perm, TYPE_LAMBDA, pointer_size, pointer_align);
|
||||
result->ast = ast;
|
||||
result->func.ret = ret;
|
||||
result->func.args = args.tight_copy(pctx->perm);
|
||||
map_insert(&pctx->type_map, hash, result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
function Ast_Resolved_Type *
|
||||
type_enum(Ast_Enum *ast){
|
||||
Ast_Resolved_Type *type = resolve_typespec(ast->typespec, AST_CAN_BE_NULL);
|
||||
if(!type){
|
||||
type = untyped_int;
|
||||
}
|
||||
|
||||
Ast_Resolved_Type *result = type_new(pctx->perm, TYPE_ENUM, type->size, type->align);
|
||||
result->base = type;
|
||||
result->ast = ast;
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
2022.05.31 - Global scope structs vs nested structs
|
||||
Structs exist in 2 variants, the global scope structs are a bit different
|
||||
then scoped structs. They startout incomplete and when some operation
|
||||
requires the actual struct size, alignment, field access etc. then it
|
||||
should call complete_type. It resolves all the children, calculates the
|
||||
size and makes sure there are no cyclic dependencies. This is require for
|
||||
correct behaviour of order independent structs. If someone just wants a pointer
|
||||
to that struct we don't need to complete the type, we know how large a pointer is.
|
||||
This allows us to have cyclic dependency that is a pointer. Cause we know how large pointer is.
|
||||
*/
|
||||
function Ast_Resolved_Type *
|
||||
type_incomplete(Ast *ast){
|
||||
Ast_Resolved_Type *result = type_new(pctx->perm, TYPE_INCOMPLETE, 0, 0);
|
||||
result->ast = ast;
|
||||
return result;
|
||||
}
|
||||
|
||||
function void
|
||||
type_struct_complete(Ast_Resolved_Type *type, Ast_Struct *node){
|
||||
// @todo: compute size, alignement, offset !!!
|
||||
// @note: resolve all the struct members first
|
||||
type->kind = TYPE_COMPLETING;
|
||||
Scratch scratch;
|
||||
Array<Ast_Resolved_Member> members = {scratch};
|
||||
For(node->members){
|
||||
Operand op = resolve_binding(it);
|
||||
Intern_String name = ast_get_name(it);
|
||||
sym_var(name, op, it);
|
||||
members.add({op.type, name});
|
||||
}
|
||||
type->agg.members = members.tight_copy(pctx->perm);
|
||||
type->kind = TYPE_STRUCT;
|
||||
|
||||
/*
|
||||
@note: resolve constant members after the struct got resolved
|
||||
this way we avoid a problem where we start resolving the function
|
||||
and this function has parameter of type parent struct
|
||||
which is being resolved right now, cyclic dependency happens.
|
||||
constants arent required to make struct work
|
||||
*/
|
||||
For(node->const_members){
|
||||
Operand op = resolve_binding(it);
|
||||
Intern_String name = ast_get_name(it);
|
||||
sym_const(name, op, it);
|
||||
}
|
||||
}
|
||||
|
||||
function Ast_Resolved_Type *
|
||||
type_struct(Ast_Struct *agg){
|
||||
Ast_Resolved_Type *result = type_new(pctx->perm, TYPE_STRUCT, 0, 0);
|
||||
result->ast = agg;
|
||||
type_struct_complete(result, agg);
|
||||
return result;
|
||||
}
|
||||
|
||||
function void
|
||||
type_complete(Ast_Resolved_Type *type){
|
||||
if(!type) {
|
||||
return;
|
||||
}
|
||||
if(type->kind == TYPE_COMPLETING){
|
||||
parsing_error(type->ast->pos, "Cyclic type dependency");
|
||||
}
|
||||
else if(type->kind != TYPE_INCOMPLETE){
|
||||
return;
|
||||
}
|
||||
|
||||
Ast_Struct *node = (Ast_Struct *)type->ast;
|
||||
type_struct_complete(type, node);
|
||||
pctx->resolving_package->ordered.add((Ast_Named *)node->parent);
|
||||
}
|
||||
Reference in New Issue
Block a user