Files
corelang/new_parse.cpp
2022-06-01 13:23:21 +02:00

550 lines
15 KiB
C++

function void
parsing_error(Token *token, const char *str, ...){
Scratch scratch;
STRING_FMT(scratch, str, string);
// @Note(Krzosa): Print nice error message
printf("\nError: %s", string.str);
if(token){
printf(" %s:%d\n", token->file.str, (S32)token->line + 1);
// @Note(Krzosa): Print error line
{
int i = 0;
while(token->line_begin[i]!='\n' && token->line_begin[i]!=0) i++;
printf("%.*s\n", i, token->line_begin);
// @Note(Krzosa): Print error marker
int token_i = token->str - token->line_begin;
for(int i = 0; i < token_i-2; i++) printf(" ");
printf("^^^^^^\n");
}
}
__debugbreak();
}
function Token *
token_get(S64 i = 0){
i += pctx->token_iter;
if(i >= pctx->tokens.len){
return &pctx->empty_token;
}
Token *result = &pctx->tokens[i];
return result;
}
function Token *
token_is_scope(){
Token *token = token_get();
if(lex_is_scope(token)) return token;
return 0;
}
function Token *
token_next(){
Token *token = token_get();
if(lex_is_scope(token)) pctx->indent = token->indent;
pctx->token_iter++;
return token;
}
function Token *
token_is(Token_Kind kind, S64 lookahead = 0){
Token *token = token_get(lookahead);
if(token->kind == kind){
return token;
}
return 0;
}
function Token *
token_is_keyword(Intern_String keyword, S64 lookahead = 0){
Token *token = token_get(lookahead);
if(token->kind == TK_Keyword){
if(keyword.str == token->intern_val.str){
return token;
}
}
return 0;
}
function Token *
token_match(Token_Kind kind){
Token *token = token_get();
if(token->kind == kind){
return token_next();
}
return 0;
}
function Token *
token_match(Token_Kind a, Token_Kind b){
Token *ta = token_get();
Token *tb = token_get(1);
if(ta->kind == a && tb->kind == b){
token_next(); token_next();
return ta;
}
return 0;
}
function Token *
token_match_keyword(Intern_String string){
Token *token = token_get();
if(token->kind == TK_Keyword){
if(string.str == token->intern_val.str){
token = token_next();
return token;
}
}
return 0;
}
function Token *
token_expect(Token_Kind kind){
Token *token = token_get();
if(token->kind == kind) return token_next();
parsing_error(token, "Expected token of kind: [%s], got instead token of kind: [%s]", token_kind_string(kind).str, token_kind_string(token->kind).str);
return 0;
}
//-----------------------------------------------------------------------------
// Expression parsing
//-----------------------------------------------------------------------------
/*
add = [+-]
mul = [/%*]
compare = == | != | >= | > | <= | <
logical = [&|^] | && | ||
unary = [&*-!~+] | ++ | --
atom_expr = Int
| Float
| String
| Identifier
| 'cast' '(' typespec ',' expr ')'
| 'size_type' '(' typespec ')'
| 'size_expr' '(' expr ')'
| '{' call_expr '}'
| '(' expr ')'
| '(' ':' typespec ')' '{' call_expr '}'
postfix_expr = atom_expr ('[' expr ']' | '.' Identifier | ++ | -- | '(' expr_list ')')*
unary_expr = unary ? unary_expr : atom_expr
mul_expr = atom_expr (mul atom_expr)*
add_expr = mul_expr (add mul_expr)*
logical_expr = add_expr (logical add_expr)*
compare_expr = logical_expr (compare logical_expr)*
ternary_expr = compare_expr ('?' ternary_expr ':' ternary_expr)?
expr = logical_expr
Compound literals
- (:[23]*Type){}
- Type{}
- { }
*/
function Ast_Expr *parse_expr(S64 rbp = 0);
function Ast_Expr *
parse_init_stmt(Ast_Expr *expr){
Token *token = token_get();
if(token->kind == TK_ColonAssign && expr->kind != AST_IDENT) parsing_error(expr->pos, "Binding with [:=] to something that is not an identifier");
if(token_is_assign(token)){
token_next();
Ast_Expr *value = parse_expr();
Ast_Expr *result = ast_expr_binary((Ast_Atom *)expr, value, token);
result->flags = set_flag(result->flags, AST_STMT);
return result;
}
return expr;
}
function Ast_Call *
parse_expr_call(Ast_Expr *left){
Scratch scratch;
Token *pos = token_get();
Array<Ast_Call_Item *> exprs = {scratch};
while(!token_is(TK_CloseParen)){
Token *token = token_get();
Ast_Expr *index = 0;
Ast_Atom *name = 0;
if(token_match(TK_OpenBracket)){
index = parse_expr();
token_expect(TK_CloseBracket);
token_expect(TK_Assign);
}
Ast_Expr *item = parse_expr();
if(!index && token_match(TK_Assign)){
assert(is_flag_set(item->flags, AST_ATOM));
name = (Ast_Atom *)item;
item = parse_expr();
}
Ast_Call_Item *item_comp = ast_call_item(token, index, name, item);
exprs.add(item_comp);
if(!token_match(TK_Comma)){
break;
}
}
token_expect(TK_CloseParen);
Ast_Call *result = ast_call(pos, left, exprs);
return result;
}
function Ast_Expr *
parse_optional_type(){
Ast_Expr *result = 0;
if(token_match(TK_Colon)) result = parse_expr();
return result;
}
function Ast_Named *parse_named(B32);
function Ast_Block *
parse_block(){
Ast_Block *block = 0;
if(token_match(OPEN_SCOPE)){
Token *token_block = token_get();
Scratch scratch;
Array<Ast *> stmts = {scratch};
do{
Token *token = token_get();
if(token_match_keyword(keyword_return)){
Ast_Expr *expr = 0;
if(!token_is_scope()) expr = parse_expr();
stmts.add(ast_return(token, expr));
}
else if(token_match_keyword(keyword_pass)){
stmts.add(ast_pass(token));
}
else if(token_match_keyword(keyword_for)){
Ast_Expr *expr_first = parse_expr();
Ast_Expr *init = parse_init_stmt(expr_first);
Ast_Expr *cond = 0;
Ast_Expr *iter = 0;
if(token_match(TK_Comma)){
cond = parse_expr();
if(token_match(TK_Comma)){
iter = parse_expr();
iter = parse_init_stmt(iter);
}
}
Ast_Block *for_block = parse_block();
stmts.add(ast_for(token, init, cond, iter, for_block));
}
else if(token_match_keyword(keyword_if)){
Array<Ast_If_Node *> if_nodes = {scratch};
Ast_Expr *expr = parse_expr();
Ast_Expr *init_val = parse_init_stmt(expr);
if(init_val != expr){
if(token_match(TK_Comma)) expr = parse_expr();
else expr = 0;
}
Ast_Block *if_block = parse_block();
Ast_If_Node *if_node = ast_if_node(token, init_val, expr, if_block);
if_nodes.add(if_node);
while(token_is(SAME_SCOPE) && token_is_keyword(keyword_else, 1)){
token_next();
token = token_next();
if(token_match_keyword(keyword_if)){
Ast_Expr *expr = parse_expr();
Ast_Block *else_if_block = parse_block();
Ast_If_Node *if_node = ast_if_node(token, 0, expr, else_if_block);
if_nodes.add(if_node);
}
else{
Ast_Block *else_block = parse_block();
Ast_If_Node *if_node = ast_if_node(token, 0, 0, else_block);
if_nodes.add(if_node);
break;
}
}
Ast_If *result_if = ast_if(token, if_nodes);
stmts.add(result_if);
}
else{
Ast *result = parse_named(false);
if(!result){
result = parse_expr();
result = parse_init_stmt((Ast_Expr *)result);
}
if(result) stmts.add(result);
else parsing_error(token, "Unexpected token [%s] while parsing statement", token_kind_string(token->kind).str);
}
} while(token_match(SAME_SCOPE));
token_expect(CLOSE_SCOPE);
block = ast_block(token_block, stmts);
}
return block;
}
function Ast_Lambda *
parse_lambda(Token *token, B32 is_typespec = false){
Scratch scratch;
Array<Ast_Lambda_Arg *> params = {scratch};
if(!token_is(TK_CloseParen)){
for(;;){
Token *name = token_expect(TK_Identifier);
token_expect(TK_Colon);
Ast_Expr *typespec = parse_expr();
Ast_Expr *default_value = 0;
if(token_match(TK_Assign)) {
default_value = parse_expr();
}
Ast_Lambda_Arg *param = ast_expr_lambda_arg(name, name->intern_val, typespec, default_value);
params.add(param);
if(!token_match(TK_Comma)){
break;
}
}
}
token_expect(TK_CloseParen);
Ast_Expr *ret = parse_optional_type();
Ast_Block *block = is_typespec ? 0 : parse_block();
Ast_Lambda *result = ast_lambda(token, params, ret, block);
return result;
}
function Ast_Expr *
null_denotation(Token *token){
switch(token->kind){
case TK_StringLit : return ast_str(token, token->intern_val);
case TK_Identifier : return ast_ident(token, token->intern_val);
case TK_Integer : return ast_int(token, token->int_val);
case TK_Pointer : return ast_expr_unary(token, TK_Pointer, parse_expr());
case TK_Dereference: return ast_expr_unary(token, TK_Dereference, parse_expr());
case TK_OpenBracket: {
Ast_Array *result = ast_array(token, parse_expr());
token_expect(TK_CloseBracket);
result->base = parse_expr(1);
return result;
}break;
case TK_Keyword: {
if(token->intern_val == keyword_cast){
token_expect(TK_OpenParen);
Ast_Expr *expr = parse_expr();
token_expect(TK_Colon);
Ast_Expr *typespec = parse_expr();
token_expect(TK_CloseParen);
return ast_expr_cast(token, expr, typespec);
}
else {
parsing_error(token, "Unexpected keyword: [%s], expected keyword [cast]", token->intern_val.str);
return 0;
}
}break;
case TK_OpenParen: {
if (token_is(TK_CloseParen)) return parse_lambda(token);
else if(token_is(TK_Identifier) && token_is(TK_Colon, 1)) return parse_lambda(token);
else{
Ast_Expr *result = parse_expr();
token_expect(TK_CloseParen);
return result;
}
}
default: parsing_error(token, "Unexpected token of kind: [%s] in expression", token_kind_string(token->kind).str); return 0;
}
}
function S64
left_binding_power(Token_Kind kind){
switch(kind){
case TK_Sub: case TK_Add: return 1;
case TK_Mul: case TK_Div: return 2;
default: return 0;
}
}
function Ast_Expr *
left_denotation(Token *op, Ast_Expr *left){
enum{ Left_Associative, Right_Associative };
S64 assoc = Left_Associative;
Ast_Expr *right = parse_expr(left_binding_power(op->kind) - assoc);
switch(op->kind){
case TK_Add: case TK_Mul: case TK_Sub: case TK_Div: return ast_expr_binary(left, right, op);
default: parsing_error(op, "Unexpected token of kind: [%s] in expression", token_kind_string(op->kind).str); return 0;
}
}
function S64
postfix_binding_power(Token_Kind kind){
switch(kind){
case TK_Dot: case TK_Decrement: case TK_Increment: case TK_OpenBracket: case TK_OpenParen: return 1;
default: return 0;
}
}
function Ast_Expr *
parse_expr(S64 rbp){
Token *token = token_next();
Ast_Expr *left = null_denotation(token);
for(;;){
token = token_get();
// @note: parse postfix
if(postfix_binding_power(token->kind) > rbp){
token_next();
switch(token->kind){
case TK_Dot: {
// @note: making sure that we always get a configuration where
// Identifier is in left node
Ast_Expr *right = parse_expr();
left = ast_expr_binary(left, right, token);
}break;
case TK_OpenBracket:{
Ast_Expr *index = parse_expr();
left = ast_expr_index(token, left, index);
token_expect(TK_CloseBracket);
}break;
case TK_OpenParen:{
left = parse_expr_call(left);
}break;
default:{
if(token->kind == TK_Increment) token->kind = TK_PostIncrement;
else if(token->kind == TK_Decrement) token->kind = TK_PostDecrement;
left = ast_expr_unary(token, token->kind, left);
}
}
}
// @note: parse right
else if(rbp < left_binding_power(token->kind)){
token = token_next();
left = left_denotation(token, left);
}
else break;
}
return left;
}
function Ast_Expr *
parse_assign_expr(){
Ast_Expr *result = 0;
if(token_match(TK_Assign)) result = parse_expr();
return result;
}
function Ast_Struct *
parse_struct(Token *pos){
Scratch scratch;
Array<Ast_Var *> members = {scratch};
Array<Ast_Const *> members_const = {scratch};
token_match(OPEN_SCOPE);
do{
Token *token = token_get();
Ast_Named *named = parse_named(false);
named->flags = set_flag(named->flags, AST_AGGREGATE_CHILD);
if(!named) parsing_error(token, "Failed to parse struct member");
if(named->kind == AST_CONST){
members_const.add((Ast_Const *)named);
} else {
assert(named->kind == AST_VAR);
members.add((Ast_Var *)named);
}
}while(token_match(SAME_SCOPE));
token_expect(CLOSE_SCOPE);
Ast_Struct *result = ast_struct(pos, members, members_const);
return result;
}
function Ast_Enum *
parse_enum(Token *pos){
Scratch scratch;
Array<Ast_Enum_Member *> members = {scratch};
Ast_Expr *typespec = parse_optional_type();
token_match(OPEN_SCOPE);
do{
Token *name = token_expect(TK_Identifier);
Ast_Expr *value = parse_assign_expr();
Ast_Enum_Member *member = ast_enum_member(name, name->intern_val, value);
members.add(member);
}while(token_match(SAME_SCOPE));
token_expect(CLOSE_SCOPE);
Ast_Enum *result = ast_enum(pos, typespec, members);
return result;
}
/*
Needs peeking only because I didn't want to duplicate code
for parsing statements and it makes code nicer.
Statements can have named syntax i :=
*/
function Ast_Named *
parse_named(B32 is_global){
Ast_Named *result = 0;
if(is_global) {
token_match(SAME_SCOPE);
if(pctx->indent != 0){
parsing_error(token_get(), "Top level declarations shouldn't be indented");
}
}
Token *name = token_get();
if(token_match(TK_Identifier, TK_DoubleColon)){
// @note parse struct binding
Token *struct_pos = token_get();
if(token_match_keyword(keyword_struct)){
Ast_Struct *struct_val = parse_struct(struct_pos);
result = ast_const(name, name->intern_val, (Ast_Expr *)struct_val);
}
else if(token_match_keyword(keyword_enum)){
Ast_Enum *enum_val = parse_enum(struct_pos);
result = ast_const(name, name->intern_val, (Ast_Expr *)enum_val);
}
// @note parse constant expression
else{
Ast_Expr *expr = parse_expr();
result = ast_const(name, name->intern_val, expr);
}
}
else if(token_match(TK_Identifier, TK_Colon)){
Ast_Expr *typespec = typespec = parse_expr();
Ast_Expr *expr = parse_assign_expr();
result = ast_var(name, typespec, name->intern_val, expr);
}
else if(token_match(TK_Identifier, TK_ColonAssign)){
Ast_Expr *expr = parse_expr();
result = ast_var(name, 0, name->intern_val, expr);
}
else if(is_global && name->kind != TK_End){
parsing_error(name, "Unexpected token: [%s] when parsing a declaration", token_kind_string(name->kind).str);
}
return result;
}