typedef struct Parser_Error Parser_Error; struct Parser_Error{ Parser_Error *next; String message; Token *token; }; typedef struct Parser{ Token_Array tokens; Arena *arena; Parser_Error *first_error; Parser_Error *last_error; }Parser; function void parser_push_error(Parser *p, Token *token, char *str, ...){ String string; { va_list args1, args2; va_start(args1, str); va_copy(args2, args1); string.len = vsnprintf(0, 0, str, args2); va_end(args2); string.str = arena_push_size(p->arena, string.len + 1); vsnprintf((char*)string.str, string.len + 1, str, args1); va_end(args1); } // @Note(Krzosa): Print nice error message { printf("Error: %s %s:%d\n", string.str, token->file.str, (S32)token->line); // @Note(Krzosa): Print error line { int i = 0; while(token->line_begin[i]!='\n' && token->line_begin[i]!=0) i++; printf("%.*s\n", i, token->line_begin); // @Note(Krzosa): Print error marker int token_i = token->str - token->line_begin; for(int i = 0; i < token_i-2; i++) printf(" "); printf("^^^^^^\n"); } } Parser_Error *error = arena_push_struct(p->arena, Parser_Error); error->message = string; error->next = 0; error->token = token; SLLQueuePush(p->first_error, p->last_error, error); __debugbreak(); } //----------------------------------------------------------------------------- // Parsing helpers //----------------------------------------------------------------------------- function Token * token_get(Parser *p){ Token *result = token_array_iter_peek(&p->tokens, 0); return result; } function Token * token_peek(Parser *p, S64 i){ Token *result = token_array_iter_peek(&p->tokens, i); return result; } function Token * token_peek_is(Parser *p, Token_Kind kind, S64 i){ Token *result = token_peek(p, i); if(result->kind == kind) return result; return 0; } function Token * token_is(Parser *p, Token_Kind kind){ Token *result = token_get(p); if(result->kind == kind) return result; return 0; } function Token * token_next(Parser *p){ Token *result = token_array_iter_next(&p->tokens); return result; } function Token * token_match(Parser *p, Token_Kind kind){ Token *token = token_get(p); if(token->kind == kind){ token = token_next(p); return token; } return 0; } function Token * token_match_keyword(Parser *p, Intern_String string){ Token *token = token_get(p); if(token->kind == TK_Keyword){ if(intern_compare(token->intern_val, string)){ token = token_next(p); return token; } } return 0; } function Token * token_expect(Parser *p, Token_Kind kind){ Token *token = token_get(p); if(token->kind == kind){ token = token_next(p); return token; } parser_push_error(p, token, "Expected token of kind: %s, got instead token of kind: %s.", token_kind_string[kind], token_kind_string[token->kind]); return 0; } function Typespec *parse_typespec(Parser *p); function Expr *parse_expr(Parser *p); //----------------------------------------------------------------------------- // Expression parsing //----------------------------------------------------------------------------- /* add = [+-] mul = [/%*] compare = == | != | >= | > | <= | < logical = [&|^] | && | || unary = [&*-!~+] | ++ | -- atom_expr = Int | Float | String | Identifier | 'cast' '(' typespec ',' expr ')' | 'size_type' '(' typespec ')' | 'size_expr' '(' expr ')' | '{' compound_expr '}' | '(' expr ')' | '(' ':' typespec ')' '{' compound_expr '}' postfix_expr = atom_expr ('[' expr ']' | '.' Identifier | ++ | -- | '(' expr_list ')')* unary_expr = unary ? unary_expr : atom_expr mul_expr = atom_expr (mul atom_expr)* add_expr = mul_expr (add mul_expr)* logical_expr = add_expr (logical add_expr)* compare_expr = logical_expr (compare logical_expr)* ternary_expr = compare_expr ('?' ternary_expr ':' ternary_expr)? expr = logical_expr Compound literals - (:[23]*Type){} - Type{} - { } */ function Expr_Compound_Field * parse_expr_compound_field(Parser *p){ Token *token = token_get(p); Expr_Compound_Field *result = 0; if(token_match(p, TK_OpenBracket)){ Expr *index = parse_expr(p); token_expect(p, TK_CloseBracket); token_expect(p, TK_Assign); Expr *expr = parse_expr(p); result = expr_compound_index(p->arena, token, index, expr); } else{ Expr *expr = parse_expr(p); if((token = token_match(p, TK_Assign))){ if(expr->kind != EK_Identifier){ parser_push_error(p, token, "Failed to parse compound literal, required identifier as left value"); } result = expr_compound_named(p->arena, token, expr->intern_val, parse_expr(p)); } else{ result = expr_compound_default(p->arena, token, expr); } } return result; } function Expr * parse_expr_compound(Parser *p, Typespec *typespec){ Token *token = token_expect(p, TK_OpenBrace); Expr *expr = expr_compound(p->arena, token, typespec); while(!token_is(p, TK_CloseBrace)){ Expr_Compound_Field *field = parse_expr_compound_field(p); expr_compound_push(expr, field); if(!token_match(p, TK_Comma)){ break; } } token_expect(p, TK_CloseBrace); return expr; } function Expr * parse_expr_atom(Parser *p){ Expr *result = 0; Token *token = token_get(p); if(token_match(p, TK_StringLit)){ result = expr_str(p->arena, token); } else if(token_match(p, TK_Identifier)){ if(token_is(p, TK_OpenBrace)){ Typespec *typespec = typespec_name(p->arena, token, token->intern_val); result = parse_expr_compound(p, typespec); } else{ result = expr_identifier(p->arena, token); } } else if(token_match(p, TK_Int)){ result = expr_int(p->arena, token); } else if(token_is(p, TK_OpenBrace)){ result = parse_expr_compound(p, 0); } else if(token_match(p, TK_OpenParen)){ if(token_match(p, TK_Colon)){ Typespec *typespec = parse_typespec(p); token_expect(p, TK_CloseParen); result = parse_expr_compound(p, typespec); } else{ Expr *expr = parse_expr(p); token_expect(p, TK_CloseParen); result = expr_paren(p->arena, token, expr); } } else if(token_match_keyword(p, keyword_cast)){ token_expect(p, TK_OpenParen); Typespec *typespec = parse_typespec(p); token_expect(p, TK_Comma); Expr *expr = parse_expr(p); token_expect(p, TK_CloseParen); result = expr_cast(p->arena, token, typespec, expr); } else{ parser_push_error(p, token, "Failed to parse expression"); } return result; } function B32 token_is_postfix(Parser *p){ Token *token = token_get(p); B32 result = token->kind == TK_OpenBracket || token->kind == TK_OpenParen || token->kind == TK_Dot || token->kind == TK_Increment || token->kind == TK_Decrement; return result; } function Expr_Compound_Field * parse_expr_function_argument(Parser *p){ Token *token = token_get(p); Expr_Compound_Field *result = 0; Expr *expr1 = parse_expr(p); if(token_match(p, TK_Assign)){ if(expr1->kind != EK_Identifier){ parser_push_error(p, token, "Failed to parse named function argument, required identifier as left value"); } Expr *expr2 = parse_expr(p); result = expr_compound_named(p->arena, token, expr1->intern_val, expr2); } else{ result = expr_compound_default(p->arena, token, expr1); } return result; } function Expr * parse_expr_postfix(Parser *p){ Expr *left = parse_expr_atom(p); while(token_is_postfix(p)){ Token *token = 0; if((token = token_match(p, TK_OpenBracket))){ Expr *size = parse_expr(p); token_expect(p, TK_CloseBracket); left = expr_index(p->arena, token, left, size); } else if((token = token_match(p, TK_OpenParen))){ left = expr_call(p->arena, token, left); if(!token_is(p, TK_CloseParen)){ do { Expr_Compound_Field *field = parse_expr_function_argument(p); expr_call_push(left, field); } while(token_match(p, TK_Comma)); } token_expect(p, TK_CloseParen); } else if(token_match(p, TK_Dot)){ token = token_expect(p, TK_Identifier); left = expr_field(p->arena, token, left); } else{ token = token_next(p); assert(token->kind == TK_Increment || token->kind == TK_Decrement); left = expr_postfix_unary(p->arena, token, left); } } return left; } function B32 token_is_unary(Parser *p){ Token *token = token_get(p); B32 result = token->kind == TK_Add || token->kind == TK_Increment || token->kind == TK_Decrement || token->kind == TK_Sub || token->kind == TK_Mul || token->kind == TK_BitAnd || token->kind == TK_Neg || token->kind == TK_Not; return result; } function Expr * parse_expr_unary(Parser *p){ if(token_is_unary(p)){ Token *op = token_next(p); Expr *right = parse_expr_unary(p); Expr *result = expr_unary(p->arena, op, right); return result; } else{ return parse_expr_postfix(p); } } function B32 token_is_mul(Parser *p){ Token *token = token_get(p); B32 result = token->kind >= TK_FirstMul && token->kind <= TK_LastMul; return result; } function Expr * parse_expr_mul(Parser *p){ Expr *left = parse_expr_unary(p); while(token_is_mul(p)){ Token *op = token_next(p); Expr *right = parse_expr_unary(p); left = expr_binary(p->arena, op, left, right); } return left; } function B32 token_is_add(Parser *p){ Token *token = token_get(p); B32 result = token->kind >= TK_FirstAdd && token->kind <= TK_LastAdd; return result; } function Expr * parse_expr_add(Parser *p){ Expr *left = parse_expr_mul(p); while(token_is_add(p)){ Token *op = token_next(p); Expr *right = parse_expr_mul(p); left = expr_binary(p->arena, op, left, right); } return left; } function B32 token_is_logical(Parser *p){ Token *token = token_get(p); B32 result = token->kind >= TK_FirstLogical && token->kind <= TK_LastLogical; return result; } function Expr * parse_expr_logical(Parser *p){ Expr *left = parse_expr_add(p); while(token_is_logical(p)){ Token *op = token_next(p); Expr *right = parse_expr_add(p); left = expr_binary(p->arena, op, left, right); } return left; } function B32 token_is_compare(Parser *p){ Token *token = token_get(p); B32 result = token->kind >= TK_FirstCompare && token->kind <= TK_LastCompare; return result; } function Expr * parse_expr_compare(Parser *p){ Expr *left = parse_expr_logical(p); while(token_is_compare(p)){ Token *op = token_next(p); Expr *right = parse_expr_logical(p); left = expr_binary(p->arena, op, left, right); } return left; } function Expr * parse_expr_ternary(Parser *p){ Expr *cond = parse_expr_compare(p); Token *token = 0; if((token = token_match(p, TK_Question))){ Expr *on_true = parse_expr_ternary(p); token_expect(p, TK_Colon); Expr *on_false = parse_expr_ternary(p); Expr *result = expr_ternary(p->arena, token, cond, on_true, on_false); return result; } return cond; } function Expr * parse_expr(Parser *p){ return parse_expr_ternary(p); } //----------------------------------------------------------------------------- // Type specifier parsing //----------------------------------------------------------------------------- /* base_type = NAME | '(' type_list? ')' type? type = ('*' | '[' expr ']')* base_type Examples: [32]*U32 - Array of 32 pointers to U32 **CustomDataType - Pointer to pointer of CustomDataType (*U32, S64) **S64 - Function pointer (CoolType: optional, S32) - Implicit void return value */ function Typespec * parse_typespec_function(Parser *p, Token *token){ Typespec *result = typespec_function(p->arena, token, 0); if(!token_is(p, TK_CloseParen)) for(;;) { // Optional name Token *name = 0; if((token = token_is(p, TK_Identifier))){ if(token_peek_is(p, TK_Colon, 1)){ token_next(p); token_next(p); name = token; } } // Parse type Typespec *arg = parse_typespec(p); if(name) arg = typespec_named_argument(p->arena, name, arg, name->intern_val); typespec_function_push(result, arg); if(!token_match(p, TK_Comma)){ break; } } token_expect(p, TK_CloseParen); if(token_is(p, TK_Identifier) || token_is(p, TK_OpenParen) || token_is(p, TK_Mul) || token_is(p, TK_OpenBracket)) result->func.ret = parse_typespec(p); else result->func.ret = typespec_name(p->arena, token_get(p), intern_void); return result; } // [10]*int - Array of 10 pointers to ints function Typespec * parse_typespec_recurse(Parser *p){ Token *token = token_get(p); if(token_match(p, TK_Mul)){ Typespec *result = parse_typespec_recurse(p); result = typespec_pointer(p->arena, token, result); return result; } else if(token_match(p, TK_OpenBracket)){ Expr *expr = parse_expr(p); token_expect(p, TK_CloseBracket); Typespec *result = parse_typespec_recurse(p); result = typespec_array(p->arena, token, result, expr); return result; } else if(token_match(p, TK_OpenParen)){ Typespec *result = parse_typespec_function(p, token); return result; } else if(token_match(p, TK_Identifier)){ Typespec *result = typespec_name(p->arena, token, token->intern_val); return result; } else{ parser_push_error(p, token, "Failed to parse type, unexpected token"); return 0; } } function Typespec * parse_typespec(Parser *p){ Typespec *result = parse_typespec_recurse(p); return result; } //----------------------------------------------------------------------------- // Parsing decls //----------------------------------------------------------------------------- /* name::(param:U32)*U32{} name::struct{} name::union{} name::enum{} name::typedef = name2; name::const = 4254; */ function Decl * parse_enum(Parser *p, Token *name){ Typespec *typespec = 0; if(token_match(p, TK_Colon)){ typespec = parse_typespec(p); } else{ typespec = typespec_name(p->arena, token_get(p), intern_int); } Decl *result = decl_enum(p->arena, name, name->intern_val, typespec); token_expect(p, TK_OpenBrace); do{ Token *val = token_expect(p, TK_Identifier); Expr *expr = 0; if(token_match(p, TK_Assign)){ expr = parse_expr(p); } decl_enum_push(p->arena, result, val, val->intern_val, expr, 0); } while(token_is(p, TK_Comma)); token_expect(p, TK_CloseBrace); return result; } function Decl * parse_decl(Parser *p){ Decl *result = 0; Token *name = 0; if((name = token_match(p, TK_Identifier))){ if(token_match(p, TK_DoubleColon)){ Token *token = 0; if((token = token_match_keyword(p, keyword_enum))){ result = parse_enum(p, name); } else if((token = token_match_keyword(p, keyword_union))){ // Union } else if((token = token_match_keyword(p, keyword_struct))){ // Struct } else if((token = token_match_keyword(p, keyword_const))){ // Const value } else if((token = token_match(p, TK_OpenParen))){ // Function } else{ parser_push_error(p, token_get(p), "Expected token of kind todo:decl_tokens"); } } else{ parser_push_error(p, token_get(p), "Expected token of kind '::'"); } } return result; } //----------------------------------------------------------------------------- // Test code //----------------------------------------------------------------------------- function void expr_print(Expr *expr); function B32 typespec_print(Typespec *spec); function void token_print(Token *token){ printf("%.*s", (S32)token->len, token->str); } function void expr_compound_print(Expr_Compound_Field *field){ switch(field->kind){ case COMPOUND_Default: { expr_print(field->init); }break; case COMPOUND_Named: { printf("[%s] = ", field->name.s.str); expr_print(field->init); }break; case COMPOUND_Index: { printf("["); expr_print(field->index); printf("] = "); expr_print(field->init); }break; default: invalid_codepath; } } function void expr_print(Expr *expr){ switch(expr->kind) { case EK_Int:case EK_String:case EK_Identifier: { token_print(expr->token); } break; case EK_SizeExpr:{ printf("size_expr("); expr_print(expr->size_expr.expr); printf(")"); }break; case EK_Compound:{ if(expr->compound.typespec){ printf("("); typespec_print(expr->compound.typespec); printf(")"); } printf("{"); for(Expr_Compound_Field *n = expr->compound.first; n; n=n->next){ expr_compound_print(n); if(n!=expr->compound.last) printf(","); } printf("}"); } break; case EK_SizeType:{ printf("size_type("); printf(")"); }break; case EK_Paren:{ printf("("); expr_print(expr->paren.expr); printf(")"); } break; case EK_Field:{ expr_print(expr->field.expr); printf(".%s", expr->field.name.s.str); } break; case EK_Binary:{ printf("("); expr_print(expr->binary.left); token_print(expr->token); expr_print(expr->binary.right); printf(")"); } break; case EK_PostfixUnary:{ printf("("); expr_print(expr->unary.expr); token_print(expr->token); printf(")"); } break; case EK_Unary:{ printf("("); token_print(expr->token); expr_print(expr->unary.expr); printf(")"); } break; case EK_Ternary:{ printf("("); expr_print(expr->ternary.cond); printf("?"); expr_print(expr->ternary.on_true); printf(":"); expr_print(expr->ternary.on_false); printf(")"); } break; case EK_Cast:{ printf("("); printf("("); typespec_print(expr->cast.typespec); printf(")"); expr_print(expr->cast.expr); printf(")"); } break; case EK_Index:{ expr_print(expr->index.atom); printf("["); expr_print(expr->index.index); printf("]"); }break; case EK_Call:{ expr_print(expr->call.atom); printf("("); for(Expr_Compound_Field *n = expr->call.first; n; n=n->next){ expr_compound_print(n); if(n!=expr->call.last) printf(","); } printf(")"); }break; default: {invalid_codepath;} break; } } function B32 typespec_print(Typespec *spec){ switch(spec->kind) { case TS_Name: { printf("%s", spec->name.s.str); } break; case TS_NamedArgument: { printf("%s: ", spec->named.name.s.str); typespec_print(spec->named.base); }break; case TS_Pointer: { typespec_print(spec->base); printf("*"); } break; case TS_Array: { typespec_print(spec->arr.base); printf("["); expr_print(spec->arr.size); printf("]"); } break; case TS_Function: { printf("("); for(Typespec *n = spec->func.first; n; n=n->next){ typespec_print(n); if(n!=spec->func.last) printf(", "); } printf(")"); typespec_print(spec->func.ret); } break; default: {invalid_codepath;} break; } return true; } function S64 eval_expr(Expr *expr){ switch(expr->kind){ case EK_Int: return expr->int_val; break; case EK_Unary:{ S64 left = eval_expr(expr->unary.expr); switch(expr->unary.op){ case TK_Not: return !left; break; case TK_Neg: return ~left; break; case TK_Sub: return -left; break; case TK_Add: return +left; break; default: invalid_codepath; } } break; case EK_Ternary:{ S64 cond = eval_expr(expr->ternary.cond); if(cond) return eval_expr(expr->ternary.on_true); else return eval_expr(expr->ternary.on_false); } break; case EK_Paren: return eval_expr(expr->paren.expr); break; case EK_Binary: { S64 left = eval_expr(expr->binary.left); S64 right = eval_expr(expr->binary.right); switch(expr->binary.op){ case TK_Add: return left + right; break; case TK_Sub: return left - right; break; case TK_Mul: return left * right; break; case TK_Div: return left / right; break; case TK_Mod: return left % right; break; case TK_Equals: return left == right; break; case TK_NotEquals: return left != right; break; case TK_GreaterThenOrEqual: return left >= right; break; case TK_LesserThenOrEqual: return left <= right; break; case TK_GreaterThen: return left > right; break; case TK_LesserThen: return left < right; break; case TK_BitAnd: return left & right; break; case TK_BitOr: return left | right; break; case TK_BitXor: return left ^ right; break; case TK_And: return left && right; break; case TK_Or: return left || right; break; case TK_LeftShift: return left << right; break; case TK_RightShift: return left >> right; break; default: invalid_codepath; } } break; default: invalid_codepath; } return 0; } function Parser parser_make(Arena *arena){ Parser result = { .tokens = lex_make_token_array(arena), .arena = arena, }; return result; } function void parser_restream(Parser *p, String stream, String file){ lex_restream(&p->tokens, stream, file); } function Parser parser_make_stream(Arena *arena, String stream, String file){ Parser parser = parser_make(arena); lex_restream(&parser.tokens, stream, file); return parser; } function void parser_add_stream(Parser *p, String string, String file){ lex_add_stream(&p->tokens, string, file); } function void parse_test_expr(){ Arena *scratch = arena_begin_scratch(); String test_case = lit("32+52-242*2/424%5-23" " 1<<5>>6<<2 " " 5*(4/3)*(2+5) " " 0&1 == 1&0 " " 1&&5*3 " " 1&&5||0 " " 1>5>=5==0 " " 1>5 ? 1 : 2 " " !!!!!1 " " ~~1 + -!2 " " 1 + ++Thing[12]++ + ++Thing[12].expr +" ); Parser parser = parser_make_stream(scratch, test_case, lit("Big_Expr")); Parser *p = &parser; S64 t = 5; S64 test_val[] = { (32+52-242*2/424%5-23), (((1<<5)>>6)<<2), 5*(4/3)*(2+5), (0&1) == (1&0), 1&&(t*3), (1&&t)||0, 1>t>=t==0, 1>t ? 1 : 2, !!!!!1, ~~1 + -!2, }; for(int i = 0; i < buff_cap(test_val); i++){ Expr *expr = parse_expr(p); S64 val = eval_expr(expr); assert(val == test_val[i]); } String exprs[] = { lit("cast([12](thing: U32, qwe: *U32) [32]Result, (123+234))"), lit("cast((thing: U32, qwe: *U32), (123+234))"), lit("(:(U32,U32)){Thing=10}"), lit("--Not_Thing[156](Thing) + test_func(asd=func1, af=func2, gg=func3)"), lit("(:[23]*Type){Thing=10}"), lit("cast(**Data,{Thing=10})"), lit("(:[64]S64){1,2,3,4,5}"), lit("Data_Type{1,2,3,4,5}"), }; for(SizeU i = 0; i < buff_cap(exprs); i++){ parser_restream(p, exprs[i], lit("Test_Exprs")); Expr *expr = parse_expr(p); expr_print(expr); printf("\n"); } arena_end_scratch(); } function void parse_test_decls(){ } function void parse_test(){ parse_test_expr(); parse_test_decls(); }