New syntax that's easier to parse, parsing doesn't need variable lookup

This commit is contained in:
Krzosa Karol
2022-05-03 11:31:21 +02:00
parent 3c376bbe30
commit 8c04044ea2
12 changed files with 1216 additions and 244 deletions

View File

@@ -1,20 +1,88 @@
#if 0
function Decl *parse_decl(Parser *p);
function Decl *parse_struct(Parser *p, Token *name, Decl_Kind kind);
function Typespec *parse_type(Parser *p);
function Token *
parse__get_name(Parser *p){
Token *name = token_next(p);
token_next(p);
token_next(p);
return name;
function Typespec *
parse_type_struct(Parser *p, Token *token){
Decl_Kind kind = intern_compare(token->intern_val, keyword_struct) ? DECL_Struct : DECL_Union;
Decl *decl = parse_struct(p, token, kind);
decl->name = (Intern_String){};
return typespec_struct(p, token, decl);
}
function Typespec *
parse_type_function(Parser *p, Token *token){
Typespec *result = typespec_function(p, token, 0);
if(!token_is(p, TK_CloseParen))
for(;;) {
if(token_is(p, TK_Identifier)){
Typespec *arg = parse_type(p);
typespec_function_push(result, arg);
}
else if(!token_match(p, TK_Comma)){
break;
}
}
token_expect(p, TK_CloseParen);
if(token_is(p, TK_Identifier)){
result->function_spec.ret = parse_type(p);
}
else{
result->function_spec.ret = typespec_name(p, token_get(p), intern_void);
}
return result;
}
function Typespec *
parse_type(Parser *p){
Token *token = 0;
Typespec *result = 0;
if((token = token_match(p, TK_Identifier))){
result = typespec_name(p, token, token->intern_val);
}
else if((token = token_match_keyword(p, keyword_struct)) || (token = token_match_keyword(p, keyword_union))){
result = parse_type_struct(p, token);
}
else if((token = token_match(p, TK_OpenParen))){
result = parse_type_function(p, token);
}
else{
parser_push_error(p, token, "Failed to parse type, unexpected token");
return 0;
}
for(;;){
if((token = token_match(p, TK_Mul))){
result = typespec_pointer(p, token, result);
}
else if((token = token_match(p, TK_OpenBracket))){
Expr *expr = parse_expr(p);
result = typespec_array(p, token, result, expr);
token_expect(p, TK_CloseBracket);
}
else if(token_match(p, TK_At)){
break;
}
else break;
}
return result;
}
function Expr *
parse_expr_assignment(Parser *p){
Expr *result = 0;
if(token_match(p, TK_Assign)){
result = parse_expr(p);
}
return result;
}
function void
parse_note_list(Parser *ctx, AST_Node *parent) {
parse_note_list(Parser *ctx, Note *parent) {
if(token_match(ctx, TK_OpenParen)) {
do {
Token *name = token_expect(ctx, TK_Identifier);
AST_Node *current = ast_note(ctx, name, name->intern_val, 0);
ast_node_push_note(parent, current);
Note *current = note_push_new(ctx, parent, name, name->intern_val, 0);
parse_note_list(ctx, current);
if(token_match(ctx, TK_Assign)) {
current->expr = parse_expr(ctx);
@@ -25,11 +93,10 @@ parse_note_list(Parser *ctx, AST_Node *parent) {
}
function void
parse__notes(Parser *ctx, AST_Node *result) {
parse__notes(Parser *ctx, Note *result) {
while(token_match(ctx, TK_At)) {
Token *name = token_expect(ctx, TK_Identifier);
AST_Node *current = ast_note(ctx, name, name->intern_val, 0);
ast_node_push_note(result, current);
Note *current = note_push_new(ctx, result, name, name->intern_val, 0);
parse_note_list(ctx, current);
if(token_match(ctx, TK_Assign)) {
current->expr = parse_expr(ctx);
@@ -37,185 +104,316 @@ parse__notes(Parser *ctx, AST_Node *result) {
}
}
function AST_Node
function Note
parse_notes(Parser *p){
AST_Node result = {};
Note result = {};
parse__notes(p, &result);
return result;
}
function AST_Node *
function Decl *
parse_enum(Parser *p, Token *name){
AST_Node *result = 0;
if(token_expect(p, TK_OpenBrace)){
result = ast_enum(p, name, name->intern_val);
do{
AST_Node notes = parse_notes(p);
Token *token = token_match(p, TK_Identifier);
if(token){
Expr *expr = 0;
if(token_match(p, TK_Assign)){
expr = parse_expr(p);
}
AST_Node *child = ast_enum_child(p, token, token->intern_val, expr);
ast_node_pass_note_list(child,&notes);
ast_node_push_child(result, child);
}
else{
break;
}
} while(token_match(p, TK_Comma));
token_expect(p, TK_CloseBrace);
}
return result;
}
function AST_Node *
parse_variable(Parser *p, Token *name){
AST_Node *result = 0;
Token *type_token = token_expect(p, TK_Identifier);
if(type_token){
AST_Node *type = symbol_require_type(p, type_token);
Token *star;
while((star = token_match(p, TK_Mul))){
type = ast_type_pointer(p, star, type);
}
while((star = token_match(p, TK_OpenBracket))){
Expr *expr = parse_expr(p);
type = ast_type_array(p, star, type, expr);
token_expect(p, TK_CloseBracket);
}
Expr *expr = 0;
if(token_match(p, TK_Assign)){
expr = parse_expr(p);
}
result = ast_variable(p, name, name->intern_val, type, expr);
}
return result;
}
function AST_Node *
parse_struct(Parser *p, Token *name, AST_Kind kind, B32 is_global){
AST_Node *result = 0;
if(token_expect(p, TK_OpenBrace)){
result = ast_struct(p, name, name?name->intern_val:(Intern_String){}, kind);
if(is_global) symbol_register(p, result);
for(;;){
AST_Node notes = parse_notes(p);
AST_Node *mem = 0;
Token *mem_name = token_match(p, TK_Identifier);
if(mem_name){
if(token_expect(p, TK_Colon)){
if(token_match_keyword(p, keyword_struct)){
mem = parse_struct(p, mem_name, AK_Struct, false);
}
else if(token_match_keyword(p, keyword_union)){
mem = parse_struct(p, mem_name, AK_Union, false);
}
//else if(token_match_keyword(p, keyword_enum)){
//mem = parse_enum(p, mem_name);
//}
else if(token_is(p, TK_Identifier)){
mem = parse_variable(p, mem_name);
token_expect(p, TK_Semicolon);
}
else parser_push_error(p, mem_name, "Unrecognized token while parsing struct");
if(mem){
ast_node_pass_note_list(mem, &notes);
ast_node_push_child(result, mem);
}
}
else{
break;
}
}
else if(token_match_keyword(p, keyword_union) &&
token_match(p, TK_Colon)){
mem = parse_struct(p, 0, AK_Union, false);
ast_node_pass_note_list(mem, &notes);
ast_node_push_child(result, mem);
}
else if(token_match_keyword(p, keyword_struct) &&
token_match(p, TK_Colon)){
mem = parse_struct(p, 0, AK_Struct, false);
ast_node_pass_note_list(mem, &notes);
ast_node_push_child(result, mem);
}
else if(token_expect(p, TK_CloseBrace)){
break;
}
else{
break;
}
}
}
return result;
}
function AST_Node *
parse_typedef(Parser *p, Token *name){
Token *type_token = token_expect(p, TK_Identifier);
AST_Node *type = symbol_require_type(p, type_token);
AST_Node *result = ast_typedef(p, name, name->intern_val, type);
token_expect(p, TK_Semicolon);
return result;
}
function AST_Node_List *
parse(Parser *p){
AST_Node_List *result = ast_node_new(p, AK_List, token_get(p), intern_empty);
Typespec *type = 0;
if(token_match(p, TK_Colon)) type = parse_type(p);
else type = typespec_name(p, token_get(p), intern_s64);
for(;;){
AST_Node *node = 0;
AST_Node notes = parse_notes(p);
if(token_is(p, TK_End)){
Decl *result = decl_enum(p, name, name->intern_val, type);
token_expect(p, TK_OpenBrace);
do{
Note notes = parse_notes(p);
Token *token = token_match(p, TK_Identifier);
if(token){
Expr *expr = parse_expr_assignment(p);
decl_enum_push(p, result, token, token->intern_val, expr, &notes);
} else break;
} while(token_match(p, TK_Comma));
token_expect(p, TK_CloseBrace);
return result;
}
function Decl *
parse_struct(Parser *p, Token *name, Decl_Kind kind){
Decl *result = decl_struct(p, kind, name, name->intern_val);
token_expect(p, TK_OpenBrace);
while(!token_is(p, TK_CloseBrace)){
Decl *decl = 0;
if((decl = parse_decl(p))){
decl_struct_push(result, decl);
}
else {
parser_push_error(p, token_get(p), "Unexpected token while parsing struct");
break;
}
else if(token_is(p, TK_Error)){
break;
}
else if(token_is(p, TK_Identifier) && // Peeking to be more error averse
token_peek_is(p, 1, TK_Colon)){
}
token_expect(p, TK_CloseBrace);
return result;
}
function Decl *
parse_variable(Parser *p, Token *name){
Typespec *type = parse_type(p);
Expr *expr = parse_expr_assignment(p);
token_expect(p, TK_Semicolon);
return decl_variable(p, name, name->intern_val, type, expr);
}
function Decl *
parse_typedef(Parser *p, Token *name){
Typespec *type = parse_type(p);
return decl_typedef(p, name, name->intern_val, type);
}
function Token *
parse_get_token_name(Parser *p, S32 count){
Token *result = token_next(p);
for(S32 i = 0; i < count; i++)
token_next(p);
return result;
}
function Decl *
parse_decl(Parser *p){
Decl *result = 0;
Note notes = parse_notes(p);
if(token_is(p, TK_Identifier)){
if(token_peek_is(p, 1, TK_DoubleColon)){
if(token_peek_is_keyword(p, 2, keyword_struct)){
node = parse_struct(p, parse__get_name(p), AK_Struct, true);
symbol_register(p, node);
result = parse_struct(p, parse_get_token_name(p,2), DECL_Struct);
}
else if(token_peek_is_keyword(p, 2, keyword_union)){
node = parse_struct(p, parse__get_name(p), AK_Union, true);
symbol_register(p, node);
result = parse_struct(p, parse_get_token_name(p,2), DECL_Union);
}
else if(token_peek_is_keyword(p, 2, keyword_enum)){
node = parse_enum(p, parse__get_name(p));
symbol_register(p, node);
result = parse_enum(p, parse_get_token_name(p,2));
}
else if(token_peek_is_keyword(p, 2, keyword_typedef)){
node = parse_typedef(p, parse__get_name(p));
symbol_register(p, node);
result = parse_typedef(p, parse_get_token_name(p,2));
}
}
else if(token_peek_is(p, 1, TK_Colon)){
if(token_peek_is(p, 2, TK_Identifier) ||
token_peek_is_keyword(p, 2, keyword_union) ||
token_peek_is_keyword(p, 2, keyword_struct) ||
token_peek_is(p, 2, TK_OpenParen)){
result = parse_variable(p, parse_get_token_name(p,1));
}
}
}
if(result){
result->first_note = notes.first;
result->last_note = notes.last;
}
else if(notes.first != 0){
parser_push_error(p, token_get(p), "Detected notes that are not attached to anything");
}
return result;
}
function Decl *
parse(Parser *p){
Decl *result = decl_new(p, DECL_List, token_get(p), (Intern_String){});
for(;;){
Decl *decl = 0;
if(token_is(p, TK_End)) {
break;
}
else if((decl = parse_decl(p))){
// Noop
}
else token_next(p);
if(decl){
decl_list_push(result, decl);
}
}
return result;
}
/*
function Token *
parse__get_name(Parser *p){
Token *name = token_next(p);
token_next(p);
token_next(p);
return name;
}
function Note *
parse_enum(Parser *p, Token *name){
Note *result = 0;
if(token_expect(p, TK_OpenBrace)){
result = ast_enum(p, name, name->intern_val);
do{
Note notes = parse_notes(p);
Token *token = token_match(p, TK_Identifier);
if(token){
Expr *expr = 0;
if(token_match(p, TK_Assign)){
expr = parse_expr(p);
}
Note *child = ast_enum_child(p, token, token->intern_val, expr);
ast_node_pass_note_list(child,&notes);
ast_node_push_child(result, child);
}
else{
break;
}
} while(token_match(p, TK_Comma));
token_expect(p, TK_CloseBrace);
}
return result;
}
function Note *
parse_variable(Parser *p, Token *name){
Note *result = 0;
Token *type_token = token_expect(p, TK_Identifier);
if(type_token){
Note *type = symbol_require_type(p, type_token);
Token *star;
while((star = token_match(p, TK_Mul))){
type = ast_type_pointer(p, star, type);
}
while((star = token_match(p, TK_OpenBracket))){
Expr *expr = parse_expr(p);
type = ast_type_array(p, star, type, expr);
token_expect(p, TK_CloseBracket);
}
Expr *expr = 0;
if(token_match(p, TK_Assign)){
expr = parse_expr(p);
}
result = ast_variable(p, name, name->intern_val, type, expr);
}
return result;
}
function Note *
parse_struct(Parser *p, Token *name, AST_Kind kind, B32 is_global){
Note *result = 0;
if(token_expect(p, TK_OpenBrace)){
result = ast_struct(p, name, name?name->intern_val:(Intern_String){}, kind);
if(is_global) symbol_register(p, result);
for(;;){
Note notes = parse_notes(p);
Note *mem = 0;
Token *mem_name = token_match(p, TK_Identifier);
if(mem_name){
if(token_expect(p, TK_Colon)){
if(token_match_keyword(p, keyword_struct)){
mem = parse_struct(p, mem_name, AK_Struct, false);
}
else if(token_match_keyword(p, keyword_union)){
mem = parse_struct(p, mem_name, AK_Union, false);
}
//else if(token_match_keyword(p, keyword_enum)){
//mem = parse_enum(p, mem_name);
//}
else if(token_is(p, TK_Identifier)){
mem = parse_variable(p, mem_name);
token_expect(p, TK_Semicolon);
}
else parser_push_error(p, mem_name, "Unrecognized token while parsing struct");
if(mem){
ast_node_pass_note_list(mem, &notes);
ast_node_push_child(result, mem);
}
}
else{
token_next(p);
break;
}
}
else if(token_match_keyword(p, keyword_union) &&
token_match(p, TK_Colon)){
mem = parse_struct(p, 0, AK_Union, false);
ast_node_pass_note_list(mem, &notes);
ast_node_push_child(result, mem);
}
else if(token_match_keyword(p, keyword_struct) &&
token_match(p, TK_Colon)){
mem = parse_struct(p, 0, AK_Struct, false);
ast_node_pass_note_list(mem, &notes);
ast_node_push_child(result, mem);
}
else if(token_expect(p, TK_CloseBrace)){
break;
}
else{
break;
}
}
}
return result;
}
function Note *
parse_typedef(Parser *p, Token *name){
Token *type_token = token_expect(p, TK_Identifier);
Note *type = symbol_require_type(p, type_token);
Note *result = ast_typedef(p, name, name->intern_val, type);
token_expect(p, TK_Semicolon);
return result;
}
function Note_List *
parse(Parser *p){
Note_List *result = ast_node_new(p, AK_List, token_get(p), intern_empty);
for(;;){
Note *node = 0;
Note notes = parse_notes(p);
if(token_is(p, TK_End)){
break;
}
else if(token_is(p, TK_Error)){
break;
}
else if(token_is(p, TK_Identifier) && // Peeking to be more error averse
token_peek_is(p, 1, TK_Colon)){
if(token_peek_is_keyword(p, 2, keyword_struct)){
node = parse_struct(p, parse__get_name(p), AK_Struct, true);
symbol_register(p, node);
}
else if(token_peek_is_keyword(p, 2, keyword_union)){
node = parse_struct(p, parse__get_name(p), AK_Union, true);
symbol_register(p, node);
}
else if(token_peek_is_keyword(p, 2, keyword_enum)){
node = parse_enum(p, parse__get_name(p));
symbol_register(p, node);
}
else if(token_peek_is_keyword(p, 2, keyword_typedef)){
node = parse_typedef(p, parse__get_name(p));
symbol_register(p, node);
}
else{
token_next(p);
}
if(node){
ast_node_pass_note_list(node, &notes);
ast_node_push_child(result, node);
}
else if(notes.first_note != 0){
parser_push_error(p, token_get(p), "Warning: notes got lost");
}
}
else{
token_next(p);
}
return result;
if(node){
ast_node_pass_note_list(node, &notes);
ast_node_push_child(result, node);
}
else if(notes.first_note != 0){
parser_push_error(p, token_get(p), "Warning: notes got lost");
}
}
#endif
return result;
}
*/