New approach, new lexer

This commit is contained in:
Krzosa Karol
2022-05-06 10:13:16 +02:00
parent 557dde1936
commit e3b5e9b33a
33 changed files with 3331 additions and 784 deletions

View File

@@ -7,6 +7,15 @@ parse_type_function(Parser *p, Token *token){
Typespec *result = typespec_function(p, token, 0);
if(!token_is(p, TK_CloseParen))
for(;;) {
// Optional name
if(token_is(p, TK_Identifier)){
if(token_peek_is(p, 1, TK_Colon)){
token_next(p);
token_next(p);
}
}
// Parse type
if(token_is(p, TK_Identifier)){
Typespec *arg = parse_type(p);
typespec_function_push(result, arg);
@@ -16,18 +25,16 @@ parse_type_function(Parser *p, Token *token){
}
}
token_expect(p, TK_CloseParen);
if(token_is(p, TK_Identifier)){
if(token_is(p, TK_Identifier))
result->function_spec.ret = parse_type(p);
}
else{
else
result->function_spec.ret = typespec_name(p, token_get(p), intern_void);
}
return result;
}
function Typespec *
parse_type(Parser *p){
// Parse as function type or normal
Token *token = 0;
Typespec *result = 0;
@@ -58,7 +65,6 @@ parse_type(Parser *p){
else break;
}
return result;
}
@@ -72,35 +78,39 @@ parse_expr_assignment(Parser *p){
}
function void
parse_note_list(Parser *ctx, Note *parent) {
if(token_match(ctx, TK_OpenParen)) {
parse_note_list(Parser *p, Note *parent) {
if(token_match(p, TK_OpenParen)) {
if(token_match(p, TK_CloseParen)){
return;
}
do {
Token *name = token_expect(ctx, TK_Identifier);
Note *current = note_push_new(ctx, parent, name, name->intern_val, 0);
parse_note_list(ctx, current);
if(token_match(ctx, TK_Assign)) {
current->expr = parse_expr(ctx);
Token *name = token_expect(p, TK_Identifier);
Note *current = note_push_new(p, parent, name, name->intern_val, 0);
parse_note_list(p, current);
if(token_match(p, TK_Assign)) {
current->expr = parse_expr(p);
}
} while(token_match(ctx, TK_Comma));
token_expect(ctx, TK_CloseParen);
} while(token_match(p, TK_Comma));
token_expect(p, TK_CloseParen);
}
}
function void
parse__notes(Parser *ctx, Note *result) {
while(token_match(ctx, TK_At)) {
Token *name = token_expect(ctx, TK_Identifier);
Note *current = note_push_new(ctx, result, name, name->intern_val, 0);
parse_note_list(ctx, current);
if(token_match(ctx, TK_Assign)) {
current->expr = parse_expr(ctx);
parse__notes(Parser *p, Note *result) {
while(token_match(p, TK_At)) {
Token *name = token_expect(p, TK_Identifier);
Note *current = note_push_new(p, result, name, name->intern_val, 0);
parse_note_list(p, current);
if(token_match(p, TK_Assign)) {
current->expr = parse_expr(p);
}
token_match(p, TK_Semicolon);
}
}
function Note
parse_notes(Parser *p){
Note result = {};
Note result = {0};
parse__notes(p, &result);
return result;
}
@@ -172,7 +182,6 @@ function Decl *
parse_variable(Parser *p, Token *name){
Typespec *type = parse_type(p);
Expr *expr = parse_expr_assignment(p);
token_expect(p, TK_Semicolon);
return decl_variable(p, name, name->intern_val, type, expr);
}
@@ -182,14 +191,77 @@ parse_typedef(Parser *p, Token *name){
return decl_typedef(p, name, name->intern_val, type);
}
/*
function Stmt *
parse_stmt(Parser *p);
function Stmt *
parse_stmt_list(Parser *p){
Token *token = token_expect(p, TK_OpenBrace);
Stmt *result = stmt_list(p, token);
while(!token_match(p, TK_CloseBrace)) {
Stmt *stmt = parse_stmt(p);
stmt_push(result, stmt);
}
return result;
}
function Stmt *
parse_stmt(Parser *p){
Token *token = token_get(p);
Decl *decl = parse_decl(p);
if(decl){
Stmt *result = stmt_decl(p, token, decl);
return result;
}
else if(token_match_keyword(p, keyword_return)){
Expr *expr = parse_expr(p);
Stmt *result = stmt_return(p, token, expr);
token_expect(p, TK_Semicolon);
return result;
}
else if(token_match_keyword(p, keyword_if)){
Expr *expr = parse_expr(p);
Stmt *if_body = parse_stmt_list(p);
Stmt *result = stmt_if(p, token, if_body, expr);
Stmt *head = result;
while(token_match_keyword(p, keyword_else)){
if(token_match_keyword(p, keyword_if)){
expr = parse_expr(p);
if_body = parse_stmt_list(p);
head = head->next = stmt_if(p, token, if_body, expr);
}
else{
if_body = parse_stmt_list(p);
head = head->next = stmt_if(p, token, if_body, 0);
break;
}
}
return result;
}
else if((token_is(p, TK_OpenBrace))){
Stmt *result = parse_stmt_list(p);
return result;
}
else{
Expr *expr = parse_expr(p);
token_expect(p, TK_Semicolon);
return stmt_expr(p, token, expr);
}
}
function Decl *
parse_function(Parser *p, Token *name){
Decl *result = decl_function(p, name, name->intern_val, 0);
if(!token_is(p, TK_CloseParen)){
for(;;) {
if((name = token_match(p, TK_Identifier)))
decl_function_push(p, result, name, name->intern_val, parse_type(p));
if(token_peek_is(p, 1, TK_Colon)){
if(token_peek_is(p, 2, TK_Identifier) ||
token_peek_is(p, 2, TK_OpenParen)){
Token *name = parse_get_token_name(p,1);
Typespec *type = parse_type(p);
decl_function_push(p, result, name, name->intern_val, type);
}
}
else if(!token_match(p, TK_Comma))
break;
}
@@ -198,13 +270,12 @@ parse_function(Parser *p, Token *name){
if(token_is(p, TK_Identifier))
result->function_decl.ret = parse_type(p);
else
result->function_decl.ret = typespec_name(p, token_get(p), intern_void);
token_expect(p, TK_OpenBrace);
parse_stmt_list(p, result);
token_expect(p, TK_CloseBrace);
result->function_decl.body = parse_stmt_list(p);
return result;
}
*/
function Decl *
parse_decl(Parser *p){
@@ -225,18 +296,21 @@ parse_decl(Parser *p){
else if(token_peek_is_keyword(p, 2, keyword_typedef)){
result = parse_typedef(p, parse_get_token_name(p,2));
}
else if(token_peek_is(p, 2, TK_OpenParen)){
result = parse_function(p, parse_get_token_name(p,2));
}
}
else if(token_peek_is(p, 1, TK_Colon)){
if(token_peek_is(p, 2, TK_Identifier) ||
token_peek_is(p, 2, TK_OpenParen)){
result = parse_variable(p, parse_get_token_name(p,1));
token_expect(p, TK_Semicolon);
}
}
}
if(result){
result->first_note = notes.first;
result->last_note = notes.last;
decl_pass_notes(result, &notes);
}
else if(notes.first != 0){
parser_push_error(p, token_get(p), "Detected notes that are not attached to anything");
@@ -247,7 +321,7 @@ parse_decl(Parser *p){
function Decl *
parse(Parser *p){
Decl *result = decl_new(p, DECL_List, token_get(p), (Intern_String){});
Decl *result = decl_new(p, DECL_List, token_get(p), (Intern_String){0});
for(;;){
Decl *decl = 0;
if(token_is(p, TK_End)) {
@@ -265,188 +339,55 @@ parse(Parser *p){
return result;
}
/*
function Token *
parse__get_name(Parser *p){
Token *name = token_next(p);
token_next(p);
token_next(p);
return name;
}
function Note *
parse_enum(Parser *p, Token *name){
Note *result = 0;
if(token_expect(p, TK_OpenBrace)){
result = ast_enum(p, name, name->intern_val);
do{
Note notes = parse_notes(p);
Token *token = token_match(p, TK_Identifier);
if(token){
Expr *expr = 0;
if(token_match(p, TK_Assign)){
expr = parse_expr(p);
}
Note *child = ast_enum_child(p, token, token->intern_val, expr);
ast_node_pass_note_list(child,&notes);
ast_node_push_child(result, child);
}
else{
break;
}
} while(token_match(p, TK_Comma));
token_expect(p, TK_CloseBrace);
}
return result;
}
function Note *
parse_variable(Parser *p, Token *name){
Note *result = 0;
Token *type_token = token_expect(p, TK_Identifier);
if(type_token){
Note *type = symbol_require_type(p, type_token);
Token *star;
while((star = token_match(p, TK_Mul))){
type = ast_type_pointer(p, star, type);
}
while((star = token_match(p, TK_OpenBracket))){
Expr *expr = parse_expr(p);
type = ast_type_array(p, star, type, expr);
token_expect(p, TK_CloseBracket);
}
Expr *expr = 0;
if(token_match(p, TK_Assign)){
expr = parse_expr(p);
}
result = ast_variable(p, name, name->intern_val, type, expr);
}
return result;
}
function Note *
parse_struct(Parser *p, Token *name, AST_Kind kind, B32 is_global){
Note *result = 0;
if(token_expect(p, TK_OpenBrace)){
result = ast_struct(p, name, name?name->intern_val:(Intern_String){}, kind);
if(is_global) symbol_register(p, result);
for(;;){
Note notes = parse_notes(p);
Note *mem = 0;
Token *mem_name = token_match(p, TK_Identifier);
if(mem_name){
if(token_expect(p, TK_Colon)){
if(token_match_keyword(p, keyword_struct)){
mem = parse_struct(p, mem_name, AK_Struct, false);
}
else if(token_match_keyword(p, keyword_union)){
mem = parse_struct(p, mem_name, AK_Union, false);
}
//else if(token_match_keyword(p, keyword_enum)){
//mem = parse_enum(p, mem_name);
//}
else if(token_is(p, TK_Identifier)){
mem = parse_variable(p, mem_name);
token_expect(p, TK_Semicolon);
}
else parser_push_error(p, mem_name, "Unrecognized token while parsing struct");
if(mem){
ast_node_pass_note_list(mem, &notes);
ast_node_push_child(result, mem);
}
}
else{
break;
}
}
else if(token_match_keyword(p, keyword_union) &&
token_match(p, TK_Colon)){
mem = parse_struct(p, 0, AK_Union, false);
ast_node_pass_note_list(mem, &notes);
ast_node_push_child(result, mem);
}
else if(token_match_keyword(p, keyword_struct) &&
token_match(p, TK_Colon)){
mem = parse_struct(p, 0, AK_Struct, false);
ast_node_pass_note_list(mem, &notes);
ast_node_push_child(result, mem);
}
else if(token_expect(p, TK_CloseBrace)){
break;
}
else{
break;
}
}
}
return result;
}
function Note *
parse_typedef(Parser *p, Token *name){
Token *type_token = token_expect(p, TK_Identifier);
Note *type = symbol_require_type(p, type_token);
Note *result = ast_typedef(p, name, name->intern_val, type);
token_expect(p, TK_Semicolon);
return result;
}
function Note_List *
parse(Parser *p){
Note_List *result = ast_node_new(p, AK_List, token_get(p), intern_empty);
for(;;){
Note *node = 0;
Note notes = parse_notes(p);
if(token_is(p, TK_End)){
break;
}
else if(token_is(p, TK_Error)){
break;
}
else if(token_is(p, TK_Identifier) && // Peeking to be more error averse
token_peek_is(p, 1, TK_Colon)){
if(token_peek_is_keyword(p, 2, keyword_struct)){
node = parse_struct(p, parse__get_name(p), AK_Struct, true);
symbol_register(p, node);
}
else if(token_peek_is_keyword(p, 2, keyword_union)){
node = parse_struct(p, parse__get_name(p), AK_Union, true);
symbol_register(p, node);
}
else if(token_peek_is_keyword(p, 2, keyword_enum)){
node = parse_enum(p, parse__get_name(p));
symbol_register(p, node);
}
else if(token_peek_is_keyword(p, 2, keyword_typedef)){
node = parse_typedef(p, parse__get_name(p));
symbol_register(p, node);
}
else{
token_next(p);
}
}
else{
token_next(p);
function void gen_stmt(Stmt *stmt);
function void gen_end();
function void gen_begin(Arena *arena, Parser *p);
function void expr_print(Parser *p, Expr *expr);
function void
parser_test(){
Parser p = {0};
{
parser_init(&p);
Intern_String a = intern_string(&p, lit("Thing"));
Intern_String b = intern_string(&p, lit("Thing"));
assert(a.s.str == b.s.str);
}
String exprs[] = {
lit("(534>43?435:42,234,cast(S64)32/*todo cast*/,Thing[10][2],Thing(1,2))"),
lit("(4+2*53)"),
lit("((4+2)*53)"),
lit("++5"),
lit("5--"), // @Todo(Krzosa):
lit("-5"),
lit("(+5)"),
lit("sizeof(32) + sizeof(:S32*)"),
lit("cast(S64**)5"),
lit("cast(S64)5+3"),
lit("534>43?435:42"),
};
for(S64 i = 0; i < buff_cap(exprs); i++){
parser_lex_stream(&p, exprs[i], lit("File"));
Expr *expr = parse_expr(&p);
assert(expr);
expr_print(&p, expr);
lex_print("\n");
}
if(node){
ast_node_pass_note_list(node, &notes);
ast_node_push_child(result, node);
String stmts[] = {
lit("Thing :: struct { test: int; } "),
lit("thing: S32 = 100; "),
lit("thing = thing + 10; "),
lit("thing++; "),
lit("{ thing_scoped: S32 = 10; thing_scoped += 10; } "),
};
for(S64 i = 0; i < buff_cap(stmts); i++){
parser_lex_stream(&p, stmts[i], lit("File"));
Stmt *stmt = parse_stmt(&p);
assert(stmt);
gen_begin(&p.scratch, &p);
gen_stmt(stmt);
gen_end();
lex_print("\n");
}
else if(notes.first_note != 0){
parser_push_error(p, token_get(p), "Warning: notes got lost");
}
}
return result;
}
*/
}