WIP top-level parsing and keyword tokens
This commit is contained in:
83
ast.c
Normal file
83
ast.c
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
typedef enum {
|
||||||
|
TYPE_NONE,
|
||||||
|
TYPE_VOID,
|
||||||
|
TYPE_BOOL,
|
||||||
|
|
||||||
|
TYPE_CHAR,
|
||||||
|
TYPE_SCHAR,
|
||||||
|
TYPE_UCHAR,
|
||||||
|
TYPE_SHORT,
|
||||||
|
TYPE_USHORT,
|
||||||
|
TYPE_INT,
|
||||||
|
TYPE_UINT,
|
||||||
|
TYPE_LONG,
|
||||||
|
TYPE_ULONG,
|
||||||
|
TYPE_LLONG,
|
||||||
|
TYPE_ULLONG,
|
||||||
|
|
||||||
|
TYPE_FLOAT,
|
||||||
|
TYPE_DOUBLE,
|
||||||
|
TYPE_LDOUBLE,
|
||||||
|
|
||||||
|
TYPE_POINTER,
|
||||||
|
TYPE_ARRAY,
|
||||||
|
TYPE_FUNCTION,
|
||||||
|
|
||||||
|
TYPE_STRUCT,
|
||||||
|
TYPE_UNION,
|
||||||
|
TYPE_ENUM,
|
||||||
|
} Type_Kind;
|
||||||
|
|
||||||
|
typedef struct Type Type;
|
||||||
|
struct Type {
|
||||||
|
Type_Kind kind;
|
||||||
|
int size;
|
||||||
|
int align;
|
||||||
|
Type *base;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef enum Ast_Kind {
|
||||||
|
AST_NONE,
|
||||||
|
AST_ERROR,
|
||||||
|
AST_INT,
|
||||||
|
AST_UNARY,
|
||||||
|
AST_BINARY,
|
||||||
|
|
||||||
|
AST_FUNCTION,
|
||||||
|
AST_BLOCK,
|
||||||
|
|
||||||
|
} Ast_Kind;
|
||||||
|
|
||||||
|
typedef struct Ast Ast;
|
||||||
|
struct Ast {
|
||||||
|
Ast_Kind kind;
|
||||||
|
Token *pos;
|
||||||
|
|
||||||
|
union {
|
||||||
|
uint64_t u;
|
||||||
|
struct {
|
||||||
|
Token_Kind op;
|
||||||
|
Ast *l;
|
||||||
|
Ast *r;
|
||||||
|
};
|
||||||
|
char *error;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
Ast *create_ast(Token *token, Ast_Kind kind) {
|
||||||
|
Ast *result = calloc(1, sizeof(Ast));
|
||||||
|
result->pos = token;
|
||||||
|
result->kind = kind;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ast *create_binary_expr(Token *token, Token_Kind op, Ast *left, Ast *right) {
|
||||||
|
Ast *result = create_ast(token, AST_BINARY);
|
||||||
|
result->op = op;
|
||||||
|
result->l = left;
|
||||||
|
result->r = right;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
Type base_type_int = {TYPE_INT, .size = sizeof(int), .align = __alignof(int)};
|
||||||
|
Type *type_int = &base_type_int;
|
||||||
4
build.sh
4
build.sh
@@ -4,7 +4,7 @@ if [[ ! -e build ]]; then
|
|||||||
mkdir build
|
mkdir build
|
||||||
fi
|
fi
|
||||||
cd build
|
cd build
|
||||||
clang -o meta $(realpath ../meta.c) -g -Wall -Wextra -Wshadow -fdiagnostics-absolute-paths
|
clang -o meta $(realpath ../meta.c) -g -Wall -Wextra -Wshadow -fdiagnostics-absolute-paths -Wno-missing-field-initializers
|
||||||
./meta > ../meta_gen.c
|
./meta > ../meta_gen.c
|
||||||
clang -o main $(realpath ../main.c) -g -Wall -Wextra -Wshadow -fdiagnostics-absolute-paths
|
clang -o main $(realpath ../main.c) -g -Wall -Wextra -Wshadow -fdiagnostics-absolute-paths -Wno-missing-field-initializers
|
||||||
./main
|
./main
|
||||||
|
|||||||
89
lex.c
89
lex.c
@@ -1,9 +1,3 @@
|
|||||||
/*
|
|
||||||
|
|
||||||
- [ ] New line splicing, first source preprocessing stage. In order to properly handle '\\' backslash new line, we most likely need to preprocess the source in a initial pass. So at some point we need to introduce a stage that will create a buffer without wrong characters with a line / column mapping data structure.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef struct Token {
|
typedef struct Token {
|
||||||
Token_Kind kind;
|
Token_Kind kind;
|
||||||
int len;
|
int len;
|
||||||
@@ -46,7 +40,6 @@ uint64_t hash_bytes(char *data, size_t len) {
|
|||||||
char *global_intern_table[4096];
|
char *global_intern_table[4096];
|
||||||
char intern_arena[4096*6];
|
char intern_arena[4096*6];
|
||||||
int intern_arena_len;
|
int intern_arena_len;
|
||||||
|
|
||||||
char *lex_alloc_string(int len) {
|
char *lex_alloc_string(int len) {
|
||||||
char *result = intern_arena + intern_arena_len;
|
char *result = intern_arena + intern_arena_len;
|
||||||
intern_arena_len += len + 1;
|
intern_arena_len += len + 1;
|
||||||
@@ -73,47 +66,6 @@ char *make_intern(char *string, int len) {
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *lex_first_keyword = NULL;
|
|
||||||
char *lex_last_keyword = NULL;
|
|
||||||
#define lex_add_keyword(x) make_intern(x, ilen(x) - 1)
|
|
||||||
|
|
||||||
void lex_init_keywords(void) {
|
|
||||||
lex_first_keyword = lex_add_keyword("auto");
|
|
||||||
lex_add_keyword("break");
|
|
||||||
lex_add_keyword("case");
|
|
||||||
lex_add_keyword("char");
|
|
||||||
lex_add_keyword("const");
|
|
||||||
lex_add_keyword("continue");
|
|
||||||
lex_add_keyword("default");
|
|
||||||
lex_add_keyword("do");
|
|
||||||
lex_add_keyword("double");
|
|
||||||
lex_add_keyword("else");
|
|
||||||
lex_add_keyword("enum");
|
|
||||||
lex_add_keyword("extern");
|
|
||||||
lex_add_keyword("float");
|
|
||||||
lex_add_keyword("for");
|
|
||||||
lex_add_keyword("goto");
|
|
||||||
lex_add_keyword("if");
|
|
||||||
lex_add_keyword("inline");
|
|
||||||
lex_add_keyword("int");
|
|
||||||
lex_add_keyword("long");
|
|
||||||
lex_add_keyword("register");
|
|
||||||
lex_add_keyword("restrict");
|
|
||||||
lex_add_keyword("return");
|
|
||||||
lex_add_keyword("short");
|
|
||||||
lex_add_keyword("signed");
|
|
||||||
lex_add_keyword("sizeof");
|
|
||||||
lex_add_keyword("static");
|
|
||||||
lex_add_keyword("struct");
|
|
||||||
lex_add_keyword("switch");
|
|
||||||
lex_add_keyword("typedef");
|
|
||||||
lex_add_keyword("union");
|
|
||||||
lex_add_keyword("unsigned");
|
|
||||||
lex_add_keyword("void");
|
|
||||||
lex_add_keyword("volatile");
|
|
||||||
lex_last_keyword = lex_add_keyword("while");
|
|
||||||
}
|
|
||||||
|
|
||||||
bool lex_is_keyword(char *string) {
|
bool lex_is_keyword(char *string) {
|
||||||
bool result = string >= lex_first_keyword && string <= lex_last_keyword;
|
bool result = string >= lex_first_keyword && string <= lex_last_keyword;
|
||||||
return result;
|
return result;
|
||||||
@@ -275,7 +227,40 @@ Token lex_token(Lexer *lex) {
|
|||||||
if (t.kind == TOK_IDENT) {
|
if (t.kind == TOK_IDENT) {
|
||||||
t.intern = make_intern(t.str, t.len);
|
t.intern = make_intern(t.str, t.len);
|
||||||
if (lex_is_keyword(t.intern)) {
|
if (lex_is_keyword(t.intern)) {
|
||||||
t.kind = TOK_KEYWORD;
|
if (t.intern == keyword_while) t.kind = TOK_while;
|
||||||
|
if (t.intern == keyword_break) t.kind = TOK_break;
|
||||||
|
if (t.intern == keyword_case) t.kind = TOK_case;
|
||||||
|
if (t.intern == keyword_char) t.kind = TOK_char;
|
||||||
|
if (t.intern == keyword_const) t.kind = TOK_const;
|
||||||
|
if (t.intern == keyword_continue) t.kind = TOK_continue;
|
||||||
|
if (t.intern == keyword_default) t.kind = TOK_default;
|
||||||
|
if (t.intern == keyword_do) t.kind = TOK_do;
|
||||||
|
if (t.intern == keyword_double) t.kind = TOK_double;
|
||||||
|
if (t.intern == keyword_else) t.kind = TOK_else;
|
||||||
|
if (t.intern == keyword_enum) t.kind = TOK_enum;
|
||||||
|
if (t.intern == keyword_extern) t.kind = TOK_extern;
|
||||||
|
if (t.intern == keyword_float) t.kind = TOK_float;
|
||||||
|
if (t.intern == keyword_for) t.kind = TOK_for;
|
||||||
|
if (t.intern == keyword_goto) t.kind = TOK_goto;
|
||||||
|
if (t.intern == keyword_if) t.kind = TOK_if;
|
||||||
|
if (t.intern == keyword_inline) t.kind = TOK_inline;
|
||||||
|
if (t.intern == keyword_int) t.kind = TOK_int;
|
||||||
|
if (t.intern == keyword_long) t.kind = TOK_long;
|
||||||
|
if (t.intern == keyword_register) t.kind = TOK_register;
|
||||||
|
if (t.intern == keyword_restrict) t.kind = TOK_restrict;
|
||||||
|
if (t.intern == keyword_return) t.kind = TOK_return;
|
||||||
|
if (t.intern == keyword_short) t.kind = TOK_short;
|
||||||
|
if (t.intern == keyword_signed) t.kind = TOK_signed;
|
||||||
|
if (t.intern == keyword_sizeof) t.kind = TOK_sizeof;
|
||||||
|
if (t.intern == keyword_static) t.kind = TOK_static;
|
||||||
|
if (t.intern == keyword_struct) t.kind = TOK_struct;
|
||||||
|
if (t.intern == keyword_switch) t.kind = TOK_switch;
|
||||||
|
if (t.intern == keyword_typedef) t.kind = TOK_typedef;
|
||||||
|
if (t.intern == keyword_union) t.kind = TOK_union;
|
||||||
|
if (t.intern == keyword_unsigned) t.kind = TOK_unsigned;
|
||||||
|
if (t.intern == keyword_void) t.kind = TOK_void;
|
||||||
|
if (t.intern == keyword_volatile) t.kind = TOK_volatile;
|
||||||
|
if (t.intern == keyword_auto) t.kind = TOK_auto;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -365,11 +350,11 @@ void lex_test(void) {
|
|||||||
assert_token(baz123, TOK_IDENT, "baz123", 0, 9);
|
assert_token(baz123, TOK_IDENT, "baz123", 0, 9);
|
||||||
assert(strcmp(baz123.intern, "baz123") == 0);
|
assert(strcmp(baz123.intern, "baz123") == 0);
|
||||||
Token kw_if = lex_token(&ident_lex);
|
Token kw_if = lex_token(&ident_lex);
|
||||||
assert_token(kw_if, TOK_KEYWORD, "if", 0, 16);
|
assert_token(kw_if, TOK_if, "if", 0, 16);
|
||||||
Token kw_for = lex_token(&ident_lex);
|
Token kw_for = lex_token(&ident_lex);
|
||||||
assert_token(kw_for, TOK_KEYWORD, "for", 0, 19);
|
assert_token(kw_for, TOK_for, "for", 0, 19);
|
||||||
Token kw_while = lex_token(&ident_lex);
|
Token kw_while = lex_token(&ident_lex);
|
||||||
assert_token(kw_while, TOK_KEYWORD, "while", 0, 23);
|
assert_token(kw_while, TOK_while, "while", 0, 23);
|
||||||
Token ident_if_ = lex_token(&ident_lex);
|
Token ident_if_ = lex_token(&ident_lex);
|
||||||
assert_token(ident_if_, TOK_IDENT, "if_", 0, 29);
|
assert_token(ident_if_, TOK_IDENT, "if_", 0, 29);
|
||||||
Token ident_x9 = lex_token(&ident_lex);
|
Token ident_x9 = lex_token(&ident_lex);
|
||||||
|
|||||||
11
main.c
11
main.c
@@ -1,3 +1,13 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
- [ ] Compile simple int main program
|
||||||
|
- [x] Lex identifiers, keywords
|
||||||
|
- [ ] Add parsing of this
|
||||||
|
- [ ] Emit it
|
||||||
|
- [ ] Print error tokens location properly in lexer / parser and make it easy (not from source code)
|
||||||
|
- [ ] New line splicing, first source preprocessing stage. In order to properly handle '\\' backslash new line, we most likely need to preprocess the source in a initial pass. So at some point we need to introduce a stage that will create a buffer without wrong characters with a line / column mapping data structure.
|
||||||
|
|
||||||
|
*/
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
@@ -9,6 +19,7 @@
|
|||||||
#include "base.c"
|
#include "base.c"
|
||||||
#include "meta_gen.c"
|
#include "meta_gen.c"
|
||||||
#include "lex.c"
|
#include "lex.c"
|
||||||
|
#include "ast.c"
|
||||||
#include "parser.c"
|
#include "parser.c"
|
||||||
#include "emit_asm_x64.c"
|
#include "emit_asm_x64.c"
|
||||||
|
|
||||||
|
|||||||
183
meta.c
183
meta.c
@@ -2,6 +2,7 @@
|
|||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <stdbool.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include "base.c"
|
#include "base.c"
|
||||||
|
|
||||||
@@ -9,70 +10,100 @@ int main() {
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
char *name;
|
char *name;
|
||||||
char *serialized_operator;
|
char *serialized_operator;
|
||||||
|
bool keyword;
|
||||||
} Task;
|
} Task;
|
||||||
Task kinds[] = {
|
Task kinds[] = {
|
||||||
{"EOF", 0},
|
{"EOF"},
|
||||||
{"ERROR", 0},
|
{"ERROR"},
|
||||||
|
{"IDENT"},
|
||||||
|
{"INT"},
|
||||||
|
{"FLOAT"},
|
||||||
|
{"CHAR"},
|
||||||
|
{"STRING"},
|
||||||
|
|
||||||
{"IDENT", 0},
|
{"LPAREN", .serialized_operator = "("},
|
||||||
{"KEYWORD", 0},
|
{"RPAREN", .serialized_operator = ")"},
|
||||||
{"INT", 0},
|
{"LBRACKET", .serialized_operator = "["},
|
||||||
{"FLOAT", 0},
|
{"RBRACKET", .serialized_operator = "]"},
|
||||||
{"CHAR", 0},
|
{"LBRACE", .serialized_operator = "{"},
|
||||||
{"STRING", 0},
|
{"RBRACE", .serialized_operator = "}"},
|
||||||
|
{"COMMA", .serialized_operator = ","},
|
||||||
|
{"DOT", .serialized_operator = "."},
|
||||||
|
{"ARROW", .serialized_operator = "->"},
|
||||||
|
{"ELLIPSIS", .serialized_operator = "..."},
|
||||||
|
{"COLON", .serialized_operator = ":"},
|
||||||
|
{"SEMICOLON", .serialized_operator = ";"},
|
||||||
|
{"QUESTION", .serialized_operator = "?"},
|
||||||
|
{"HASH", .serialized_operator = "#"},
|
||||||
|
{"HASHHASH", .serialized_operator = "##"},
|
||||||
|
{"PLUS", .serialized_operator = "+"},
|
||||||
|
{"MINUS", .serialized_operator = "-"},
|
||||||
|
{"STAR", .serialized_operator = "*"},
|
||||||
|
{"SLASH", .serialized_operator = "/"},
|
||||||
|
{"PERCENT", .serialized_operator = "%"},
|
||||||
|
{"INC", .serialized_operator = "++"},
|
||||||
|
{"DEC", .serialized_operator = "--"},
|
||||||
|
{"ASSIGN", .serialized_operator = "="},
|
||||||
|
{"PLUS_ASSIGN", .serialized_operator = "+="},
|
||||||
|
{"MINUS_ASSIGN", .serialized_operator = "-="},
|
||||||
|
{"MUL_ASSIGN", .serialized_operator = "*="},
|
||||||
|
{"DIV_ASSIGN", .serialized_operator = "/="},
|
||||||
|
{"MOD_ASSIGN", .serialized_operator = "%="},
|
||||||
|
{"LSHIFT_ASSIGN", .serialized_operator = "<<="},
|
||||||
|
{"RSHIFT_ASSIGN", .serialized_operator = ">>="},
|
||||||
|
{"AND_ASSIGN", .serialized_operator = "&="},
|
||||||
|
{"XOR_ASSIGN", .serialized_operator = "^="},
|
||||||
|
{"OR_ASSIGN", .serialized_operator = "|="},
|
||||||
|
{"EQ", .serialized_operator = "=="},
|
||||||
|
{"NEQ", .serialized_operator = "!="},
|
||||||
|
{"LT", .serialized_operator = "<"},
|
||||||
|
{"LEQ", .serialized_operator = "<="},
|
||||||
|
{"GT", .serialized_operator = ">"},
|
||||||
|
{"GEQ", .serialized_operator = ">="},
|
||||||
|
{"NOT", .serialized_operator = "!"},
|
||||||
|
{"BITNOT", .serialized_operator = "~"},
|
||||||
|
{"BITAND", .serialized_operator = "&"},
|
||||||
|
{"BITOR", .serialized_operator = "|"},
|
||||||
|
{"BITXOR", .serialized_operator = "^"},
|
||||||
|
{"AND", .serialized_operator = "&&"},
|
||||||
|
{"OR", .serialized_operator = "||"},
|
||||||
|
{"LSHIFT", .serialized_operator = "<<"},
|
||||||
|
{"RSHIFT", .serialized_operator = ">>"},
|
||||||
|
|
||||||
{"LPAREN", "("},
|
{"auto", .keyword = true},
|
||||||
{"RPAREN", ")"},
|
{"break", .keyword = true},
|
||||||
{"LBRACKET", "["},
|
{"case", .keyword = true},
|
||||||
{"RBRACKET", "]"},
|
{"char", .keyword = true},
|
||||||
{"LBRACE", "{"},
|
{"const", .keyword = true},
|
||||||
{"RBRACE", "}"},
|
{"continue", .keyword = true},
|
||||||
{"COMMA", ","},
|
{"default", .keyword = true},
|
||||||
{"DOT", "."},
|
{"do", .keyword = true},
|
||||||
{"ARROW", "->"},
|
{"double", .keyword = true},
|
||||||
{"ELLIPSIS", "..."},
|
{"else", .keyword = true},
|
||||||
{"COLON", ":"},
|
{"enum", .keyword = true},
|
||||||
{"SEMICOLON", ";"},
|
{"extern", .keyword = true},
|
||||||
{"QUESTION", "?"},
|
{"float", .keyword = true},
|
||||||
{"HASH", "#"},
|
{"for", .keyword = true},
|
||||||
{"HASHHASH", "##"},
|
{"goto", .keyword = true},
|
||||||
|
{"if", .keyword = true},
|
||||||
{"PLUS", "+"},
|
{"inline", .keyword = true},
|
||||||
{"MINUS", "-"},
|
{"int", .keyword = true},
|
||||||
{"STAR", "*"},
|
{"long", .keyword = true},
|
||||||
{"SLASH", "/"},
|
{"register", .keyword = true},
|
||||||
{"PERCENT", "%"},
|
{"restrict", .keyword = true},
|
||||||
{"INC", "++"},
|
{"return", .keyword = true},
|
||||||
{"DEC", "--"},
|
{"short", .keyword = true},
|
||||||
|
{"signed", .keyword = true},
|
||||||
{"ASSIGN", "="},
|
{"sizeof", .keyword = true},
|
||||||
{"PLUS_ASSIGN", "+="},
|
{"static", .keyword = true},
|
||||||
{"MINUS_ASSIGN", "-="},
|
{"struct", .keyword = true},
|
||||||
{"MUL_ASSIGN", "*="},
|
{"switch", .keyword = true},
|
||||||
{"DIV_ASSIGN", "/="},
|
{"typedef", .keyword = true},
|
||||||
{"MOD_ASSIGN", "%="},
|
{"union", .keyword = true},
|
||||||
{"LSHIFT_ASSIGN", "<<="},
|
{"unsigned", .keyword = true},
|
||||||
{"RSHIFT_ASSIGN", ">>="},
|
{"void", .keyword = true},
|
||||||
{"AND_ASSIGN", "&="},
|
{"volatile", .keyword = true},
|
||||||
{"XOR_ASSIGN", "^="},
|
{"while", .keyword = true},
|
||||||
{"OR_ASSIGN", "|="},
|
|
||||||
|
|
||||||
{"EQ", "=="},
|
|
||||||
{"NEQ", "!="},
|
|
||||||
{"LT", "<"},
|
|
||||||
{"LEQ", "<="},
|
|
||||||
{"GT", ">"},
|
|
||||||
{"GEQ", ">="},
|
|
||||||
|
|
||||||
{"NOT", "!"},
|
|
||||||
{"BITNOT", "~"},
|
|
||||||
{"BITAND", "&"},
|
|
||||||
{"BITOR", "|"},
|
|
||||||
{"BITXOR", "^"},
|
|
||||||
{"AND", "&&"},
|
|
||||||
{"OR", "||"},
|
|
||||||
{"LSHIFT", "<<"},
|
|
||||||
{"RSHIFT", ">>"},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
printf("// auto generated by meta.c\n");
|
printf("// auto generated by meta.c\n");
|
||||||
@@ -104,4 +135,36 @@ int main() {
|
|||||||
printf(" default: return \"<invalid-token-kind>\";\n");
|
printf(" default: return \"<invalid-token-kind>\";\n");
|
||||||
printf(" }\n");
|
printf(" }\n");
|
||||||
printf("}\n");
|
printf("}\n");
|
||||||
|
|
||||||
|
{
|
||||||
|
printf("// \n");
|
||||||
|
printf("// KEYWORDS\n");
|
||||||
|
printf("// \n");
|
||||||
|
|
||||||
|
for (int i = 0; i < ilen(kinds); i += 1) {
|
||||||
|
if (kinds[i].keyword) {
|
||||||
|
printf("char *keyword_%s;\n", kinds[i].name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Task *first = NULL;
|
||||||
|
Task *last = NULL;
|
||||||
|
printf("char *lex_first_keyword = NULL;\n");
|
||||||
|
printf("char *lex_last_keyword = NULL;\n");
|
||||||
|
printf("char *make_intern(char *string, int len);\n");
|
||||||
|
printf("#define lex_add_keyword(x) make_intern(x, ilen(x) - 1)\n");
|
||||||
|
printf("void lex_init_keywords(void) {\n");
|
||||||
|
for (int i = 0; i < ilen(kinds); i += 1) {
|
||||||
|
if (kinds[i].keyword) {
|
||||||
|
if (!first) first = kinds + i;
|
||||||
|
last = kinds + i;
|
||||||
|
printf(" keyword_%s = lex_add_keyword(\"%s\");\n", kinds[i].name, kinds[i].name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf("#define TOK_FIRST_KEYWORD TOK_%s\n", first->name);
|
||||||
|
printf("#define TOK_LAST_KEYWORD TOK_%s\n", last->name);
|
||||||
|
printf(" lex_first_keyword = keyword_%s;\n", first->name);
|
||||||
|
printf(" lex_last_keyword = keyword_%s;\n", last->name);
|
||||||
|
printf("}\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
151
meta_gen.c
151
meta_gen.c
@@ -3,7 +3,6 @@ typedef enum {
|
|||||||
TOK_EOF,
|
TOK_EOF,
|
||||||
TOK_ERROR,
|
TOK_ERROR,
|
||||||
TOK_IDENT,
|
TOK_IDENT,
|
||||||
TOK_KEYWORD,
|
|
||||||
TOK_INT,
|
TOK_INT,
|
||||||
TOK_FLOAT,
|
TOK_FLOAT,
|
||||||
TOK_CHAR,
|
TOK_CHAR,
|
||||||
@@ -56,6 +55,40 @@ typedef enum {
|
|||||||
TOK_OR,
|
TOK_OR,
|
||||||
TOK_LSHIFT,
|
TOK_LSHIFT,
|
||||||
TOK_RSHIFT,
|
TOK_RSHIFT,
|
||||||
|
TOK_auto,
|
||||||
|
TOK_break,
|
||||||
|
TOK_case,
|
||||||
|
TOK_char,
|
||||||
|
TOK_const,
|
||||||
|
TOK_continue,
|
||||||
|
TOK_default,
|
||||||
|
TOK_do,
|
||||||
|
TOK_double,
|
||||||
|
TOK_else,
|
||||||
|
TOK_enum,
|
||||||
|
TOK_extern,
|
||||||
|
TOK_float,
|
||||||
|
TOK_for,
|
||||||
|
TOK_goto,
|
||||||
|
TOK_if,
|
||||||
|
TOK_inline,
|
||||||
|
TOK_int,
|
||||||
|
TOK_long,
|
||||||
|
TOK_register,
|
||||||
|
TOK_restrict,
|
||||||
|
TOK_return,
|
||||||
|
TOK_short,
|
||||||
|
TOK_signed,
|
||||||
|
TOK_sizeof,
|
||||||
|
TOK_static,
|
||||||
|
TOK_struct,
|
||||||
|
TOK_switch,
|
||||||
|
TOK_typedef,
|
||||||
|
TOK_union,
|
||||||
|
TOK_unsigned,
|
||||||
|
TOK_void,
|
||||||
|
TOK_volatile,
|
||||||
|
TOK_while,
|
||||||
} Token_Kind;
|
} Token_Kind;
|
||||||
char *token_to_op(Token_Kind kind) {
|
char *token_to_op(Token_Kind kind) {
|
||||||
switch (kind) {
|
switch (kind) {
|
||||||
@@ -115,7 +148,6 @@ char *token_to_name(Token_Kind kind) {
|
|||||||
case TOK_EOF: return "EOF";
|
case TOK_EOF: return "EOF";
|
||||||
case TOK_ERROR: return "ERROR";
|
case TOK_ERROR: return "ERROR";
|
||||||
case TOK_IDENT: return "IDENT";
|
case TOK_IDENT: return "IDENT";
|
||||||
case TOK_KEYWORD: return "KEYWORD";
|
|
||||||
case TOK_INT: return "INT";
|
case TOK_INT: return "INT";
|
||||||
case TOK_FLOAT: return "FLOAT";
|
case TOK_FLOAT: return "FLOAT";
|
||||||
case TOK_CHAR: return "CHAR";
|
case TOK_CHAR: return "CHAR";
|
||||||
@@ -168,6 +200,121 @@ char *token_to_name(Token_Kind kind) {
|
|||||||
case TOK_OR: return "OR";
|
case TOK_OR: return "OR";
|
||||||
case TOK_LSHIFT: return "LSHIFT";
|
case TOK_LSHIFT: return "LSHIFT";
|
||||||
case TOK_RSHIFT: return "RSHIFT";
|
case TOK_RSHIFT: return "RSHIFT";
|
||||||
|
case TOK_auto: return "auto";
|
||||||
|
case TOK_break: return "break";
|
||||||
|
case TOK_case: return "case";
|
||||||
|
case TOK_char: return "char";
|
||||||
|
case TOK_const: return "const";
|
||||||
|
case TOK_continue: return "continue";
|
||||||
|
case TOK_default: return "default";
|
||||||
|
case TOK_do: return "do";
|
||||||
|
case TOK_double: return "double";
|
||||||
|
case TOK_else: return "else";
|
||||||
|
case TOK_enum: return "enum";
|
||||||
|
case TOK_extern: return "extern";
|
||||||
|
case TOK_float: return "float";
|
||||||
|
case TOK_for: return "for";
|
||||||
|
case TOK_goto: return "goto";
|
||||||
|
case TOK_if: return "if";
|
||||||
|
case TOK_inline: return "inline";
|
||||||
|
case TOK_int: return "int";
|
||||||
|
case TOK_long: return "long";
|
||||||
|
case TOK_register: return "register";
|
||||||
|
case TOK_restrict: return "restrict";
|
||||||
|
case TOK_return: return "return";
|
||||||
|
case TOK_short: return "short";
|
||||||
|
case TOK_signed: return "signed";
|
||||||
|
case TOK_sizeof: return "sizeof";
|
||||||
|
case TOK_static: return "static";
|
||||||
|
case TOK_struct: return "struct";
|
||||||
|
case TOK_switch: return "switch";
|
||||||
|
case TOK_typedef: return "typedef";
|
||||||
|
case TOK_union: return "union";
|
||||||
|
case TOK_unsigned: return "unsigned";
|
||||||
|
case TOK_void: return "void";
|
||||||
|
case TOK_volatile: return "volatile";
|
||||||
|
case TOK_while: return "while";
|
||||||
default: return "<invalid-token-kind>";
|
default: return "<invalid-token-kind>";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
//
|
||||||
|
// KEYWORDS
|
||||||
|
//
|
||||||
|
char *keyword_auto;
|
||||||
|
char *keyword_break;
|
||||||
|
char *keyword_case;
|
||||||
|
char *keyword_char;
|
||||||
|
char *keyword_const;
|
||||||
|
char *keyword_continue;
|
||||||
|
char *keyword_default;
|
||||||
|
char *keyword_do;
|
||||||
|
char *keyword_double;
|
||||||
|
char *keyword_else;
|
||||||
|
char *keyword_enum;
|
||||||
|
char *keyword_extern;
|
||||||
|
char *keyword_float;
|
||||||
|
char *keyword_for;
|
||||||
|
char *keyword_goto;
|
||||||
|
char *keyword_if;
|
||||||
|
char *keyword_inline;
|
||||||
|
char *keyword_int;
|
||||||
|
char *keyword_long;
|
||||||
|
char *keyword_register;
|
||||||
|
char *keyword_restrict;
|
||||||
|
char *keyword_return;
|
||||||
|
char *keyword_short;
|
||||||
|
char *keyword_signed;
|
||||||
|
char *keyword_sizeof;
|
||||||
|
char *keyword_static;
|
||||||
|
char *keyword_struct;
|
||||||
|
char *keyword_switch;
|
||||||
|
char *keyword_typedef;
|
||||||
|
char *keyword_union;
|
||||||
|
char *keyword_unsigned;
|
||||||
|
char *keyword_void;
|
||||||
|
char *keyword_volatile;
|
||||||
|
char *keyword_while;
|
||||||
|
char *lex_first_keyword = NULL;
|
||||||
|
char *lex_last_keyword = NULL;
|
||||||
|
char *make_intern(char *string, int len);
|
||||||
|
#define lex_add_keyword(x) make_intern(x, ilen(x) - 1)
|
||||||
|
void lex_init_keywords(void) {
|
||||||
|
keyword_auto = lex_add_keyword("auto");
|
||||||
|
keyword_break = lex_add_keyword("break");
|
||||||
|
keyword_case = lex_add_keyword("case");
|
||||||
|
keyword_char = lex_add_keyword("char");
|
||||||
|
keyword_const = lex_add_keyword("const");
|
||||||
|
keyword_continue = lex_add_keyword("continue");
|
||||||
|
keyword_default = lex_add_keyword("default");
|
||||||
|
keyword_do = lex_add_keyword("do");
|
||||||
|
keyword_double = lex_add_keyword("double");
|
||||||
|
keyword_else = lex_add_keyword("else");
|
||||||
|
keyword_enum = lex_add_keyword("enum");
|
||||||
|
keyword_extern = lex_add_keyword("extern");
|
||||||
|
keyword_float = lex_add_keyword("float");
|
||||||
|
keyword_for = lex_add_keyword("for");
|
||||||
|
keyword_goto = lex_add_keyword("goto");
|
||||||
|
keyword_if = lex_add_keyword("if");
|
||||||
|
keyword_inline = lex_add_keyword("inline");
|
||||||
|
keyword_int = lex_add_keyword("int");
|
||||||
|
keyword_long = lex_add_keyword("long");
|
||||||
|
keyword_register = lex_add_keyword("register");
|
||||||
|
keyword_restrict = lex_add_keyword("restrict");
|
||||||
|
keyword_return = lex_add_keyword("return");
|
||||||
|
keyword_short = lex_add_keyword("short");
|
||||||
|
keyword_signed = lex_add_keyword("signed");
|
||||||
|
keyword_sizeof = lex_add_keyword("sizeof");
|
||||||
|
keyword_static = lex_add_keyword("static");
|
||||||
|
keyword_struct = lex_add_keyword("struct");
|
||||||
|
keyword_switch = lex_add_keyword("switch");
|
||||||
|
keyword_typedef = lex_add_keyword("typedef");
|
||||||
|
keyword_union = lex_add_keyword("union");
|
||||||
|
keyword_unsigned = lex_add_keyword("unsigned");
|
||||||
|
keyword_void = lex_add_keyword("void");
|
||||||
|
keyword_volatile = lex_add_keyword("volatile");
|
||||||
|
keyword_while = lex_add_keyword("while");
|
||||||
|
#define TOK_FIRST_KEYWORD TOK_auto
|
||||||
|
#define TOK_LAST_KEYWORD TOK_while
|
||||||
|
lex_first_keyword = keyword_auto;
|
||||||
|
lex_last_keyword = keyword_while;
|
||||||
|
}
|
||||||
|
|||||||
65
parser.c
65
parser.c
@@ -3,30 +3,6 @@ typedef struct Parser {
|
|||||||
Token *end;
|
Token *end;
|
||||||
} Parser;
|
} Parser;
|
||||||
|
|
||||||
typedef enum Ast_Kind {
|
|
||||||
AST_NONE,
|
|
||||||
AST_ERROR,
|
|
||||||
AST_INT,
|
|
||||||
AST_UNARY,
|
|
||||||
AST_BINARY,
|
|
||||||
} Ast_Kind;
|
|
||||||
|
|
||||||
typedef struct Ast Ast;
|
|
||||||
struct Ast {
|
|
||||||
Ast_Kind kind;
|
|
||||||
Token *pos;
|
|
||||||
|
|
||||||
union {
|
|
||||||
uint64_t u;
|
|
||||||
struct {
|
|
||||||
Token_Kind op;
|
|
||||||
Ast *l;
|
|
||||||
Ast *r;
|
|
||||||
};
|
|
||||||
char *error;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
Token *next_token(Parser *p) {
|
Token *next_token(Parser *p) {
|
||||||
if (p->at < p->end) {
|
if (p->at < p->end) {
|
||||||
return p->at++;
|
return p->at++;
|
||||||
@@ -41,6 +17,18 @@ Token *match_token(Parser *p, Token_Kind kind) {
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool is_keyword(Token *token) {
|
||||||
|
bool result = (token->kind >= TOK_FIRST_KEYWORD && token->kind <= TOK_LAST_KEYWORD);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
Token *match_keyword(Parser *p, char *keyword) {
|
||||||
|
if (is_keyword(p->at) && p->at->intern == keyword) {
|
||||||
|
return next_token(p);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
Token *expect_token(Parser *p, Token_Kind kind) {
|
Token *expect_token(Parser *p, Token_Kind kind) {
|
||||||
if (p->at->kind == kind) {
|
if (p->at->kind == kind) {
|
||||||
return next_token(p);
|
return next_token(p);
|
||||||
@@ -48,21 +36,6 @@ Token *expect_token(Parser *p, Token_Kind kind) {
|
|||||||
panicf("expected token kind: %s, got instead: %s", token_to_name(p->at->kind), token_to_name(kind));
|
panicf("expected token kind: %s, got instead: %s", token_to_name(p->at->kind), token_to_name(kind));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ast *create_ast(Token *token, Ast_Kind kind) {
|
|
||||||
Ast *result = calloc(1, sizeof(Ast));
|
|
||||||
result->pos = token;
|
|
||||||
result->kind = kind;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ast *create_binary_expr(Token *token, Token_Kind op, Ast *left, Ast *right) {
|
|
||||||
Ast *result = create_ast(token, AST_BINARY);
|
|
||||||
result->op = op;
|
|
||||||
result->l = left;
|
|
||||||
result->r = right;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ast *parse_expr(Parser *p, int power_of_binding_to_right);
|
Ast *parse_expr(Parser *p, int power_of_binding_to_right);
|
||||||
|
|
||||||
Ast *parse_atom(Parser *p) {
|
Ast *parse_atom(Parser *p) {
|
||||||
@@ -118,6 +91,20 @@ Ast *parse_expr(Parser *p, int power_of_binding_to_right) {
|
|||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ast *parse_program(Parser *p) {
|
||||||
|
for (;;) {
|
||||||
|
if (match_keyword(p, keyword_int)) {
|
||||||
|
Token *ident = expect_token(p, TOK_IDENT);
|
||||||
|
expect_token(p, TOK_LPAREN);
|
||||||
|
expect_token(p, TOK_RPAREN);
|
||||||
|
} else {
|
||||||
|
Token *token = next_token(p);
|
||||||
|
panicf("invalid token");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
int64_t eval_expr(Ast *n) {
|
int64_t eval_expr(Ast *n) {
|
||||||
switch (n->kind) {
|
switch (n->kind) {
|
||||||
case AST_INT: return (int64_t)n->u;
|
case AST_INT: return (int64_t)n->u;
|
||||||
|
|||||||
Reference in New Issue
Block a user