WIP top-level parsing and keyword tokens
This commit is contained in:
83
ast.c
Normal file
83
ast.c
Normal file
@@ -0,0 +1,83 @@
|
||||
typedef enum {
|
||||
TYPE_NONE,
|
||||
TYPE_VOID,
|
||||
TYPE_BOOL,
|
||||
|
||||
TYPE_CHAR,
|
||||
TYPE_SCHAR,
|
||||
TYPE_UCHAR,
|
||||
TYPE_SHORT,
|
||||
TYPE_USHORT,
|
||||
TYPE_INT,
|
||||
TYPE_UINT,
|
||||
TYPE_LONG,
|
||||
TYPE_ULONG,
|
||||
TYPE_LLONG,
|
||||
TYPE_ULLONG,
|
||||
|
||||
TYPE_FLOAT,
|
||||
TYPE_DOUBLE,
|
||||
TYPE_LDOUBLE,
|
||||
|
||||
TYPE_POINTER,
|
||||
TYPE_ARRAY,
|
||||
TYPE_FUNCTION,
|
||||
|
||||
TYPE_STRUCT,
|
||||
TYPE_UNION,
|
||||
TYPE_ENUM,
|
||||
} Type_Kind;
|
||||
|
||||
typedef struct Type Type;
|
||||
struct Type {
|
||||
Type_Kind kind;
|
||||
int size;
|
||||
int align;
|
||||
Type *base;
|
||||
};
|
||||
|
||||
typedef enum Ast_Kind {
|
||||
AST_NONE,
|
||||
AST_ERROR,
|
||||
AST_INT,
|
||||
AST_UNARY,
|
||||
AST_BINARY,
|
||||
|
||||
AST_FUNCTION,
|
||||
AST_BLOCK,
|
||||
|
||||
} Ast_Kind;
|
||||
|
||||
typedef struct Ast Ast;
|
||||
struct Ast {
|
||||
Ast_Kind kind;
|
||||
Token *pos;
|
||||
|
||||
union {
|
||||
uint64_t u;
|
||||
struct {
|
||||
Token_Kind op;
|
||||
Ast *l;
|
||||
Ast *r;
|
||||
};
|
||||
char *error;
|
||||
};
|
||||
};
|
||||
|
||||
Ast *create_ast(Token *token, Ast_Kind kind) {
|
||||
Ast *result = calloc(1, sizeof(Ast));
|
||||
result->pos = token;
|
||||
result->kind = kind;
|
||||
return result;
|
||||
}
|
||||
|
||||
Ast *create_binary_expr(Token *token, Token_Kind op, Ast *left, Ast *right) {
|
||||
Ast *result = create_ast(token, AST_BINARY);
|
||||
result->op = op;
|
||||
result->l = left;
|
||||
result->r = right;
|
||||
return result;
|
||||
}
|
||||
|
||||
Type base_type_int = {TYPE_INT, .size = sizeof(int), .align = __alignof(int)};
|
||||
Type *type_int = &base_type_int;
|
||||
4
build.sh
4
build.sh
@@ -4,7 +4,7 @@ if [[ ! -e build ]]; then
|
||||
mkdir build
|
||||
fi
|
||||
cd build
|
||||
clang -o meta $(realpath ../meta.c) -g -Wall -Wextra -Wshadow -fdiagnostics-absolute-paths
|
||||
clang -o meta $(realpath ../meta.c) -g -Wall -Wextra -Wshadow -fdiagnostics-absolute-paths -Wno-missing-field-initializers
|
||||
./meta > ../meta_gen.c
|
||||
clang -o main $(realpath ../main.c) -g -Wall -Wextra -Wshadow -fdiagnostics-absolute-paths
|
||||
clang -o main $(realpath ../main.c) -g -Wall -Wextra -Wshadow -fdiagnostics-absolute-paths -Wno-missing-field-initializers
|
||||
./main
|
||||
|
||||
89
lex.c
89
lex.c
@@ -1,9 +1,3 @@
|
||||
/*
|
||||
|
||||
- [ ] New line splicing, first source preprocessing stage. In order to properly handle '\\' backslash new line, we most likely need to preprocess the source in a initial pass. So at some point we need to introduce a stage that will create a buffer without wrong characters with a line / column mapping data structure.
|
||||
|
||||
*/
|
||||
|
||||
typedef struct Token {
|
||||
Token_Kind kind;
|
||||
int len;
|
||||
@@ -46,7 +40,6 @@ uint64_t hash_bytes(char *data, size_t len) {
|
||||
char *global_intern_table[4096];
|
||||
char intern_arena[4096*6];
|
||||
int intern_arena_len;
|
||||
|
||||
char *lex_alloc_string(int len) {
|
||||
char *result = intern_arena + intern_arena_len;
|
||||
intern_arena_len += len + 1;
|
||||
@@ -73,47 +66,6 @@ char *make_intern(char *string, int len) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *lex_first_keyword = NULL;
|
||||
char *lex_last_keyword = NULL;
|
||||
#define lex_add_keyword(x) make_intern(x, ilen(x) - 1)
|
||||
|
||||
void lex_init_keywords(void) {
|
||||
lex_first_keyword = lex_add_keyword("auto");
|
||||
lex_add_keyword("break");
|
||||
lex_add_keyword("case");
|
||||
lex_add_keyword("char");
|
||||
lex_add_keyword("const");
|
||||
lex_add_keyword("continue");
|
||||
lex_add_keyword("default");
|
||||
lex_add_keyword("do");
|
||||
lex_add_keyword("double");
|
||||
lex_add_keyword("else");
|
||||
lex_add_keyword("enum");
|
||||
lex_add_keyword("extern");
|
||||
lex_add_keyword("float");
|
||||
lex_add_keyword("for");
|
||||
lex_add_keyword("goto");
|
||||
lex_add_keyword("if");
|
||||
lex_add_keyword("inline");
|
||||
lex_add_keyword("int");
|
||||
lex_add_keyword("long");
|
||||
lex_add_keyword("register");
|
||||
lex_add_keyword("restrict");
|
||||
lex_add_keyword("return");
|
||||
lex_add_keyword("short");
|
||||
lex_add_keyword("signed");
|
||||
lex_add_keyword("sizeof");
|
||||
lex_add_keyword("static");
|
||||
lex_add_keyword("struct");
|
||||
lex_add_keyword("switch");
|
||||
lex_add_keyword("typedef");
|
||||
lex_add_keyword("union");
|
||||
lex_add_keyword("unsigned");
|
||||
lex_add_keyword("void");
|
||||
lex_add_keyword("volatile");
|
||||
lex_last_keyword = lex_add_keyword("while");
|
||||
}
|
||||
|
||||
bool lex_is_keyword(char *string) {
|
||||
bool result = string >= lex_first_keyword && string <= lex_last_keyword;
|
||||
return result;
|
||||
@@ -275,7 +227,40 @@ Token lex_token(Lexer *lex) {
|
||||
if (t.kind == TOK_IDENT) {
|
||||
t.intern = make_intern(t.str, t.len);
|
||||
if (lex_is_keyword(t.intern)) {
|
||||
t.kind = TOK_KEYWORD;
|
||||
if (t.intern == keyword_while) t.kind = TOK_while;
|
||||
if (t.intern == keyword_break) t.kind = TOK_break;
|
||||
if (t.intern == keyword_case) t.kind = TOK_case;
|
||||
if (t.intern == keyword_char) t.kind = TOK_char;
|
||||
if (t.intern == keyword_const) t.kind = TOK_const;
|
||||
if (t.intern == keyword_continue) t.kind = TOK_continue;
|
||||
if (t.intern == keyword_default) t.kind = TOK_default;
|
||||
if (t.intern == keyword_do) t.kind = TOK_do;
|
||||
if (t.intern == keyword_double) t.kind = TOK_double;
|
||||
if (t.intern == keyword_else) t.kind = TOK_else;
|
||||
if (t.intern == keyword_enum) t.kind = TOK_enum;
|
||||
if (t.intern == keyword_extern) t.kind = TOK_extern;
|
||||
if (t.intern == keyword_float) t.kind = TOK_float;
|
||||
if (t.intern == keyword_for) t.kind = TOK_for;
|
||||
if (t.intern == keyword_goto) t.kind = TOK_goto;
|
||||
if (t.intern == keyword_if) t.kind = TOK_if;
|
||||
if (t.intern == keyword_inline) t.kind = TOK_inline;
|
||||
if (t.intern == keyword_int) t.kind = TOK_int;
|
||||
if (t.intern == keyword_long) t.kind = TOK_long;
|
||||
if (t.intern == keyword_register) t.kind = TOK_register;
|
||||
if (t.intern == keyword_restrict) t.kind = TOK_restrict;
|
||||
if (t.intern == keyword_return) t.kind = TOK_return;
|
||||
if (t.intern == keyword_short) t.kind = TOK_short;
|
||||
if (t.intern == keyword_signed) t.kind = TOK_signed;
|
||||
if (t.intern == keyword_sizeof) t.kind = TOK_sizeof;
|
||||
if (t.intern == keyword_static) t.kind = TOK_static;
|
||||
if (t.intern == keyword_struct) t.kind = TOK_struct;
|
||||
if (t.intern == keyword_switch) t.kind = TOK_switch;
|
||||
if (t.intern == keyword_typedef) t.kind = TOK_typedef;
|
||||
if (t.intern == keyword_union) t.kind = TOK_union;
|
||||
if (t.intern == keyword_unsigned) t.kind = TOK_unsigned;
|
||||
if (t.intern == keyword_void) t.kind = TOK_void;
|
||||
if (t.intern == keyword_volatile) t.kind = TOK_volatile;
|
||||
if (t.intern == keyword_auto) t.kind = TOK_auto;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -365,11 +350,11 @@ void lex_test(void) {
|
||||
assert_token(baz123, TOK_IDENT, "baz123", 0, 9);
|
||||
assert(strcmp(baz123.intern, "baz123") == 0);
|
||||
Token kw_if = lex_token(&ident_lex);
|
||||
assert_token(kw_if, TOK_KEYWORD, "if", 0, 16);
|
||||
assert_token(kw_if, TOK_if, "if", 0, 16);
|
||||
Token kw_for = lex_token(&ident_lex);
|
||||
assert_token(kw_for, TOK_KEYWORD, "for", 0, 19);
|
||||
assert_token(kw_for, TOK_for, "for", 0, 19);
|
||||
Token kw_while = lex_token(&ident_lex);
|
||||
assert_token(kw_while, TOK_KEYWORD, "while", 0, 23);
|
||||
assert_token(kw_while, TOK_while, "while", 0, 23);
|
||||
Token ident_if_ = lex_token(&ident_lex);
|
||||
assert_token(ident_if_, TOK_IDENT, "if_", 0, 29);
|
||||
Token ident_x9 = lex_token(&ident_lex);
|
||||
|
||||
11
main.c
11
main.c
@@ -1,3 +1,13 @@
|
||||
/*
|
||||
|
||||
- [ ] Compile simple int main program
|
||||
- [x] Lex identifiers, keywords
|
||||
- [ ] Add parsing of this
|
||||
- [ ] Emit it
|
||||
- [ ] Print error tokens location properly in lexer / parser and make it easy (not from source code)
|
||||
- [ ] New line splicing, first source preprocessing stage. In order to properly handle '\\' backslash new line, we most likely need to preprocess the source in a initial pass. So at some point we need to introduce a stage that will create a buffer without wrong characters with a line / column mapping data structure.
|
||||
|
||||
*/
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
@@ -9,6 +19,7 @@
|
||||
#include "base.c"
|
||||
#include "meta_gen.c"
|
||||
#include "lex.c"
|
||||
#include "ast.c"
|
||||
#include "parser.c"
|
||||
#include "emit_asm_x64.c"
|
||||
|
||||
|
||||
183
meta.c
183
meta.c
@@ -2,6 +2,7 @@
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include <assert.h>
|
||||
#include "base.c"
|
||||
|
||||
@@ -9,70 +10,100 @@ int main() {
|
||||
typedef struct {
|
||||
char *name;
|
||||
char *serialized_operator;
|
||||
bool keyword;
|
||||
} Task;
|
||||
Task kinds[] = {
|
||||
{"EOF", 0},
|
||||
{"ERROR", 0},
|
||||
{"EOF"},
|
||||
{"ERROR"},
|
||||
{"IDENT"},
|
||||
{"INT"},
|
||||
{"FLOAT"},
|
||||
{"CHAR"},
|
||||
{"STRING"},
|
||||
|
||||
{"IDENT", 0},
|
||||
{"KEYWORD", 0},
|
||||
{"INT", 0},
|
||||
{"FLOAT", 0},
|
||||
{"CHAR", 0},
|
||||
{"STRING", 0},
|
||||
{"LPAREN", .serialized_operator = "("},
|
||||
{"RPAREN", .serialized_operator = ")"},
|
||||
{"LBRACKET", .serialized_operator = "["},
|
||||
{"RBRACKET", .serialized_operator = "]"},
|
||||
{"LBRACE", .serialized_operator = "{"},
|
||||
{"RBRACE", .serialized_operator = "}"},
|
||||
{"COMMA", .serialized_operator = ","},
|
||||
{"DOT", .serialized_operator = "."},
|
||||
{"ARROW", .serialized_operator = "->"},
|
||||
{"ELLIPSIS", .serialized_operator = "..."},
|
||||
{"COLON", .serialized_operator = ":"},
|
||||
{"SEMICOLON", .serialized_operator = ";"},
|
||||
{"QUESTION", .serialized_operator = "?"},
|
||||
{"HASH", .serialized_operator = "#"},
|
||||
{"HASHHASH", .serialized_operator = "##"},
|
||||
{"PLUS", .serialized_operator = "+"},
|
||||
{"MINUS", .serialized_operator = "-"},
|
||||
{"STAR", .serialized_operator = "*"},
|
||||
{"SLASH", .serialized_operator = "/"},
|
||||
{"PERCENT", .serialized_operator = "%"},
|
||||
{"INC", .serialized_operator = "++"},
|
||||
{"DEC", .serialized_operator = "--"},
|
||||
{"ASSIGN", .serialized_operator = "="},
|
||||
{"PLUS_ASSIGN", .serialized_operator = "+="},
|
||||
{"MINUS_ASSIGN", .serialized_operator = "-="},
|
||||
{"MUL_ASSIGN", .serialized_operator = "*="},
|
||||
{"DIV_ASSIGN", .serialized_operator = "/="},
|
||||
{"MOD_ASSIGN", .serialized_operator = "%="},
|
||||
{"LSHIFT_ASSIGN", .serialized_operator = "<<="},
|
||||
{"RSHIFT_ASSIGN", .serialized_operator = ">>="},
|
||||
{"AND_ASSIGN", .serialized_operator = "&="},
|
||||
{"XOR_ASSIGN", .serialized_operator = "^="},
|
||||
{"OR_ASSIGN", .serialized_operator = "|="},
|
||||
{"EQ", .serialized_operator = "=="},
|
||||
{"NEQ", .serialized_operator = "!="},
|
||||
{"LT", .serialized_operator = "<"},
|
||||
{"LEQ", .serialized_operator = "<="},
|
||||
{"GT", .serialized_operator = ">"},
|
||||
{"GEQ", .serialized_operator = ">="},
|
||||
{"NOT", .serialized_operator = "!"},
|
||||
{"BITNOT", .serialized_operator = "~"},
|
||||
{"BITAND", .serialized_operator = "&"},
|
||||
{"BITOR", .serialized_operator = "|"},
|
||||
{"BITXOR", .serialized_operator = "^"},
|
||||
{"AND", .serialized_operator = "&&"},
|
||||
{"OR", .serialized_operator = "||"},
|
||||
{"LSHIFT", .serialized_operator = "<<"},
|
||||
{"RSHIFT", .serialized_operator = ">>"},
|
||||
|
||||
{"LPAREN", "("},
|
||||
{"RPAREN", ")"},
|
||||
{"LBRACKET", "["},
|
||||
{"RBRACKET", "]"},
|
||||
{"LBRACE", "{"},
|
||||
{"RBRACE", "}"},
|
||||
{"COMMA", ","},
|
||||
{"DOT", "."},
|
||||
{"ARROW", "->"},
|
||||
{"ELLIPSIS", "..."},
|
||||
{"COLON", ":"},
|
||||
{"SEMICOLON", ";"},
|
||||
{"QUESTION", "?"},
|
||||
{"HASH", "#"},
|
||||
{"HASHHASH", "##"},
|
||||
|
||||
{"PLUS", "+"},
|
||||
{"MINUS", "-"},
|
||||
{"STAR", "*"},
|
||||
{"SLASH", "/"},
|
||||
{"PERCENT", "%"},
|
||||
{"INC", "++"},
|
||||
{"DEC", "--"},
|
||||
|
||||
{"ASSIGN", "="},
|
||||
{"PLUS_ASSIGN", "+="},
|
||||
{"MINUS_ASSIGN", "-="},
|
||||
{"MUL_ASSIGN", "*="},
|
||||
{"DIV_ASSIGN", "/="},
|
||||
{"MOD_ASSIGN", "%="},
|
||||
{"LSHIFT_ASSIGN", "<<="},
|
||||
{"RSHIFT_ASSIGN", ">>="},
|
||||
{"AND_ASSIGN", "&="},
|
||||
{"XOR_ASSIGN", "^="},
|
||||
{"OR_ASSIGN", "|="},
|
||||
|
||||
{"EQ", "=="},
|
||||
{"NEQ", "!="},
|
||||
{"LT", "<"},
|
||||
{"LEQ", "<="},
|
||||
{"GT", ">"},
|
||||
{"GEQ", ">="},
|
||||
|
||||
{"NOT", "!"},
|
||||
{"BITNOT", "~"},
|
||||
{"BITAND", "&"},
|
||||
{"BITOR", "|"},
|
||||
{"BITXOR", "^"},
|
||||
{"AND", "&&"},
|
||||
{"OR", "||"},
|
||||
{"LSHIFT", "<<"},
|
||||
{"RSHIFT", ">>"},
|
||||
{"auto", .keyword = true},
|
||||
{"break", .keyword = true},
|
||||
{"case", .keyword = true},
|
||||
{"char", .keyword = true},
|
||||
{"const", .keyword = true},
|
||||
{"continue", .keyword = true},
|
||||
{"default", .keyword = true},
|
||||
{"do", .keyword = true},
|
||||
{"double", .keyword = true},
|
||||
{"else", .keyword = true},
|
||||
{"enum", .keyword = true},
|
||||
{"extern", .keyword = true},
|
||||
{"float", .keyword = true},
|
||||
{"for", .keyword = true},
|
||||
{"goto", .keyword = true},
|
||||
{"if", .keyword = true},
|
||||
{"inline", .keyword = true},
|
||||
{"int", .keyword = true},
|
||||
{"long", .keyword = true},
|
||||
{"register", .keyword = true},
|
||||
{"restrict", .keyword = true},
|
||||
{"return", .keyword = true},
|
||||
{"short", .keyword = true},
|
||||
{"signed", .keyword = true},
|
||||
{"sizeof", .keyword = true},
|
||||
{"static", .keyword = true},
|
||||
{"struct", .keyword = true},
|
||||
{"switch", .keyword = true},
|
||||
{"typedef", .keyword = true},
|
||||
{"union", .keyword = true},
|
||||
{"unsigned", .keyword = true},
|
||||
{"void", .keyword = true},
|
||||
{"volatile", .keyword = true},
|
||||
{"while", .keyword = true},
|
||||
};
|
||||
|
||||
printf("// auto generated by meta.c\n");
|
||||
@@ -104,4 +135,36 @@ int main() {
|
||||
printf(" default: return \"<invalid-token-kind>\";\n");
|
||||
printf(" }\n");
|
||||
printf("}\n");
|
||||
|
||||
{
|
||||
printf("// \n");
|
||||
printf("// KEYWORDS\n");
|
||||
printf("// \n");
|
||||
|
||||
for (int i = 0; i < ilen(kinds); i += 1) {
|
||||
if (kinds[i].keyword) {
|
||||
printf("char *keyword_%s;\n", kinds[i].name);
|
||||
}
|
||||
}
|
||||
|
||||
Task *first = NULL;
|
||||
Task *last = NULL;
|
||||
printf("char *lex_first_keyword = NULL;\n");
|
||||
printf("char *lex_last_keyword = NULL;\n");
|
||||
printf("char *make_intern(char *string, int len);\n");
|
||||
printf("#define lex_add_keyword(x) make_intern(x, ilen(x) - 1)\n");
|
||||
printf("void lex_init_keywords(void) {\n");
|
||||
for (int i = 0; i < ilen(kinds); i += 1) {
|
||||
if (kinds[i].keyword) {
|
||||
if (!first) first = kinds + i;
|
||||
last = kinds + i;
|
||||
printf(" keyword_%s = lex_add_keyword(\"%s\");\n", kinds[i].name, kinds[i].name);
|
||||
}
|
||||
}
|
||||
printf("#define TOK_FIRST_KEYWORD TOK_%s\n", first->name);
|
||||
printf("#define TOK_LAST_KEYWORD TOK_%s\n", last->name);
|
||||
printf(" lex_first_keyword = keyword_%s;\n", first->name);
|
||||
printf(" lex_last_keyword = keyword_%s;\n", last->name);
|
||||
printf("}\n");
|
||||
}
|
||||
}
|
||||
|
||||
151
meta_gen.c
151
meta_gen.c
@@ -3,7 +3,6 @@ typedef enum {
|
||||
TOK_EOF,
|
||||
TOK_ERROR,
|
||||
TOK_IDENT,
|
||||
TOK_KEYWORD,
|
||||
TOK_INT,
|
||||
TOK_FLOAT,
|
||||
TOK_CHAR,
|
||||
@@ -56,6 +55,40 @@ typedef enum {
|
||||
TOK_OR,
|
||||
TOK_LSHIFT,
|
||||
TOK_RSHIFT,
|
||||
TOK_auto,
|
||||
TOK_break,
|
||||
TOK_case,
|
||||
TOK_char,
|
||||
TOK_const,
|
||||
TOK_continue,
|
||||
TOK_default,
|
||||
TOK_do,
|
||||
TOK_double,
|
||||
TOK_else,
|
||||
TOK_enum,
|
||||
TOK_extern,
|
||||
TOK_float,
|
||||
TOK_for,
|
||||
TOK_goto,
|
||||
TOK_if,
|
||||
TOK_inline,
|
||||
TOK_int,
|
||||
TOK_long,
|
||||
TOK_register,
|
||||
TOK_restrict,
|
||||
TOK_return,
|
||||
TOK_short,
|
||||
TOK_signed,
|
||||
TOK_sizeof,
|
||||
TOK_static,
|
||||
TOK_struct,
|
||||
TOK_switch,
|
||||
TOK_typedef,
|
||||
TOK_union,
|
||||
TOK_unsigned,
|
||||
TOK_void,
|
||||
TOK_volatile,
|
||||
TOK_while,
|
||||
} Token_Kind;
|
||||
char *token_to_op(Token_Kind kind) {
|
||||
switch (kind) {
|
||||
@@ -115,7 +148,6 @@ char *token_to_name(Token_Kind kind) {
|
||||
case TOK_EOF: return "EOF";
|
||||
case TOK_ERROR: return "ERROR";
|
||||
case TOK_IDENT: return "IDENT";
|
||||
case TOK_KEYWORD: return "KEYWORD";
|
||||
case TOK_INT: return "INT";
|
||||
case TOK_FLOAT: return "FLOAT";
|
||||
case TOK_CHAR: return "CHAR";
|
||||
@@ -168,6 +200,121 @@ char *token_to_name(Token_Kind kind) {
|
||||
case TOK_OR: return "OR";
|
||||
case TOK_LSHIFT: return "LSHIFT";
|
||||
case TOK_RSHIFT: return "RSHIFT";
|
||||
case TOK_auto: return "auto";
|
||||
case TOK_break: return "break";
|
||||
case TOK_case: return "case";
|
||||
case TOK_char: return "char";
|
||||
case TOK_const: return "const";
|
||||
case TOK_continue: return "continue";
|
||||
case TOK_default: return "default";
|
||||
case TOK_do: return "do";
|
||||
case TOK_double: return "double";
|
||||
case TOK_else: return "else";
|
||||
case TOK_enum: return "enum";
|
||||
case TOK_extern: return "extern";
|
||||
case TOK_float: return "float";
|
||||
case TOK_for: return "for";
|
||||
case TOK_goto: return "goto";
|
||||
case TOK_if: return "if";
|
||||
case TOK_inline: return "inline";
|
||||
case TOK_int: return "int";
|
||||
case TOK_long: return "long";
|
||||
case TOK_register: return "register";
|
||||
case TOK_restrict: return "restrict";
|
||||
case TOK_return: return "return";
|
||||
case TOK_short: return "short";
|
||||
case TOK_signed: return "signed";
|
||||
case TOK_sizeof: return "sizeof";
|
||||
case TOK_static: return "static";
|
||||
case TOK_struct: return "struct";
|
||||
case TOK_switch: return "switch";
|
||||
case TOK_typedef: return "typedef";
|
||||
case TOK_union: return "union";
|
||||
case TOK_unsigned: return "unsigned";
|
||||
case TOK_void: return "void";
|
||||
case TOK_volatile: return "volatile";
|
||||
case TOK_while: return "while";
|
||||
default: return "<invalid-token-kind>";
|
||||
}
|
||||
}
|
||||
//
|
||||
// KEYWORDS
|
||||
//
|
||||
char *keyword_auto;
|
||||
char *keyword_break;
|
||||
char *keyword_case;
|
||||
char *keyword_char;
|
||||
char *keyword_const;
|
||||
char *keyword_continue;
|
||||
char *keyword_default;
|
||||
char *keyword_do;
|
||||
char *keyword_double;
|
||||
char *keyword_else;
|
||||
char *keyword_enum;
|
||||
char *keyword_extern;
|
||||
char *keyword_float;
|
||||
char *keyword_for;
|
||||
char *keyword_goto;
|
||||
char *keyword_if;
|
||||
char *keyword_inline;
|
||||
char *keyword_int;
|
||||
char *keyword_long;
|
||||
char *keyword_register;
|
||||
char *keyword_restrict;
|
||||
char *keyword_return;
|
||||
char *keyword_short;
|
||||
char *keyword_signed;
|
||||
char *keyword_sizeof;
|
||||
char *keyword_static;
|
||||
char *keyword_struct;
|
||||
char *keyword_switch;
|
||||
char *keyword_typedef;
|
||||
char *keyword_union;
|
||||
char *keyword_unsigned;
|
||||
char *keyword_void;
|
||||
char *keyword_volatile;
|
||||
char *keyword_while;
|
||||
char *lex_first_keyword = NULL;
|
||||
char *lex_last_keyword = NULL;
|
||||
char *make_intern(char *string, int len);
|
||||
#define lex_add_keyword(x) make_intern(x, ilen(x) - 1)
|
||||
void lex_init_keywords(void) {
|
||||
keyword_auto = lex_add_keyword("auto");
|
||||
keyword_break = lex_add_keyword("break");
|
||||
keyword_case = lex_add_keyword("case");
|
||||
keyword_char = lex_add_keyword("char");
|
||||
keyword_const = lex_add_keyword("const");
|
||||
keyword_continue = lex_add_keyword("continue");
|
||||
keyword_default = lex_add_keyword("default");
|
||||
keyword_do = lex_add_keyword("do");
|
||||
keyword_double = lex_add_keyword("double");
|
||||
keyword_else = lex_add_keyword("else");
|
||||
keyword_enum = lex_add_keyword("enum");
|
||||
keyword_extern = lex_add_keyword("extern");
|
||||
keyword_float = lex_add_keyword("float");
|
||||
keyword_for = lex_add_keyword("for");
|
||||
keyword_goto = lex_add_keyword("goto");
|
||||
keyword_if = lex_add_keyword("if");
|
||||
keyword_inline = lex_add_keyword("inline");
|
||||
keyword_int = lex_add_keyword("int");
|
||||
keyword_long = lex_add_keyword("long");
|
||||
keyword_register = lex_add_keyword("register");
|
||||
keyword_restrict = lex_add_keyword("restrict");
|
||||
keyword_return = lex_add_keyword("return");
|
||||
keyword_short = lex_add_keyword("short");
|
||||
keyword_signed = lex_add_keyword("signed");
|
||||
keyword_sizeof = lex_add_keyword("sizeof");
|
||||
keyword_static = lex_add_keyword("static");
|
||||
keyword_struct = lex_add_keyword("struct");
|
||||
keyword_switch = lex_add_keyword("switch");
|
||||
keyword_typedef = lex_add_keyword("typedef");
|
||||
keyword_union = lex_add_keyword("union");
|
||||
keyword_unsigned = lex_add_keyword("unsigned");
|
||||
keyword_void = lex_add_keyword("void");
|
||||
keyword_volatile = lex_add_keyword("volatile");
|
||||
keyword_while = lex_add_keyword("while");
|
||||
#define TOK_FIRST_KEYWORD TOK_auto
|
||||
#define TOK_LAST_KEYWORD TOK_while
|
||||
lex_first_keyword = keyword_auto;
|
||||
lex_last_keyword = keyword_while;
|
||||
}
|
||||
|
||||
65
parser.c
65
parser.c
@@ -3,30 +3,6 @@ typedef struct Parser {
|
||||
Token *end;
|
||||
} Parser;
|
||||
|
||||
typedef enum Ast_Kind {
|
||||
AST_NONE,
|
||||
AST_ERROR,
|
||||
AST_INT,
|
||||
AST_UNARY,
|
||||
AST_BINARY,
|
||||
} Ast_Kind;
|
||||
|
||||
typedef struct Ast Ast;
|
||||
struct Ast {
|
||||
Ast_Kind kind;
|
||||
Token *pos;
|
||||
|
||||
union {
|
||||
uint64_t u;
|
||||
struct {
|
||||
Token_Kind op;
|
||||
Ast *l;
|
||||
Ast *r;
|
||||
};
|
||||
char *error;
|
||||
};
|
||||
};
|
||||
|
||||
Token *next_token(Parser *p) {
|
||||
if (p->at < p->end) {
|
||||
return p->at++;
|
||||
@@ -41,6 +17,18 @@ Token *match_token(Parser *p, Token_Kind kind) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool is_keyword(Token *token) {
|
||||
bool result = (token->kind >= TOK_FIRST_KEYWORD && token->kind <= TOK_LAST_KEYWORD);
|
||||
return result;
|
||||
}
|
||||
|
||||
Token *match_keyword(Parser *p, char *keyword) {
|
||||
if (is_keyword(p->at) && p->at->intern == keyword) {
|
||||
return next_token(p);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Token *expect_token(Parser *p, Token_Kind kind) {
|
||||
if (p->at->kind == kind) {
|
||||
return next_token(p);
|
||||
@@ -48,21 +36,6 @@ Token *expect_token(Parser *p, Token_Kind kind) {
|
||||
panicf("expected token kind: %s, got instead: %s", token_to_name(p->at->kind), token_to_name(kind));
|
||||
}
|
||||
|
||||
Ast *create_ast(Token *token, Ast_Kind kind) {
|
||||
Ast *result = calloc(1, sizeof(Ast));
|
||||
result->pos = token;
|
||||
result->kind = kind;
|
||||
return result;
|
||||
}
|
||||
|
||||
Ast *create_binary_expr(Token *token, Token_Kind op, Ast *left, Ast *right) {
|
||||
Ast *result = create_ast(token, AST_BINARY);
|
||||
result->op = op;
|
||||
result->l = left;
|
||||
result->r = right;
|
||||
return result;
|
||||
}
|
||||
|
||||
Ast *parse_expr(Parser *p, int power_of_binding_to_right);
|
||||
|
||||
Ast *parse_atom(Parser *p) {
|
||||
@@ -118,6 +91,20 @@ Ast *parse_expr(Parser *p, int power_of_binding_to_right) {
|
||||
return n;
|
||||
}
|
||||
|
||||
Ast *parse_program(Parser *p) {
|
||||
for (;;) {
|
||||
if (match_keyword(p, keyword_int)) {
|
||||
Token *ident = expect_token(p, TOK_IDENT);
|
||||
expect_token(p, TOK_LPAREN);
|
||||
expect_token(p, TOK_RPAREN);
|
||||
} else {
|
||||
Token *token = next_token(p);
|
||||
panicf("invalid token");
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int64_t eval_expr(Ast *n) {
|
||||
switch (n->kind) {
|
||||
case AST_INT: return (int64_t)n->u;
|
||||
|
||||
Reference in New Issue
Block a user