Compare commits
4 Commits
a0adc03bcb
...
bd97674f50
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bd97674f50 | ||
|
|
b1b79abfd4 | ||
|
|
fc04ee5c3e | ||
|
|
0058355611 |
87
ast.c
Normal file
87
ast.c
Normal file
@@ -0,0 +1,87 @@
|
||||
typedef enum {
|
||||
TYPE_NONE,
|
||||
TYPE_VOID,
|
||||
TYPE_BOOL,
|
||||
|
||||
TYPE_CHAR,
|
||||
TYPE_SCHAR,
|
||||
TYPE_UCHAR,
|
||||
TYPE_SHORT,
|
||||
TYPE_USHORT,
|
||||
TYPE_INT,
|
||||
TYPE_UINT,
|
||||
TYPE_LONG,
|
||||
TYPE_ULONG,
|
||||
TYPE_LLONG,
|
||||
TYPE_ULLONG,
|
||||
|
||||
TYPE_FLOAT,
|
||||
TYPE_DOUBLE,
|
||||
TYPE_LDOUBLE,
|
||||
|
||||
TYPE_POINTER,
|
||||
TYPE_ARRAY,
|
||||
TYPE_FUNCTION,
|
||||
|
||||
TYPE_STRUCT,
|
||||
TYPE_UNION,
|
||||
TYPE_ENUM,
|
||||
} Type_Kind;
|
||||
|
||||
typedef struct Type Type;
|
||||
struct Type {
|
||||
Type_Kind kind;
|
||||
int size;
|
||||
int align;
|
||||
Type *base;
|
||||
};
|
||||
|
||||
typedef enum Ast_Kind {
|
||||
AST_NONE,
|
||||
AST_ERROR,
|
||||
AST_PROGRAM,
|
||||
AST_INT,
|
||||
AST_UNARY,
|
||||
AST_BINARY,
|
||||
|
||||
AST_FUNCTION,
|
||||
AST_BLOCK,
|
||||
|
||||
} Ast_Kind;
|
||||
|
||||
typedef struct Ast Ast;
|
||||
struct Ast {
|
||||
Ast_Kind kind;
|
||||
Token *pos;
|
||||
|
||||
Ast *first;
|
||||
Ast *last;
|
||||
|
||||
union {
|
||||
uint64_t u;
|
||||
struct {
|
||||
Token_Kind op;
|
||||
Ast *l;
|
||||
Ast *r;
|
||||
};
|
||||
char *error;
|
||||
};
|
||||
};
|
||||
|
||||
Ast *create_ast(Token *token, Ast_Kind kind) {
|
||||
Ast *result = calloc(1, sizeof(Ast));
|
||||
result->pos = token;
|
||||
result->kind = kind;
|
||||
return result;
|
||||
}
|
||||
|
||||
Ast *create_binary_expr(Token *token, Token_Kind op, Ast *left, Ast *right) {
|
||||
Ast *result = create_ast(token, AST_BINARY);
|
||||
result->op = op;
|
||||
result->l = left;
|
||||
result->r = right;
|
||||
return result;
|
||||
}
|
||||
|
||||
Type base_type_int = {TYPE_INT, .size = sizeof(int), .align = __alignof(int)};
|
||||
Type *type_int = &base_type_int;
|
||||
3
base.c
3
base.c
@@ -1,4 +1,7 @@
|
||||
#define panicf(...) base_panicf(__FILE__, __LINE__, __VA_ARGS__)
|
||||
#define len(x) (sizeof((x))/sizeof((x)[0]))
|
||||
#define ilen(x) ((int)len(x))
|
||||
|
||||
|
||||
_Noreturn
|
||||
void base_panicf(char *file, int line, const char *fmt, ...) {
|
||||
|
||||
28
build.sh
28
build.sh
@@ -1,32 +1,10 @@
|
||||
# set -euo pipefail
|
||||
|
||||
assert_eq() {
|
||||
expected="$1"
|
||||
actual="$2"
|
||||
if [ "$expected" != "$actual" ]; then
|
||||
echo "assert failed: expected '$expected', got '$actual'"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
verify_expr() {
|
||||
./main "$1"
|
||||
clang out.s -o test
|
||||
./test
|
||||
assert_eq $? $2
|
||||
}
|
||||
set -euo pipefail
|
||||
|
||||
if [[ ! -e build ]]; then
|
||||
mkdir build
|
||||
fi
|
||||
cd build
|
||||
clang -o meta $(realpath ../meta.c) -g -Wall -Wextra -Wshadow -fdiagnostics-absolute-paths
|
||||
clang -o meta $(realpath ../meta.c) -g -Wall -Wextra -Wshadow -fdiagnostics-absolute-paths -Wno-missing-field-initializers
|
||||
./meta > ../meta_gen.c
|
||||
clang -o main $(realpath ../main.c) -g -Wall -Wextra -Wshadow -fdiagnostics-absolute-paths
|
||||
|
||||
clang -o main $(realpath ../main.c) -g -Wall -Wextra -Wshadow -fdiagnostics-absolute-paths -Wno-missing-field-initializers
|
||||
./main
|
||||
verify_expr "2+1-1" 2
|
||||
verify_expr "(2+1-1)*3" 6
|
||||
|
||||
|
||||
echo done
|
||||
@@ -32,3 +32,21 @@ void emit_program(FILE *file, Ast *n) {
|
||||
emit_expr(file, n);
|
||||
fprintf(file, " ret\n");
|
||||
}
|
||||
|
||||
void emit_expr_test(char *expr, int value) {
|
||||
Token_Array tokens = lex_file("expr", expr, strlen(expr));
|
||||
Parser parser = {tokens.data, tokens.data + tokens.len};
|
||||
Ast *ast = parse_expr(&parser, 0);
|
||||
FILE *file = fopen("out.s", "w");
|
||||
emit_program(file, ast);
|
||||
fclose(file);
|
||||
int result = system("clang out.s -o out");
|
||||
assert(result == 0);
|
||||
result = system("./out");
|
||||
assert(WEXITSTATUS(result) == value);
|
||||
}
|
||||
|
||||
void emit_x64_test(void) {
|
||||
emit_expr_test("10+5*2-10", 10+5*2-10);
|
||||
printf("x64 tests passed\n");
|
||||
}
|
||||
198
lex.c
198
lex.c
@@ -1,9 +1,3 @@
|
||||
/*
|
||||
|
||||
- [ ] New line splicing, first source preprocessing stage. In order to properly handle '\\' backslash new line, we most likely need to preprocess the source in a initial pass. So at some point we need to introduce a stage that will create a buffer without wrong characters with a line / column mapping data structure.
|
||||
|
||||
*/
|
||||
|
||||
typedef struct Token {
|
||||
Token_Kind kind;
|
||||
int len;
|
||||
@@ -18,6 +12,7 @@ typedef struct Token {
|
||||
|
||||
union {
|
||||
uint64_t u;
|
||||
char *intern;
|
||||
};
|
||||
} Token;
|
||||
|
||||
@@ -32,6 +27,50 @@ typedef struct Lexer {
|
||||
uint8_t preproc;
|
||||
} Lexer;
|
||||
|
||||
|
||||
uint64_t hash_bytes(char *data, size_t len) {
|
||||
uint64_t h = 1469598103934665603ull;
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
h ^= (unsigned char)data[i];
|
||||
h *= 1099511628211ull;
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
char *global_intern_table[4096];
|
||||
char intern_arena[4096*6];
|
||||
int intern_arena_len;
|
||||
char *lex_alloc_string(int len) {
|
||||
char *result = intern_arena + intern_arena_len;
|
||||
intern_arena_len += len + 1;
|
||||
assert(intern_arena_len < ilen(intern_arena));
|
||||
return result;
|
||||
}
|
||||
|
||||
char *make_intern(char *string, int len) {
|
||||
uint64_t hash = hash_bytes(string, len);
|
||||
int index = hash % ilen(global_intern_table);
|
||||
for (int i = 0; i < ilen(global_intern_table); i += 1) {
|
||||
if (global_intern_table[index] == NULL) {
|
||||
global_intern_table[index] = lex_alloc_string(len + 1);
|
||||
memcpy(global_intern_table[index], string, len);
|
||||
global_intern_table[index][len] = 0;
|
||||
return global_intern_table[index];
|
||||
} else if (global_intern_table[index] && (memcmp(global_intern_table[index], string, len) == 0)) {
|
||||
return global_intern_table[index];
|
||||
}
|
||||
index += 1;
|
||||
index = index % ilen(global_intern_table);
|
||||
}
|
||||
assert(!"invalid codepath");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool lex_is_keyword(char *string) {
|
||||
bool result = string >= lex_first_keyword && string <= lex_last_keyword;
|
||||
return result;
|
||||
}
|
||||
|
||||
void lex_advance(Lexer *lex) {
|
||||
if (lex->at >= lex->end) {
|
||||
return;
|
||||
@@ -39,34 +78,13 @@ void lex_advance(Lexer *lex) {
|
||||
|
||||
if (*lex->at == '\n') {
|
||||
lex->line++;
|
||||
lex->preproc = false;
|
||||
lex->column = 0;
|
||||
} else {
|
||||
lex->column++;
|
||||
}
|
||||
|
||||
if (*lex->at == '\\') {
|
||||
lex->at += 1;
|
||||
if ((lex->at < lex->end) && *lex->at == '\n') {
|
||||
lex->at += 1;
|
||||
lex->line += 1; lex->column = 0;
|
||||
} else if ((lex->at < lex->end) && *lex->at == '\r') {
|
||||
lex->at += 1;
|
||||
if ((lex->at < lex->end) && *lex->at == '\n') {
|
||||
lex->at += 1;
|
||||
lex->line += 1; lex->column = 0;
|
||||
} else {
|
||||
panicf("after \\r missing \\n");
|
||||
}
|
||||
} else {
|
||||
panicf("stray '\\' without follow up new line");
|
||||
}
|
||||
|
||||
} else if (*lex->at == '\n') {
|
||||
lex->preproc = false;
|
||||
lex->at += 1;
|
||||
} else {
|
||||
lex->at += 1;
|
||||
}
|
||||
lex->at += 1;
|
||||
}
|
||||
|
||||
void eat_whitespace(Lexer *lex) {
|
||||
@@ -138,31 +156,6 @@ Token lex_token(Lexer *lex) {
|
||||
}
|
||||
|
||||
char c = *lex->at;
|
||||
|
||||
if (isdigit(c)) {
|
||||
t.kind = TOK_INT;
|
||||
while (lex->at < lex->end && isdigit(*lex->at)) {
|
||||
lex_advance(lex);
|
||||
}
|
||||
|
||||
// @todo: proper lexing of floats (as well as postfixes)
|
||||
if (lex->at < lex->end && *lex->at == '.') {
|
||||
t.kind = TOK_FLOAT;
|
||||
lex_advance(lex);
|
||||
|
||||
while (lex->at < lex->end && isdigit(*lex->at)) {
|
||||
lex_advance(lex);
|
||||
}
|
||||
}
|
||||
|
||||
if (t.kind == TOK_INT) {
|
||||
t.u = strtoull(t.str, NULL, 10);
|
||||
}
|
||||
|
||||
t.len = (int)(lex->at - t.str);
|
||||
return t;
|
||||
}
|
||||
|
||||
lex_advance(lex);
|
||||
|
||||
switch (c) {
|
||||
@@ -197,6 +190,27 @@ Token lex_token(Lexer *lex) {
|
||||
case '|': t.kind = lex_repeat_or_assign(lex, '|', TOK_BITOR, TOK_OR, TOK_OR_ASSIGN); break;
|
||||
case '^': t.kind = lex_assign_variant(lex, TOK_BITXOR, TOK_XOR_ASSIGN); break;
|
||||
|
||||
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
|
||||
t.kind = TOK_INT;
|
||||
while (lex->at < lex->end && isdigit(*lex->at)) {
|
||||
lex_advance(lex);
|
||||
}
|
||||
t.u = strtoull(t.str, NULL, 10);
|
||||
} break;
|
||||
|
||||
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j':
|
||||
case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't':
|
||||
case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
|
||||
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J':
|
||||
case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T':
|
||||
case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z':
|
||||
case '_': {
|
||||
t.kind = TOK_IDENT;
|
||||
while (lex->at < lex->end && (isalnum(*lex->at) || *lex->at == '_')) {
|
||||
lex_advance(lex);
|
||||
}
|
||||
} break;
|
||||
|
||||
case '#': {
|
||||
t.kind = TOK_HASH;
|
||||
lex->preproc = t.preproc = true;
|
||||
@@ -209,6 +223,47 @@ Token lex_token(Lexer *lex) {
|
||||
}
|
||||
|
||||
t.len = (int)(lex->at - t.str);
|
||||
|
||||
if (t.kind == TOK_IDENT) {
|
||||
t.intern = make_intern(t.str, t.len);
|
||||
if (lex_is_keyword(t.intern)) {
|
||||
if (t.intern == keyword_while) t.kind = TOK_while;
|
||||
if (t.intern == keyword_break) t.kind = TOK_break;
|
||||
if (t.intern == keyword_case) t.kind = TOK_case;
|
||||
if (t.intern == keyword_char) t.kind = TOK_char;
|
||||
if (t.intern == keyword_const) t.kind = TOK_const;
|
||||
if (t.intern == keyword_continue) t.kind = TOK_continue;
|
||||
if (t.intern == keyword_default) t.kind = TOK_default;
|
||||
if (t.intern == keyword_do) t.kind = TOK_do;
|
||||
if (t.intern == keyword_double) t.kind = TOK_double;
|
||||
if (t.intern == keyword_else) t.kind = TOK_else;
|
||||
if (t.intern == keyword_enum) t.kind = TOK_enum;
|
||||
if (t.intern == keyword_extern) t.kind = TOK_extern;
|
||||
if (t.intern == keyword_float) t.kind = TOK_float;
|
||||
if (t.intern == keyword_for) t.kind = TOK_for;
|
||||
if (t.intern == keyword_goto) t.kind = TOK_goto;
|
||||
if (t.intern == keyword_if) t.kind = TOK_if;
|
||||
if (t.intern == keyword_inline) t.kind = TOK_inline;
|
||||
if (t.intern == keyword_int) t.kind = TOK_int;
|
||||
if (t.intern == keyword_long) t.kind = TOK_long;
|
||||
if (t.intern == keyword_register) t.kind = TOK_register;
|
||||
if (t.intern == keyword_restrict) t.kind = TOK_restrict;
|
||||
if (t.intern == keyword_return) t.kind = TOK_return;
|
||||
if (t.intern == keyword_short) t.kind = TOK_short;
|
||||
if (t.intern == keyword_signed) t.kind = TOK_signed;
|
||||
if (t.intern == keyword_sizeof) t.kind = TOK_sizeof;
|
||||
if (t.intern == keyword_static) t.kind = TOK_static;
|
||||
if (t.intern == keyword_struct) t.kind = TOK_struct;
|
||||
if (t.intern == keyword_switch) t.kind = TOK_switch;
|
||||
if (t.intern == keyword_typedef) t.kind = TOK_typedef;
|
||||
if (t.intern == keyword_union) t.kind = TOK_union;
|
||||
if (t.intern == keyword_unsigned) t.kind = TOK_unsigned;
|
||||
if (t.intern == keyword_void) t.kind = TOK_void;
|
||||
if (t.intern == keyword_volatile) t.kind = TOK_volatile;
|
||||
if (t.intern == keyword_auto) t.kind = TOK_auto;
|
||||
}
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
@@ -234,12 +289,12 @@ void assert_token(Token t, Token_Kind kind, char *text, int line, int column) {
|
||||
}
|
||||
|
||||
void lex_test(void) {
|
||||
char *src = "12 + 34.5 * 6\n- 7 % 2 / 1 == 1 != 2 <= 3 >= 4 && 3 || 4 << 1 >> 2";
|
||||
char *src = "12 + 34 * 6\n- 7 % 2 / 1 == 1 != 2 <= 3 >= 4 && 3 || 4 << 1 >> 2";
|
||||
Lexer lex = make_lexer("test.c", src, (int)strlen(src));
|
||||
|
||||
assert_token(lex_token(&lex), TOK_INT, "12", 0, 0);
|
||||
assert_token(lex_token(&lex), TOK_PLUS, "+", 0, 3);
|
||||
assert_token(lex_token(&lex), TOK_FLOAT, "34.5", 0, 5);
|
||||
assert_token(lex_token(&lex), TOK_INT, "34", 0, 5);
|
||||
assert_token(lex_token(&lex), TOK_STAR, "*", 0, 10);
|
||||
assert_token(lex_token(&lex), TOK_INT, "6", 0, 12);
|
||||
assert_token(lex_token(&lex), TOK_MINUS, "-", 1, 0);
|
||||
@@ -274,5 +329,36 @@ void lex_test(void) {
|
||||
Token_Array array = lex_file("test.c", src, (int)strlen(src));
|
||||
assert(array.len == 28);
|
||||
|
||||
char *intern_a = make_intern("hello", 5);
|
||||
char *intern_b = make_intern("hello", 5);
|
||||
char *intern_c = make_intern("world", 5);
|
||||
assert(strcmp(intern_a, "hello") == 0);
|
||||
assert(strcmp(intern_b, "hello") == 0);
|
||||
assert(strcmp(intern_c, "world") == 0);
|
||||
assert(intern_a == intern_b);
|
||||
assert(intern_a != intern_c);
|
||||
|
||||
char *ident_src = "foo _bar baz123 if for while if_ x9";
|
||||
Lexer ident_lex = make_lexer("ident_test.c", ident_src, (int)strlen(ident_src));
|
||||
Token foo = lex_token(&ident_lex);
|
||||
assert_token(foo, TOK_IDENT, "foo", 0, 0);
|
||||
assert(strcmp(foo.intern, "foo") == 0);
|
||||
Token bar = lex_token(&ident_lex);
|
||||
assert_token(bar, TOK_IDENT, "_bar", 0, 4);
|
||||
assert(strcmp(bar.intern, "_bar") == 0);
|
||||
Token baz123 = lex_token(&ident_lex);
|
||||
assert_token(baz123, TOK_IDENT, "baz123", 0, 9);
|
||||
assert(strcmp(baz123.intern, "baz123") == 0);
|
||||
Token kw_if = lex_token(&ident_lex);
|
||||
assert_token(kw_if, TOK_if, "if", 0, 16);
|
||||
Token kw_for = lex_token(&ident_lex);
|
||||
assert_token(kw_for, TOK_for, "for", 0, 19);
|
||||
Token kw_while = lex_token(&ident_lex);
|
||||
assert_token(kw_while, TOK_while, "while", 0, 23);
|
||||
Token ident_if_ = lex_token(&ident_lex);
|
||||
assert_token(ident_if_, TOK_IDENT, "if_", 0, 29);
|
||||
Token ident_x9 = lex_token(&ident_lex);
|
||||
assert_token(ident_x9, TOK_IDENT, "x9", 0, 33);
|
||||
|
||||
printf("lexer tests passed\n");
|
||||
}
|
||||
|
||||
30
main.c
30
main.c
@@ -1,3 +1,13 @@
|
||||
/*
|
||||
|
||||
- [ ] Compile simple int main program
|
||||
- [x] Lex identifiers, keywords
|
||||
- [ ] Add parsing of this
|
||||
- [ ] Emit it
|
||||
- [ ] Print error tokens location properly in lexer / parser and make it easy (not from source code)
|
||||
- [ ] New line splicing, first source preprocessing stage. In order to properly handle '\\' backslash new line, we most likely need to preprocess the source in a initial pass. So at some point we need to introduce a stage that will create a buffer without wrong characters with a line / column mapping data structure.
|
||||
|
||||
*/
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
@@ -9,20 +19,14 @@
|
||||
#include "base.c"
|
||||
#include "meta_gen.c"
|
||||
#include "lex.c"
|
||||
#include "ast.c"
|
||||
#include "parser.c"
|
||||
#include "emit_asm_x64.c"
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if (argc == 2) {
|
||||
Token_Array tokens = lex_file("expr", argv[1], strlen(argv[1]));
|
||||
Parser parser = {tokens.data, tokens.data + tokens.len};
|
||||
Ast *ast = parse_expr(&parser, 0);
|
||||
FILE *file = fopen("out.s", "w");
|
||||
emit_program(file, ast);
|
||||
fclose(file);
|
||||
} else {
|
||||
vec_test();
|
||||
lex_test();
|
||||
parser_test();
|
||||
}
|
||||
int main() {
|
||||
lex_init_keywords();
|
||||
vec_test();
|
||||
lex_test();
|
||||
parser_test();
|
||||
emit_x64_test();
|
||||
}
|
||||
191
meta.c
191
meta.c
@@ -1,76 +1,109 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#define len(x) (sizeof((x))/sizeof((x)[0]))
|
||||
#define ilen(x) ((int)len(x))
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include <assert.h>
|
||||
#include "base.c"
|
||||
|
||||
int main() {
|
||||
typedef struct {
|
||||
char *name;
|
||||
char *serialized_operator;
|
||||
bool keyword;
|
||||
} Task;
|
||||
Task kinds[] = {
|
||||
{"EOF", 0},
|
||||
{"ERROR", 0},
|
||||
{"EOF"},
|
||||
{"ERROR"},
|
||||
{"IDENT"},
|
||||
{"INT"},
|
||||
{"FLOAT"},
|
||||
{"CHAR"},
|
||||
{"STRING"},
|
||||
|
||||
{"IDENT", 0},
|
||||
{"KEYWORD", 0},
|
||||
{"INT", 0},
|
||||
{"FLOAT", 0},
|
||||
{"CHAR", 0},
|
||||
{"STRING", 0},
|
||||
{"LPAREN", .serialized_operator = "("},
|
||||
{"RPAREN", .serialized_operator = ")"},
|
||||
{"LBRACKET", .serialized_operator = "["},
|
||||
{"RBRACKET", .serialized_operator = "]"},
|
||||
{"LBRACE", .serialized_operator = "{"},
|
||||
{"RBRACE", .serialized_operator = "}"},
|
||||
{"COMMA", .serialized_operator = ","},
|
||||
{"DOT", .serialized_operator = "."},
|
||||
{"ARROW", .serialized_operator = "->"},
|
||||
{"ELLIPSIS", .serialized_operator = "..."},
|
||||
{"COLON", .serialized_operator = ":"},
|
||||
{"SEMICOLON", .serialized_operator = ";"},
|
||||
{"QUESTION", .serialized_operator = "?"},
|
||||
{"HASH", .serialized_operator = "#"},
|
||||
{"HASHHASH", .serialized_operator = "##"},
|
||||
{"PLUS", .serialized_operator = "+"},
|
||||
{"MINUS", .serialized_operator = "-"},
|
||||
{"STAR", .serialized_operator = "*"},
|
||||
{"SLASH", .serialized_operator = "/"},
|
||||
{"PERCENT", .serialized_operator = "%"},
|
||||
{"INC", .serialized_operator = "++"},
|
||||
{"DEC", .serialized_operator = "--"},
|
||||
{"ASSIGN", .serialized_operator = "="},
|
||||
{"PLUS_ASSIGN", .serialized_operator = "+="},
|
||||
{"MINUS_ASSIGN", .serialized_operator = "-="},
|
||||
{"MUL_ASSIGN", .serialized_operator = "*="},
|
||||
{"DIV_ASSIGN", .serialized_operator = "/="},
|
||||
{"MOD_ASSIGN", .serialized_operator = "%="},
|
||||
{"LSHIFT_ASSIGN", .serialized_operator = "<<="},
|
||||
{"RSHIFT_ASSIGN", .serialized_operator = ">>="},
|
||||
{"AND_ASSIGN", .serialized_operator = "&="},
|
||||
{"XOR_ASSIGN", .serialized_operator = "^="},
|
||||
{"OR_ASSIGN", .serialized_operator = "|="},
|
||||
{"EQ", .serialized_operator = "=="},
|
||||
{"NEQ", .serialized_operator = "!="},
|
||||
{"LT", .serialized_operator = "<"},
|
||||
{"LEQ", .serialized_operator = "<="},
|
||||
{"GT", .serialized_operator = ">"},
|
||||
{"GEQ", .serialized_operator = ">="},
|
||||
{"NOT", .serialized_operator = "!"},
|
||||
{"BITNOT", .serialized_operator = "~"},
|
||||
{"BITAND", .serialized_operator = "&"},
|
||||
{"BITOR", .serialized_operator = "|"},
|
||||
{"BITXOR", .serialized_operator = "^"},
|
||||
{"AND", .serialized_operator = "&&"},
|
||||
{"OR", .serialized_operator = "||"},
|
||||
{"LSHIFT", .serialized_operator = "<<"},
|
||||
{"RSHIFT", .serialized_operator = ">>"},
|
||||
|
||||
{"LPAREN", "("},
|
||||
{"RPAREN", ")"},
|
||||
{"LBRACKET", "["},
|
||||
{"RBRACKET", "]"},
|
||||
{"LBRACE", "{"},
|
||||
{"RBRACE", "}"},
|
||||
{"COMMA", ","},
|
||||
{"DOT", "."},
|
||||
{"ARROW", "->"},
|
||||
{"ELLIPSIS", "..."},
|
||||
{"COLON", ":"},
|
||||
{"SEMICOLON", ";"},
|
||||
{"QUESTION", "?"},
|
||||
{"HASH", "#"},
|
||||
{"HASHHASH", "##"},
|
||||
|
||||
{"PLUS", "+"},
|
||||
{"MINUS", "-"},
|
||||
{"STAR", "*"},
|
||||
{"SLASH", "/"},
|
||||
{"PERCENT", "%"},
|
||||
{"INC", "++"},
|
||||
{"DEC", "--"},
|
||||
|
||||
{"ASSIGN", "="},
|
||||
{"PLUS_ASSIGN", "+="},
|
||||
{"MINUS_ASSIGN", "-="},
|
||||
{"MUL_ASSIGN", "*="},
|
||||
{"DIV_ASSIGN", "/="},
|
||||
{"MOD_ASSIGN", "%="},
|
||||
{"LSHIFT_ASSIGN", "<<="},
|
||||
{"RSHIFT_ASSIGN", ">>="},
|
||||
{"AND_ASSIGN", "&="},
|
||||
{"XOR_ASSIGN", "^="},
|
||||
{"OR_ASSIGN", "|="},
|
||||
|
||||
{"EQ", "=="},
|
||||
{"NEQ", "!="},
|
||||
{"LT", "<"},
|
||||
{"LEQ", "<="},
|
||||
{"GT", ">"},
|
||||
{"GEQ", ">="},
|
||||
|
||||
{"NOT", "!"},
|
||||
{"BITNOT", "~"},
|
||||
{"BITAND", "&"},
|
||||
{"BITOR", "|"},
|
||||
{"BITXOR", "^"},
|
||||
{"AND", "&&"},
|
||||
{"OR", "||"},
|
||||
{"LSHIFT", "<<"},
|
||||
{"RSHIFT", ">>"},
|
||||
{"auto", .keyword = true},
|
||||
{"break", .keyword = true},
|
||||
{"case", .keyword = true},
|
||||
{"char", .keyword = true},
|
||||
{"const", .keyword = true},
|
||||
{"continue", .keyword = true},
|
||||
{"default", .keyword = true},
|
||||
{"do", .keyword = true},
|
||||
{"double", .keyword = true},
|
||||
{"else", .keyword = true},
|
||||
{"enum", .keyword = true},
|
||||
{"extern", .keyword = true},
|
||||
{"float", .keyword = true},
|
||||
{"for", .keyword = true},
|
||||
{"goto", .keyword = true},
|
||||
{"if", .keyword = true},
|
||||
{"inline", .keyword = true},
|
||||
{"int", .keyword = true},
|
||||
{"long", .keyword = true},
|
||||
{"register", .keyword = true},
|
||||
{"restrict", .keyword = true},
|
||||
{"return", .keyword = true},
|
||||
{"short", .keyword = true},
|
||||
{"signed", .keyword = true},
|
||||
{"sizeof", .keyword = true},
|
||||
{"static", .keyword = true},
|
||||
{"struct", .keyword = true},
|
||||
{"switch", .keyword = true},
|
||||
{"typedef", .keyword = true},
|
||||
{"union", .keyword = true},
|
||||
{"unsigned", .keyword = true},
|
||||
{"void", .keyword = true},
|
||||
{"volatile", .keyword = true},
|
||||
{"while", .keyword = true},
|
||||
};
|
||||
|
||||
printf("// auto generated by meta.c\n");
|
||||
@@ -102,4 +135,36 @@ int main() {
|
||||
printf(" default: return \"<invalid-token-kind>\";\n");
|
||||
printf(" }\n");
|
||||
printf("}\n");
|
||||
|
||||
{
|
||||
printf("// \n");
|
||||
printf("// KEYWORDS\n");
|
||||
printf("// \n");
|
||||
|
||||
for (int i = 0; i < ilen(kinds); i += 1) {
|
||||
if (kinds[i].keyword) {
|
||||
printf("char *keyword_%s;\n", kinds[i].name);
|
||||
}
|
||||
}
|
||||
|
||||
Task *first = NULL;
|
||||
Task *last = NULL;
|
||||
printf("char *lex_first_keyword = NULL;\n");
|
||||
printf("char *lex_last_keyword = NULL;\n");
|
||||
printf("char *make_intern(char *string, int len);\n");
|
||||
printf("#define lex_add_keyword(x) make_intern(x, ilen(x) - 1)\n");
|
||||
printf("void lex_init_keywords(void) {\n");
|
||||
for (int i = 0; i < ilen(kinds); i += 1) {
|
||||
if (kinds[i].keyword) {
|
||||
if (!first) first = kinds + i;
|
||||
last = kinds + i;
|
||||
printf(" keyword_%s = lex_add_keyword(\"%s\");\n", kinds[i].name, kinds[i].name);
|
||||
}
|
||||
}
|
||||
printf("#define TOK_FIRST_KEYWORD TOK_%s\n", first->name);
|
||||
printf("#define TOK_LAST_KEYWORD TOK_%s\n", last->name);
|
||||
printf(" lex_first_keyword = keyword_%s;\n", first->name);
|
||||
printf(" lex_last_keyword = keyword_%s;\n", last->name);
|
||||
printf("}\n");
|
||||
}
|
||||
}
|
||||
|
||||
151
meta_gen.c
151
meta_gen.c
@@ -3,7 +3,6 @@ typedef enum {
|
||||
TOK_EOF,
|
||||
TOK_ERROR,
|
||||
TOK_IDENT,
|
||||
TOK_KEYWORD,
|
||||
TOK_INT,
|
||||
TOK_FLOAT,
|
||||
TOK_CHAR,
|
||||
@@ -56,6 +55,40 @@ typedef enum {
|
||||
TOK_OR,
|
||||
TOK_LSHIFT,
|
||||
TOK_RSHIFT,
|
||||
TOK_auto,
|
||||
TOK_break,
|
||||
TOK_case,
|
||||
TOK_char,
|
||||
TOK_const,
|
||||
TOK_continue,
|
||||
TOK_default,
|
||||
TOK_do,
|
||||
TOK_double,
|
||||
TOK_else,
|
||||
TOK_enum,
|
||||
TOK_extern,
|
||||
TOK_float,
|
||||
TOK_for,
|
||||
TOK_goto,
|
||||
TOK_if,
|
||||
TOK_inline,
|
||||
TOK_int,
|
||||
TOK_long,
|
||||
TOK_register,
|
||||
TOK_restrict,
|
||||
TOK_return,
|
||||
TOK_short,
|
||||
TOK_signed,
|
||||
TOK_sizeof,
|
||||
TOK_static,
|
||||
TOK_struct,
|
||||
TOK_switch,
|
||||
TOK_typedef,
|
||||
TOK_union,
|
||||
TOK_unsigned,
|
||||
TOK_void,
|
||||
TOK_volatile,
|
||||
TOK_while,
|
||||
} Token_Kind;
|
||||
char *token_to_op(Token_Kind kind) {
|
||||
switch (kind) {
|
||||
@@ -115,7 +148,6 @@ char *token_to_name(Token_Kind kind) {
|
||||
case TOK_EOF: return "EOF";
|
||||
case TOK_ERROR: return "ERROR";
|
||||
case TOK_IDENT: return "IDENT";
|
||||
case TOK_KEYWORD: return "KEYWORD";
|
||||
case TOK_INT: return "INT";
|
||||
case TOK_FLOAT: return "FLOAT";
|
||||
case TOK_CHAR: return "CHAR";
|
||||
@@ -168,6 +200,121 @@ char *token_to_name(Token_Kind kind) {
|
||||
case TOK_OR: return "OR";
|
||||
case TOK_LSHIFT: return "LSHIFT";
|
||||
case TOK_RSHIFT: return "RSHIFT";
|
||||
case TOK_auto: return "auto";
|
||||
case TOK_break: return "break";
|
||||
case TOK_case: return "case";
|
||||
case TOK_char: return "char";
|
||||
case TOK_const: return "const";
|
||||
case TOK_continue: return "continue";
|
||||
case TOK_default: return "default";
|
||||
case TOK_do: return "do";
|
||||
case TOK_double: return "double";
|
||||
case TOK_else: return "else";
|
||||
case TOK_enum: return "enum";
|
||||
case TOK_extern: return "extern";
|
||||
case TOK_float: return "float";
|
||||
case TOK_for: return "for";
|
||||
case TOK_goto: return "goto";
|
||||
case TOK_if: return "if";
|
||||
case TOK_inline: return "inline";
|
||||
case TOK_int: return "int";
|
||||
case TOK_long: return "long";
|
||||
case TOK_register: return "register";
|
||||
case TOK_restrict: return "restrict";
|
||||
case TOK_return: return "return";
|
||||
case TOK_short: return "short";
|
||||
case TOK_signed: return "signed";
|
||||
case TOK_sizeof: return "sizeof";
|
||||
case TOK_static: return "static";
|
||||
case TOK_struct: return "struct";
|
||||
case TOK_switch: return "switch";
|
||||
case TOK_typedef: return "typedef";
|
||||
case TOK_union: return "union";
|
||||
case TOK_unsigned: return "unsigned";
|
||||
case TOK_void: return "void";
|
||||
case TOK_volatile: return "volatile";
|
||||
case TOK_while: return "while";
|
||||
default: return "<invalid-token-kind>";
|
||||
}
|
||||
}
|
||||
//
|
||||
// KEYWORDS
|
||||
//
|
||||
char *keyword_auto;
|
||||
char *keyword_break;
|
||||
char *keyword_case;
|
||||
char *keyword_char;
|
||||
char *keyword_const;
|
||||
char *keyword_continue;
|
||||
char *keyword_default;
|
||||
char *keyword_do;
|
||||
char *keyword_double;
|
||||
char *keyword_else;
|
||||
char *keyword_enum;
|
||||
char *keyword_extern;
|
||||
char *keyword_float;
|
||||
char *keyword_for;
|
||||
char *keyword_goto;
|
||||
char *keyword_if;
|
||||
char *keyword_inline;
|
||||
char *keyword_int;
|
||||
char *keyword_long;
|
||||
char *keyword_register;
|
||||
char *keyword_restrict;
|
||||
char *keyword_return;
|
||||
char *keyword_short;
|
||||
char *keyword_signed;
|
||||
char *keyword_sizeof;
|
||||
char *keyword_static;
|
||||
char *keyword_struct;
|
||||
char *keyword_switch;
|
||||
char *keyword_typedef;
|
||||
char *keyword_union;
|
||||
char *keyword_unsigned;
|
||||
char *keyword_void;
|
||||
char *keyword_volatile;
|
||||
char *keyword_while;
|
||||
char *lex_first_keyword = NULL;
|
||||
char *lex_last_keyword = NULL;
|
||||
char *make_intern(char *string, int len);
|
||||
#define lex_add_keyword(x) make_intern(x, ilen(x) - 1)
|
||||
void lex_init_keywords(void) {
|
||||
keyword_auto = lex_add_keyword("auto");
|
||||
keyword_break = lex_add_keyword("break");
|
||||
keyword_case = lex_add_keyword("case");
|
||||
keyword_char = lex_add_keyword("char");
|
||||
keyword_const = lex_add_keyword("const");
|
||||
keyword_continue = lex_add_keyword("continue");
|
||||
keyword_default = lex_add_keyword("default");
|
||||
keyword_do = lex_add_keyword("do");
|
||||
keyword_double = lex_add_keyword("double");
|
||||
keyword_else = lex_add_keyword("else");
|
||||
keyword_enum = lex_add_keyword("enum");
|
||||
keyword_extern = lex_add_keyword("extern");
|
||||
keyword_float = lex_add_keyword("float");
|
||||
keyword_for = lex_add_keyword("for");
|
||||
keyword_goto = lex_add_keyword("goto");
|
||||
keyword_if = lex_add_keyword("if");
|
||||
keyword_inline = lex_add_keyword("inline");
|
||||
keyword_int = lex_add_keyword("int");
|
||||
keyword_long = lex_add_keyword("long");
|
||||
keyword_register = lex_add_keyword("register");
|
||||
keyword_restrict = lex_add_keyword("restrict");
|
||||
keyword_return = lex_add_keyword("return");
|
||||
keyword_short = lex_add_keyword("short");
|
||||
keyword_signed = lex_add_keyword("signed");
|
||||
keyword_sizeof = lex_add_keyword("sizeof");
|
||||
keyword_static = lex_add_keyword("static");
|
||||
keyword_struct = lex_add_keyword("struct");
|
||||
keyword_switch = lex_add_keyword("switch");
|
||||
keyword_typedef = lex_add_keyword("typedef");
|
||||
keyword_union = lex_add_keyword("union");
|
||||
keyword_unsigned = lex_add_keyword("unsigned");
|
||||
keyword_void = lex_add_keyword("void");
|
||||
keyword_volatile = lex_add_keyword("volatile");
|
||||
keyword_while = lex_add_keyword("while");
|
||||
#define TOK_FIRST_KEYWORD TOK_auto
|
||||
#define TOK_LAST_KEYWORD TOK_while
|
||||
lex_first_keyword = keyword_auto;
|
||||
lex_last_keyword = keyword_while;
|
||||
}
|
||||
|
||||
69
parser.c
69
parser.c
@@ -3,30 +3,6 @@ typedef struct Parser {
|
||||
Token *end;
|
||||
} Parser;
|
||||
|
||||
typedef enum Ast_Kind {
|
||||
AST_NONE,
|
||||
AST_ERROR,
|
||||
AST_INT,
|
||||
AST_UNARY,
|
||||
AST_BINARY,
|
||||
} Ast_Kind;
|
||||
|
||||
typedef struct Ast Ast;
|
||||
struct Ast {
|
||||
Ast_Kind kind;
|
||||
Token *pos;
|
||||
|
||||
union {
|
||||
uint64_t u;
|
||||
struct {
|
||||
Token_Kind op;
|
||||
Ast *l;
|
||||
Ast *r;
|
||||
};
|
||||
char *error;
|
||||
};
|
||||
};
|
||||
|
||||
Token *next_token(Parser *p) {
|
||||
if (p->at < p->end) {
|
||||
return p->at++;
|
||||
@@ -41,6 +17,18 @@ Token *match_token(Parser *p, Token_Kind kind) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool is_keyword(Token *token) {
|
||||
bool result = (token->kind >= TOK_FIRST_KEYWORD && token->kind <= TOK_LAST_KEYWORD);
|
||||
return result;
|
||||
}
|
||||
|
||||
Token *match_keyword(Parser *p, char *keyword) {
|
||||
if (is_keyword(p->at) && p->at->intern == keyword) {
|
||||
return next_token(p);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Token *expect_token(Parser *p, Token_Kind kind) {
|
||||
if (p->at->kind == kind) {
|
||||
return next_token(p);
|
||||
@@ -48,21 +36,6 @@ Token *expect_token(Parser *p, Token_Kind kind) {
|
||||
panicf("expected token kind: %s, got instead: %s", token_to_name(p->at->kind), token_to_name(kind));
|
||||
}
|
||||
|
||||
Ast *create_ast(Token *token, Ast_Kind kind) {
|
||||
Ast *result = calloc(1, sizeof(Ast));
|
||||
result->pos = token;
|
||||
result->kind = kind;
|
||||
return result;
|
||||
}
|
||||
|
||||
Ast *create_binary_expr(Token *token, Token_Kind op, Ast *left, Ast *right) {
|
||||
Ast *result = create_ast(token, AST_BINARY);
|
||||
result->op = op;
|
||||
result->l = left;
|
||||
result->r = right;
|
||||
return result;
|
||||
}
|
||||
|
||||
Ast *parse_expr(Parser *p, int power_of_binding_to_right);
|
||||
|
||||
Ast *parse_atom(Parser *p) {
|
||||
@@ -118,6 +91,24 @@ Ast *parse_expr(Parser *p, int power_of_binding_to_right) {
|
||||
return n;
|
||||
}
|
||||
|
||||
Type *parse_declspec(Parser *p) {
|
||||
if (match_token(p, TOK_int)) {
|
||||
return type_int;
|
||||
} else {
|
||||
panicf("%s:%d: error: unknown token while parsing declspec", p->at->file, p->at->line);
|
||||
}
|
||||
}
|
||||
|
||||
Ast *parse_program(Parser *p) {
|
||||
Ast *result = create_ast(p->at, AST_PROGRAM);
|
||||
while (p->at->kind != TOK_EOF) {
|
||||
Type *type = parse_declspec(p);
|
||||
|
||||
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int64_t eval_expr(Ast *n) {
|
||||
switch (n->kind) {
|
||||
case AST_INT: return (int64_t)n->u;
|
||||
|
||||
Reference in New Issue
Block a user