518 lines
13 KiB
C
518 lines
13 KiB
C
global Token token_end_of_stream = {};
|
|
|
|
function Token *
|
|
token_alloc(Tokens *t){
|
|
if(t->cap == 0){
|
|
t->cap = 1024;
|
|
t->tokens = malloc(sizeof(Token)*t->cap);
|
|
}
|
|
else if(t->len+1 > t->cap){
|
|
t->cap *= 2;
|
|
t->tokens = realloc(t->tokens, sizeof(Token)*t->cap);
|
|
}
|
|
|
|
Token *result = t->tokens + t->len++;
|
|
memory_zero(result, sizeof(*result));
|
|
return result;
|
|
}
|
|
|
|
function void
|
|
lex_advance(Lex_Stream *s){
|
|
if(*s->stream == '\n'){
|
|
s->stream++;
|
|
s->line++;
|
|
s->line_begin = s->stream;
|
|
}
|
|
else if(*s->stream == 0){
|
|
// Don't advance, end of stream
|
|
}
|
|
else{
|
|
s->stream++;
|
|
}
|
|
}
|
|
|
|
function B32
|
|
lex_is_whitespace(U8 c){
|
|
B32 result = c == '\n' || c == '\r' || c == ' ' || c == '\r';
|
|
return result;
|
|
}
|
|
|
|
function B32
|
|
lex_is_alphabetic(U8 c){
|
|
B32 result = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
|
|
return result;
|
|
}
|
|
|
|
function B32
|
|
lex_is_numeric(U8 c){
|
|
B32 result = c >= '0' && c <= '9';
|
|
return result;
|
|
}
|
|
|
|
function B32
|
|
lex_is_alphanumeric(U8 c){
|
|
B32 result = lex_is_numeric(c) || lex_is_alphabetic(c);
|
|
return result;
|
|
}
|
|
|
|
function void
|
|
lex_set_len(Lex_Stream *s, Token *token){
|
|
assert(s->stream >= token->str);
|
|
token->len = s->stream - token->str;
|
|
}
|
|
|
|
function U8
|
|
lexc(Lex_Stream *s){
|
|
return *s->stream;
|
|
}
|
|
|
|
function void
|
|
token_error(Token *t, String error_val){
|
|
t->kind = TK_Error;
|
|
t->error_val = error_val;
|
|
}
|
|
|
|
function void
|
|
lex_parse_string(Lex_Stream *s, Token *t, U8 c){
|
|
for(;;){
|
|
if(lexc(s) == '\\') lex_advance(s);
|
|
else if(lexc(s) == c) break;
|
|
else if(lexc(s) == 0){
|
|
token_error(t, lit("Unterminated string, reached end of file"));
|
|
break;
|
|
}
|
|
lex_advance(s);
|
|
}
|
|
if(t->kind != TK_Error){
|
|
lex_advance(s);
|
|
lex_set_len(s,t);
|
|
}
|
|
}
|
|
|
|
function void
|
|
lex_token_seed(Lex_Stream *s, Token *t){
|
|
t->str = s->stream;
|
|
t->file = s->filename;
|
|
t->line = s->line;
|
|
t->line_begin = s->line_begin;
|
|
}
|
|
|
|
function U64
|
|
parse_u64(U8 *str, S64 len){
|
|
U64 result = 0;
|
|
U64 m = 1;
|
|
for(S64 i = len - 1; i >= 0; --i){
|
|
U64 val = str[i] - '0';
|
|
U64 new_val = val * m;
|
|
assert_msg(result+new_val >= result, "Integer overflow!");
|
|
result+=new_val;
|
|
m *= 10;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
function void
|
|
token_push_error(Lex_Stream *stream, Tokens *tokens, String error_val){
|
|
Token *token = token_alloc(tokens);
|
|
token->kind = TK_Error;
|
|
token->error_val = error_val;
|
|
lex_token_seed(stream, token);
|
|
}
|
|
|
|
function void
|
|
lex_base(Lex_Stream *s, Tokens *tokens){
|
|
while(*s->stream){
|
|
while(lex_is_whitespace(*s->stream))
|
|
lex_advance(s);
|
|
|
|
|
|
#define CASE2(op, OpName, Assign) \
|
|
case op: \
|
|
if (lexc(s) == '=') { \
|
|
lex_advance(s); \
|
|
t->kind = Assign; \
|
|
} else { \
|
|
t->kind = OpName; \
|
|
} \
|
|
break
|
|
#define CASE3(op, OpName, Assign, Incr) \
|
|
case op: \
|
|
if (lexc(s) == '=') { \
|
|
lex_advance(s); \
|
|
t->kind = Assign; \
|
|
} else if (lexc(s) == op) { \
|
|
lex_advance(s); \
|
|
t->kind = Incr; \
|
|
} else { \
|
|
t->kind = OpName; \
|
|
} \
|
|
break
|
|
|
|
|
|
Token *t = token_alloc(tokens);
|
|
if(0){
|
|
top:
|
|
while(lex_is_whitespace(*s->stream))
|
|
lex_advance(s);
|
|
if(lexc(s) == 0)
|
|
break;
|
|
}
|
|
lex_token_seed(s, t);
|
|
lex_advance(s);
|
|
switch(*t->str) {
|
|
CASE2('!', TK_Not, TK_NotEquals);
|
|
CASE2('^', TK_BitXor, TK_XorAssign);
|
|
CASE2('=', TK_Assign, TK_Equals);
|
|
CASE2('*', TK_Mul, TK_MulAssign);
|
|
CASE2('%', TK_Mod, TK_ModAssign);
|
|
CASE3('+', TK_Add, TK_AddAssign, TK_Increment);
|
|
CASE3('&', TK_BitAnd, TK_AndAssign, TK_And);
|
|
CASE3('|', TK_BitOr, TK_OrAssign, TK_Or);
|
|
#undef CASE2
|
|
#undef CASE3
|
|
|
|
case 0: break;
|
|
case '@': t->kind = TK_At; break;
|
|
case '(': t->kind = TK_OpenParen; break;
|
|
case ')': t->kind = TK_CloseParen; break;
|
|
case '{': t->kind = TK_OpenBrace; break;
|
|
case '}': t->kind = TK_CloseBrace; break;
|
|
case '[': t->kind = TK_OpenBracket; break;
|
|
case ']': t->kind = TK_CloseBracket; break;
|
|
case ',': t->kind = TK_Comma; break;
|
|
case '~': t->kind = TK_Neg; break;
|
|
case '?': t->kind = TK_Question; break;
|
|
case ';': t->kind = TK_Semicolon; break;
|
|
|
|
case '#': {
|
|
t->kind = TK_Pound;
|
|
// @Todo(Krzosa): Some convenient way to recognize macros
|
|
} break;
|
|
|
|
case '.': {
|
|
if(s->stream[0] == '.' && s->stream[1] == '.') {
|
|
lex_advance(s);
|
|
lex_advance(s);
|
|
t->kind = TK_ThreeDots;
|
|
}
|
|
else {
|
|
t->kind = TK_Dot;
|
|
}
|
|
} break;
|
|
|
|
case '<': {
|
|
if (lexc(s) == '<') {
|
|
lex_advance(s);
|
|
if (lexc(s) == '=') {
|
|
lex_advance(s);
|
|
t->kind = TK_LeftShiftAssign;
|
|
}
|
|
else {
|
|
t->kind = TK_LeftShift;
|
|
}
|
|
}
|
|
else if (lexc(s) == '=') {
|
|
lex_advance(s);
|
|
t->kind = TK_LesserThenOrEqual;
|
|
}
|
|
else {
|
|
t->kind = TK_LesserThen;
|
|
}
|
|
} break;
|
|
|
|
case '>': {
|
|
if (lexc(s) == '>') {
|
|
lex_advance(s);
|
|
if (lexc(s) == '=') {
|
|
lex_advance(s);
|
|
t->kind = TK_RightShiftAssign;
|
|
}
|
|
else {
|
|
t->kind = TK_RightShift;
|
|
}
|
|
}
|
|
else if (lexc(s) == '=') {
|
|
lex_advance(s);
|
|
t->kind = TK_GreaterThenOrEqual;
|
|
}
|
|
else {
|
|
t->kind = TK_GreaterThen;
|
|
}
|
|
} break;
|
|
|
|
case ':': {
|
|
if (lexc(s) == ':') {
|
|
lex_advance(s);
|
|
t->kind = TK_DoubleColon;
|
|
}
|
|
else {
|
|
t->kind = TK_Colon;
|
|
}
|
|
} break;
|
|
|
|
case '-':{
|
|
if (lexc(s) == '=') {
|
|
lex_advance(s);
|
|
t->kind = TK_SubAssign;
|
|
}
|
|
else if (lexc(s) == '-') {
|
|
lex_advance(s);
|
|
t->kind = TK_Decrement;
|
|
}
|
|
else if (lexc(s) == '>') {
|
|
lex_advance(s);
|
|
t->kind = TK_Arrow;
|
|
}
|
|
else {
|
|
t->kind = TK_Sub;
|
|
}
|
|
} break;
|
|
|
|
case '\'':{not_implemented;} break;
|
|
case '"': {
|
|
t->kind = TK_U8Lit;
|
|
lex_parse_string(s,t,'"');
|
|
if(t->kind != TK_Error){
|
|
t->str += 1;
|
|
t->len -= 2;
|
|
}
|
|
} break;
|
|
|
|
case '/': {
|
|
if(lexc(s) == '='){
|
|
t->kind = TK_DivAssign;
|
|
lex_advance(s);
|
|
}
|
|
else if(lexc(s) == '/'){
|
|
lex_advance(s);
|
|
if(lexc(s) == '/'){
|
|
lex_advance(s);
|
|
//t->kind = TK_DocComment;
|
|
}
|
|
else {
|
|
//t->kind = TK_Comment;
|
|
}
|
|
for(;;){
|
|
if(lexc(s) == '\n' || lexc(s) == 0) break;
|
|
lex_advance(s);
|
|
}
|
|
goto top;
|
|
//lex_set_len(s,t);
|
|
}
|
|
else if(lexc(s) == '*'){
|
|
lex_advance(s);
|
|
//t->kind = TK_Comment;
|
|
for(;;){
|
|
if(s->stream[0] == '*' && s->stream[1] == '/'){
|
|
lex_advance(s);
|
|
lex_advance(s);
|
|
break;
|
|
}
|
|
else if(lexc(s) == 0){
|
|
token_error(t, lit("Unterminated block comment"));
|
|
break;
|
|
}
|
|
lex_advance(s);
|
|
}
|
|
goto top;
|
|
//lex_set_len(s,t);
|
|
}
|
|
else t->kind = TK_Div;
|
|
} break;
|
|
|
|
case '0':
|
|
case '1':case '2':case '3':
|
|
case '4':case '5':case '6':
|
|
case '7':case '8':case '9': {
|
|
t->kind = TK_Int;
|
|
while(lex_is_numeric(lexc(s)))
|
|
lex_advance(s);
|
|
lex_set_len(s, t);
|
|
t->int_val = parse_u64(t->str, t->len);
|
|
} break;
|
|
|
|
case 'l':{
|
|
if(s->stream[0] == 'i' && s->stream[1] == 't' && s->stream[2] == '(' && s->stream[3] == '"'){
|
|
t->kind = TK_StringLit;
|
|
lex_advance(s);lex_advance(s);lex_advance(s);lex_advance(s);
|
|
lex_parse_string(s,t,'"');
|
|
|
|
if(s->stream[0] == ')') {
|
|
t->str += 5;
|
|
t->len -= 6;
|
|
lex_advance(s);
|
|
}
|
|
else token_error(t, lit("Unterminated string literal, missing closing parenthesis"));
|
|
|
|
break;
|
|
}
|
|
};
|
|
|
|
case 'A':case 'a':case 'M':case 'm':case 'B':
|
|
case 'b':case 'N':case 'n':case 'C':case 'c':case 'O':
|
|
case 'o':case 'D':case 'd':case 'P':case 'p':case 'E':
|
|
case 'e':case 'Q':case 'q':case 'F':case 'f':case 'R':
|
|
case 'r':case 'G':case 'g':case 'S':case 's':case 'H':
|
|
case 'h':case 'T':case 't':case 'I':case 'i':case 'U':
|
|
case 'u':case 'J':case 'j':case 'V':case 'v':case 'K':
|
|
case 'k':case 'W':case 'w':case 'L':case 'X':
|
|
case 'x':case 'Z':case 'z':case 'Y':case 'y':case '_': {
|
|
t->kind = TK_Identifier;
|
|
while(lex_is_alphanumeric(lexc(s)) || lexc(s) == '_')
|
|
lex_advance(s);
|
|
lex_set_len(s,t);
|
|
} break;
|
|
default:{
|
|
token_error(t, lit("Unknown token"));
|
|
} break;
|
|
}
|
|
if(t->len==0){
|
|
lex_set_len(s,t);
|
|
}
|
|
}
|
|
|
|
// Token end of stream
|
|
Token *t = token_alloc(tokens);
|
|
*t = token_end_of_stream;
|
|
tokens->len -= 1;
|
|
}
|
|
|
|
function Tokens
|
|
lex_stream(String in_stream, String filename){
|
|
Lex_Stream stream = {in_stream.str, in_stream.str, filename, 0};
|
|
Tokens tokens = {};
|
|
lex_base(&stream, &tokens);
|
|
return tokens;
|
|
}
|
|
|
|
function void
|
|
parser_lex_stream(Parser *p, String in_stream, String filename){
|
|
Lex_Stream stream = {in_stream.str, in_stream.str, filename, 0};
|
|
p->tokens.len = 0;
|
|
p->tokens.iter = 0;
|
|
lex_base(&stream, &p->tokens);
|
|
intern_tokens(p);
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
//
|
|
//-----------------------------------------------------------------------------
|
|
|
|
function B32
|
|
token_compare(Token *t, String str){
|
|
B32 result = string_compare(t->string, str);
|
|
return result;
|
|
}
|
|
|
|
function B32
|
|
token_is_comment(Token *token){
|
|
B32 result = token->kind == TK_Comment || token->kind == TK_DocComment;
|
|
return result;
|
|
}
|
|
|
|
function Token *
|
|
token_get(Parser *p){
|
|
Token *token = p->tokens.tokens + p->tokens.iter;
|
|
return token;
|
|
}
|
|
|
|
function B32
|
|
intern_compare(Intern_String a, Intern_String b){
|
|
B32 result = a.s.str == b.s.str;
|
|
return result;
|
|
}
|
|
|
|
function Token *
|
|
token_is_keyword(Parser *p, Intern_String keyword){
|
|
assert(intern_is_keyword(p, keyword));
|
|
Token *t = token_get(p);
|
|
if(t->kind == TK_Keyword && intern_compare(t->intern_val, keyword)){
|
|
return t;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
function void
|
|
token_advance(Parser *p){
|
|
p->tokens.iter = clamp_top_s64(p->tokens.iter + 1, p->tokens.len);
|
|
}
|
|
|
|
function Token *
|
|
token_next(Parser *p){
|
|
Token *token = token_get(p);
|
|
token_advance(p);
|
|
return token;
|
|
}
|
|
|
|
function Token *
|
|
token_match(Parser *p, Token_Kind kind){
|
|
Token *token = token_get(p);
|
|
if(token->kind == kind){
|
|
return token_next(p);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
function Token *
|
|
token_match_keyword(Parser *p, Intern_String keyword){
|
|
assert(intern_is_keyword(p, keyword));
|
|
Token *token = token_get(p);
|
|
if(token->kind == TK_Keyword && intern_compare(keyword, token->intern_val)){
|
|
return token_next(p);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
function Token *
|
|
token_expect(Parser *p, Token_Kind kind){
|
|
Token *token = token_get(p);
|
|
if(token->kind == kind){
|
|
return token_next(p);
|
|
}
|
|
|
|
parser_push_error(p, token,
|
|
"Expected token of kind: %s, got instead token of kind: %s",
|
|
token_kind_string[kind].str, token_kind_string[token->kind].str);
|
|
return 0;
|
|
}
|
|
|
|
function B32
|
|
token_is(Parser *p, Token_Kind kind){
|
|
B32 result = token_get(p)->kind == kind;
|
|
return result;
|
|
}
|
|
|
|
function Token *
|
|
token_is_assignment(Parser *p){
|
|
Token *t = token_get(p);
|
|
if(t->kind >= TK_Assign && t->kind <= TK_RightShiftAssign)
|
|
return t;
|
|
return 0;
|
|
}
|
|
|
|
function Token *
|
|
token_peek(Parser *p, S64 count){
|
|
S64 index = clamp_top_s64(p->tokens.iter + count, p->tokens.len);
|
|
Token *result = p->tokens.tokens + index;
|
|
return result;
|
|
}
|
|
|
|
function Token *
|
|
token_peek_is(Parser *p, S64 count, Token_Kind kind){
|
|
Token *token = token_peek(p, count);
|
|
if(token->kind == kind)
|
|
return token;
|
|
return 0;
|
|
}
|
|
|
|
function Token *
|
|
token_peek_is_keyword(Parser *p, S64 count, Intern_String keyword){
|
|
Token *token = token_peek(p, count);
|
|
if(token->kind == TK_Keyword){
|
|
if(intern_compare(keyword, token->intern_val)){
|
|
return token;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|