diff --git a/main.c b/main.c index fa852a0..890a428 100644 --- a/main.c +++ b/main.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include "base.c" #include "meta_gen.c" @@ -71,6 +72,35 @@ Lexer make_lexer(char *file, char *src, int len) { return lex; } +bool lex_peek_is(Lexer *lex, char c) { + return lex->at < lex->end && *lex->at == c; +} + +bool lex_match(Lexer *lex, char c) { + if (lex_peek_is(lex, c)) { + lex_advance(lex); + return true; + } + return false; +} + +Token_Kind lex_repeat_or_assign(Lexer *lex, char repeated_char, Token_Kind single, Token_Kind repeated, Token_Kind assigned) { + if (lex_match(lex, repeated_char)) return repeated; + if (lex_match(lex, '=')) return assigned; + return single; +} + +Token_Kind lex_assign_variant(Lexer *lex, Token_Kind single, Token_Kind assigned) { + return lex_match(lex, '=') ? assigned : single; +} + +Token_Kind lex_shift_family(Lexer *lex, char repeated_char, Token_Kind single, Token_Kind single_eq, Token_Kind doubled, Token_Kind doubled_eq) { + if (lex_match(lex, repeated_char)) { + return lex_match(lex, '=') ? doubled_eq : doubled; + } + return lex_match(lex, '=') ? single_eq : single; +} + Token lex_token(Lexer *lex) { eat_whitespace(lex); Token t = { @@ -128,134 +158,24 @@ Token lex_token(Lexer *lex) { case ';': t.kind = TOK_SEMICOLON; break; case '?': t.kind = TOK_QUESTION; break; case '#': t.kind = TOK_HASH; break; - case '+': { - if (lex->at < lex->end && *lex->at == '+') { - lex_advance(lex); - t.kind = TOK_INC; - } else if (lex->at < lex->end && *lex->at == '=') { - lex_advance(lex); - t.kind = TOK_PLUS_ASSIGN; - } else { - t.kind = TOK_PLUS; - } - } break; + case '+': t.kind = lex_repeat_or_assign(lex, '+', TOK_PLUS, TOK_INC, TOK_PLUS_ASSIGN); break; case '-': { - if (lex->at < lex->end && *lex->at == '-') { - lex_advance(lex); - t.kind = TOK_DEC; - } else if (lex->at < lex->end && *lex->at == '=') { - lex_advance(lex); - t.kind = TOK_MINUS_ASSIGN; - } else if (lex->at < lex->end && *lex->at == '>') { - lex_advance(lex); - t.kind = TOK_ARROW; - } else { - t.kind = TOK_MINUS; - } - } break; - case '*': { - if (lex->at < lex->end && *lex->at == '=') { - lex_advance(lex); - t.kind = TOK_MUL_ASSIGN; - } else { - t.kind = TOK_STAR; - } - } break; - case '/': { - if (lex->at < lex->end && *lex->at == '=') { - lex_advance(lex); - t.kind = TOK_DIV_ASSIGN; - } else { - t.kind = TOK_SLASH; - } - } break; - case '%': { - if (lex->at < lex->end && *lex->at == '=') { - lex_advance(lex); - t.kind = TOK_MOD_ASSIGN; - } else { - t.kind = TOK_PERCENT; - } - } break; - case '=': { - if (lex->at < lex->end && *lex->at == '=') { - lex_advance(lex); - t.kind = TOK_EQ; - } else { - t.kind = TOK_ASSIGN; - } - } break; - case '<': { - if (lex->at < lex->end && *lex->at == '<') { - lex_advance(lex); - if (lex->at < lex->end && *lex->at == '=') { - lex_advance(lex); - t.kind = TOK_LSHIFT_ASSIGN; - } else { - t.kind = TOK_LSHIFT; - } - } else if (lex->at < lex->end && *lex->at == '=') { - lex_advance(lex); - t.kind = TOK_LEQ; - } else { - t.kind = TOK_LT; - } - } break; - case '>': { - if (lex->at < lex->end && *lex->at == '>') { - lex_advance(lex); - if (lex->at < lex->end && *lex->at == '=') { - lex_advance(lex); - t.kind = TOK_RSHIFT_ASSIGN; - } else { - t.kind = TOK_RSHIFT; - } - } else if (lex->at < lex->end && *lex->at == '=') { - lex_advance(lex); - t.kind = TOK_GEQ; - } else { - t.kind = TOK_GT; - } - } break; - case '!': { - if (lex->at < lex->end && *lex->at == '=') { - lex_advance(lex); - t.kind = TOK_NEQ; - } else { - t.kind = TOK_NOT; - } + if (lex_match(lex, '-')) t.kind = TOK_DEC; + else if (lex_match(lex, '=')) t.kind = TOK_MINUS_ASSIGN; + else if (lex_match(lex, '>')) t.kind = TOK_ARROW; + else t.kind = TOK_MINUS; } break; + case '*': t.kind = lex_assign_variant(lex, TOK_STAR, TOK_MUL_ASSIGN); break; + case '/': t.kind = lex_assign_variant(lex, TOK_SLASH, TOK_DIV_ASSIGN); break; + case '%': t.kind = lex_assign_variant(lex, TOK_PERCENT, TOK_MOD_ASSIGN); break; + case '=': t.kind = lex_assign_variant(lex, TOK_ASSIGN, TOK_EQ); break; + case '<': t.kind = lex_shift_family(lex, '<', TOK_LT, TOK_LEQ, TOK_LSHIFT, TOK_LSHIFT_ASSIGN); break; + case '>': t.kind = lex_shift_family(lex, '>', TOK_GT, TOK_GEQ, TOK_RSHIFT, TOK_RSHIFT_ASSIGN); break; + case '!': t.kind = lex_assign_variant(lex, TOK_NOT, TOK_NEQ); break; case '~': t.kind = TOK_BITNOT; break; - case '&': { - if (lex->at < lex->end && *lex->at == '&') { - lex_advance(lex); - t.kind = TOK_AND; - } else if (lex->at < lex->end && *lex->at == '=') { - lex_advance(lex); - t.kind = TOK_AND_ASSIGN; - } else { - t.kind = TOK_BITAND; - } - } break; - case '|': { - if (lex->at < lex->end && *lex->at == '|') { - lex_advance(lex); - t.kind = TOK_OR; - } else if (lex->at < lex->end && *lex->at == '=') { - lex_advance(lex); - t.kind = TOK_OR_ASSIGN; - } else { - t.kind = TOK_BITOR; - } - } break; - case '^': { - if (lex->at < lex->end && *lex->at == '=') { - lex_advance(lex); - t.kind = TOK_XOR_ASSIGN; - } else { - t.kind = TOK_BITXOR; - } - } break; + case '&': t.kind = lex_repeat_or_assign(lex, '&', TOK_BITAND, TOK_AND, TOK_AND_ASSIGN); break; + case '|': t.kind = lex_repeat_or_assign(lex, '|', TOK_BITOR, TOK_OR, TOK_OR_ASSIGN); break; + case '^': t.kind = lex_assign_variant(lex, TOK_BITXOR, TOK_XOR_ASSIGN); break; default: { // @todo: lexer perhaps should have a static buffer of size 1024, error message // should be put there and piped to the upper program. The token should be filled