Refactor lexer operator matching helpers

This commit is contained in:
Krzosa Karol
2026-05-10 14:43:14 +02:00
parent e04db0ac0c
commit df7f1f27a1

170
main.c
View File

@@ -4,6 +4,7 @@
#include <stdlib.h>
#include <ctype.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdarg.h>
#include "base.c"
#include "meta_gen.c"
@@ -71,6 +72,35 @@ Lexer make_lexer(char *file, char *src, int len) {
return lex;
}
bool lex_peek_is(Lexer *lex, char c) {
return lex->at < lex->end && *lex->at == c;
}
bool lex_match(Lexer *lex, char c) {
if (lex_peek_is(lex, c)) {
lex_advance(lex);
return true;
}
return false;
}
Token_Kind lex_repeat_or_assign(Lexer *lex, char repeated_char, Token_Kind single, Token_Kind repeated, Token_Kind assigned) {
if (lex_match(lex, repeated_char)) return repeated;
if (lex_match(lex, '=')) return assigned;
return single;
}
Token_Kind lex_assign_variant(Lexer *lex, Token_Kind single, Token_Kind assigned) {
return lex_match(lex, '=') ? assigned : single;
}
Token_Kind lex_shift_family(Lexer *lex, char repeated_char, Token_Kind single, Token_Kind single_eq, Token_Kind doubled, Token_Kind doubled_eq) {
if (lex_match(lex, repeated_char)) {
return lex_match(lex, '=') ? doubled_eq : doubled;
}
return lex_match(lex, '=') ? single_eq : single;
}
Token lex_token(Lexer *lex) {
eat_whitespace(lex);
Token t = {
@@ -128,134 +158,24 @@ Token lex_token(Lexer *lex) {
case ';': t.kind = TOK_SEMICOLON; break;
case '?': t.kind = TOK_QUESTION; break;
case '#': t.kind = TOK_HASH; break;
case '+': {
if (lex->at < lex->end && *lex->at == '+') {
lex_advance(lex);
t.kind = TOK_INC;
} else if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_PLUS_ASSIGN;
} else {
t.kind = TOK_PLUS;
}
} break;
case '+': t.kind = lex_repeat_or_assign(lex, '+', TOK_PLUS, TOK_INC, TOK_PLUS_ASSIGN); break;
case '-': {
if (lex->at < lex->end && *lex->at == '-') {
lex_advance(lex);
t.kind = TOK_DEC;
} else if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_MINUS_ASSIGN;
} else if (lex->at < lex->end && *lex->at == '>') {
lex_advance(lex);
t.kind = TOK_ARROW;
} else {
t.kind = TOK_MINUS;
}
} break;
case '*': {
if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_MUL_ASSIGN;
} else {
t.kind = TOK_STAR;
}
} break;
case '/': {
if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_DIV_ASSIGN;
} else {
t.kind = TOK_SLASH;
}
} break;
case '%': {
if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_MOD_ASSIGN;
} else {
t.kind = TOK_PERCENT;
}
} break;
case '=': {
if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_EQ;
} else {
t.kind = TOK_ASSIGN;
}
} break;
case '<': {
if (lex->at < lex->end && *lex->at == '<') {
lex_advance(lex);
if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_LSHIFT_ASSIGN;
} else {
t.kind = TOK_LSHIFT;
}
} else if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_LEQ;
} else {
t.kind = TOK_LT;
}
} break;
case '>': {
if (lex->at < lex->end && *lex->at == '>') {
lex_advance(lex);
if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_RSHIFT_ASSIGN;
} else {
t.kind = TOK_RSHIFT;
}
} else if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_GEQ;
} else {
t.kind = TOK_GT;
}
} break;
case '!': {
if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_NEQ;
} else {
t.kind = TOK_NOT;
}
if (lex_match(lex, '-')) t.kind = TOK_DEC;
else if (lex_match(lex, '=')) t.kind = TOK_MINUS_ASSIGN;
else if (lex_match(lex, '>')) t.kind = TOK_ARROW;
else t.kind = TOK_MINUS;
} break;
case '*': t.kind = lex_assign_variant(lex, TOK_STAR, TOK_MUL_ASSIGN); break;
case '/': t.kind = lex_assign_variant(lex, TOK_SLASH, TOK_DIV_ASSIGN); break;
case '%': t.kind = lex_assign_variant(lex, TOK_PERCENT, TOK_MOD_ASSIGN); break;
case '=': t.kind = lex_assign_variant(lex, TOK_ASSIGN, TOK_EQ); break;
case '<': t.kind = lex_shift_family(lex, '<', TOK_LT, TOK_LEQ, TOK_LSHIFT, TOK_LSHIFT_ASSIGN); break;
case '>': t.kind = lex_shift_family(lex, '>', TOK_GT, TOK_GEQ, TOK_RSHIFT, TOK_RSHIFT_ASSIGN); break;
case '!': t.kind = lex_assign_variant(lex, TOK_NOT, TOK_NEQ); break;
case '~': t.kind = TOK_BITNOT; break;
case '&': {
if (lex->at < lex->end && *lex->at == '&') {
lex_advance(lex);
t.kind = TOK_AND;
} else if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_AND_ASSIGN;
} else {
t.kind = TOK_BITAND;
}
} break;
case '|': {
if (lex->at < lex->end && *lex->at == '|') {
lex_advance(lex);
t.kind = TOK_OR;
} else if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_OR_ASSIGN;
} else {
t.kind = TOK_BITOR;
}
} break;
case '^': {
if (lex->at < lex->end && *lex->at == '=') {
lex_advance(lex);
t.kind = TOK_XOR_ASSIGN;
} else {
t.kind = TOK_BITXOR;
}
} break;
case '&': t.kind = lex_repeat_or_assign(lex, '&', TOK_BITAND, TOK_AND, TOK_AND_ASSIGN); break;
case '|': t.kind = lex_repeat_or_assign(lex, '|', TOK_BITOR, TOK_OR, TOK_OR_ASSIGN); break;
case '^': t.kind = lex_assign_variant(lex, TOK_BITXOR, TOK_XOR_ASSIGN); break;
default: {
// @todo: lexer perhaps should have a static buffer of size 1024, error message
// should be put there and piped to the upper program. The token should be filled