1526 lines
46 KiB
C
1526 lines
46 KiB
C
#include "clexer.h"
|
|
#include <stdarg.h>
|
|
|
|
#ifndef CL_PRIVATE_FUNCTION
|
|
#if defined(__GNUC__) || defined(__clang__)
|
|
#define CL_PRIVATE_FUNCTION __attribute__((unused)) static
|
|
#else
|
|
#define CL_PRIVATE_FUNCTION static
|
|
#endif
|
|
#endif
|
|
|
|
#ifndef CL_Allocate
|
|
#include <stdlib.h>
|
|
#define CL_Allocate(allocator, size) malloc(size)
|
|
#endif
|
|
|
|
#ifndef CL_STRING_TO_DOUBLE
|
|
#include <stdlib.h>
|
|
#define CL_STRING_TO_DOUBLE(str, len) strtod(str, 0)
|
|
#endif
|
|
|
|
#ifndef CL_ASSERT
|
|
#include <assert.h>
|
|
#define CL_ASSERT(x) assert(x)
|
|
#endif
|
|
|
|
#ifndef CL_VSNPRINTF
|
|
#include <stdio.h>
|
|
#define CL_VSNPRINTF vsnprintf
|
|
#endif
|
|
|
|
#ifndef CL_SNPRINTF
|
|
#include <stdio.h>
|
|
#define CL_SNPRINTF snprintf
|
|
#endif
|
|
|
|
#ifndef CL__MemoryCopy
|
|
#include <string.h>
|
|
#define CL__MemoryCopy(dst, src, s) memcpy(dst, src, s)
|
|
#endif
|
|
|
|
#ifndef CL_MemoryZero
|
|
#include <string.h>
|
|
#define CL_MemoryZero(p, size) memset(p, 0, size)
|
|
#endif
|
|
|
|
#ifndef CL_FileExists
|
|
#define CL_FileExists CL__FileExists
|
|
#include <stdio.h>
|
|
CL_PRIVATE_FUNCTION bool CL_FileExists(char *name) {
|
|
bool result = false;
|
|
FILE *f = fopen(name, "rb");
|
|
if (f) {
|
|
result = true;
|
|
fclose(f);
|
|
}
|
|
return result;
|
|
}
|
|
#endif
|
|
|
|
CL_PRIVATE_FUNCTION void CL_ReportError(CL_Lexer *T, CL_Token *token, const char *string, ...);
|
|
|
|
CL_PRIVATE_FUNCTION char *CL_PushStringCopy(CL_Allocator arena, char *p, int size) {
|
|
char *copy_buffer = (char *)CL_Allocate(arena, size + 1);
|
|
CL__MemoryCopy(copy_buffer, p, size);
|
|
copy_buffer[size] = 0;
|
|
return copy_buffer;
|
|
}
|
|
|
|
CL_INLINE void CL_Advance(CL_Lexer *T) {
|
|
if (*T->stream == '\n') {
|
|
T->line += 1;
|
|
T->column = 0;
|
|
}
|
|
else if (*T->stream == ' ') {
|
|
T->column += 1;
|
|
}
|
|
else if (*T->stream == '\t') {
|
|
T->column += 1;
|
|
}
|
|
else if (*T->stream == 0) {
|
|
return;
|
|
}
|
|
T->stream += 1;
|
|
}
|
|
|
|
CL_INLINE bool CL_IsAlphabetic(char c) {
|
|
bool result = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
|
|
return result;
|
|
}
|
|
|
|
CL_INLINE bool CL_IsNumeric(char c) {
|
|
bool result = (c >= '0' && c <= '9');
|
|
return result;
|
|
}
|
|
|
|
CL_INLINE bool CL_IsHexNumeric(char c) {
|
|
bool result = (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
|
|
return result;
|
|
}
|
|
|
|
CL_INLINE bool CL_IsWhitespace(char c) {
|
|
bool result = c == ' ' || c == '\n' || c == '\r' || c == '\t';
|
|
return result;
|
|
}
|
|
|
|
CL_INLINE bool CL_IsAlphanumeric(char c) {
|
|
bool result = CL_IsAlphabetic(c) || CL_IsNumeric(c);
|
|
return result;
|
|
}
|
|
|
|
CL_API_FUNCTION void CL_SetTokenLength(CL_Lexer *T, CL_Token *token) {
|
|
intptr_t diff = T->stream - token->str;
|
|
CL_ASSERT(diff < 2147483647);
|
|
token->len = (int)diff;
|
|
}
|
|
|
|
CL_PRIVATE_FUNCTION uint64_t CL_CharMapToNumber(char c) {
|
|
switch (c) {
|
|
case '0': return 0; break;
|
|
case '1': return 1; break;
|
|
case '2': return 2; break;
|
|
case '3': return 3; break;
|
|
case '4': return 4; break;
|
|
case '5': return 5; break;
|
|
case '6': return 6; break;
|
|
case '7': return 7; break;
|
|
case '8': return 8; break;
|
|
case '9': return 9; break;
|
|
case 'a':
|
|
case 'A': return 10; break;
|
|
case 'b':
|
|
case 'B': return 11; break;
|
|
case 'c':
|
|
case 'C': return 12; break;
|
|
case 'd':
|
|
case 'D': return 13; break;
|
|
case 'e':
|
|
case 'E': return 14; break;
|
|
case 'f':
|
|
case 'F': return 15; break;
|
|
default: return 255;
|
|
}
|
|
}
|
|
|
|
CL_PRIVATE_FUNCTION uint64_t CL_ParseInteger(CL_Lexer *T, CL_Token *token, char *string, uint64_t len, uint64_t base) {
|
|
CL_ASSERT(base >= 2 && base <= 16);
|
|
uint64_t acc = 0;
|
|
for (uint64_t i = 0; i < len; i++) {
|
|
uint64_t num = CL_CharMapToNumber(string[i]);
|
|
if (num >= base) {
|
|
CL_ReportError(T, token, "Internal compiler error! Failed to parse a number");
|
|
break;
|
|
}
|
|
acc *= base;
|
|
acc += num;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
typedef struct CL_UTF32Result {
|
|
uint32_t out_str;
|
|
int advance;
|
|
int error;
|
|
} CL_UTF32Result;
|
|
|
|
CL_PRIVATE_FUNCTION CL_UTF32Result CL_UTF8ToUTF32(char *c, int max_advance) {
|
|
CL_UTF32Result result = {0};
|
|
|
|
if ((c[0] & 0x80) == 0) { // Check if leftmost zero of first byte is unset
|
|
if (max_advance >= 1) {
|
|
result.out_str = c[0];
|
|
result.advance = 1;
|
|
}
|
|
else result.error = 1;
|
|
}
|
|
|
|
else if ((c[0] & 0xe0) == 0xc0) {
|
|
if ((c[1] & 0xc0) == 0x80) { // Continuation byte required
|
|
if (max_advance >= 2) {
|
|
result.out_str = (uint32_t)(c[0] & 0x1f) << 6u | (c[1] & 0x3f);
|
|
result.advance = 2;
|
|
}
|
|
else result.error = 2;
|
|
}
|
|
else result.error = 2;
|
|
}
|
|
|
|
else if ((c[0] & 0xf0) == 0xe0) {
|
|
if ((c[1] & 0xc0) == 0x80 && (c[2] & 0xc0) == 0x80) { // Two continuation bytes required
|
|
if (max_advance >= 3) {
|
|
result.out_str = (uint32_t)(c[0] & 0xf) << 12u | (uint32_t)(c[1] & 0x3f) << 6u | (c[2] & 0x3f);
|
|
result.advance = 3;
|
|
}
|
|
else result.error = 3;
|
|
}
|
|
else result.error = 3;
|
|
}
|
|
|
|
else if ((c[0] & 0xf8) == 0xf0) {
|
|
if ((c[1] & 0xc0) == 0x80 && (c[2] & 0xc0) == 0x80 && (c[3] & 0xc0) == 0x80) { // Three continuation bytes required
|
|
if (max_advance >= 4) {
|
|
result.out_str = (uint32_t)(c[0] & 0xf) << 18u | (uint32_t)(c[1] & 0x3f) << 12u | (uint32_t)(c[2] & 0x3f) << 6u | (uint32_t)(c[3] & 0x3f);
|
|
result.advance = 4;
|
|
}
|
|
else result.error = 4;
|
|
}
|
|
else result.error = 4;
|
|
}
|
|
else result.error = 4;
|
|
|
|
return result;
|
|
}
|
|
|
|
// @todo I think I should look at this again
|
|
CL_PRIVATE_FUNCTION void CL_ParseCharLiteral(CL_Lexer *T, CL_Token *token) {
|
|
token->kind = CL_CHARLIT;
|
|
token->str = T->stream;
|
|
while (*T->stream != '\'') {
|
|
if (*T->stream == '\\') {
|
|
CL_Advance(T);
|
|
}
|
|
if (*T->stream == 0) {
|
|
CL_ReportError(T, token, "Unclosed character literal!");
|
|
return;
|
|
}
|
|
CL_Advance(T);
|
|
}
|
|
CL_SetTokenLength(T, token);
|
|
|
|
if (token->str[0] == '\\') {
|
|
switch (token->str[1]) {
|
|
case '\\': token->u64 = '\\'; break;
|
|
case '\'': token->u64 = '\''; break;
|
|
case '"': token->u64 = '"'; break;
|
|
case 't': token->u64 = '\t'; break;
|
|
case 'v': token->u64 = '\v'; break;
|
|
case 'f': token->u64 = '\f'; break;
|
|
case 'n': token->u64 = '\n'; break;
|
|
case 'r': token->u64 = '\r'; break;
|
|
case 'a': token->u64 = '\a'; break;
|
|
case 'b': token->u64 = '\b'; break;
|
|
case '0': token->u64 = '\0'; break;
|
|
case 'x':
|
|
case 'X': CL_ASSERT(!"Not implemented"); break; // Hex constant
|
|
case 'u': CL_ASSERT(!"Not implemented"); break; // Unicode constant
|
|
default: {
|
|
CL_ReportError(T, token, "Unknown escape code");
|
|
}
|
|
}
|
|
}
|
|
|
|
else {
|
|
if (token->len > 4) {
|
|
CL_ReportError(T, token, "This character literal has invalid format, it's too big");
|
|
goto skip_utf_encode;
|
|
}
|
|
|
|
token->u64 = 0;
|
|
int i = 0;
|
|
|
|
for (; i < token->len;) {
|
|
CL_UTF32Result result = CL_UTF8ToUTF32(token->str + i, (int)token->len);
|
|
i += result.advance;
|
|
token->u64 |= result.out_str << (8 * (token->len - i));
|
|
if (result.error) {
|
|
CL_ReportError(T, token, "This character literal couldnt be parsed as utf8");
|
|
break;
|
|
}
|
|
}
|
|
if (i != token->len) {
|
|
CL_ReportError(T, token, "Character literal decode error");
|
|
}
|
|
}
|
|
|
|
skip_utf_encode:
|
|
CL_Advance(T);
|
|
}
|
|
|
|
// It combines strings, verifies the escape sequences but doesn't do any allocations
|
|
// so the final string actually needs additional transformation pass. A pass
|
|
// that will combine the string snippets, replace escape sequences with actual values etc.
|
|
//
|
|
// @warning: @not_sure: we are not setting token->string_literal
|
|
//
|
|
// "String 1" "String 2" - those strings snippets are combined
|
|
// @todo: look at this again
|
|
// @todo: make a manual correct version that user can execute if he needs to
|
|
CL_PRIVATE_FUNCTION void CL_CheckString(CL_Lexer *T, CL_Token *token) {
|
|
token->kind = CL_STRINGLIT;
|
|
combine_next_string_literal:
|
|
while (*T->stream != '"' && *T->stream != 0 AND_CL_STRING_TERMINATE_ON_NEW_LINE) {
|
|
if (*T->stream == '\\') {
|
|
CL_Advance(T);
|
|
switch (*T->stream) {
|
|
case 'a':
|
|
case 'b':
|
|
case 'e':
|
|
case 'f':
|
|
case 'n':
|
|
case 'r':
|
|
case 't':
|
|
case 'v':
|
|
case '\\':
|
|
case '\'':
|
|
case '?':
|
|
case '"':
|
|
case 'x':
|
|
case 'X': // Hex constant
|
|
case 'u': // Unicode constant
|
|
case 'U':
|
|
break;
|
|
case '0': // octal numbers or null
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
break;
|
|
default: {
|
|
CL_ReportError(T, token, "Invalid escape sequence");
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
CL_Advance(T);
|
|
}
|
|
CL_Advance(T);
|
|
|
|
// Try to seek if there is a consecutive string.
|
|
// If there is such string we try to combine it.
|
|
{
|
|
char *seek_for_next_string = T->stream;
|
|
while (CL_IsWhitespace(*seek_for_next_string)) {
|
|
seek_for_next_string += 1;
|
|
}
|
|
|
|
if (*seek_for_next_string == '"') {
|
|
seek_for_next_string += 1;
|
|
while (T->stream != seek_for_next_string) CL_Advance(T);
|
|
goto combine_next_string_literal;
|
|
}
|
|
}
|
|
CL_SetTokenLength(T, token);
|
|
}
|
|
|
|
CL_PRIVATE_FUNCTION void CL_IsIdentifierKeyword(CL_Token *token) {
|
|
if (token->len == 1) return;
|
|
char *c = token->str;
|
|
switch (c[0]) {
|
|
case 'v': {
|
|
switch (c[1]) {
|
|
case 'o': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "void", 4)) {
|
|
token->kind = CL_KEYWORD_VOID;
|
|
}
|
|
else if (CL_StringsAreEqual(token->str, token->len, "volatile", 8)) {
|
|
token->kind = CL_KEYWORD_VOLATILE;
|
|
}
|
|
} break;
|
|
}
|
|
} break;
|
|
case 'i': {
|
|
switch (c[1]) {
|
|
case 'n': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "int", 3)) {
|
|
token->kind = CL_KEYWORD_INT;
|
|
}
|
|
else if (CL_StringsAreEqual(token->str, token->len, "inline", 6)) {
|
|
token->kind = CL_KEYWORD_INLINE;
|
|
}
|
|
} break;
|
|
case 'f': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "if", 2)) {
|
|
token->kind = CL_KEYWORD_IF;
|
|
}
|
|
} break;
|
|
}
|
|
} break;
|
|
case 'c': {
|
|
switch (c[1]) {
|
|
case 'h': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "char", 4)) {
|
|
token->kind = CL_KEYWORD_CHAR;
|
|
}
|
|
} break;
|
|
case 'o': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "const", 5)) {
|
|
token->kind = CL_KEYWORD_CONST;
|
|
}
|
|
else if (CL_StringsAreEqual(token->str, token->len, "continue", 8)) {
|
|
token->kind = CL_KEYWORD_CONTINUE;
|
|
}
|
|
} break;
|
|
case 'a': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "case", 4)) {
|
|
token->kind = CL_KEYWORD_CASE;
|
|
}
|
|
} break;
|
|
}
|
|
} break;
|
|
case 'u': {
|
|
switch (c[1]) {
|
|
case 'n': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "unsigned", 8)) {
|
|
token->kind = CL_KEYWORD_UNSIGNED;
|
|
}
|
|
else if (CL_StringsAreEqual(token->str, token->len, "union", 5)) {
|
|
token->kind = CL_KEYWORD_UNION;
|
|
}
|
|
} break;
|
|
}
|
|
} break;
|
|
case 's': {
|
|
switch (c[1]) {
|
|
case 'i': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "signed", 6)) {
|
|
token->kind = CL_KEYWORD_SIGNED;
|
|
}
|
|
else if (CL_StringsAreEqual(token->str, token->len, "sizeof", 6)) {
|
|
token->kind = CL_KEYWORD_SIZEOF;
|
|
}
|
|
} break;
|
|
case 'h': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "short", 5)) {
|
|
token->kind = CL_KEYWORD_SHORT;
|
|
}
|
|
} break;
|
|
case 't': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "static", 6)) {
|
|
token->kind = CL_KEYWORD_STATIC;
|
|
}
|
|
else if (CL_StringsAreEqual(token->str, token->len, "struct", 6)) {
|
|
token->kind = CL_KEYWORD_STRUCT;
|
|
}
|
|
} break;
|
|
case 'w': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "switch", 6)) {
|
|
token->kind = CL_KEYWORD_SWITCH;
|
|
}
|
|
} break;
|
|
}
|
|
} break;
|
|
case 'l': {
|
|
switch (c[1]) {
|
|
case 'o': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "long", 4)) {
|
|
token->kind = CL_KEYWORD_LONG;
|
|
}
|
|
} break;
|
|
}
|
|
} break;
|
|
case 'd': {
|
|
switch (c[1]) {
|
|
case 'o': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "double", 6)) {
|
|
token->kind = CL_KEYWORD_DOUBLE;
|
|
}
|
|
else if (CL_StringsAreEqual(token->str, token->len, "do", 2)) {
|
|
token->kind = CL_KEYWORD_DO;
|
|
}
|
|
} break;
|
|
case 'e': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "default", 7)) {
|
|
token->kind = CL_KEYWORD_DEFAULT;
|
|
}
|
|
} break;
|
|
}
|
|
} break;
|
|
case 'f': {
|
|
switch (c[1]) {
|
|
case 'l': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "float", 5)) {
|
|
token->kind = CL_KEYWORD_FLOAT;
|
|
}
|
|
} break;
|
|
case 'o': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "for", 3)) {
|
|
token->kind = CL_KEYWORD_FOR;
|
|
}
|
|
} break;
|
|
}
|
|
} break;
|
|
case '_': {
|
|
switch (c[1]) {
|
|
case 'B': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "_Bool", 5)) {
|
|
token->kind = CL_KEYWORD__BOOL;
|
|
}
|
|
} break;
|
|
case 'C': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "_Complex", 8)) {
|
|
token->kind = CL_KEYWORD__COMPLEX;
|
|
}
|
|
} break;
|
|
case 'I': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "_Imaginary", 10)) {
|
|
token->kind = CL_KEYWORD__IMAGINARY;
|
|
}
|
|
} break;
|
|
case 'T': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "_Thread_local", 13)) {
|
|
token->kind = CL_KEYWORD__THREAD_LOCAL;
|
|
}
|
|
} break;
|
|
case 'A': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "_Atomic", 7)) {
|
|
token->kind = CL_KEYWORD__ATOMIC;
|
|
}
|
|
else if (CL_StringsAreEqual(token->str, token->len, "_Alignas", 8)) {
|
|
token->kind = CL_KEYWORD__ALIGNAS;
|
|
}
|
|
else if (CL_StringsAreEqual(token->str, token->len, "_Alignof", 8)) {
|
|
token->kind = CL_KEYWORD__ALIGNOF;
|
|
}
|
|
} break;
|
|
case 'N': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "_Noreturn", 9)) {
|
|
token->kind = CL_KEYWORD__NORETURN;
|
|
}
|
|
} break;
|
|
case 'S': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "_Static_assert", 14)) {
|
|
token->kind = CL_KEYWORD__STATIC_ASSERT;
|
|
}
|
|
} break;
|
|
case 'G': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "_Generic", 8)) {
|
|
token->kind = CL_KEYWORD__GENERIC;
|
|
}
|
|
} break;
|
|
}
|
|
} break;
|
|
case 'a': {
|
|
switch (c[1]) {
|
|
case 'u': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "auto", 4)) {
|
|
token->kind = CL_KEYWORD_AUTO;
|
|
}
|
|
} break;
|
|
}
|
|
} break;
|
|
case 'e': {
|
|
switch (c[1]) {
|
|
case 'x': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "extern", 6)) {
|
|
token->kind = CL_KEYWORD_EXTERN;
|
|
}
|
|
} break;
|
|
case 'n': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "enum", 4)) {
|
|
token->kind = CL_KEYWORD_ENUM;
|
|
}
|
|
} break;
|
|
case 'l': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "else", 4)) {
|
|
token->kind = CL_KEYWORD_ELSE;
|
|
}
|
|
} break;
|
|
}
|
|
} break;
|
|
case 'r': {
|
|
switch (c[1]) {
|
|
case 'e': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "register", 8)) {
|
|
token->kind = CL_KEYWORD_REGISTER;
|
|
}
|
|
else if (CL_StringsAreEqual(token->str, token->len, "restrict", 8)) {
|
|
token->kind = CL_KEYWORD_RESTRICT;
|
|
}
|
|
else if (CL_StringsAreEqual(token->str, token->len, "return", 6)) {
|
|
token->kind = CL_KEYWORD_RETURN;
|
|
}
|
|
} break;
|
|
}
|
|
} break;
|
|
case 't': {
|
|
switch (c[1]) {
|
|
case 'y': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "typedef", 7)) {
|
|
token->kind = CL_KEYWORD_TYPEDEF;
|
|
}
|
|
} break;
|
|
}
|
|
} break;
|
|
case 'b': {
|
|
switch (c[1]) {
|
|
case 'r': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "break", 5)) {
|
|
token->kind = CL_KEYWORD_BREAK;
|
|
}
|
|
} break;
|
|
}
|
|
} break;
|
|
case 'w': {
|
|
switch (c[1]) {
|
|
case 'h': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "while", 5)) {
|
|
token->kind = CL_KEYWORD_WHILE;
|
|
}
|
|
} break;
|
|
}
|
|
} break;
|
|
case 'g': {
|
|
switch (c[1]) {
|
|
case 'o': {
|
|
if (CL_StringsAreEqual(token->str, token->len, "goto", 4)) {
|
|
token->kind = CL_KEYWORD_GOTO;
|
|
}
|
|
} break;
|
|
}
|
|
} break;
|
|
}
|
|
}
|
|
|
|
CL_PRIVATE_FUNCTION void CL_LexMacroInclude(CL_Lexer *T, CL_Token *token) {
|
|
token->kind = CL_PREPROC_INCLUDE;
|
|
while (*T->stream == ' ') CL_Advance(T);
|
|
char end = 0;
|
|
if (*T->stream == '"') {
|
|
end = '"';
|
|
}
|
|
else if (*T->stream == '<') {
|
|
end = '>';
|
|
token->is_system_include = true;
|
|
}
|
|
else {
|
|
CL_ReportError(T, token, "Invalid include directive, file not specified");
|
|
return;
|
|
}
|
|
CL_Advance(T);
|
|
|
|
token->str = T->stream;
|
|
while (*T->stream != end) {
|
|
if (*T->stream == 0) {
|
|
CL_ReportError(T, token, "Invalid include directive, reached end of file while reading filename");
|
|
}
|
|
if (*T->stream == '\n') {
|
|
CL_ReportError(T, token, "Invalid include directive filename, got newline character while reading filename");
|
|
}
|
|
CL_Advance(T);
|
|
}
|
|
CL_SetTokenLength(T, token);
|
|
CL_Advance(T);
|
|
|
|
// @not_sure: this is because we want null terminated input into path resolution stuff
|
|
token->string_literal = CL_PushStringCopy(T->arena, token->str, token->len);
|
|
}
|
|
|
|
CL_PRIVATE_FUNCTION bool CL_LexMacro(CL_Lexer *T, CL_Token *token) {
|
|
while (*T->stream == ' ' || T->stream[0] == '\t') CL_Advance(T);
|
|
token->str = T->stream;
|
|
while (CL_IsAlphabetic(*T->stream)) CL_Advance(T);
|
|
CL_SetTokenLength(T, token);
|
|
|
|
switch (*token->str) {
|
|
case 'd':
|
|
if (CL_StringsAreEqual(token->str, token->len, "define", 6)) {
|
|
token->kind = CL_PREPROC_DEFINE;
|
|
}
|
|
break;
|
|
|
|
case 'i':
|
|
if (CL_StringsAreEqual(token->str, token->len, "ifdef", 5)) {
|
|
token->kind = CL_PREPROC_IFDEF;
|
|
}
|
|
else if (CL_StringsAreEqual(token->str, token->len, "ifndef", 6)) {
|
|
token->kind = CL_PREPROC_IFNDEF;
|
|
}
|
|
else if (CL_StringsAreEqual(token->str, token->len, "include", 7)) {
|
|
token->kind = CL_PREPROC_INCLUDE;
|
|
CL_LexMacroInclude(T, token);
|
|
}
|
|
else if (CL_StringsAreEqual(token->str, token->len, "if", 2)) {
|
|
token->kind = CL_PREPROC_IF;
|
|
}
|
|
break;
|
|
|
|
case 'e':
|
|
if (CL_StringsAreEqual(token->str, token->len, "endif", 5)) {
|
|
token->kind = CL_PREPROC_ENDIF;
|
|
}
|
|
else if (CL_StringsAreEqual(token->str, token->len, "error", 5)) {
|
|
token->kind = CL_PREPROC_ERROR;
|
|
}
|
|
else if (CL_StringsAreEqual(token->str, token->len, "else", 4)) {
|
|
token->kind = CL_PREPROC_ELSE;
|
|
}
|
|
else if (CL_StringsAreEqual(token->str, token->len, "elif", 4)) {
|
|
token->kind = CL_PREPROC_ELIF;
|
|
}
|
|
break;
|
|
|
|
case 'p':
|
|
if (CL_StringsAreEqual(token->str, token->len, "pragma", 6)) {
|
|
token->kind = CL_PREPROC_PRAGMA;
|
|
}
|
|
break;
|
|
|
|
case 'u':
|
|
if (CL_StringsAreEqual(token->str, token->len, "undef", 5)) {
|
|
token->kind = CL_PREPROC_UNDEF;
|
|
}
|
|
break;
|
|
default: return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Skipped space here is for case #define Memes (a), this is not a function like macro because of space
|
|
static uint32_t CL_TokenID; // @todo: make it stable, thread local?
|
|
CL_PRIVATE_FUNCTION void CL_PrepareToken(CL_Lexer *T, CL_Token *token, bool skipped_space) {
|
|
CL_MemoryZero(token, sizeof(*token));
|
|
token->str = T->stream;
|
|
token->line = T->line;
|
|
token->column = T->column;
|
|
token->file = T->file;
|
|
token->id = ++CL_TokenID;
|
|
if (skipped_space) token->is_there_whitespace_before_token = true;
|
|
CL_Advance(T);
|
|
}
|
|
|
|
CL_PRIVATE_FUNCTION void CL_DefaultTokenize(CL_Lexer *T, CL_Token *token) {
|
|
char *c = token->str;
|
|
switch (*c) {
|
|
case 0: break;
|
|
case '(': token->kind = CL_OPENPAREN; break;
|
|
case ')': token->kind = CL_CLOSEPAREN; break;
|
|
case '{': token->kind = CL_OPENBRACE; break;
|
|
case '}': token->kind = CL_CLOSEBRACE; break;
|
|
case '[': token->kind = CL_OPENBRACKET; break;
|
|
case ']': token->kind = CL_CLOSEBRACKET; break;
|
|
case ',': token->kind = CL_COMMA; break;
|
|
case '~': token->kind = CL_NEG; break;
|
|
case '?': token->kind = CL_QUESTION; break;
|
|
case ';': token->kind = CL_SEMICOLON; break;
|
|
case ':': token->kind = CL_COLON; break;
|
|
case '.': {
|
|
token->kind = CL_DOT;
|
|
if (T->stream[0] == '.' && T->stream[1] == '.') {
|
|
CL_Advance(T);
|
|
CL_Advance(T);
|
|
token->kind = CL_THREEDOTS;
|
|
}
|
|
} break;
|
|
case '/': {
|
|
token->kind = CL_DIV;
|
|
if (*T->stream == '/') {
|
|
token->kind = CL_COMMENT;
|
|
CL_Advance(T);
|
|
|
|
while (*T->stream != '\n' && *T->stream != 0) {
|
|
CL_Advance(T);
|
|
}
|
|
CL_SetTokenLength(T, token);
|
|
}
|
|
else if (*T->stream == '*') {
|
|
token->kind = CL_COMMENT;
|
|
CL_Advance(T);
|
|
for (;;) {
|
|
if (T->stream[0] == '*' && T->stream[1] == '/') {
|
|
break;
|
|
}
|
|
if (T->stream[0] == 0) {
|
|
CL_ReportError(T, token, "Unclosed block comment");
|
|
goto error_end_path;
|
|
}
|
|
CL_Advance(T);
|
|
}
|
|
token->str += 2;
|
|
CL_SetTokenLength(T, token);
|
|
CL_Advance(T);
|
|
CL_Advance(T);
|
|
}
|
|
else if (*T->stream == '=') {
|
|
token->kind = CL_DIVASSIGN;
|
|
CL_Advance(T);
|
|
}
|
|
} break;
|
|
case '#': {
|
|
if (*T->stream == '#') {
|
|
token->kind = CL_MACRO_CONCAT;
|
|
CL_Advance(T);
|
|
}
|
|
else {
|
|
bool is_macro_directive = CL_LexMacro(T, token);
|
|
if (is_macro_directive) {
|
|
T->inside_of_macro = true;
|
|
}
|
|
else {
|
|
if (!T->inside_of_macro) {
|
|
CL_ReportError(T, token, "Invalid preprocessor directive");
|
|
goto error_end_path;
|
|
}
|
|
|
|
token->kind = CL_PREPROC_STRINGIFY;
|
|
token->str = T->stream;
|
|
while (*T->stream == '_' || CL_IsAlphanumeric(*T->stream))
|
|
CL_Advance(T);
|
|
CL_SetTokenLength(T, token);
|
|
}
|
|
}
|
|
} break;
|
|
case '>': {
|
|
if (*T->stream == '=') {
|
|
token->kind = CL_GREATERTHEN_OR_EQUAL;
|
|
CL_Advance(T);
|
|
}
|
|
else if (*T->stream == '>') {
|
|
CL_Advance(T);
|
|
if (*T->stream == '=') {
|
|
CL_Advance(T);
|
|
token->kind = CL_RIGHTSHIFTASSIGN;
|
|
}
|
|
else {
|
|
token->kind = CL_RIGHTSHIFT;
|
|
}
|
|
}
|
|
else {
|
|
token->kind = CL_GREATERTHEN;
|
|
}
|
|
} break;
|
|
case '<': {
|
|
token->kind = CL_LESSERTHEN;
|
|
if (*T->stream == '=') {
|
|
token->kind = CL_LESSERTHEN_OR_EQUAL;
|
|
CL_Advance(T);
|
|
}
|
|
else if (*T->stream == '<') {
|
|
CL_Advance(T);
|
|
if (*T->stream == '=') {
|
|
CL_Advance(T);
|
|
token->kind = CL_LEFTSHIFTASSIGN;
|
|
}
|
|
else {
|
|
token->kind = CL_LEFTSHIFT;
|
|
}
|
|
}
|
|
} break;
|
|
case '&': {
|
|
if (*T->stream == '=') {
|
|
token->kind = CL_ANDASSIGN;
|
|
CL_Advance(T);
|
|
}
|
|
else if (*T->stream == '&') {
|
|
token->kind = CL_AND;
|
|
CL_Advance(T);
|
|
}
|
|
else {
|
|
token->kind = CL_BITAND;
|
|
}
|
|
} break;
|
|
case '-': {
|
|
if (*T->stream == '-') {
|
|
token->kind = CL_DECREMENT;
|
|
CL_Advance(T);
|
|
}
|
|
else if (*T->stream == '=') {
|
|
token->kind = CL_SUBASSIGN;
|
|
CL_Advance(T);
|
|
}
|
|
else {
|
|
token->kind = CL_SUB;
|
|
}
|
|
} break;
|
|
case '+': {
|
|
if (*T->stream == '+') {
|
|
token->kind = CL_INCREMENT;
|
|
CL_Advance(T);
|
|
}
|
|
else if (*T->stream == '=') {
|
|
token->kind = CL_ADDASSIGN;
|
|
CL_Advance(T);
|
|
}
|
|
else {
|
|
token->kind = CL_ADD;
|
|
}
|
|
} break;
|
|
case '|': {
|
|
if (*T->stream == '|') {
|
|
token->kind = CL_OR;
|
|
CL_Advance(T);
|
|
}
|
|
else if (*T->stream == '=') {
|
|
token->kind = CL_ORASSIGN;
|
|
CL_Advance(T);
|
|
}
|
|
else {
|
|
token->kind = CL_BITOR;
|
|
}
|
|
} break;
|
|
case '=': {
|
|
if (*T->stream != '=') {
|
|
token->kind = CL_ASSIGN;
|
|
}
|
|
else {
|
|
CL_Advance(T);
|
|
token->kind = CL_EQUALS;
|
|
}
|
|
} break;
|
|
case '!': {
|
|
if (*T->stream != '=') {
|
|
token->kind = CL_NOT;
|
|
}
|
|
else {
|
|
CL_Advance(T);
|
|
token->kind = CL_NOTEQUALS;
|
|
}
|
|
} break;
|
|
case '*': {
|
|
token->kind = CL_MUL;
|
|
if (*T->stream == '=') {
|
|
CL_Advance(T);
|
|
token->kind = CL_MULASSIGN;
|
|
}
|
|
} break;
|
|
case '%': {
|
|
token->kind = CL_MOD;
|
|
if (*T->stream == '=') {
|
|
token->kind = CL_MODASSIGN;
|
|
CL_Advance(T);
|
|
}
|
|
} break;
|
|
case '^': {
|
|
token->kind = CL_BITXOR;
|
|
if (*T->stream == '=') {
|
|
CL_Advance(T);
|
|
token->kind = CL_XORASSIGN;
|
|
}
|
|
} break;
|
|
case '"': {
|
|
CL_CheckString(T, token);
|
|
} break;
|
|
case '\'': {
|
|
CL_ParseCharLiteral(T, token);
|
|
} break;
|
|
case 'U': { // @todo Unicode32
|
|
if (*T->stream == '"') {
|
|
token->fix = CL_PREFIX_U32;
|
|
CL_Advance(T);
|
|
CL_CheckString(T, token);
|
|
}
|
|
else if (*T->stream == '\'') {
|
|
token->fix = CL_PREFIX_U32;
|
|
CL_Advance(T);
|
|
CL_ParseCharLiteral(T, token);
|
|
}
|
|
else goto parse_regular_char;
|
|
} break;
|
|
case 'u': { // Unicode16
|
|
if (*T->stream == '8') { // Unicode8
|
|
if (T->stream[1] == '"') { // U8 STRING
|
|
token->fix = CL_PREFIX_U8;
|
|
CL_Advance(T);
|
|
CL_Advance(T);
|
|
CL_CheckString(T, token);
|
|
}
|
|
else if (T->stream[1] == '\'') { // U8 CHAR
|
|
token->fix = CL_PREFIX_U8;
|
|
CL_Advance(T);
|
|
CL_Advance(T);
|
|
CL_ParseCharLiteral(T, token);
|
|
}
|
|
else goto parse_regular_char;
|
|
}
|
|
else if (*T->stream == '"') { // U16 STRING
|
|
token->fix = CL_PREFIX_U16;
|
|
CL_Advance(T);
|
|
CL_CheckString(T, token);
|
|
}
|
|
else if (*T->stream == '\'') { // U16 CHAR
|
|
CL_Advance(T);
|
|
CL_ParseCharLiteral(T, token);
|
|
}
|
|
else goto parse_regular_char;
|
|
}
|
|
case 'L': { // Widechar
|
|
if (*T->stream == '"') {
|
|
token->fix = CL_PREFIX_L;
|
|
CL_Advance(T);
|
|
CL_CheckString(T, token); // @todo UTF16
|
|
}
|
|
else if (*T->stream == '\'') {
|
|
token->fix = CL_PREFIX_L;
|
|
CL_Advance(T);
|
|
CL_ParseCharLiteral(T, token);
|
|
}
|
|
else goto parse_regular_char;
|
|
} break;
|
|
case 'A':
|
|
case 'a':
|
|
case 'B':
|
|
case 'b':
|
|
case 'C':
|
|
case 'c':
|
|
case 'D':
|
|
case 'd':
|
|
case 'E':
|
|
case 'e':
|
|
case 'F':
|
|
case 'f':
|
|
case 'G':
|
|
case 'g':
|
|
case 'H':
|
|
case 'h':
|
|
case 'I':
|
|
case 'i':
|
|
case 'J':
|
|
case 'j':
|
|
case 'K':
|
|
case 'k':
|
|
/*case 'L':*/ case 'l':
|
|
case 'M':
|
|
case 'm':
|
|
case 'N':
|
|
case 'n':
|
|
case 'O':
|
|
case 'o':
|
|
case 'P':
|
|
case 'p':
|
|
case 'Q':
|
|
case 'q':
|
|
case 'R':
|
|
case 'r':
|
|
case 'S':
|
|
case 's':
|
|
case 'T':
|
|
case 't':
|
|
// case 'U': case 'u':
|
|
case 'V':
|
|
case 'v':
|
|
case 'W':
|
|
case 'w':
|
|
case 'X':
|
|
case 'x':
|
|
case 'Y':
|
|
case 'y':
|
|
case 'Z':
|
|
case 'z':
|
|
case '_':
|
|
parse_regular_char : {
|
|
token->kind = CL_IDENTIFIER;
|
|
while (*T->stream == '_' || CL_IsAlphanumeric(*T->stream)) {
|
|
CL_Advance(T);
|
|
}
|
|
CL_SetTokenLength(T, token);
|
|
CL_IsIdentifierKeyword(token);
|
|
} break;
|
|
case '0': {
|
|
if (*T->stream == 'x' || *T->stream == 'X') {
|
|
token->kind = CL_INT;
|
|
token->is_hex = true;
|
|
CL_Advance(T);
|
|
while (CL_IsHexNumeric(*T->stream)) {
|
|
CL_Advance(T);
|
|
}
|
|
uint64_t len = T->stream - token->str;
|
|
CL_ASSERT(len > 2);
|
|
token->u64 = CL_ParseInteger(T, token, token->str + 2, len - 2, 16);
|
|
break;
|
|
}
|
|
}
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
case '8':
|
|
case '9': {
|
|
token->kind = CL_INT;
|
|
for (;;) {
|
|
if (*T->stream == '.') {
|
|
if (token->kind == CL_FLOAT) {
|
|
CL_ReportError(T, token, "Failed to parse a floating point number, invalid format, found multiple '.'");
|
|
}
|
|
|
|
if (token->kind == CL_INT) {
|
|
token->kind = CL_FLOAT;
|
|
}
|
|
}
|
|
else if (CL_IsNumeric(*T->stream) == false) {
|
|
break;
|
|
}
|
|
CL_Advance(T);
|
|
}
|
|
|
|
if (token->kind == CL_INT) {
|
|
uint64_t len = T->stream - token->str;
|
|
CL_ASSERT(len > 0);
|
|
token->u64 = CL_ParseInteger(T, token, token->str, len, 10);
|
|
}
|
|
|
|
else if (token->kind == CL_FLOAT) {
|
|
token->f64 = CL_STRING_TO_DOUBLE(token->str, token->len);
|
|
}
|
|
|
|
else {
|
|
CL_ASSERT(token->kind == CL_ERROR);
|
|
}
|
|
|
|
if (*T->stream == 'f' || *T->stream == 'F') {
|
|
CL_Advance(T);
|
|
token->fix = CL_SUFFIX_F;
|
|
}
|
|
|
|
else if (*T->stream == 'l' || *T->stream == 'L') {
|
|
CL_Advance(T);
|
|
token->fix = CL_SUFFIX_L;
|
|
if (*T->stream == 'l' || *T->stream == 'L') {
|
|
CL_Advance(T);
|
|
token->fix = CL_SUFFIX_LL;
|
|
if (*T->stream == 'u' || *T->stream == 'U') {
|
|
CL_Advance(T);
|
|
token->fix = CL_SUFFIX_ULL;
|
|
}
|
|
}
|
|
else if (*T->stream == 'u' || *T->stream == 'U') {
|
|
CL_Advance(T);
|
|
token->fix = CL_SUFFIX_UL;
|
|
}
|
|
}
|
|
|
|
else if (*T->stream == 'u' || *T->stream == 'U') {
|
|
CL_Advance(T);
|
|
token->fix = CL_SUFFIX_U;
|
|
if (*T->stream == 'l' || *T->stream == 'L') {
|
|
CL_Advance(T);
|
|
token->fix = CL_SUFFIX_UL;
|
|
if (*T->stream == 'l' || *T->stream == 'L') {
|
|
CL_Advance(T);
|
|
token->fix = CL_SUFFIX_ULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
} break;
|
|
|
|
default: {
|
|
CL_ReportError(T, token, "Unhandled character, skipping ...");
|
|
} break;
|
|
}
|
|
|
|
error_end_path:;
|
|
}
|
|
|
|
CL_PRIVATE_FUNCTION bool CL_EatWhitespace(CL_Lexer *T) {
|
|
bool skipped = false;
|
|
for (;;) {
|
|
if (CL_IsWhitespace(*T->stream)) {
|
|
if (*T->stream == '\n') T->inside_of_macro = false;
|
|
CL_Advance(T);
|
|
skipped = true;
|
|
}
|
|
else if (T->stream[0] == '\\' && T->stream[1] == '\n') {
|
|
CL_Advance(T);
|
|
CL_Advance(T);
|
|
skipped = true;
|
|
}
|
|
else if (T->stream[0] == '\\' && T->stream[1] == '\r' && T->stream[2] == '\n') {
|
|
CL_Advance(T);
|
|
CL_Advance(T);
|
|
CL_Advance(T);
|
|
skipped = true;
|
|
}
|
|
else {
|
|
break;
|
|
}
|
|
}
|
|
return skipped;
|
|
}
|
|
|
|
CL_PRIVATE_FUNCTION void CL_TryToFinalizeToken(CL_Lexer *T, CL_Token *token) {
|
|
if (!token->len) {
|
|
CL_SetTokenLength(T, token);
|
|
}
|
|
if (T->inside_of_macro) {
|
|
token->is_inside_macro = true;
|
|
}
|
|
}
|
|
|
|
CL_PRIVATE_FUNCTION void CL_InitNextToken(CL_Lexer *T, CL_Token *token) {
|
|
// Skip comments, comments get allocated on perm and gathered on the Tokenizer.
|
|
// First non comment token gets those comments attached.
|
|
for (;;) {
|
|
bool skipped = CL_EatWhitespace(T);
|
|
CL_PrepareToken(T, token, skipped);
|
|
CL_DefaultTokenize(T, token);
|
|
|
|
if (token->kind == CL_EOF) {
|
|
break;
|
|
}
|
|
|
|
if (T->select_includes) {
|
|
if (token->kind != CL_PREPROC_INCLUDE) continue;
|
|
}
|
|
|
|
if (T->select_macros) {
|
|
if (!token->is_inside_macro) continue;
|
|
}
|
|
|
|
if (T->select_comments) {
|
|
if (token->kind != CL_COMMENT) continue;
|
|
}
|
|
|
|
if (T->skip_comments) {
|
|
if (token->kind == CL_COMMENT) continue;
|
|
}
|
|
|
|
if (T->skip_macros) {
|
|
if (token->is_inside_macro) continue;
|
|
}
|
|
|
|
break;
|
|
}
|
|
CL_TryToFinalizeToken(T, token);
|
|
}
|
|
|
|
CL_API_FUNCTION CL_Token CL_Next(CL_Lexer *T) {
|
|
CL_Token result;
|
|
CL_MemoryZero(&result, sizeof(CL_Token));
|
|
CL_InitNextToken(T, &result);
|
|
return result;
|
|
}
|
|
|
|
CL_API_FUNCTION CL_Lexer CL_Begin(CL_Allocator arena, char *stream, char *filename) {
|
|
CL_Lexer lexer = {0};
|
|
lexer.stream = lexer.stream_begin = stream;
|
|
lexer.file = filename;
|
|
lexer.arena = arena;
|
|
lexer.skip_comments = true;
|
|
return lexer;
|
|
}
|
|
|
|
//
|
|
//
|
|
//
|
|
|
|
CL_PRIVATE_FUNCTION char *CL_ChopLastSlash(CL_Allocator arena, char *str) {
|
|
int i = 0;
|
|
int slash_pos = -1;
|
|
while (str[i]) {
|
|
if (str[i] == '/') {
|
|
slash_pos = i;
|
|
}
|
|
i += 1;
|
|
}
|
|
|
|
char *result = str;
|
|
if (slash_pos != -1) {
|
|
result = CL_PushStringCopy(arena, str, slash_pos);
|
|
}
|
|
else {
|
|
result = (char *)"./";
|
|
}
|
|
return result;
|
|
}
|
|
|
|
CL_PRIVATE_FUNCTION char *CL_JoinPath(CL_Allocator arena, char *a, char *b) {
|
|
int alen = CL_StringLength(a);
|
|
int blen = CL_StringLength(b);
|
|
int additional_len = 0;
|
|
|
|
if (alen && a[alen - 1] != '/') additional_len = 1;
|
|
char *result = (char *)CL_Allocate(arena, sizeof(char) * (alen + blen + 1 + additional_len));
|
|
CL__MemoryCopy(result, a, alen);
|
|
if (additional_len) result[alen++] = '/';
|
|
CL__MemoryCopy(result + alen, b, blen);
|
|
result[alen + blen] = 0;
|
|
return result;
|
|
}
|
|
|
|
CL_PRIVATE_FUNCTION bool CL_IsAbsolutePath(char *path) {
|
|
#if _WIN32
|
|
bool result = CL_IsAlphabetic(path[0]) && path[1] == ':' && path[2] == '/';
|
|
#else
|
|
bool result = path[0] == '/';
|
|
#endif
|
|
return result;
|
|
}
|
|
|
|
CL_PRIVATE_FUNCTION char *CL_SkipToLastSlash(char *p) {
|
|
int last_slash = 0;
|
|
for (int i = 0; p[i]; i += 1) {
|
|
if (p[i] == '/') last_slash = i;
|
|
}
|
|
return p + last_slash;
|
|
}
|
|
|
|
CL_API_FUNCTION char *CL_ResolveFilepath(CL_Allocator arena, CL_SearchPaths *search_paths, char *filename, char *parent_file, bool is_system_include) {
|
|
CL_SearchPaths null_search_paths = {0};
|
|
if (search_paths == 0) search_paths = &null_search_paths;
|
|
|
|
if (search_paths->file_begin_to_ignore) {
|
|
char *name = CL_SkipToLastSlash(filename);
|
|
int namelen = CL_StringLength(name);
|
|
char *ignore = search_paths->file_begin_to_ignore;
|
|
int ignorelen = CL_StringLength(ignore);
|
|
if (namelen > ignorelen) {
|
|
namelen = ignorelen;
|
|
}
|
|
if (CL_StringsAreEqual(name, namelen, search_paths->file_begin_to_ignore, ignorelen)) {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
if (CL_IsAbsolutePath(filename) && CL_FileExists(filename)) {
|
|
return filename;
|
|
}
|
|
|
|
if (is_system_include) {
|
|
for (int path_i = 0; path_i < search_paths->system_include_path_count; path_i += 1) {
|
|
char *path_it = search_paths->system_include_path[path_i];
|
|
char *file = CL_JoinPath(arena, path_it, filename);
|
|
if (CL_FileExists(file)) {
|
|
return file;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (parent_file) {
|
|
char *parent_dir = CL_ChopLastSlash(arena, parent_file);
|
|
char *file = CL_JoinPath(arena, parent_dir, filename);
|
|
if (CL_FileExists(file)) {
|
|
return file;
|
|
}
|
|
}
|
|
|
|
for (int path_i = 0; path_i < search_paths->include_path_count; path_i += 1) {
|
|
char *path_it = search_paths->include_path[path_i];
|
|
char *file = CL_JoinPath(arena, path_it, filename);
|
|
if (CL_FileExists(file)) {
|
|
return file;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
//
|
|
//
|
|
//
|
|
|
|
const char *CL_FixString[] = {
|
|
"SUFFIX_INVALID",
|
|
"SUFFIX_U",
|
|
"SUFFIX_UL",
|
|
"SUFFIX_ULL",
|
|
"SUFFIX_L",
|
|
"SUFFIX_LL",
|
|
"SUFFIX_F",
|
|
"SUFFIX_FL",
|
|
"PREFIX_U8",
|
|
"PREFIX_U16",
|
|
"PREFIX_U32",
|
|
"PREFIX_L",
|
|
};
|
|
|
|
const char *CL_KindString[] = {
|
|
"EOF",
|
|
"*",
|
|
"/",
|
|
"%",
|
|
"<<",
|
|
">>",
|
|
"+",
|
|
"-",
|
|
"==",
|
|
"<",
|
|
">",
|
|
"<=",
|
|
">=",
|
|
"!=",
|
|
"&",
|
|
"|",
|
|
"^",
|
|
"&&",
|
|
"||",
|
|
"~",
|
|
"!",
|
|
"--",
|
|
"++",
|
|
"--",
|
|
"++",
|
|
"=",
|
|
"/=",
|
|
"*=",
|
|
"%=",
|
|
"-=",
|
|
"+=",
|
|
"&=",
|
|
"|=",
|
|
"^=",
|
|
"<<=",
|
|
">>=",
|
|
"(",
|
|
")",
|
|
"{",
|
|
"}",
|
|
"[",
|
|
"]",
|
|
",",
|
|
"##",
|
|
"#Stringify",
|
|
"?",
|
|
"...",
|
|
";",
|
|
".",
|
|
":",
|
|
"TAG",
|
|
"->",
|
|
"SIZEOF",
|
|
"DOCCOMMENT",
|
|
"COMMENT",
|
|
"IDENTIFIER",
|
|
"STRING_LITERAL",
|
|
"CHARACTER_LITERAL",
|
|
"ERROR TOKEN",
|
|
"FLOAT",
|
|
"INT",
|
|
"PREPROC_NULL",
|
|
"PREPROC_DEFINE",
|
|
"PREPROC_IFDEF",
|
|
"PREPROC_IFNDEF",
|
|
"PREPROC_INCLUDE",
|
|
"PREPROC_ENDIF",
|
|
"PREPROC_IF",
|
|
"PREPROC_PRAGMA",
|
|
"PREPROC_ERROR",
|
|
"PREPROC_ELSE",
|
|
"PREPROC_ELIF",
|
|
"PREPROC_UNDEF",
|
|
"KEYWORD_VOID",
|
|
"KEYWORD_INT",
|
|
"KEYWORD_CHAR",
|
|
"KEYWORD_UNSIGNED",
|
|
"KEYWORD_SIGNED",
|
|
"KEYWORD_LONG",
|
|
"KEYWORD_SHORT",
|
|
"KEYWORD_DOUBLE",
|
|
"KEYWORD_FLOAT",
|
|
"KEYWORD__BOOL",
|
|
"KEYWORD__COMPLEX",
|
|
"KEYWORD__IMAGINARY",
|
|
"KEYWORD_STATIC",
|
|
"KEYWORD_AUTO",
|
|
"KEYWORD_CONST",
|
|
"KEYWORD_EXTERN",
|
|
"KEYWORD_INLINE",
|
|
"KEYWORD_REGISTER",
|
|
"KEYWORD_RESTRICT",
|
|
"KEYWORD_VOLATILE",
|
|
"KEYWORD__THREAD_LOCAL",
|
|
"KEYWORD__ATOMIC",
|
|
"KEYWORD__NORETURN",
|
|
"KEYWORD_STRUCT",
|
|
"KEYWORD_UNION",
|
|
"KEYWORD_ENUM",
|
|
"KEYWORD_TYPEDEF",
|
|
"KEYWORD_DEFAULT",
|
|
"KEYWORD_BREAK",
|
|
"KEYWORD_RETURN",
|
|
"KEYWORD_SWITCH",
|
|
"KEYWORD_IF",
|
|
"KEYWORD_ELSE",
|
|
"KEYWORD_FOR",
|
|
"KEYWORD_WHILE",
|
|
"KEYWORD_CASE",
|
|
"KEYWORD_CONTINUE",
|
|
"KEYWORD_DO",
|
|
"KEYWORD_GOTO",
|
|
"KEYWORD_SIZEOF",
|
|
"KEYWORD__ALIGNAS",
|
|
"KEYWORD__ALIGNOF",
|
|
"KEYWORD__STATIC_ASSERT",
|
|
"KEYWORD__GENERIC",
|
|
};
|
|
|
|
CL_API_FUNCTION void CL_StringifyMessage(char *buff, int buff_size, CL_Message *msg) {
|
|
CL_SNPRINTF(buff, buff_size, "%s:%d %15s", msg->token.file, msg->token.line, msg->string);
|
|
}
|
|
|
|
CL_API_FUNCTION void CL_Stringify(char *buff, int buff_size, CL_Token *token) {
|
|
const char *token_kind = "UNKNOWN";
|
|
if (token->kind < CL_COUNT) token_kind = CL_KindString[token->kind];
|
|
CL_SNPRINTF(buff, buff_size, "%s:%d %15s %15.*s", token->file, token->line, token_kind, token->len, token->str);
|
|
}
|
|
|
|
#define CL_SLL_QUEUE_ADD_MOD(f, l, n, next) \
|
|
do { \
|
|
(n)->next = 0; \
|
|
if ((f) == 0) { \
|
|
(f) = (l) = (n); \
|
|
} \
|
|
else { \
|
|
(l) = (l)->next = (n); \
|
|
} \
|
|
} while (0)
|
|
#define CL_SLL_QUEUE_ADD(f, l, n) CL_SLL_QUEUE_ADD_MOD(f, l, n, next)
|
|
|
|
#define CL__FORMAT(arena, string, result) \
|
|
va_list args1, args2; \
|
|
va_start(args1, string); \
|
|
va_copy(args2, args1); \
|
|
int len = CL_VSNPRINTF(0, 0, string, args2); \
|
|
va_end(args2); \
|
|
char *result = (char *)CL_Allocate((arena), len + 1); \
|
|
CL_VSNPRINTF(result, len + 1, string, args1); \
|
|
va_end(args1)
|
|
|
|
CL_PRIVATE_FUNCTION void CL_ReportError(CL_Lexer *T, CL_Token *token, const char *string, ...) {
|
|
CL__FORMAT(T->arena, string, message_string);
|
|
CL_Message *result = (CL_Message *)CL_Allocate(T->arena, sizeof(CL_Message));
|
|
CL_MemoryZero(result, sizeof(CL_Message));
|
|
CL_SLL_QUEUE_ADD(T->first_message, T->last_message, result);
|
|
|
|
result->string = (char *)string;
|
|
result->token = *token;
|
|
token->kind = CL_ERROR;
|
|
token->error = result;
|
|
T->errors += 1;
|
|
}
|