Files
first/code/clexer.c
Krzosa Karol f34019efb7 Reorganization
2024-01-06 13:44:50 +01:00

1879 lines
59 KiB
C

#include "clexer.h"
#include <stdarg.h>
#ifndef CL_STRING_TO_DOUBLE
#include <stdlib.h>
#define CL_STRING_TO_DOUBLE(str, len) strtod(str, 0)
#endif
#ifndef CL_ASSERT
#include <assert.h>
#define CL_ASSERT(x) assert(x)
#endif
#ifndef CL_VSNPRINTF
#include <stdio.h>
#define CL_VSNPRINTF vsnprintf
#endif
#ifndef CL_SNPRINTF
#include <stdio.h>
#define CL_SNPRINTF snprintf
#endif
#ifndef CL__MemoryCopy
#include <string.h>
#define CL__MemoryCopy(dst, src, s) memcpy(dst, src, s)
#endif
#ifndef CL__MemoryZero
#include <string.h>
#define CL__MemoryZero(p, size) memset(p, 0, size)
#endif
#ifndef CL_ReadFile
#define CL_ReadFile CL__ReadFile
#include <stdio.h>
CL_PRIVATE_FUNCTION char *CL_ReadFile(CL_Arena *arena, char *name) {
char *result = 0;
FILE *f = fopen(name, "rb");
if (f) {
fseek(f, 0, SEEK_END);
int len = ftell(f);
fseek(f, 0, SEEK_SET);
result = (char *)CL_PushSize(arena, len + 1);
fread(result, len, 1, f);
fclose(f);
result[len] = 0;
}
return result;
}
#endif
#ifndef CL_FileExists
#define CL_FileExists CL__FileExists
#include <stdio.h>
CL_API_FUNCTION bool CL_FileExists(char *name) {
bool result = false;
FILE *f = fopen(name, "rb");
if (f) {
result = true;
fclose(f);
}
return result;
}
#endif
#ifndef CL__HASH_BYTES
#define CL__HASH_BYTES CL__HashBytes
// FNV HASH (1a?)
static uint64_t CL__HashBytes(void *p, int bytes) {
uint8_t *p8 = (uint8_t *)p;
uint64_t hash = (uint64_t)14695981039346656037ULL;
for (int i = 0; i < bytes; i++) {
hash = hash ^ (uint64_t)(p8[i]);
hash = hash * (uint64_t)1099511628211ULL;
}
return hash;
}
#endif
#ifndef CL_CUSTOM_ARENA_TYPE
CL_PRIVATE_FUNCTION void *CL_PushSize(CL_Arena *arena, int size) {
if (arena->len + size > arena->cap) {
CL_ASSERT(!"CLEX: Not enough memory");
}
void *result = arena->buff + arena->len;
arena->len += size;
return result;
}
#endif
#ifdef __cplusplus
#define CL_ZeroStruct() \
{}
#else
#define CL_ZeroStruct() \
{ 0 }
#endif
#define CL_PushArray(arena, T, size) (T *)CL__PushSizeZeroed(arena, sizeof(T) * (size))
#define CL_PushStruct(arena, T) CL_PushArray(arena, T, 1)
CL_PRIVATE_FUNCTION void *CL__PushSizeZeroed(CL_Arena *arena, int size) {
void *result = CL_PushSize(arena, size);
CL__MemoryZero(result, size);
return result;
}
const char *CL_FixString[] = {
"",
"SUFFIX_U",
"SUFFIX_UL",
"SUFFIX_ULL",
"SUFFIX_L",
"SUFFIX_LL",
"SUFFIX_F",
"SUFFIX_FL",
"PREFIX_U8",
"PREFIX_U16",
"PREFIX_U32",
"PREFIX_L",
};
const char *CL_KindString[] = {
"EOF",
"*",
"/",
"%",
"<<",
">>",
"+",
"-",
"==",
"<",
">",
"<=",
">=",
"!=",
"&",
"|",
"^",
"&&",
"||",
"~",
"!",
"--",
"++",
"--",
"++",
"=",
"/=",
"*=",
"%=",
"-=",
"+=",
"&=",
"|=",
"^=",
"<<=",
">>=",
"(",
")",
"{",
"}",
"[",
"]",
",",
"##",
"#Stringify",
"?",
"...",
";",
".",
":",
"TAG",
"->",
"SIZEOF",
"DOCCOMMENT",
"COMMENT",
"IDENTIFIER",
"STRING_LITERAL",
"CHARACTER_LITERAL",
"ERROR TOKEN",
"FLOAT",
"INT",
"PREPROC_NULL",
"PREPROC_DEFINE",
"PREPROC_IFDEF",
"PREPROC_IFNDEF",
"PREPROC_INCLUDE",
"PREPROC_ENDIF",
"PREPROC_IF",
"PREPROC_PRAGMA",
"PREPROC_ERROR",
"PREPROC_ELSE",
"PREPROC_ELIF",
"PREPROC_UNDEF",
"KEYWORD_VOID",
"KEYWORD_INT",
"KEYWORD_CHAR",
"KEYWORD_UNSIGNED",
"KEYWORD_SIGNED",
"KEYWORD_LONG",
"KEYWORD_SHORT",
"KEYWORD_DOUBLE",
"KEYWORD_FLOAT",
"KEYWORD__BOOL",
"KEYWORD__COMPLEX",
"KEYWORD__IMAGINARY",
"KEYWORD_STATIC",
"KEYWORD_AUTO",
"KEYWORD_CONST",
"KEYWORD_EXTERN",
"KEYWORD_INLINE",
"KEYWORD_REGISTER",
"KEYWORD_RESTRICT",
"KEYWORD_VOLATILE",
"KEYWORD__THREAD_LOCAL",
"KEYWORD__ATOMIC",
"KEYWORD__NORETURN",
"KEYWORD_STRUCT",
"KEYWORD_UNION",
"KEYWORD_ENUM",
"KEYWORD_TYPEDEF",
"KEYWORD_DEFAULT",
"KEYWORD_BREAK",
"KEYWORD_RETURN",
"KEYWORD_SWITCH",
"KEYWORD_IF",
"KEYWORD_ELSE",
"KEYWORD_FOR",
"KEYWORD_WHILE",
"KEYWORD_CASE",
"KEYWORD_CONTINUE",
"KEYWORD_DO",
"KEYWORD_GOTO",
"KEYWORD_SIZEOF",
"KEYWORD__ALIGNAS",
"KEYWORD__ALIGNOF",
"KEYWORD__STATIC_ASSERT",
"KEYWORD__GENERIC",
};
const char *CL_MessageKindString[] = {
"ERROR",
"WARNING",
"TRACE",
};
/*END*/
#define CL_DLL_QUEUE_ADD_MOD(f, l, node, next, prev) \
do { \
if ((f) == 0) { \
(f) = (l) = (node); \
(node)->prev = 0; \
(node)->next = 0; \
} \
else { \
(l)->next = (node); \
(node)->prev = (l); \
(node)->next = 0; \
(l) = (node); \
} \
} while (0)
#define CL_DLL_QUEUE_ADD(f, l, node) CL_DLL_QUEUE_ADD_MOD(f, l, node, next, prev)
#define CL_SLL_QUEUE_ADD_MOD(f, l, n, next) \
do { \
(n)->next = 0; \
if ((f) == 0) { \
(f) = (l) = (n); \
} \
else { \
(l) = (l)->next = (n); \
} \
} while (0)
#define CL_SLL_QUEUE_ADD(f, l, n) CL_SLL_QUEUE_ADD_MOD(f, l, n, next)
#define CL__FORMAT(arena, string, result) \
va_list args1, args2; \
va_start(args1, string); \
va_copy(args2, args1); \
int len = CL_VSNPRINTF(0, 0, string, args2); \
va_end(args2); \
char *result = (char *)CL_PushSize((arena), len + 1); \
CL_VSNPRINTF(result, len + 1, string, args1); \
va_end(args1)
CL_API_FUNCTION void CL_ReportError(CL_LexResult *T, CL_Token *token, const char *string, ...) {
CL__FORMAT(T->arena->other, string, message_string);
CL_Message *result = CL_PushStruct(T->arena->other, CL_Message);
result->kind = CLM_ERROR;
result->string = (char *)string;
CL_SLL_QUEUE_ADD(T->first_message, T->last_message, result);
result->token = *token;
T->errors += 1;
token->kind = CL_ERROR;
token->error = result;
#if TEST_DEBUG
printf("%s:%d %s\n", token->file, token->line, string);
__debugbreak();
#endif
}
CL_PRIVATE_FUNCTION char *CL_PushStringCopy(CL_Arena *arena, char *p, int size) {
char *copy_buffer = (char *)CL_PushSize(arena, size + 1);
CL__MemoryCopy(copy_buffer, p, size);
copy_buffer[size] = 0;
return copy_buffer;
}
CL_PRIVATE_FUNCTION CL_Token *CL_CopyToken(CL_Arena *arena, CL_Token *token) {
CL_Token *copy_buffer = (CL_Token *)CL_PushSize(arena, sizeof(CL_Token));
CL__MemoryCopy(copy_buffer, token, sizeof(CL_Token));
return copy_buffer;
}
CL_API_FUNCTION void CL_StringifyMessage(char *buff, int buff_size, CL_Message *msg) {
const char *kind = CL_MessageKindString[msg->kind];
CL_SNPRINTF(buff, buff_size, "%s:%d %15s %15s", msg->token.file, msg->token.line, kind, msg->string);
}
CL_API_FUNCTION void CL_Stringify(char *buff, int buff_size, CL_Token *token) {
const char *token_kind = "UNKNOWN";
if (token->kind < CL_COUNT) token_kind = CL_KindString[token->kind];
CL_SNPRINTF(buff, buff_size, "%s:%d %15s %15.*s", token->file, token->line, token_kind, token->len, token->str);
}
CL_API_FUNCTION void CL_PrintMessages(CL_LexResult *lex_result) {
char buff[1024];
for (CL_Message *it = lex_result->first_message; it; it = it->next) {
CL_StringifyMessage(buff, sizeof(buff), it);
printf("%s\n", buff);
}
}
CL_API_FUNCTION void CL_PrintTokens(CL_Tokens tokens) {
char buff[1024];
for (int i = 0; i < tokens.count; i += 1) {
CL_Stringify(buff, sizeof(buff), &tokens.data[i]);
printf("%s\n", buff);
}
}
CL_INLINE void CL_Advance(CL_LexResult *T) {
if (*T->stream == '\n') {
T->line += 1;
T->column = 0;
}
else if (*T->stream == ' ') {
T->column += 1;
}
else if (*T->stream == '\t') {
T->column += 1;
}
else if (*T->stream == 0) {
return;
}
T->stream += 1;
}
CL_INLINE bool CL_IsAlphabetic(char c) {
bool result = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
return result;
}
CL_INLINE bool CL_IsNumeric(char c) {
bool result = (c >= '0' && c <= '9');
return result;
}
CL_INLINE bool CL_IsHexNumeric(char c) {
bool result = (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
return result;
}
CL_INLINE bool CL_IsWhitespace(char c) {
bool result = c == ' ' || c == '\n' || c == '\r' || c == '\t';
return result;
}
CL_INLINE bool CL_IsAlphanumeric(char c) {
bool result = CL_IsAlphabetic(c) || CL_IsNumeric(c);
return result;
}
CL_API_FUNCTION bool CL_EatWhitespace(CL_LexResult *T) {
bool skipped = false;
for (;;) {
if (CL_IsWhitespace(*T->stream)) {
if (*T->stream == '\n') T->inside_of_macro = false;
CL_Advance(T);
skipped = true;
}
else if (T->stream[0] == '\\' && T->stream[1] == '\n') {
CL_Advance(T);
CL_Advance(T);
skipped = true;
}
else if (T->stream[0] == '\\' && T->stream[1] == '\r' && T->stream[2] == '\n') {
CL_Advance(T);
CL_Advance(T);
CL_Advance(T);
skipped = true;
}
else {
break;
}
}
return skipped;
}
CL_API_FUNCTION void CL_SetTokenLength(CL_LexResult *T, CL_Token *token) {
intptr_t diff = T->stream - token->str;
CL_ASSERT(diff < 2147483647);
token->len = (int)diff;
}
CL_API_FUNCTION void CL_TryToFinalizeToken(CL_LexResult *T, CL_Token *token) {
for (; T->attached_comment_index < T->comments.count; T->attached_comment_index += 1) {
CL_Token *it = T->comments.data + T->attached_comment_index;
it->comment_is_attached_to_token = token;
}
if (!token->len) {
CL_SetTokenLength(T, token);
}
if (T->inside_of_macro) {
token->flags |= CL_INSIDE_OF_MACRO;
}
}
CL_PRIVATE_FUNCTION uint64_t CL_CharMapToNumber(char c) {
switch (c) {
case '0': return 0; break;
case '1': return 1; break;
case '2': return 2; break;
case '3': return 3; break;
case '4': return 4; break;
case '5': return 5; break;
case '6': return 6; break;
case '7': return 7; break;
case '8': return 8; break;
case '9': return 9; break;
case 'a':
case 'A': return 10; break;
case 'b':
case 'B': return 11; break;
case 'c':
case 'C': return 12; break;
case 'd':
case 'D': return 13; break;
case 'e':
case 'E': return 14; break;
case 'f':
case 'F': return 15; break;
default: return 255;
}
}
CL_PRIVATE_FUNCTION uint64_t CL_ParseInteger(CL_LexResult *T, CL_Token *token, char *string, uint64_t len, uint64_t base) {
CL_ASSERT(base >= 2 && base <= 16);
uint64_t acc = 0;
for (uint64_t i = 0; i < len; i++) {
uint64_t num = CL_CharMapToNumber(string[i]);
if (num >= base) {
CL_ReportError(T, token, "Internal compiler error! Failed to parse a number");
break;
}
acc *= base;
acc += num;
}
return acc;
}
typedef struct CL_UTF32Result {
uint32_t out_str;
int advance;
int error;
} CL_UTF32Result;
CL_PRIVATE_FUNCTION CL_UTF32Result CL_UTF8ToUTF32(char *c, int max_advance) {
CL_UTF32Result result = CL_ZeroStruct();
if ((c[0] & 0x80) == 0) { // Check if leftmost zero of first byte is unset
if (max_advance >= 1) {
result.out_str = c[0];
result.advance = 1;
}
else result.error = 1;
}
else if ((c[0] & 0xe0) == 0xc0) {
if ((c[1] & 0xc0) == 0x80) { // Continuation byte required
if (max_advance >= 2) {
result.out_str = (uint32_t)(c[0] & 0x1f) << 6u | (c[1] & 0x3f);
result.advance = 2;
}
else result.error = 2;
}
else result.error = 2;
}
else if ((c[0] & 0xf0) == 0xe0) {
if ((c[1] & 0xc0) == 0x80 && (c[2] & 0xc0) == 0x80) { // Two continuation bytes required
if (max_advance >= 3) {
result.out_str = (uint32_t)(c[0] & 0xf) << 12u | (uint32_t)(c[1] & 0x3f) << 6u | (c[2] & 0x3f);
result.advance = 3;
}
else result.error = 3;
}
else result.error = 3;
}
else if ((c[0] & 0xf8) == 0xf0) {
if ((c[1] & 0xc0) == 0x80 && (c[2] & 0xc0) == 0x80 && (c[3] & 0xc0) == 0x80) { // Three continuation bytes required
if (max_advance >= 4) {
result.out_str = (uint32_t)(c[0] & 0xf) << 18u | (uint32_t)(c[1] & 0x3f) << 12u | (uint32_t)(c[2] & 0x3f) << 6u | (uint32_t)(c[3] & 0x3f);
result.advance = 4;
}
else result.error = 4;
}
else result.error = 4;
}
else result.error = 4;
return result;
}
// @todo I think I should look at this again
CL_API_FUNCTION void CL_ParseCharLiteral(CL_LexResult *T, CL_Token *token) {
token->kind = CL_CHARLIT;
token->str = T->stream;
while (*T->stream != '\'') {
if (*T->stream == '\\') {
CL_Advance(T);
}
if (*T->stream == 0) {
CL_ReportError(T, token, "Unclosed character literal!");
return;
}
CL_Advance(T);
}
CL_SetTokenLength(T, token);
if (token->str[0] == '\\') {
switch (token->str[1]) {
case '\\': token->u64 = '\\'; break;
case '\'': token->u64 = '\''; break;
case '"': token->u64 = '"'; break;
case 't': token->u64 = '\t'; break;
case 'v': token->u64 = '\v'; break;
case 'f': token->u64 = '\f'; break;
case 'n': token->u64 = '\n'; break;
case 'r': token->u64 = '\r'; break;
case 'a': token->u64 = '\a'; break;
case 'b': token->u64 = '\b'; break;
case '0':
token->u64 = '\0';
break;
// Octal constant
case 'x':
case 'X': CL_ASSERT(0); break; // Hex constant
case 'u': CL_ASSERT(0); break; // Unicode constant
default: {
CL_ReportError(T, token, "Unknown escape code");
}
}
}
else {
if (token->len > 4) {
CL_ReportError(T, token, "This character literal has invalid format, it's too big");
goto skip_utf_encode;
}
token->u64 = 0;
int i = 0;
for (; i < token->len;) {
CL_UTF32Result result = CL_UTF8ToUTF32(token->str + i, (int)token->len);
i += result.advance;
token->u64 |= result.out_str << (8 * (token->len - i));
if (result.error) {
CL_ReportError(T, token, "This character literal couldnt be parsed as utf8");
break;
}
}
if (i != token->len) {
CL_ReportError(T, token, "Character literal decode error");
}
}
skip_utf_encode:
CL_Advance(T);
}
CL_PRIVATE_FUNCTION void CL_BufferWrite(char *buffer, int buffer_size, int *buffer_iter, char write) {
if (*buffer_iter < buffer_size) {
buffer[*buffer_iter] = write;
*buffer_iter += 1;
}
}
// @todo I think I should look at this again
// Idea: Maybe try to figure out size first and then write the string
CL_API_FUNCTION void CL_ParseString(CL_LexResult *T, CL_Token *token) {
// @todo String builder here, we dont really want 4096 character limit
int buffer_iter = 0;
int buffer_size = 4096;
char buffer[4096];
token->kind = CL_STRINGLIT;
// First we try to parse the string normally, we write contents to scratch memory.
// Afterwards we try to seek if there are more consecutive strings. As the speak
// says, those are one string, so we combine them nicely. Then after we have written
// everything to the scratch buffer. We make a proper tight copy on the pernament
// allocator.
combine_next_string_literal:
while (*T->stream != '"' && *T->stream != 0 AND_CL_STRING_TERMINATE_ON_NEW_LINE) {
if (*T->stream == '\\') {
CL_Advance(T);
switch (*T->stream) {
case '\\': CL_BufferWrite(buffer, buffer_size, &buffer_iter, '\\'); break;
case '\'': CL_BufferWrite(buffer, buffer_size, &buffer_iter, '\''); break;
case '"': CL_BufferWrite(buffer, buffer_size, &buffer_iter, '"'); break;
case 't': CL_BufferWrite(buffer, buffer_size, &buffer_iter, '\t'); break;
case 'f': CL_BufferWrite(buffer, buffer_size, &buffer_iter, '\f'); break;
case 'n': CL_BufferWrite(buffer, buffer_size, &buffer_iter, '\n'); break;
case 'v': CL_BufferWrite(buffer, buffer_size, &buffer_iter, '\v'); break;
case 'r': CL_BufferWrite(buffer, buffer_size, &buffer_iter, '\r'); break;
case 'a': CL_BufferWrite(buffer, buffer_size, &buffer_iter, '\a'); break;
case 'b': CL_BufferWrite(buffer, buffer_size, &buffer_iter, '\b'); break;
case '0':
CL_BufferWrite(buffer, buffer_size, &buffer_iter, '\0');
break;
// Octal constant
case 'x':
case 'X': CL_ASSERT(0); break; // Hex constant
case 'u': CL_ASSERT(0); break; // Unicode constant
}
}
else {
CL_BufferWrite(buffer, buffer_size, &buffer_iter, *T->stream);
}
CL_Advance(T);
}
CL_Advance(T);
// Try to seek if there is a consecutive string.
// If there is such string we try to combine it.
char *seek_for_next_string = T->stream;
while (CL_IsWhitespace(*seek_for_next_string)) {
seek_for_next_string += 1;
}
if (*seek_for_next_string == '"') {
seek_for_next_string += 1;
while (T->stream != seek_for_next_string) CL_Advance(T);
goto combine_next_string_literal;
}
int len = buffer_iter + 1;
if (len > buffer_size) {
len = buffer_size;
CL_ReportError(T, token, "Truncated string! Reached 4096 character limit for string literal.");
}
token->string_literal = CL_PushStringCopy(T->arena->other, buffer, len);
}
CL_API_FUNCTION void CL_IsIdentifierKeyword(CL_LexResult *ctx, CL_Token *token) {
if (token->len == 1) return;
char *c = token->str;
/*import meta
meta.gen_lex_keywords()*/
switch (c[0]) {
case 'v': {
switch (c[1]) {
case 'o': {
if (CL_StringsAreEqual(token->str, token->len, "void", 4)) {
token->kind = CL_KEYWORD_VOID;
}
else if (CL_StringsAreEqual(token->str, token->len, "volatile", 8)) {
token->kind = CL_KEYWORD_VOLATILE;
}
} break;
}
} break;
case 'i': {
switch (c[1]) {
case 'n': {
if (CL_StringsAreEqual(token->str, token->len, "int", 3)) {
token->kind = CL_KEYWORD_INT;
}
else if (CL_StringsAreEqual(token->str, token->len, "inline", 6)) {
token->kind = CL_KEYWORD_INLINE;
}
} break;
case 'f': {
if (CL_StringsAreEqual(token->str, token->len, "if", 2)) {
token->kind = CL_KEYWORD_IF;
}
} break;
}
} break;
case 'c': {
switch (c[1]) {
case 'h': {
if (CL_StringsAreEqual(token->str, token->len, "char", 4)) {
token->kind = CL_KEYWORD_CHAR;
}
} break;
case 'o': {
if (CL_StringsAreEqual(token->str, token->len, "const", 5)) {
token->kind = CL_KEYWORD_CONST;
}
else if (CL_StringsAreEqual(token->str, token->len, "continue", 8)) {
token->kind = CL_KEYWORD_CONTINUE;
}
} break;
case 'a': {
if (CL_StringsAreEqual(token->str, token->len, "case", 4)) {
token->kind = CL_KEYWORD_CASE;
}
} break;
}
} break;
case 'u': {
switch (c[1]) {
case 'n': {
if (CL_StringsAreEqual(token->str, token->len, "unsigned", 8)) {
token->kind = CL_KEYWORD_UNSIGNED;
}
else if (CL_StringsAreEqual(token->str, token->len, "union", 5)) {
token->kind = CL_KEYWORD_UNION;
}
} break;
}
} break;
case 's': {
switch (c[1]) {
case 'i': {
if (CL_StringsAreEqual(token->str, token->len, "signed", 6)) {
token->kind = CL_KEYWORD_SIGNED;
}
else if (CL_StringsAreEqual(token->str, token->len, "sizeof", 6)) {
token->kind = CL_KEYWORD_SIZEOF;
}
} break;
case 'h': {
if (CL_StringsAreEqual(token->str, token->len, "short", 5)) {
token->kind = CL_KEYWORD_SHORT;
}
} break;
case 't': {
if (CL_StringsAreEqual(token->str, token->len, "static", 6)) {
token->kind = CL_KEYWORD_STATIC;
}
else if (CL_StringsAreEqual(token->str, token->len, "struct", 6)) {
token->kind = CL_KEYWORD_STRUCT;
}
} break;
case 'w': {
if (CL_StringsAreEqual(token->str, token->len, "switch", 6)) {
token->kind = CL_KEYWORD_SWITCH;
}
} break;
}
} break;
case 'l': {
switch (c[1]) {
case 'o': {
if (CL_StringsAreEqual(token->str, token->len, "long", 4)) {
token->kind = CL_KEYWORD_LONG;
}
} break;
}
} break;
case 'd': {
switch (c[1]) {
case 'o': {
if (CL_StringsAreEqual(token->str, token->len, "double", 6)) {
token->kind = CL_KEYWORD_DOUBLE;
}
else if (CL_StringsAreEqual(token->str, token->len, "do", 2)) {
token->kind = CL_KEYWORD_DO;
}
} break;
case 'e': {
if (CL_StringsAreEqual(token->str, token->len, "default", 7)) {
token->kind = CL_KEYWORD_DEFAULT;
}
} break;
}
} break;
case 'f': {
switch (c[1]) {
case 'l': {
if (CL_StringsAreEqual(token->str, token->len, "float", 5)) {
token->kind = CL_KEYWORD_FLOAT;
}
} break;
case 'o': {
if (CL_StringsAreEqual(token->str, token->len, "for", 3)) {
token->kind = CL_KEYWORD_FOR;
}
} break;
}
} break;
case '_': {
switch (c[1]) {
case 'B': {
if (CL_StringsAreEqual(token->str, token->len, "_Bool", 5)) {
token->kind = CL_KEYWORD__BOOL;
}
} break;
case 'C': {
if (CL_StringsAreEqual(token->str, token->len, "_Complex", 8)) {
token->kind = CL_KEYWORD__COMPLEX;
}
} break;
case 'I': {
if (CL_StringsAreEqual(token->str, token->len, "_Imaginary", 10)) {
token->kind = CL_KEYWORD__IMAGINARY;
}
} break;
case 'T': {
if (CL_StringsAreEqual(token->str, token->len, "_Thread_local", 13)) {
token->kind = CL_KEYWORD__THREAD_LOCAL;
}
} break;
case 'A': {
if (CL_StringsAreEqual(token->str, token->len, "_Atomic", 7)) {
token->kind = CL_KEYWORD__ATOMIC;
}
else if (CL_StringsAreEqual(token->str, token->len, "_Alignas", 8)) {
token->kind = CL_KEYWORD__ALIGNAS;
}
else if (CL_StringsAreEqual(token->str, token->len, "_Alignof", 8)) {
token->kind = CL_KEYWORD__ALIGNOF;
}
} break;
case 'N': {
if (CL_StringsAreEqual(token->str, token->len, "_Noreturn", 9)) {
token->kind = CL_KEYWORD__NORETURN;
}
} break;
case 'S': {
if (CL_StringsAreEqual(token->str, token->len, "_Static_assert", 14)) {
token->kind = CL_KEYWORD__STATIC_ASSERT;
}
} break;
case 'G': {
if (CL_StringsAreEqual(token->str, token->len, "_Generic", 8)) {
token->kind = CL_KEYWORD__GENERIC;
}
} break;
}
} break;
case 'a': {
switch (c[1]) {
case 'u': {
if (CL_StringsAreEqual(token->str, token->len, "auto", 4)) {
token->kind = CL_KEYWORD_AUTO;
}
} break;
}
} break;
case 'e': {
switch (c[1]) {
case 'x': {
if (CL_StringsAreEqual(token->str, token->len, "extern", 6)) {
token->kind = CL_KEYWORD_EXTERN;
}
} break;
case 'n': {
if (CL_StringsAreEqual(token->str, token->len, "enum", 4)) {
token->kind = CL_KEYWORD_ENUM;
}
} break;
case 'l': {
if (CL_StringsAreEqual(token->str, token->len, "else", 4)) {
token->kind = CL_KEYWORD_ELSE;
}
} break;
}
} break;
case 'r': {
switch (c[1]) {
case 'e': {
if (CL_StringsAreEqual(token->str, token->len, "register", 8)) {
token->kind = CL_KEYWORD_REGISTER;
}
else if (CL_StringsAreEqual(token->str, token->len, "restrict", 8)) {
token->kind = CL_KEYWORD_RESTRICT;
}
else if (CL_StringsAreEqual(token->str, token->len, "return", 6)) {
token->kind = CL_KEYWORD_RETURN;
}
} break;
}
} break;
case 't': {
switch (c[1]) {
case 'y': {
if (CL_StringsAreEqual(token->str, token->len, "typedef", 7)) {
token->kind = CL_KEYWORD_TYPEDEF;
}
} break;
}
} break;
case 'b': {
switch (c[1]) {
case 'r': {
if (CL_StringsAreEqual(token->str, token->len, "break", 5)) {
token->kind = CL_KEYWORD_BREAK;
}
} break;
}
} break;
case 'w': {
switch (c[1]) {
case 'h': {
if (CL_StringsAreEqual(token->str, token->len, "while", 5)) {
token->kind = CL_KEYWORD_WHILE;
}
} break;
}
} break;
case 'g': {
switch (c[1]) {
case 'o': {
if (CL_StringsAreEqual(token->str, token->len, "goto", 4)) {
token->kind = CL_KEYWORD_GOTO;
}
} break;
}
} break;
}
/*END*/
}
CL_API_FUNCTION void CL_LexMacroInclude(CL_LexResult *T, CL_Token *token) {
token->kind = CL_PREPROC_INCLUDE;
while (*T->stream == ' ') CL_Advance(T);
char end = 0;
if (*T->stream == '"') {
end = '"';
}
else if (*T->stream == '<') {
end = '>';
token->flags |= CL_SYSTEM_INCLUDE;
}
else {
CL_ReportError(T, token, "Invalid include directive, file not specified");
return;
}
CL_Advance(T);
token->str = T->stream;
while (*T->stream != end) {
if (*T->stream == 0) {
CL_ReportError(T, token, "Invalid include directive, reached end of file while reading filename");
}
if (*T->stream == '\n') {
CL_ReportError(T, token, "Invalid include directive filename, got newline character while reading filename");
}
CL_Advance(T);
}
CL_SetTokenLength(T, token);
CL_Advance(T);
token->str = CL_PushStringCopy(T->arena->other, token->str, token->len);
CL_Token *include_list_item = CL_CopyToken(T->arena->include, token);
T->includes.count += 1;
if (T->includes.data == 0) T->includes.data = include_list_item;
}
CL_API_FUNCTION bool CL_LexMacro(CL_LexResult *T, CL_Token *token) {
while (*T->stream == ' ' || T->stream[0] == '\t') CL_Advance(T);
token->str = T->stream;
while (CL_IsAlphabetic(*T->stream)) CL_Advance(T);
CL_SetTokenLength(T, token);
/*import meta
meta.gen_lex_preproc_keywords() Need to add END*/
switch (*token->str) {
case 'd':
if (CL_StringsAreEqual(token->str, token->len, "define", 6)) {
token->kind = CL_PREPROC_DEFINE;
}
break;
case 'i':
if (CL_StringsAreEqual(token->str, token->len, "ifdef", 5)) {
token->kind = CL_PREPROC_IFDEF;
}
else if (CL_StringsAreEqual(token->str, token->len, "ifndef", 6)) {
token->kind = CL_PREPROC_IFNDEF;
}
else if (CL_StringsAreEqual(token->str, token->len, "include", 7)) {
token->kind = CL_PREPROC_INCLUDE;
CL_LexMacroInclude(T, token);
}
else if (CL_StringsAreEqual(token->str, token->len, "if", 2)) {
token->kind = CL_PREPROC_IF;
}
break;
case 'e':
if (CL_StringsAreEqual(token->str, token->len, "endif", 5)) {
token->kind = CL_PREPROC_ENDIF;
}
else if (CL_StringsAreEqual(token->str, token->len, "error", 5)) {
token->kind = CL_PREPROC_ERROR;
}
else if (CL_StringsAreEqual(token->str, token->len, "else", 4)) {
token->kind = CL_PREPROC_ELSE;
}
else if (CL_StringsAreEqual(token->str, token->len, "elif", 4)) {
token->kind = CL_PREPROC_ELIF;
}
break;
case 'p':
if (CL_StringsAreEqual(token->str, token->len, "pragma", 6)) {
token->kind = CL_PREPROC_PRAGMA;
}
break;
case 'u':
if (CL_StringsAreEqual(token->str, token->len, "undef", 5)) {
token->kind = CL_PREPROC_UNDEF;
}
break;
default: return false;
}
return true;
}
CL_API_FUNCTION void CL_InitLexResult(CL_LexResult *T, CL_ArenaTuple *arena, char *filename, char *filecontent) {
CL__MemoryZero(T, sizeof(CL_LexResult));
T->arena = arena;
T->stream = filecontent;
T->stream_begin = filecontent;
T->file = filename;
}
CL_API_FUNCTION CL_LexResult *CL_CreateLexingResult(CL_ArenaTuple *arena, char *filename, char *filecontent) {
CL_LexResult *T = CL_PushStruct(arena->other, CL_LexResult);
CL_InitLexResult(T, arena, filename, filecontent);
return T;
}
// Skipped space here is for case #define Memes (a), this is not a function like macro because of space
static uint32_t CL_TokenID; // @todo: make it stable, thread local?
CL_API_FUNCTION void CL_PrepareToken(CL_LexResult *T, CL_Token *token, bool skipped_space) {
CL__MemoryZero(token, sizeof(*token));
token->str = T->stream;
token->line = T->line;
token->column = T->column;
token->file = T->file;
token->id = ++CL_TokenID;
if (skipped_space) token->flags |= CL_WHITESPACE_BEFORE_TOKEN;
CL_Advance(T);
}
CL_API_FUNCTION void CL_DefaultTokenize(CL_LexResult *T, CL_Token *token) {
char *c = token->str;
switch (*c) {
case 0: break;
case '(': token->kind = CL_OPENPAREN; break;
case ')': token->kind = CL_CLOSEPAREN; break;
case '{': token->kind = CL_OPENBRACE; break;
case '}': token->kind = CL_CLOSEBRACE; break;
case '[': token->kind = CL_OPENBRACKET; break;
case ']': token->kind = CL_CLOSEBRACKET; break;
case ',': token->kind = CL_COMMA; break;
case '~': token->kind = CL_NEG; break;
case '?': token->kind = CL_QUESTION; break;
case ';': token->kind = CL_SEMICOLON; break;
case '.': {
token->kind = CL_DOT;
if (T->stream[0] == '.' && T->stream[1] == '.') {
CL_Advance(T);
CL_Advance(T);
token->kind = CL_THREEDOTS;
}
} break;
case ':': {
token->kind = CL_COLON;
} break;
case '/': {
token->kind = CL_DIV;
if (*T->stream == '/') {
token->kind = CL_COMMENT;
CL_Advance(T);
while (*T->stream != '\n' && *T->stream != 0) {
CL_Advance(T);
}
CL_SetTokenLength(T, token);
CL_Token *comment_token = CL_CopyToken(T->arena->comment, token);
if (T->comments.data == 0) T->comments.data = comment_token;
T->comments.count += 1;
}
else if (*T->stream == '*') {
token->kind = CL_COMMENT;
CL_Advance(T);
for (;;) {
if (T->stream[0] == '*' && T->stream[1] == '/') {
break;
}
if (T->stream[0] == 0) {
CL_ReportError(T, token, "Unclosed block comment");
goto error_end_path;
}
CL_Advance(T);
}
token->str += 2;
CL_SetTokenLength(T, token);
CL_Advance(T);
CL_Advance(T);
CL_Token *comment_token = CL_CopyToken(T->arena->comment, token);
if (T->comments.data == 0) T->comments.data = comment_token;
T->comments.count += 1;
}
else if (*T->stream == '=') {
token->kind = CL_DIVASSIGN;
CL_Advance(T);
}
} break;
case '#': {
if (*T->stream == '#') {
token->kind = CL_MACRO_CONCAT;
CL_Advance(T);
}
else {
bool is_macro_directive = CL_LexMacro(T, token);
if (is_macro_directive) {
T->inside_of_macro = true;
}
else {
if (!T->inside_of_macro) {
CL_ReportError(T, token, "Invalid preprocessor directive");
goto error_end_path;
}
token->kind = CL_PREPROC_STRINGIFY;
token->str = T->stream;
while (*T->stream == '_' || CL_IsAlphanumeric(*T->stream))
CL_Advance(T);
CL_SetTokenLength(T, token);
}
}
} break;
case '>': {
if (*T->stream == '=') {
token->kind = CL_GREATERTHEN_OR_EQUAL;
CL_Advance(T);
}
else if (*T->stream == '>') {
CL_Advance(T);
if (*T->stream == '=') {
CL_Advance(T);
token->kind = CL_RIGHTSHIFTASSIGN;
}
else {
token->kind = CL_RIGHTSHIFT;
}
}
else {
token->kind = CL_GREATERTHEN;
}
} break;
case '<': {
token->kind = CL_LESSERTHEN;
if (*T->stream == '=') {
token->kind = CL_LESSERTHEN_OR_EQUAL;
CL_Advance(T);
}
else if (*T->stream == '<') {
CL_Advance(T);
if (*T->stream == '=') {
CL_Advance(T);
token->kind = CL_LEFTSHIFTASSIGN;
}
else {
token->kind = CL_LEFTSHIFT;
}
}
} break;
case '&': {
if (*T->stream == '=') {
token->kind = CL_ANDASSIGN;
CL_Advance(T);
}
else if (*T->stream == '&') {
token->kind = CL_AND;
CL_Advance(T);
}
else {
token->kind = CL_BITAND;
}
} break;
case '-': {
if (*T->stream == '-') {
token->kind = CL_DECREMENT;
CL_Advance(T);
}
else if (*T->stream == '=') {
token->kind = CL_SUBASSIGN;
CL_Advance(T);
}
else {
token->kind = CL_SUB;
}
} break;
case '+': {
if (*T->stream == '+') {
token->kind = CL_INCREMENT;
CL_Advance(T);
}
else if (*T->stream == '=') {
token->kind = CL_ADDASSIGN;
CL_Advance(T);
}
else {
token->kind = CL_ADD;
}
} break;
case '|': {
if (*T->stream == '|') {
token->kind = CL_OR;
CL_Advance(T);
}
else if (*T->stream == '=') {
token->kind = CL_ORASSIGN;
CL_Advance(T);
}
else {
token->kind = CL_BITOR;
}
} break;
case '=': {
if (*T->stream != '=') {
token->kind = CL_ASSIGN;
}
else {
CL_Advance(T);
token->kind = CL_EQUALS;
}
} break;
case '!': {
if (*T->stream != '=') {
token->kind = CL_NOT;
}
else {
CL_Advance(T);
token->kind = CL_NOTEQUALS;
}
} break;
case '*': {
token->kind = CL_MUL;
if (*T->stream == '=') {
CL_Advance(T);
token->kind = CL_MULASSIGN;
}
} break;
case '%': {
token->kind = CL_MOD;
if (*T->stream == '=') {
token->kind = CL_MODASSIGN;
CL_Advance(T);
}
} break;
case '^': {
token->kind = CL_BITXOR;
if (*T->stream == '=') {
CL_Advance(T);
token->kind = CL_XORASSIGN;
}
} break;
case '"': {
CL_ParseString(T, token);
} break;
case '\'': {
CL_ParseCharLiteral(T, token);
} break;
case 'U': { // @todo Unicode32
if (*T->stream == '"') {
token->fix = CL_PREFIX_U32;
CL_Advance(T);
CL_ParseString(T, token);
}
else if (*T->stream == '\'') {
token->fix = CL_PREFIX_U32;
CL_Advance(T);
CL_ParseCharLiteral(T, token);
}
else goto parse_regular_char;
} break;
case 'u': { // Unicode16
if (*T->stream == '8') { // Unicode8
if (T->stream[1] == '"') { // U8 STRING
token->fix = CL_PREFIX_U8;
CL_Advance(T);
CL_Advance(T);
CL_ParseString(T, token);
}
else if (T->stream[1] == '\'') { // U8 CHAR
token->fix = CL_PREFIX_U8;
CL_Advance(T);
CL_Advance(T);
CL_ParseCharLiteral(T, token);
}
else goto parse_regular_char;
}
else if (*T->stream == '"') { // U16 STRING
token->fix = CL_PREFIX_U16;
CL_Advance(T);
CL_ParseString(T, token);
}
else if (*T->stream == '\'') { // U16 CHAR
CL_Advance(T);
CL_ParseCharLiteral(T, token);
}
else goto parse_regular_char;
}
case 'L': { // Widechar
if (*T->stream == '"') {
token->fix = CL_PREFIX_L;
CL_Advance(T);
CL_ParseString(T, token); // @todo UTF16
}
else if (*T->stream == '\'') {
token->fix = CL_PREFIX_L;
CL_Advance(T);
CL_ParseCharLiteral(T, token);
}
else goto parse_regular_char;
} break;
case 'A':
case 'a':
case 'B':
case 'b':
case 'C':
case 'c':
case 'D':
case 'd':
case 'E':
case 'e':
case 'F':
case 'f':
case 'G':
case 'g':
case 'H':
case 'h':
case 'I':
case 'i':
case 'J':
case 'j':
case 'K':
case 'k':
/*case 'L':*/ case 'l':
case 'M':
case 'm':
case 'N':
case 'n':
case 'O':
case 'o':
case 'P':
case 'p':
case 'Q':
case 'q':
case 'R':
case 'r':
case 'S':
case 's':
case 'T':
case 't':
// case 'U': case 'u':
case 'V':
case 'v':
case 'W':
case 'w':
case 'X':
case 'x':
case 'Y':
case 'y':
case 'Z':
case 'z':
case '_':
parse_regular_char : {
token->kind = CL_IDENTIFIER;
while (*T->stream == '_' || CL_IsAlphanumeric(*T->stream)) {
CL_Advance(T);
}
CL_SetTokenLength(T, token);
CL_IsIdentifierKeyword(T, token);
} break;
case '0': {
if (*T->stream == 'x' || *T->stream == 'X') {
token->kind = CL_INT;
token->flags |= CL_HEX;
CL_Advance(T);
while (CL_IsHexNumeric(*T->stream)) {
CL_Advance(T);
}
uint64_t len = T->stream - token->str;
CL_ASSERT(len > 2);
token->u64 = CL_ParseInteger(T, token, token->str + 2, len - 2, 16);
break;
}
}
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': {
token->kind = CL_INT;
for (;;) {
if (*T->stream == '.') {
if (token->kind == CL_FLOAT) {
CL_ReportError(T, token, "Failed to parse a floating point number, invalid format, found multiple '.'");
}
if (token->kind == CL_INT) {
token->kind = CL_FLOAT;
}
}
else if (CL_IsNumeric(*T->stream) == false) {
break;
}
CL_Advance(T);
}
if (token->kind == CL_INT) {
uint64_t len = T->stream - token->str;
CL_ASSERT(len > 0);
token->u64 = CL_ParseInteger(T, token, token->str, len, 10);
}
else if (token->kind == CL_FLOAT) {
token->f64 = CL_STRING_TO_DOUBLE(token->str, token->len);
}
else {
CL_ASSERT(token->kind == CL_ERROR);
}
if (*T->stream == 'f' || *T->stream == 'F') {
CL_Advance(T);
token->fix = CL_SUFFIX_F;
}
else if (*T->stream == 'l' || *T->stream == 'L') {
CL_Advance(T);
token->fix = CL_SUFFIX_L;
if (*T->stream == 'l' || *T->stream == 'L') {
CL_Advance(T);
token->fix = CL_SUFFIX_LL;
if (*T->stream == 'u' || *T->stream == 'U') {
CL_Advance(T);
token->fix = CL_SUFFIX_ULL;
}
}
else if (*T->stream == 'u' || *T->stream == 'U') {
CL_Advance(T);
token->fix = CL_SUFFIX_UL;
}
}
else if (*T->stream == 'u' || *T->stream == 'U') {
CL_Advance(T);
token->fix = CL_SUFFIX_U;
if (*T->stream == 'l' || *T->stream == 'L') {
CL_Advance(T);
token->fix = CL_SUFFIX_UL;
if (*T->stream == 'l' || *T->stream == 'L') {
CL_Advance(T);
token->fix = CL_SUFFIX_ULL;
}
}
}
} break;
default: {
CL_Message *result = CL_PushStruct(T->arena->other, CL_Message);
result->kind = CLM_WARNING;
result->string = (char *)"Unhandled character, skipping ...";
CL_SLL_QUEUE_ADD(T->first_message, T->last_message, result);
result->token = *token;
token->kind = CL_COMMENT;
} break;
}
error_end_path:;
}
CL_API_FUNCTION bool CL_IsComment(CL_Kind kind) {
bool result = kind == CL_COMMENT && kind != CL_EOF;
return result;
}
CL_API_FUNCTION void CL_InitNextToken(CL_LexResult *T, CL_Token *token) {
// Skip comments, comments get allocated on perm and gathered on the Tokenizer.
// First non comment token gets those comments attached.
do {
bool skipped = CL_EatWhitespace(T);
CL_PrepareToken(T, token, skipped);
CL_DefaultTokenize(T, token);
} while (CL_IsComment(token->kind));
CL_TryToFinalizeToken(T, token);
}
CL_API_FUNCTION void CL_AddToken(CL_LexResult *T, CL_Token *token) {
if (!T->tokens.data) T->tokens.data = token;
T->tokens.count += 1;
}
CL_API_FUNCTION void CL_AddTokenEx(CL_Arena *arena, CL_Tokens *tokens, CL_Token *token_to_add) {
if (token_to_add->kind != CL_EOF) {
CL_Token *token = CL_PushStruct(arena, CL_Token);
*token = *token_to_add;
if (!tokens->data) tokens->data = token;
tokens->count += 1;
}
}
CL_API_FUNCTION void CL_AddTokenList(CL_Arena *arena, CL_Tokens *main, CL_Tokens *tokens_to_add) {
for (int i = 0; i < tokens_to_add->count; i += 1) {
CL_Token *it = tokens_to_add->data + i;
CL_AddTokenEx(arena, main, it);
}
}
CL_API_FUNCTION CL_Token *CL_AddNextToken(CL_LexResult *T) {
CL_Token *token = CL_PushStruct(T->arena->token, CL_Token);
CL_InitNextToken(T, token);
CL_AddToken(T, token);
return token;
}
CL_API_FUNCTION void CL_LexStringEx(CL_LexResult *result) {
CL_Token *token;
do {
token = CL_AddNextToken(result);
} while (token->kind != CL_EOF);
}
CL_API_FUNCTION CL_LexResult *CL_LexString(CL_ArenaTuple *arena, char *filename, char *string) {
CL_LexResult *result = CL_CreateLexingResult(arena, filename, string);
CL_LexStringEx(result);
return result;
}
CL_API_FUNCTION CL_LexResult *CL_LexFile(CL_ArenaTuple *arena, char *filename) {
char *file = CL_ReadFile(arena->other, filename);
CL_LexResult *result = 0;
if (file) {
result = CL_LexString(arena, filename, file);
}
return result;
}
CL_API_FUNCTION void CL_AddLexResult(CL_LexList *list, CL_LexResult *result) {
if (result == 0) return;
CL_SLL_QUEUE_ADD_MOD(list->first_result, list->last_result, result, next_result);
list->count += 1;
}
CL_API_FUNCTION CL_LexList CL_MakeLexList(CL_LexResult *l) {
CL_LexList result = CL_ZeroStruct();
CL_AddLexResult(&result, l);
return result;
}
CL_PRIVATE_FUNCTION void CL__SetIncludeToken(CL_IncludeIter *iter, CL_Token *token) {
if (token) {
iter->include_token = token;
iter->filename = token->str;
iter->is_system_include = token->flags & CL_SYSTEM_INCLUDE;
}
else {
iter->include_token = 0;
iter->filename = 0;
iter->is_system_include = 0;
}
}
CL_API_FUNCTION void CL_GetNextInclude(CL_IncludeIter *iter) {
if (iter->inited_with_filename) {
iter->parent = iter->lex_list->first_result;
iter->inited_with_filename = false;
}
for (; iter->parent;) {
iter->include_index += 1;
if (iter->include_index >= iter->parent->includes.count) {
iter->parent = iter->parent->next_result;
CL__SetIncludeToken(iter, 0);
iter->include_index = -1;
continue;
}
CL_Token *it = iter->parent->includes.data + iter->include_index;
CL__SetIncludeToken(iter, it);
if (iter->resolve) {
char *filename = CL_ResolveFilepath(iter->arena, &iter->search_paths, iter->filename, iter->parent->file, iter->is_system_include);
if (CL_IsValidFile(iter->lex_list, filename)) {
iter->filename = filename;
}
else {
CL__SetIncludeToken(iter, 0);
continue;
}
}
return;
}
}
CL_API_FUNCTION CL_IncludeIter CL_IterateFileAndResolvedIncludes(CL_ArenaTuple *arena, char *filename, CL_SearchPaths search_paths) {
CL_IncludeIter result;
CL__MemoryZero(&result, sizeof(CL_IncludeIter));
result.lex_list = CL_PushStruct(arena->other, CL_LexList);
if (CL_FileExists(filename)) {
result.inited_with_filename = true;
result.filename = filename;
}
result.include_index = -1;
result.resolve = true;
result.search_paths = search_paths;
result.arena = arena->other;
return result;
}
CL_API_FUNCTION CL_IncludeIter CL_IterateIncludes(CL_LexList *list) {
CL_IncludeIter result;
CL__MemoryZero(&result, sizeof(CL_IncludeIter));
result.lex_list = list;
result.parent = list->first_result;
result.include_index = -1;
CL_GetNextInclude(&result);
return result;
}
CL_API_FUNCTION CL_IncludeIter CL_IterateResolvedIncludes(CL_Arena *arena, CL_LexList *list, CL_SearchPaths search_paths) {
CL_IncludeIter result;
CL__MemoryZero(&result, sizeof(CL_IncludeIter));
result.lex_list = list;
result.parent = list->first_result;
result.include_index = -1;
result.resolve = true;
result.search_paths = search_paths;
result.arena = arena;
CL_GetNextInclude(&result);
return result;
}
#define CL_IS_POW2(x) (((x) & ((x)-1)) == 0)
#define CL_WRAP_AROUND_POWER_OF_2(x, pow2) (((x) & ((pow2)-1llu)))
CL_API_FUNCTION void CL_InitInternTable(CL_Arena *arena, CL_InternTable *table, int size) {
CL_ASSERT(CL_IS_POW2(size));
table->arena = arena;
table->entries = CL_PushArray(arena, CL_InternEntry, size);
table->entry_count = size;
table->occupied_entry_count = 0;
}
CL_API_FUNCTION CL_InternTable *CL_CreateInternTable(CL_Arena *arena, int size) {
CL_InternTable *result = CL_PushStruct(arena, CL_InternTable);
CL_InitInternTable(arena, result, size);
return result;
}
CL_API_FUNCTION CL_Intern *CL_InsertIntern(CL_InternTable *table, char *string, int len) {
CL_ASSERT(table->arena);
uint64_t hash = CL__HASH_BYTES(string, len);
if (hash == 0) hash += 1;
uint64_t index = CL_WRAP_AROUND_POWER_OF_2(hash, table->entry_count);
CL_InternEntry *it = table->entries + index;
for (;;) {
if (it->hash == 0) {
it->string = CL_PushStringCopy(table->arena, string, len);
it->len = len;
it->hash = hash;
table->occupied_entry_count += 1;
return it->string;
}
else if (CL_StringsAreEqual(string, len, it->string, it->len)) {
return it->string;
}
if (!it->next) {
it->next = CL_PushStruct(table->arena, CL_InternEntry);
}
it = it->next;
}
}
CL_API_FUNCTION void CL_InternResult(CL_InternTable *table, CL_LexResult *result) {
for (int i = 0; i < result->tokens.count; i += 1) {
CL_Token *it = result->tokens.data + i;
if (it->kind == CL_IDENTIFIER) {
it->intern = CL_InsertIntern(table, it->str, it->len);
}
}
}
CL_API_FUNCTION void CL_InternListEx(CL_InternTable *table, CL_LexList *list) {
for (CL_LexResult *it = list->first_result; it; it = it->next_result) {
CL_InternResult(table, it);
}
}
CL_API_FUNCTION void CL_InternList(CL_Arena *arena, CL_LexList *list) {
list->intern_table = CL_CreateInternTable(arena, 4096);
CL_InternListEx(list->intern_table, list);
}
CL_PRIVATE_FUNCTION char *CL_ChopLastSlash(CL_Arena *arena, char *str) {
int i = 0;
int slash_pos = -1;
while (str[i]) {
if (str[i] == '/') {
slash_pos = i;
}
i += 1;
}
char *result = str;
if (slash_pos != -1) {
result = CL_PushStringCopy(arena, str, slash_pos);
}
else {
result = (char *)"./";
}
return result;
}
CL_PRIVATE_FUNCTION char *CL_JoinPath(CL_Arena *arena, char *a, char *b) {
int alen = CL_StringLength(a);
int blen = CL_StringLength(b);
int additional_len = 0;
if (alen && a[alen - 1] != '/') additional_len = 1;
char *result = CL_PushArray(arena, char, alen + blen + 1 + additional_len);
CL__MemoryCopy(result, a, alen);
if (additional_len) result[alen++] = '/';
CL__MemoryCopy(result + alen, b, blen);
result[alen + blen] = 0;
return result;
}
CL_PRIVATE_FUNCTION bool CL_IsAbsolutePath(char *path) {
#if _WIN32
bool result = CL_IsAlphabetic(path[0]) && path[1] == ':' && path[2] == '/';
#else
bool result = path[0] == '/';
#endif
return result;
}
char *CL_SkipToLastSlash(char *p) {
int last_slash = 0;
for (int i = 0; p[i]; i += 1) {
if (p[i] == '/') last_slash = i;
}
return p + last_slash;
}
CL_API_FUNCTION char *CL_ResolveFilepath(CL_Arena *arena, CL_SearchPaths *search_paths, char *filename, char *parent_file, bool is_system_include) {
CL_SearchPaths null_search_paths = CL_ZeroStruct();
if (search_paths == 0) search_paths = &null_search_paths;
if (search_paths->file_begin_to_ignore) {
char *name = CL_SkipToLastSlash(filename);
int namelen = CL_StringLength(name);
char *ignore = search_paths->file_begin_to_ignore;
int ignorelen = CL_StringLength(ignore);
if (namelen > ignorelen) {
namelen = ignorelen;
}
if (CL_StringsAreEqual(name, namelen, search_paths->file_begin_to_ignore, ignorelen)) {
return 0;
}
}
if (CL_IsAbsolutePath(filename) && CL_FileExists(filename)) {
return filename;
}
if (is_system_include) {
for (int path_i = 0; path_i < search_paths->system_include_path_count; path_i += 1) {
char *path_it = search_paths->system_include_path[path_i];
char *file = CL_JoinPath(arena, path_it, filename);
if (CL_FileExists(file)) {
return file;
}
}
}
else {
if (parent_file) {
char *parent_dir = CL_ChopLastSlash(arena, parent_file);
char *file = CL_JoinPath(arena, parent_dir, filename);
if (CL_FileExists(file)) {
return file;
}
}
for (int path_i = 0; path_i < search_paths->include_path_count; path_i += 1) {
char *path_it = search_paths->include_path[path_i];
char *file = CL_JoinPath(arena, path_it, filename);
if (CL_FileExists(file)) {
return file;
}
}
}
return 0;
}
CL_API_FUNCTION bool CL_IsValidFile(CL_LexList *list, char *filename) {
if (filename == 0) return false;
int filename_len = CL_StringLength(filename);
if (filename_len == 0) return false;
for (CL_LexResult *it = list->first_result; it; it = it->next_result) {
int file_len = CL_StringLength(it->file);
if (CL_StringsAreEqual(filename, filename_len, it->file, file_len)) {
return false;
}
}
return true;
}
CL_API_FUNCTION CL_LexResult *CL_GetFile(CL_LexList *list, char *name) {
for (CL_LexResult *it = list->first_result; it; it = it->next_result) {
if (CL_StringsAreEqual(it->file, CL_StringLength(it->file), name, CL_StringLength(name))) {
return it;
}
}
return 0;
}
CL_API_FUNCTION void CL_InitDefaultTuple(CL_ArenaTuple *tuple) {
CL__MemoryZero(tuple, sizeof(CL_ArenaTuple));
tuple->comment = &tuple->default_comment;
tuple->token = &tuple->default_token;
tuple->include = &tuple->default_include;
tuple->other = &tuple->default_other;
}
CL_API_FUNCTION CL_LexList CL_LexRecursive(CL_ArenaTuple *arena, char *filename, CL_SearchPaths paths) {
CL_LexResult *first_file = CL_LexFile(arena, filename);
CL_LexList result = CL_MakeLexList(first_file);
result.search_paths = paths;
for (CL_IncludeIter iter = CL_IterateResolvedIncludes(arena->other, &result, paths); iter.filename; CL_GetNextInclude(&iter)) {
CL_LexResult *file = CL_LexFile(arena, iter.filename);
CL_AddLexResult(&result, file);
}
return result;
}