Refactor the folder structure which only had 2 'real' modules, now it's more real, no need for splitting into folders beside the external one

This commit is contained in:
Krzosa Karol
2026-02-09 23:11:24 +01:00
parent fde4e463ad
commit 56a729b617
69 changed files with 12 additions and 346 deletions

237
src/basic_unicode.cpp Normal file
View File

@@ -0,0 +1,237 @@
API UTF32Result UTF16ToUTF32(uint16_t *c, int64_t max_advance) {
UTF32Result result;
MemoryZero(&result, sizeof(result));
if (max_advance >= 1) {
result.advance = 1;
result.out_str = c[0];
if (c[0] >= 0xD800 && c[0] <= 0xDBFF && c[1] >= 0xDC00 && c[1] <= 0xDFFF) {
if (max_advance >= 2) {
result.out_str = 0x10000;
result.out_str += (uint32_t)(c[0] & 0x03FF) << 10u | (c[1] & 0x03FF);
result.advance = 2;
} else
result.error = 2;
}
} else {
result.error = 1;
}
return result;
}
API UTF8Result UTF32ToUTF8(uint32_t codepoint) {
UTF8Result result;
MemoryZero(&result, sizeof(result));
if (codepoint <= 0x7F) {
result.len = 1;
result.out_str[0] = (char)codepoint;
} else if (codepoint <= 0x7FF) {
result.len = 2;
result.out_str[0] = 0xc0 | (0x1f & (codepoint >> 6));
result.out_str[1] = 0x80 | (0x3f & codepoint);
} else if (codepoint <= 0xFFFF) { // 16 bit word
result.len = 3;
result.out_str[0] = 0xe0 | (0xf & (codepoint >> 12)); // 4 bits
result.out_str[1] = 0x80 | (0x3f & (codepoint >> 6)); // 6 bits
result.out_str[2] = 0x80 | (0x3f & codepoint); // 6 bits
} else if (codepoint <= 0x10FFFF) { // 21 bit word
result.len = 4;
result.out_str[0] = 0xf0 | (0x7 & (codepoint >> 18)); // 3 bits
result.out_str[1] = 0x80 | (0x3f & (codepoint >> 12)); // 6 bits
result.out_str[2] = 0x80 | (0x3f & (codepoint >> 6)); // 6 bits
result.out_str[3] = 0x80 | (0x3f & codepoint); // 6 bits
} else {
result.error = 1;
}
return result;
}
API UTF32Result UTF8ToUTF32(uint8_t *c, int64_t max_advance) {
UTF32Result result;
MemoryZero(&result, sizeof(result));
if ((c[0] & 0x80) == 0) { // Check if leftmost zero of first byte is unset
if (max_advance >= 1) {
result.out_str = c[0];
result.advance = 1;
} else result.error = 1;
}
else if ((c[0] & 0xe0) == 0xc0) {
if ((c[1] & 0xc0) == 0x80) { // Continuation byte required
if (max_advance >= 2) {
result.out_str = (uint32_t)(c[0] & 0x1f) << 6u | (c[1] & 0x3f);
result.advance = 2;
} else result.error = 2;
} else result.error = 2;
}
else if ((c[0] & 0xf0) == 0xe0) {
if ((c[1] & 0xc0) == 0x80 && (c[2] & 0xc0) == 0x80) { // Two continuation bytes required
if (max_advance >= 3) {
result.out_str = (uint32_t)(c[0] & 0xf) << 12u | (uint32_t)(c[1] & 0x3f) << 6u | (c[2] & 0x3f);
result.advance = 3;
} else result.error = 3;
} else result.error = 3;
}
else if ((c[0] & 0xf8) == 0xf0) {
if ((c[1] & 0xc0) == 0x80 && (c[2] & 0xc0) == 0x80 && (c[3] & 0xc0) == 0x80) { // Three continuation bytes required
if (max_advance >= 4) {
result.out_str = (uint32_t)(c[0] & 0xf) << 18u | (uint32_t)(c[1] & 0x3f) << 12u | (uint32_t)(c[2] & 0x3f) << 6u | (uint32_t)(c[3] & 0x3f);
result.advance = 4;
} else result.error = 4;
} else result.error = 4;
} else result.error = 4;
return result;
}
API UTF16Result UTF32ToUTF16(uint32_t codepoint) {
UTF16Result result;
MemoryZero(&result, sizeof(result));
if (codepoint < 0x10000) {
result.out_str[0] = (uint16_t)codepoint;
result.out_str[1] = 0;
result.len = 1;
} else if (codepoint <= 0x10FFFF) {
uint32_t code = (codepoint - 0x10000);
result.out_str[0] = (uint16_t)(0xD800 | (code >> 10));
result.out_str[1] = (uint16_t)(0xDC00 | (code & 0x3FF));
result.len = 2;
} else {
result.error = 1;
}
return result;
}
#define UTF__HANDLE_DECODE_ERROR(question_mark, I) \
{ \
if (outlen < buffer_size - 1) buffer[outlen++] = (question_mark); \
i += I; \
}
API int64_t CreateCharFromWidechar(char *buffer, int64_t buffer_size, char16_t *in, int64_t inlen) {
int64_t outlen = 0;
for (int64_t i = 0; i < inlen && in[i];) {
UTF32Result decode = UTF16ToUTF32((uint16_t *)(in + i), (int64_t)(inlen - i));
if (!decode.error) {
i += decode.advance;
UTF8Result encode = UTF32ToUTF8(decode.out_str);
if (!encode.error) {
for (int64_t j = 0; j < encode.len; j++) {
if (outlen < buffer_size - 1) {
buffer[outlen++] = encode.out_str[j];
}
}
} else UTF__HANDLE_DECODE_ERROR('?', 0);
} else UTF__HANDLE_DECODE_ERROR('?', 1);
}
buffer[outlen] = 0;
return outlen;
}
API int64_t CreateWidecharFromChar(char16_t *buffer, int64_t buffer_size, char *in, int64_t inlen) {
int64_t outlen = 0;
for (int64_t i = 0; i < inlen;) {
UTF32Result decode = UTF8ToUTF32((uint8_t *)(in + i), (int64_t)(inlen - i));
if (!decode.error) {
i += decode.advance;
UTF16Result encode = UTF32ToUTF16(decode.out_str);
if (!encode.error) {
for (int64_t j = 0; j < encode.len; j++) {
if (outlen < buffer_size - 1) {
buffer[outlen++] = encode.out_str[j];
}
}
} else UTF__HANDLE_DECODE_ERROR(0x003f, 0);
} else UTF__HANDLE_DECODE_ERROR(0x003f, 1);
}
buffer[outlen] = 0;
return outlen;
}
API bool IsValid(UTF8Iter &iter) {
return iter.item;
}
API void Advance(UTF8Iter *iter) {
iter->i += iter->utf8_codepoint_byte_size;
UTF32Result r = UTF8ToUTF32((uint8_t *)(iter->data + iter->i), iter->len - iter->i);
if (r.error) {
iter->item = 0;
return;
}
iter->utf8_codepoint_byte_size = r.advance;
iter->item = r.out_str;
}
API UTF8Iter IterateUTF8Ex(char *data, int64_t len) {
UTF8Iter result;
MemoryZero(&result, sizeof(result));
result.data = data;
result.len = len;
if (len) Advance(&result);
return result;
}
API UTF8Iter IterateUTF8(char *data) {
int64_t length = 0;
while (data[length]) length += 1;
return IterateUTF8Ex(data, length);
}
API UTF8Iter IterateUTF8(String string) {
return IterateUTF8Ex(string.data, string.len);
}
API bool IsUTF8ContinuationByte(char c) {
char result = (c & 0b11000000) == 0b10000000;
return result;
}
API String16 ToString16(Allocator allocator, String string) {
char16_t *buffer = (char16_t *)AllocSize(allocator, sizeof(char16_t) * (string.len + 1));
int64_t size = CreateWidecharFromChar(buffer, string.len + 1, string.data, string.len);
String16 result = {buffer, size};
return result;
}
API String16 ToString16(Allocator allocator, char *in_string) {
String string(in_string);
String16 result = ToString16(allocator, string);
return result;
}
API char16_t *ToWidechar(Allocator allocator, String string) {
String16 result = ToString16(allocator, string);
return result.data;
}
API String ToString(Allocator allocator, String16 string) {
Assert(sizeof(char16_t) == 2);
int64_t buffer_size = (string.len + 1) * 2;
char *buffer = (char *)AllocSize(allocator, buffer_size);
int64_t size = CreateCharFromWidechar(buffer, buffer_size, string.data, string.len);
String result = {buffer, size};
Assert(size < buffer_size);
return result;
}
API String ToString(Allocator allocator, char16_t *string, int64_t len) {
return ToString(allocator, {string, len});
}
API String ToString(Allocator allocator, char16_t *wstring) {
int64_t size = WideLength(wstring);
String result = ToString(allocator, {wstring, size});
return result;
}