110 lines
3.6 KiB
Plaintext
110 lines
3.6 KiB
Plaintext
OS :: #import "os_windows.kl"
|
|
#import "arena.kl"
|
|
SizeU :: U64
|
|
|
|
ClampTopSizeU :: (val: SizeU, max: SizeU): SizeU
|
|
if val > max
|
|
return max
|
|
return val
|
|
|
|
GetAlignOffset :: (size: SizeU, align: SizeU): SizeU
|
|
mask := align - 1
|
|
val := size & mask
|
|
if val != 0
|
|
val = align - val
|
|
return val
|
|
|
|
AlignUp :: (size: SizeU, align: SizeU): SizeU
|
|
result := size + GetAlignOffset(size, align)
|
|
return result
|
|
|
|
//
|
|
// Unicode
|
|
//
|
|
QuestionMark16 :: 0x003f
|
|
String32 :: struct;; str: *U32; len: S64
|
|
String16 :: struct;; str: *U16; len: S64
|
|
|
|
Utf8ToUtf32 :: (c: *U8, max_advance: S64): U32, S64
|
|
out_str: U32
|
|
advance: S64
|
|
if (c[0] & 0b10000000) == 0
|
|
if max_advance >= 1
|
|
c0 := c[0]->U32
|
|
out_str = c0
|
|
advance = 1
|
|
|
|
elif (c[0] & 0b11100000) == 0b11000000
|
|
if (c[1] & 0b11000000) == 0b10000000 // Continuation byte required
|
|
if max_advance >= 2
|
|
c0 := c[0]->U32; c1 := c[1]->U32
|
|
out_str = (c0 & 0b00011111) << 6 | (c1 & 0b00111111)
|
|
advance = 2
|
|
|
|
elif (c[0] & 0b11110000) == 0b11100000
|
|
if (c[1] & 0b11000000) == 0b10000000 && (c[2] & 0b11000000) == 0b10000000 // Two continuation bytes required
|
|
if max_advance >= 3
|
|
c0 := c[0]->U32; c1 := c[1]->U32; c2 := c[2]->U32
|
|
out_str = (c0 & 0b00001111) << 12 | (c1 & 0b00111111) << 6 | (c2 & 0b00111111)
|
|
advance = 3
|
|
|
|
elif (c[0] & 0b11111000) == 0b11110000
|
|
if (c[1] & 0b11000000) == 0b10000000 && (c[2] & 0b11000000) == 0b10000000 && (c[3] & 0b11000000) == 0b10000000 // Three continuation bytes required
|
|
if max_advance >= 4
|
|
c0 := c[0]->U32; c1 := c[1]->U32; c2 := c[2]->U32; c3 := c[3]->U32
|
|
out_str = (c0 & 0b00001111) << 18 | (c1 & 0b00111111) << 12 | (c2 & 0b00111111) << 6 | (c3 & 0b00111111)
|
|
advance = 4
|
|
|
|
return out_str, advance
|
|
|
|
Utf32ToUtf16 :: (codepoint: U32): [2]U16, S64
|
|
str: [2]U16
|
|
len := 0
|
|
if codepoint < 0x10000
|
|
str[0] = codepoint->U16
|
|
len = 1
|
|
elif codepoint <= 0x10FFFF
|
|
code: U32 = (codepoint - 0x10000)
|
|
str[0] = (0xD800 | (code >> 10))->U16
|
|
str[1] = (0xDC00 | (code & 0x3FF))->U16
|
|
len = 2
|
|
|
|
return str, len
|
|
|
|
StringToString16 :: (arena: *Arena, in: String): String16
|
|
in_str := &in[0]
|
|
// @Note(Krzosa): Should be more then enough space
|
|
alloc_size := (Length(in)*2)+1
|
|
result := String16{str = PushSize(arena, alloc_size->U64)}
|
|
for i := 0, i < Length(in)
|
|
s32, s32_len := Utf8ToUtf32(in_str + i, Length(in) - i)
|
|
if s32_len != 0
|
|
i += s32_len
|
|
s16, s16_len := Utf32ToUtf16(s32)
|
|
if s16_len != 0
|
|
for j := 0, j < s16_len, j++
|
|
result.str[result.len++] = s16[j]
|
|
else
|
|
result.str[result.len++] = QuestionMark16
|
|
break
|
|
else
|
|
result.str[result.len++] = QuestionMark16
|
|
break
|
|
|
|
result.str[result.len] = 0
|
|
return result
|
|
|
|
TestUnicode :: (arena: *Arena)
|
|
string := " 豈 更 車 賈 滑 串 句 龜 龜 契 金 喇 奈 懶 癩 羅 蘿 螺 裸 邏 樂 洛 烙 珞 落 酪 駱 亂 卵 欄 爛 蘭 鸞 嵐 濫 藍 襤 拉 臘 蠟 廊 朗 浪 狼 郎 來 冷 勞 擄 櫓 爐 盧 老 蘆 虜 路 露 魯 鷺 碌 祿 綠 菉 錄 鹿 論 壟 弄 籠 聾 牢 磊 賂 雷 壘 屢 樓 淚 漏 累 縷 陋 勒 肋 凜 凌 稜 綾 菱 陵 讀 拏 樂 諾 丹 寧 怒 率 異 北 磻 便 復 不 泌 數 索 參 塞 省 葉 說 殺 辰 沈 拾 若 掠 略 亮 兩 凉 梁 糧 良 諒 量 勵 ..."
|
|
string_result := StringToString16(arena, string)
|
|
print(string_result)
|
|
|
|
s32, s32_len := Utf8ToUtf32('A', 1)
|
|
assert(s32 == 'A', "Invalid decode")
|
|
|
|
s32_2, s32_len_2 := Utf8ToUtf32('ć', 2)
|
|
assert(s32_2 == 0x107, "Invalid decode")
|
|
|
|
s32_3, s32_len_3 := Utf8ToUtf32('ó', 2)
|
|
assert(s32_3 == 0xF3, "Invalid decode")
|