Buffer iterator and utf8

This commit is contained in:
Krzosa Karol
2024-06-20 11:52:33 +02:00
parent 5539bbd3b9
commit b3b8a54f7f
2 changed files with 210 additions and 28 deletions

View File

@@ -1177,6 +1177,11 @@ UTF8Iter IterateUTF8(String string) {
return IterateUTF8Ex(string.data, string.len);
}
bool IsUTF8ContinuationByte(char c) {
char result = (c & 0b11000000) == 0b10000000;
return result;
}
char ToLowerCase(char a) {
if (a >= 'A' && a <= 'Z') a += 32;
return a;
@@ -1352,7 +1357,7 @@ Array<String> Split(Allocator allocator, String string, String delimiter) {
result.add(before_match);
string = string.skip(index + delimiter.len);
}
if (string.len) result.add(string);
result.add(string);
return result;
}

View File

@@ -1,19 +1,25 @@
struct Buffer {
Allocator allocator;
char *data[2];
int64_t cap;
int64_t len;
int bi; // current buffer index
};
struct Range {
int64_t a;
int64_t b; // one past last index
int64_t min;
int64_t max; // one past last index
// <0,4> = 0,1,2,3
};
struct Edit {
Range range;
String string;
};
struct Buffer {
Allocator allocator;
char *data[2];
int64_t cap;
int64_t len;
int bi; // current buffer index
Array<String> lines;
};
int64_t GetRangeSize(Range range) {
int64_t result = range.b - range.a;
int64_t result = range.max - range.min;
return result;
}
@@ -22,18 +28,22 @@ Range GetRange(const Buffer &buffer) {
return result;
}
struct Edit {
Range range;
String string;
};
int64_t ClampMax(Buffer *buffer, int64_t pos) {
int64_t result = Clamp(pos, (int64_t)0, buffer->len);
return result;
}
int64_t ClampMin(Buffer *buffer, int64_t pos) {
int64_t end_of_buffer = Max((int64_t)0, buffer->len - 1);
int64_t result = Clamp(pos, (int64_t)0, end_of_buffer);
return result;
}
void ApplyEdits(Buffer *buffer, Array<Edit> edits) {
int64_t size_to_delete = 0;
int64_t size_to_insert = 0;
int64_t end_of_buffer = Max((int64_t)0, buffer->len - 1);
For(edits) {
it.range.a = Clamp(it.range.a, (int64_t)0, end_of_buffer);
it.range.b = Clamp(it.range.b, (int64_t)0, buffer->len);
it.range.min = ClampMin(buffer, it.range.min);
it.range.max = ClampMax(buffer, it.range.max);
size_to_delete += GetRangeSize(it.range);
size_to_insert += it.string.len;
}
@@ -43,10 +53,10 @@ void ApplyEdits(Buffer *buffer, Array<Edit> edits) {
ForItem(it2, edits) {
if (&it1 == &it2) continue;
bool a2_inside = it2.range.a >= it1.range.a && it2.range.a < it1.range.b;
bool a2_inside = it2.range.min >= it1.range.min && it2.range.min < it1.range.max;
Assert(!a2_inside);
bool b2_inside = it2.range.b > it1.range.a && it2.range.b <= it1.range.b;
bool b2_inside = it2.range.max > it1.range.min && it2.range.max <= it1.range.max;
Assert(!b2_inside);
}
}
@@ -81,28 +91,28 @@ void ApplyEdits(Buffer *buffer, Array<Edit> edits) {
int64_t prev_dest = 0;
For(edits) {
Range source_range = {prev_source, it.range.a};
Range source_range = {prev_source, it.range.min};
if (GetRangeSize(source_range) != 0) {
String source_string = {};
source_string.data = buffer->data[buffer->bi] + source_range.a;
source_string.data = buffer->data[buffer->bi] + source_range.min;
source_string.len = GetRangeSize(source_range);
Range dest_range = {prev_dest, prev_dest + source_string.len};
writes.add({dest_range, source_string});
prev_dest = dest_range.b;
prev_dest = dest_range.max;
}
Range dest_range = {prev_dest, prev_dest + it.string.len};
writes.add({dest_range, it.string});
prev_dest = dest_range.b;
prev_source = it.range.b;
prev_dest = dest_range.max;
prev_source = it.range.max;
}
// Add remaining range
Range source_range = {prev_source, buffer->len};
if (GetRangeSize(source_range)) {
String source_string = {};
source_string.data = buffer->data[buffer->bi] + source_range.a;
source_string.data = buffer->data[buffer->bi] + source_range.min;
source_string.len = GetRangeSize(source_range);
Range dest_range = {prev_dest, prev_dest + source_string.len};
writes.add({dest_range, source_string});
@@ -110,7 +120,7 @@ void ApplyEdits(Buffer *buffer, Array<Edit> edits) {
#if DEBUG_BUILD
for (int64_t i = 0; i < writes.len - 1; i += 1) {
Assert(writes[i].range.b == writes[i + 1].range.a);
Assert(writes[i].range.max == writes[i + 1].range.min);
}
#endif
@@ -123,12 +133,118 @@ void ApplyEdits(Buffer *buffer, Array<Edit> edits) {
buffer->bi = dsti;
Assert(new_buffer_len == buffer->len + len_offset);
buffer->len = new_buffer_len;
String string = {buffer->data[buffer->bi], buffer->len};
buffer->lines = Split(buffer->allocator, string, "\n");
}
void AddEdit(Array<Edit> *edits, Range range, String string) {
edits->add({range, string});
}
int64_t AdjustUTF8Pos(Buffer *buffer, int64_t pos, int64_t direction = 1) {
int64_t result = pos;
for (; result >= 0 && result < buffer->len;) {
if (IsUTF8ContinuationByte(buffer->data[buffer->bi][0])) {
result += direction;
} else {
break;
}
}
return result;
}
bool InBounds(Buffer *buffer, int64_t pos) {
bool result = pos >= 0 && pos < buffer->len;
return result;
}
char GetChar(Buffer *buffer, int64_t pos) {
if (!InBounds(buffer, pos)) return 0;
return buffer->data[buffer->bi][pos];
}
char *GetCharP(Buffer *buffer, int64_t pos) {
if (!InBounds(buffer, pos)) return 0;
return buffer->data[buffer->bi] + pos;
}
uint32_t GetUTF32(Buffer *buffer, int64_t pos, int64_t *codepoint_size) {
if (!InBounds(buffer, pos)) {
return 0;
}
char *p = buffer->data[buffer->bi] + pos;
int64_t max = buffer->len - pos;
UTF32Result utf32 = UTF8ToUTF32(p, (int)max);
Assert(utf32.error == 0);
if (utf32.error != 0) return 0;
if (codepoint_size) codepoint_size[0] = utf32.advance;
return utf32.out_str;
}
constexpr int16_t ITERATE_FORWARD = 1;
constexpr int16_t ITERATE_BACKWARD = -1;
struct BufferIter {
Buffer *buffer;
int64_t pos;
int64_t end;
int64_t direction;
int64_t utf8_codepoint_size;
int64_t codepoint_index;
uint32_t item;
};
bool IsValid(const BufferIter &iter) {
Assert(iter.direction == ITERATE_FORWARD || iter.direction == ITERATE_BACKWARD);
bool result = false;
if (iter.direction == ITERATE_BACKWARD) {
result = iter.pos >= iter.end;
} else {
result = iter.pos < iter.end;
}
if (result) {
Assert(!IsUTF8ContinuationByte(GetChar(iter.buffer, iter.pos)));
Assert(InBounds(iter.buffer, iter.pos));
}
return result;
}
void Advance(BufferIter *iter) {
Assert(iter->direction == ITERATE_FORWARD || iter->direction == ITERATE_BACKWARD);
iter->codepoint_index += 1;
if (iter->direction == ITERATE_FORWARD) {
iter->pos += iter->utf8_codepoint_size;
} else {
iter->pos = AdjustUTF8Pos(iter->buffer, iter->pos - 1, ITERATE_BACKWARD);
}
if (!IsValid(*iter)) return;
iter->item = GetUTF32(iter->buffer, iter->pos, &iter->utf8_codepoint_size);
}
BufferIter Iterate(Buffer *buffer, Range range, int64_t direction = ITERATE_FORWARD) {
Assert(direction == ITERATE_FORWARD || direction == ITERATE_BACKWARD);
Assert(!IsUTF8ContinuationByte(GetChar(buffer, range.min)));
Assert(range.max >= range.min);
range.min = ClampMin(buffer, range.min);
range.max = ClampMax(buffer, range.max);
BufferIter result = {buffer, range.min, range.max, direction};
if (direction == ITERATE_BACKWARD) {
result.end = range.min;
result.pos = range.max;
}
Advance(&result);
return result;
}
void RunBufferTests() {
Scratch scratch;
{
@@ -138,6 +254,8 @@ void RunBufferTests() {
ApplyEdits(&buffer, edits);
String string = {buffer.data[buffer.bi], buffer.len};
Assert(string == "Things and other things");
Assert(buffer.lines.len == 1);
Assert(buffer.lines[0] == "Things and other things");
}
{
Buffer buffer = {scratch};
@@ -156,5 +274,64 @@ void RunBufferTests() {
String string = {buffer.data[buffer.bi], buffer.len};
Assert(string == "Memes dna BigOther things");
Assert(buffer.lines.len == 1);
Assert(buffer.lines[0] == "Memes dna BigOther things");
}
{
Buffer buffer = {scratch};
Array<Edit> edits = {scratch};
edits.add({
{0, 0},
"Things and other things\n"
"Things and other things\n"
});
ApplyEdits(&buffer, edits);
Assert(buffer.lines.len == 3);
Assert(buffer.lines[1] == "Things and other things");
Assert(buffer.lines[0] == "Things and other things");
Assert(buffer.lines[2] == "");
{
Array<char> s = {scratch};
for (BufferIter iter = Iterate(&buffer, {0, 6}); IsValid(iter); Advance(&iter)) {
Assert(iter.item < 255);
s.add((char)iter.item);
}
String str = {s.data, s.len};
Assert(str == "Things");
}
{
Array<char> s = {scratch};
for (BufferIter iter = Iterate(&buffer, {0, 6}, ITERATE_BACKWARD); IsValid(iter); Advance(&iter)) {
Assert(iter.item < 255);
s.add((char)iter.item);
}
String str = {s.data, s.len};
Assert(str == "sgnihT");
}
{
Array<char> s = {scratch};
for (BufferIter iter = Iterate(&buffer, {0, buffer.len}); IsValid(iter); Advance(&iter)) {
Assert(iter.item < 255);
s.add((char)iter.item);
}
String str = {s.data, s.len};
String b = {GetCharP(&buffer, 0), buffer.len};
Assert(str == b);
}
{
Array<char> s = {scratch};
for (BufferIter iter = Iterate(&buffer, {0, buffer.len}, ITERATE_BACKWARD); IsValid(iter); Advance(&iter)) {
Assert(iter.item < 255);
s.add((char)iter.item);
}
String str = {s.data, s.len};
String b = {GetCharP(&buffer, 0), buffer.len};
Assert(str.len == b.len);
}
}
}