Buffer iterator and utf8
This commit is contained in:
@@ -1177,6 +1177,11 @@ UTF8Iter IterateUTF8(String string) {
|
|||||||
return IterateUTF8Ex(string.data, string.len);
|
return IterateUTF8Ex(string.data, string.len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IsUTF8ContinuationByte(char c) {
|
||||||
|
char result = (c & 0b11000000) == 0b10000000;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
char ToLowerCase(char a) {
|
char ToLowerCase(char a) {
|
||||||
if (a >= 'A' && a <= 'Z') a += 32;
|
if (a >= 'A' && a <= 'Z') a += 32;
|
||||||
return a;
|
return a;
|
||||||
@@ -1352,7 +1357,7 @@ Array<String> Split(Allocator allocator, String string, String delimiter) {
|
|||||||
result.add(before_match);
|
result.add(before_match);
|
||||||
string = string.skip(index + delimiter.len);
|
string = string.skip(index + delimiter.len);
|
||||||
}
|
}
|
||||||
if (string.len) result.add(string);
|
result.add(string);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,19 +1,25 @@
|
|||||||
struct Buffer {
|
|
||||||
Allocator allocator;
|
|
||||||
char *data[2];
|
|
||||||
int64_t cap;
|
|
||||||
int64_t len;
|
|
||||||
int bi; // current buffer index
|
|
||||||
};
|
|
||||||
|
|
||||||
struct Range {
|
struct Range {
|
||||||
int64_t a;
|
int64_t min;
|
||||||
int64_t b; // one past last index
|
int64_t max; // one past last index
|
||||||
// <0,4> = 0,1,2,3
|
// <0,4> = 0,1,2,3
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct Edit {
|
||||||
|
Range range;
|
||||||
|
String string;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Buffer {
|
||||||
|
Allocator allocator;
|
||||||
|
char *data[2];
|
||||||
|
int64_t cap;
|
||||||
|
int64_t len;
|
||||||
|
int bi; // current buffer index
|
||||||
|
Array<String> lines;
|
||||||
|
};
|
||||||
|
|
||||||
int64_t GetRangeSize(Range range) {
|
int64_t GetRangeSize(Range range) {
|
||||||
int64_t result = range.b - range.a;
|
int64_t result = range.max - range.min;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -22,18 +28,22 @@ Range GetRange(const Buffer &buffer) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Edit {
|
int64_t ClampMax(Buffer *buffer, int64_t pos) {
|
||||||
Range range;
|
int64_t result = Clamp(pos, (int64_t)0, buffer->len);
|
||||||
String string;
|
return result;
|
||||||
};
|
}
|
||||||
|
int64_t ClampMin(Buffer *buffer, int64_t pos) {
|
||||||
|
int64_t end_of_buffer = Max((int64_t)0, buffer->len - 1);
|
||||||
|
int64_t result = Clamp(pos, (int64_t)0, end_of_buffer);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
void ApplyEdits(Buffer *buffer, Array<Edit> edits) {
|
void ApplyEdits(Buffer *buffer, Array<Edit> edits) {
|
||||||
int64_t size_to_delete = 0;
|
int64_t size_to_delete = 0;
|
||||||
int64_t size_to_insert = 0;
|
int64_t size_to_insert = 0;
|
||||||
int64_t end_of_buffer = Max((int64_t)0, buffer->len - 1);
|
|
||||||
For(edits) {
|
For(edits) {
|
||||||
it.range.a = Clamp(it.range.a, (int64_t)0, end_of_buffer);
|
it.range.min = ClampMin(buffer, it.range.min);
|
||||||
it.range.b = Clamp(it.range.b, (int64_t)0, buffer->len);
|
it.range.max = ClampMax(buffer, it.range.max);
|
||||||
size_to_delete += GetRangeSize(it.range);
|
size_to_delete += GetRangeSize(it.range);
|
||||||
size_to_insert += it.string.len;
|
size_to_insert += it.string.len;
|
||||||
}
|
}
|
||||||
@@ -43,10 +53,10 @@ void ApplyEdits(Buffer *buffer, Array<Edit> edits) {
|
|||||||
ForItem(it2, edits) {
|
ForItem(it2, edits) {
|
||||||
if (&it1 == &it2) continue;
|
if (&it1 == &it2) continue;
|
||||||
|
|
||||||
bool a2_inside = it2.range.a >= it1.range.a && it2.range.a < it1.range.b;
|
bool a2_inside = it2.range.min >= it1.range.min && it2.range.min < it1.range.max;
|
||||||
Assert(!a2_inside);
|
Assert(!a2_inside);
|
||||||
|
|
||||||
bool b2_inside = it2.range.b > it1.range.a && it2.range.b <= it1.range.b;
|
bool b2_inside = it2.range.max > it1.range.min && it2.range.max <= it1.range.max;
|
||||||
Assert(!b2_inside);
|
Assert(!b2_inside);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -81,28 +91,28 @@ void ApplyEdits(Buffer *buffer, Array<Edit> edits) {
|
|||||||
int64_t prev_dest = 0;
|
int64_t prev_dest = 0;
|
||||||
|
|
||||||
For(edits) {
|
For(edits) {
|
||||||
Range source_range = {prev_source, it.range.a};
|
Range source_range = {prev_source, it.range.min};
|
||||||
if (GetRangeSize(source_range) != 0) {
|
if (GetRangeSize(source_range) != 0) {
|
||||||
String source_string = {};
|
String source_string = {};
|
||||||
source_string.data = buffer->data[buffer->bi] + source_range.a;
|
source_string.data = buffer->data[buffer->bi] + source_range.min;
|
||||||
source_string.len = GetRangeSize(source_range);
|
source_string.len = GetRangeSize(source_range);
|
||||||
Range dest_range = {prev_dest, prev_dest + source_string.len};
|
Range dest_range = {prev_dest, prev_dest + source_string.len};
|
||||||
writes.add({dest_range, source_string});
|
writes.add({dest_range, source_string});
|
||||||
|
|
||||||
prev_dest = dest_range.b;
|
prev_dest = dest_range.max;
|
||||||
}
|
}
|
||||||
|
|
||||||
Range dest_range = {prev_dest, prev_dest + it.string.len};
|
Range dest_range = {prev_dest, prev_dest + it.string.len};
|
||||||
writes.add({dest_range, it.string});
|
writes.add({dest_range, it.string});
|
||||||
prev_dest = dest_range.b;
|
prev_dest = dest_range.max;
|
||||||
prev_source = it.range.b;
|
prev_source = it.range.max;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add remaining range
|
// Add remaining range
|
||||||
Range source_range = {prev_source, buffer->len};
|
Range source_range = {prev_source, buffer->len};
|
||||||
if (GetRangeSize(source_range)) {
|
if (GetRangeSize(source_range)) {
|
||||||
String source_string = {};
|
String source_string = {};
|
||||||
source_string.data = buffer->data[buffer->bi] + source_range.a;
|
source_string.data = buffer->data[buffer->bi] + source_range.min;
|
||||||
source_string.len = GetRangeSize(source_range);
|
source_string.len = GetRangeSize(source_range);
|
||||||
Range dest_range = {prev_dest, prev_dest + source_string.len};
|
Range dest_range = {prev_dest, prev_dest + source_string.len};
|
||||||
writes.add({dest_range, source_string});
|
writes.add({dest_range, source_string});
|
||||||
@@ -110,7 +120,7 @@ void ApplyEdits(Buffer *buffer, Array<Edit> edits) {
|
|||||||
|
|
||||||
#if DEBUG_BUILD
|
#if DEBUG_BUILD
|
||||||
for (int64_t i = 0; i < writes.len - 1; i += 1) {
|
for (int64_t i = 0; i < writes.len - 1; i += 1) {
|
||||||
Assert(writes[i].range.b == writes[i + 1].range.a);
|
Assert(writes[i].range.max == writes[i + 1].range.min);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -123,12 +133,118 @@ void ApplyEdits(Buffer *buffer, Array<Edit> edits) {
|
|||||||
buffer->bi = dsti;
|
buffer->bi = dsti;
|
||||||
Assert(new_buffer_len == buffer->len + len_offset);
|
Assert(new_buffer_len == buffer->len + len_offset);
|
||||||
buffer->len = new_buffer_len;
|
buffer->len = new_buffer_len;
|
||||||
|
|
||||||
|
String string = {buffer->data[buffer->bi], buffer->len};
|
||||||
|
buffer->lines = Split(buffer->allocator, string, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void AddEdit(Array<Edit> *edits, Range range, String string) {
|
void AddEdit(Array<Edit> *edits, Range range, String string) {
|
||||||
edits->add({range, string});
|
edits->add({range, string});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int64_t AdjustUTF8Pos(Buffer *buffer, int64_t pos, int64_t direction = 1) {
|
||||||
|
int64_t result = pos;
|
||||||
|
for (; result >= 0 && result < buffer->len;) {
|
||||||
|
if (IsUTF8ContinuationByte(buffer->data[buffer->bi][0])) {
|
||||||
|
result += direction;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool InBounds(Buffer *buffer, int64_t pos) {
|
||||||
|
bool result = pos >= 0 && pos < buffer->len;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
char GetChar(Buffer *buffer, int64_t pos) {
|
||||||
|
if (!InBounds(buffer, pos)) return 0;
|
||||||
|
return buffer->data[buffer->bi][pos];
|
||||||
|
}
|
||||||
|
|
||||||
|
char *GetCharP(Buffer *buffer, int64_t pos) {
|
||||||
|
if (!InBounds(buffer, pos)) return 0;
|
||||||
|
return buffer->data[buffer->bi] + pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t GetUTF32(Buffer *buffer, int64_t pos, int64_t *codepoint_size) {
|
||||||
|
if (!InBounds(buffer, pos)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *p = buffer->data[buffer->bi] + pos;
|
||||||
|
int64_t max = buffer->len - pos;
|
||||||
|
UTF32Result utf32 = UTF8ToUTF32(p, (int)max);
|
||||||
|
Assert(utf32.error == 0);
|
||||||
|
|
||||||
|
if (utf32.error != 0) return 0;
|
||||||
|
if (codepoint_size) codepoint_size[0] = utf32.advance;
|
||||||
|
return utf32.out_str;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr int16_t ITERATE_FORWARD = 1;
|
||||||
|
constexpr int16_t ITERATE_BACKWARD = -1;
|
||||||
|
struct BufferIter {
|
||||||
|
Buffer *buffer;
|
||||||
|
int64_t pos;
|
||||||
|
int64_t end;
|
||||||
|
int64_t direction;
|
||||||
|
|
||||||
|
int64_t utf8_codepoint_size;
|
||||||
|
int64_t codepoint_index;
|
||||||
|
uint32_t item;
|
||||||
|
};
|
||||||
|
|
||||||
|
bool IsValid(const BufferIter &iter) {
|
||||||
|
Assert(iter.direction == ITERATE_FORWARD || iter.direction == ITERATE_BACKWARD);
|
||||||
|
|
||||||
|
bool result = false;
|
||||||
|
if (iter.direction == ITERATE_BACKWARD) {
|
||||||
|
result = iter.pos >= iter.end;
|
||||||
|
} else {
|
||||||
|
result = iter.pos < iter.end;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result) {
|
||||||
|
Assert(!IsUTF8ContinuationByte(GetChar(iter.buffer, iter.pos)));
|
||||||
|
Assert(InBounds(iter.buffer, iter.pos));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Advance(BufferIter *iter) {
|
||||||
|
Assert(iter->direction == ITERATE_FORWARD || iter->direction == ITERATE_BACKWARD);
|
||||||
|
|
||||||
|
iter->codepoint_index += 1;
|
||||||
|
if (iter->direction == ITERATE_FORWARD) {
|
||||||
|
iter->pos += iter->utf8_codepoint_size;
|
||||||
|
} else {
|
||||||
|
iter->pos = AdjustUTF8Pos(iter->buffer, iter->pos - 1, ITERATE_BACKWARD);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!IsValid(*iter)) return;
|
||||||
|
iter->item = GetUTF32(iter->buffer, iter->pos, &iter->utf8_codepoint_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
BufferIter Iterate(Buffer *buffer, Range range, int64_t direction = ITERATE_FORWARD) {
|
||||||
|
Assert(direction == ITERATE_FORWARD || direction == ITERATE_BACKWARD);
|
||||||
|
Assert(!IsUTF8ContinuationByte(GetChar(buffer, range.min)));
|
||||||
|
Assert(range.max >= range.min);
|
||||||
|
range.min = ClampMin(buffer, range.min);
|
||||||
|
range.max = ClampMax(buffer, range.max);
|
||||||
|
|
||||||
|
BufferIter result = {buffer, range.min, range.max, direction};
|
||||||
|
if (direction == ITERATE_BACKWARD) {
|
||||||
|
result.end = range.min;
|
||||||
|
result.pos = range.max;
|
||||||
|
}
|
||||||
|
|
||||||
|
Advance(&result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
void RunBufferTests() {
|
void RunBufferTests() {
|
||||||
Scratch scratch;
|
Scratch scratch;
|
||||||
{
|
{
|
||||||
@@ -138,6 +254,8 @@ void RunBufferTests() {
|
|||||||
ApplyEdits(&buffer, edits);
|
ApplyEdits(&buffer, edits);
|
||||||
String string = {buffer.data[buffer.bi], buffer.len};
|
String string = {buffer.data[buffer.bi], buffer.len};
|
||||||
Assert(string == "Things and other things");
|
Assert(string == "Things and other things");
|
||||||
|
Assert(buffer.lines.len == 1);
|
||||||
|
Assert(buffer.lines[0] == "Things and other things");
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
Buffer buffer = {scratch};
|
Buffer buffer = {scratch};
|
||||||
@@ -156,5 +274,64 @@ void RunBufferTests() {
|
|||||||
|
|
||||||
String string = {buffer.data[buffer.bi], buffer.len};
|
String string = {buffer.data[buffer.bi], buffer.len};
|
||||||
Assert(string == "Memes dna BigOther things");
|
Assert(string == "Memes dna BigOther things");
|
||||||
|
Assert(buffer.lines.len == 1);
|
||||||
|
Assert(buffer.lines[0] == "Memes dna BigOther things");
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Buffer buffer = {scratch};
|
||||||
|
Array<Edit> edits = {scratch};
|
||||||
|
edits.add({
|
||||||
|
{0, 0},
|
||||||
|
"Things and other things\n"
|
||||||
|
"Things and other things\n"
|
||||||
|
});
|
||||||
|
ApplyEdits(&buffer, edits);
|
||||||
|
Assert(buffer.lines.len == 3);
|
||||||
|
Assert(buffer.lines[1] == "Things and other things");
|
||||||
|
Assert(buffer.lines[0] == "Things and other things");
|
||||||
|
Assert(buffer.lines[2] == "");
|
||||||
|
|
||||||
|
{
|
||||||
|
Array<char> s = {scratch};
|
||||||
|
for (BufferIter iter = Iterate(&buffer, {0, 6}); IsValid(iter); Advance(&iter)) {
|
||||||
|
Assert(iter.item < 255);
|
||||||
|
|
||||||
|
s.add((char)iter.item);
|
||||||
|
}
|
||||||
|
String str = {s.data, s.len};
|
||||||
|
Assert(str == "Things");
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Array<char> s = {scratch};
|
||||||
|
for (BufferIter iter = Iterate(&buffer, {0, 6}, ITERATE_BACKWARD); IsValid(iter); Advance(&iter)) {
|
||||||
|
Assert(iter.item < 255);
|
||||||
|
|
||||||
|
s.add((char)iter.item);
|
||||||
|
}
|
||||||
|
String str = {s.data, s.len};
|
||||||
|
Assert(str == "sgnihT");
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Array<char> s = {scratch};
|
||||||
|
for (BufferIter iter = Iterate(&buffer, {0, buffer.len}); IsValid(iter); Advance(&iter)) {
|
||||||
|
Assert(iter.item < 255);
|
||||||
|
|
||||||
|
s.add((char)iter.item);
|
||||||
|
}
|
||||||
|
String str = {s.data, s.len};
|
||||||
|
String b = {GetCharP(&buffer, 0), buffer.len};
|
||||||
|
Assert(str == b);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Array<char> s = {scratch};
|
||||||
|
for (BufferIter iter = Iterate(&buffer, {0, buffer.len}, ITERATE_BACKWARD); IsValid(iter); Advance(&iter)) {
|
||||||
|
Assert(iter.item < 255);
|
||||||
|
|
||||||
|
s.add((char)iter.item);
|
||||||
|
}
|
||||||
|
String str = {s.data, s.len};
|
||||||
|
String b = {GetCharP(&buffer, 0), buffer.len};
|
||||||
|
Assert(str.len == b.len);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user