Buffer iterator and utf8
This commit is contained in:
@@ -1177,6 +1177,11 @@ UTF8Iter IterateUTF8(String string) {
|
||||
return IterateUTF8Ex(string.data, string.len);
|
||||
}
|
||||
|
||||
bool IsUTF8ContinuationByte(char c) {
|
||||
char result = (c & 0b11000000) == 0b10000000;
|
||||
return result;
|
||||
}
|
||||
|
||||
char ToLowerCase(char a) {
|
||||
if (a >= 'A' && a <= 'Z') a += 32;
|
||||
return a;
|
||||
@@ -1352,7 +1357,7 @@ Array<String> Split(Allocator allocator, String string, String delimiter) {
|
||||
result.add(before_match);
|
||||
string = string.skip(index + delimiter.len);
|
||||
}
|
||||
if (string.len) result.add(string);
|
||||
result.add(string);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,19 +1,25 @@
|
||||
struct Buffer {
|
||||
Allocator allocator;
|
||||
char *data[2];
|
||||
int64_t cap;
|
||||
int64_t len;
|
||||
int bi; // current buffer index
|
||||
};
|
||||
|
||||
struct Range {
|
||||
int64_t a;
|
||||
int64_t b; // one past last index
|
||||
int64_t min;
|
||||
int64_t max; // one past last index
|
||||
// <0,4> = 0,1,2,3
|
||||
};
|
||||
|
||||
struct Edit {
|
||||
Range range;
|
||||
String string;
|
||||
};
|
||||
|
||||
struct Buffer {
|
||||
Allocator allocator;
|
||||
char *data[2];
|
||||
int64_t cap;
|
||||
int64_t len;
|
||||
int bi; // current buffer index
|
||||
Array<String> lines;
|
||||
};
|
||||
|
||||
int64_t GetRangeSize(Range range) {
|
||||
int64_t result = range.b - range.a;
|
||||
int64_t result = range.max - range.min;
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -22,18 +28,22 @@ Range GetRange(const Buffer &buffer) {
|
||||
return result;
|
||||
}
|
||||
|
||||
struct Edit {
|
||||
Range range;
|
||||
String string;
|
||||
};
|
||||
int64_t ClampMax(Buffer *buffer, int64_t pos) {
|
||||
int64_t result = Clamp(pos, (int64_t)0, buffer->len);
|
||||
return result;
|
||||
}
|
||||
int64_t ClampMin(Buffer *buffer, int64_t pos) {
|
||||
int64_t end_of_buffer = Max((int64_t)0, buffer->len - 1);
|
||||
int64_t result = Clamp(pos, (int64_t)0, end_of_buffer);
|
||||
return result;
|
||||
}
|
||||
|
||||
void ApplyEdits(Buffer *buffer, Array<Edit> edits) {
|
||||
int64_t size_to_delete = 0;
|
||||
int64_t size_to_insert = 0;
|
||||
int64_t end_of_buffer = Max((int64_t)0, buffer->len - 1);
|
||||
For(edits) {
|
||||
it.range.a = Clamp(it.range.a, (int64_t)0, end_of_buffer);
|
||||
it.range.b = Clamp(it.range.b, (int64_t)0, buffer->len);
|
||||
it.range.min = ClampMin(buffer, it.range.min);
|
||||
it.range.max = ClampMax(buffer, it.range.max);
|
||||
size_to_delete += GetRangeSize(it.range);
|
||||
size_to_insert += it.string.len;
|
||||
}
|
||||
@@ -43,10 +53,10 @@ void ApplyEdits(Buffer *buffer, Array<Edit> edits) {
|
||||
ForItem(it2, edits) {
|
||||
if (&it1 == &it2) continue;
|
||||
|
||||
bool a2_inside = it2.range.a >= it1.range.a && it2.range.a < it1.range.b;
|
||||
bool a2_inside = it2.range.min >= it1.range.min && it2.range.min < it1.range.max;
|
||||
Assert(!a2_inside);
|
||||
|
||||
bool b2_inside = it2.range.b > it1.range.a && it2.range.b <= it1.range.b;
|
||||
bool b2_inside = it2.range.max > it1.range.min && it2.range.max <= it1.range.max;
|
||||
Assert(!b2_inside);
|
||||
}
|
||||
}
|
||||
@@ -81,28 +91,28 @@ void ApplyEdits(Buffer *buffer, Array<Edit> edits) {
|
||||
int64_t prev_dest = 0;
|
||||
|
||||
For(edits) {
|
||||
Range source_range = {prev_source, it.range.a};
|
||||
Range source_range = {prev_source, it.range.min};
|
||||
if (GetRangeSize(source_range) != 0) {
|
||||
String source_string = {};
|
||||
source_string.data = buffer->data[buffer->bi] + source_range.a;
|
||||
source_string.data = buffer->data[buffer->bi] + source_range.min;
|
||||
source_string.len = GetRangeSize(source_range);
|
||||
Range dest_range = {prev_dest, prev_dest + source_string.len};
|
||||
writes.add({dest_range, source_string});
|
||||
|
||||
prev_dest = dest_range.b;
|
||||
prev_dest = dest_range.max;
|
||||
}
|
||||
|
||||
Range dest_range = {prev_dest, prev_dest + it.string.len};
|
||||
writes.add({dest_range, it.string});
|
||||
prev_dest = dest_range.b;
|
||||
prev_source = it.range.b;
|
||||
prev_dest = dest_range.max;
|
||||
prev_source = it.range.max;
|
||||
}
|
||||
|
||||
// Add remaining range
|
||||
Range source_range = {prev_source, buffer->len};
|
||||
if (GetRangeSize(source_range)) {
|
||||
String source_string = {};
|
||||
source_string.data = buffer->data[buffer->bi] + source_range.a;
|
||||
source_string.data = buffer->data[buffer->bi] + source_range.min;
|
||||
source_string.len = GetRangeSize(source_range);
|
||||
Range dest_range = {prev_dest, prev_dest + source_string.len};
|
||||
writes.add({dest_range, source_string});
|
||||
@@ -110,7 +120,7 @@ void ApplyEdits(Buffer *buffer, Array<Edit> edits) {
|
||||
|
||||
#if DEBUG_BUILD
|
||||
for (int64_t i = 0; i < writes.len - 1; i += 1) {
|
||||
Assert(writes[i].range.b == writes[i + 1].range.a);
|
||||
Assert(writes[i].range.max == writes[i + 1].range.min);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -123,12 +133,118 @@ void ApplyEdits(Buffer *buffer, Array<Edit> edits) {
|
||||
buffer->bi = dsti;
|
||||
Assert(new_buffer_len == buffer->len + len_offset);
|
||||
buffer->len = new_buffer_len;
|
||||
|
||||
String string = {buffer->data[buffer->bi], buffer->len};
|
||||
buffer->lines = Split(buffer->allocator, string, "\n");
|
||||
}
|
||||
|
||||
void AddEdit(Array<Edit> *edits, Range range, String string) {
|
||||
edits->add({range, string});
|
||||
}
|
||||
|
||||
int64_t AdjustUTF8Pos(Buffer *buffer, int64_t pos, int64_t direction = 1) {
|
||||
int64_t result = pos;
|
||||
for (; result >= 0 && result < buffer->len;) {
|
||||
if (IsUTF8ContinuationByte(buffer->data[buffer->bi][0])) {
|
||||
result += direction;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool InBounds(Buffer *buffer, int64_t pos) {
|
||||
bool result = pos >= 0 && pos < buffer->len;
|
||||
return result;
|
||||
}
|
||||
|
||||
char GetChar(Buffer *buffer, int64_t pos) {
|
||||
if (!InBounds(buffer, pos)) return 0;
|
||||
return buffer->data[buffer->bi][pos];
|
||||
}
|
||||
|
||||
char *GetCharP(Buffer *buffer, int64_t pos) {
|
||||
if (!InBounds(buffer, pos)) return 0;
|
||||
return buffer->data[buffer->bi] + pos;
|
||||
}
|
||||
|
||||
uint32_t GetUTF32(Buffer *buffer, int64_t pos, int64_t *codepoint_size) {
|
||||
if (!InBounds(buffer, pos)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
char *p = buffer->data[buffer->bi] + pos;
|
||||
int64_t max = buffer->len - pos;
|
||||
UTF32Result utf32 = UTF8ToUTF32(p, (int)max);
|
||||
Assert(utf32.error == 0);
|
||||
|
||||
if (utf32.error != 0) return 0;
|
||||
if (codepoint_size) codepoint_size[0] = utf32.advance;
|
||||
return utf32.out_str;
|
||||
}
|
||||
|
||||
constexpr int16_t ITERATE_FORWARD = 1;
|
||||
constexpr int16_t ITERATE_BACKWARD = -1;
|
||||
struct BufferIter {
|
||||
Buffer *buffer;
|
||||
int64_t pos;
|
||||
int64_t end;
|
||||
int64_t direction;
|
||||
|
||||
int64_t utf8_codepoint_size;
|
||||
int64_t codepoint_index;
|
||||
uint32_t item;
|
||||
};
|
||||
|
||||
bool IsValid(const BufferIter &iter) {
|
||||
Assert(iter.direction == ITERATE_FORWARD || iter.direction == ITERATE_BACKWARD);
|
||||
|
||||
bool result = false;
|
||||
if (iter.direction == ITERATE_BACKWARD) {
|
||||
result = iter.pos >= iter.end;
|
||||
} else {
|
||||
result = iter.pos < iter.end;
|
||||
}
|
||||
|
||||
if (result) {
|
||||
Assert(!IsUTF8ContinuationByte(GetChar(iter.buffer, iter.pos)));
|
||||
Assert(InBounds(iter.buffer, iter.pos));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void Advance(BufferIter *iter) {
|
||||
Assert(iter->direction == ITERATE_FORWARD || iter->direction == ITERATE_BACKWARD);
|
||||
|
||||
iter->codepoint_index += 1;
|
||||
if (iter->direction == ITERATE_FORWARD) {
|
||||
iter->pos += iter->utf8_codepoint_size;
|
||||
} else {
|
||||
iter->pos = AdjustUTF8Pos(iter->buffer, iter->pos - 1, ITERATE_BACKWARD);
|
||||
}
|
||||
|
||||
if (!IsValid(*iter)) return;
|
||||
iter->item = GetUTF32(iter->buffer, iter->pos, &iter->utf8_codepoint_size);
|
||||
}
|
||||
|
||||
BufferIter Iterate(Buffer *buffer, Range range, int64_t direction = ITERATE_FORWARD) {
|
||||
Assert(direction == ITERATE_FORWARD || direction == ITERATE_BACKWARD);
|
||||
Assert(!IsUTF8ContinuationByte(GetChar(buffer, range.min)));
|
||||
Assert(range.max >= range.min);
|
||||
range.min = ClampMin(buffer, range.min);
|
||||
range.max = ClampMax(buffer, range.max);
|
||||
|
||||
BufferIter result = {buffer, range.min, range.max, direction};
|
||||
if (direction == ITERATE_BACKWARD) {
|
||||
result.end = range.min;
|
||||
result.pos = range.max;
|
||||
}
|
||||
|
||||
Advance(&result);
|
||||
return result;
|
||||
}
|
||||
|
||||
void RunBufferTests() {
|
||||
Scratch scratch;
|
||||
{
|
||||
@@ -138,6 +254,8 @@ void RunBufferTests() {
|
||||
ApplyEdits(&buffer, edits);
|
||||
String string = {buffer.data[buffer.bi], buffer.len};
|
||||
Assert(string == "Things and other things");
|
||||
Assert(buffer.lines.len == 1);
|
||||
Assert(buffer.lines[0] == "Things and other things");
|
||||
}
|
||||
{
|
||||
Buffer buffer = {scratch};
|
||||
@@ -156,5 +274,64 @@ void RunBufferTests() {
|
||||
|
||||
String string = {buffer.data[buffer.bi], buffer.len};
|
||||
Assert(string == "Memes dna BigOther things");
|
||||
Assert(buffer.lines.len == 1);
|
||||
Assert(buffer.lines[0] == "Memes dna BigOther things");
|
||||
}
|
||||
{
|
||||
Buffer buffer = {scratch};
|
||||
Array<Edit> edits = {scratch};
|
||||
edits.add({
|
||||
{0, 0},
|
||||
"Things and other things\n"
|
||||
"Things and other things\n"
|
||||
});
|
||||
ApplyEdits(&buffer, edits);
|
||||
Assert(buffer.lines.len == 3);
|
||||
Assert(buffer.lines[1] == "Things and other things");
|
||||
Assert(buffer.lines[0] == "Things and other things");
|
||||
Assert(buffer.lines[2] == "");
|
||||
|
||||
{
|
||||
Array<char> s = {scratch};
|
||||
for (BufferIter iter = Iterate(&buffer, {0, 6}); IsValid(iter); Advance(&iter)) {
|
||||
Assert(iter.item < 255);
|
||||
|
||||
s.add((char)iter.item);
|
||||
}
|
||||
String str = {s.data, s.len};
|
||||
Assert(str == "Things");
|
||||
}
|
||||
{
|
||||
Array<char> s = {scratch};
|
||||
for (BufferIter iter = Iterate(&buffer, {0, 6}, ITERATE_BACKWARD); IsValid(iter); Advance(&iter)) {
|
||||
Assert(iter.item < 255);
|
||||
|
||||
s.add((char)iter.item);
|
||||
}
|
||||
String str = {s.data, s.len};
|
||||
Assert(str == "sgnihT");
|
||||
}
|
||||
{
|
||||
Array<char> s = {scratch};
|
||||
for (BufferIter iter = Iterate(&buffer, {0, buffer.len}); IsValid(iter); Advance(&iter)) {
|
||||
Assert(iter.item < 255);
|
||||
|
||||
s.add((char)iter.item);
|
||||
}
|
||||
String str = {s.data, s.len};
|
||||
String b = {GetCharP(&buffer, 0), buffer.len};
|
||||
Assert(str == b);
|
||||
}
|
||||
{
|
||||
Array<char> s = {scratch};
|
||||
for (BufferIter iter = Iterate(&buffer, {0, buffer.len}, ITERATE_BACKWARD); IsValid(iter); Advance(&iter)) {
|
||||
Assert(iter.item < 255);
|
||||
|
||||
s.add((char)iter.item);
|
||||
}
|
||||
String str = {s.data, s.len};
|
||||
String b = {GetCharP(&buffer, 0), buffer.len};
|
||||
Assert(str.len == b.len);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user