/* Ranges are from 0 to n+1 (one past last index). <0,4> = 0,1,2,3 These seem the best, I tried other formations but these are much better then the rest. First of all you can represent the cursor at the end of the buffer by doing: . This action in itself doesn't select anything, the formation doesn't force you to index the buffer and so on, it's reduced to a pure position. In the end you can nicely represent cursors in select mode and non-select mode. This property of being able to represent pure positions makes it possible to clamp the values to the same range <0, buffer_len>, and if the things are past the range we end up with a pure value. Very nice behaviour, The program won't delete anything. */ struct Range { int64_t min; int64_t max; // one past last index // <0,4> = 0,1,2,3 }; struct Line { int64_t number; Range range; }; struct LineAndColumn { Line line; int64_t column; }; struct Edit { Range range; String string; }; // - Buffer should be initialized before use! struct Buffer { Allocator allocator; char *data[2]; int64_t cap; int64_t len; int bi; // current buffer index Array lines; }; int64_t GetRangeSize(Range range) { int64_t result = range.max - range.min; return result; } Range GetRange(const Buffer &buffer) { Range result = {0, buffer.len}; return result; } int64_t Clamp(const Buffer &buffer, int64_t pos) { int64_t result = Clamp(pos, (int64_t)0, buffer.len); return result; } Range Clamp(const Buffer &buffer, Range range) { Range result = {}; result.min = Clamp(buffer, range.min); result.max = Clamp(buffer, range.max); return result; } Range GetEnd(const Buffer &buffer) { Range range = {buffer.len, buffer.len}; return range; } void AddEdit(Array *edits, Range range, String string) { edits->add({range, string}); } bool InBounds(const Buffer &buffer, int64_t pos) { bool result = pos >= 0 && pos < buffer.len; return result; } char GetChar(const Buffer &buffer, int64_t pos) { if (!InBounds(buffer, pos)) return 0; return buffer.data[buffer.bi][pos]; } char *GetCharP(const Buffer &buffer, int64_t pos) { if (!InBounds(buffer, pos)) return 0; return buffer.data[buffer.bi] + pos; } String GetString(const Buffer &buffer, Range range = {0, INT64_MAX}) { range = Clamp(buffer, range); String result = {GetCharP(buffer, range.min), GetRangeSize(range)}; return result; } void ApplyEdits(Buffer *buffer, Array edits) { Assert(edits.len); int64_t size_to_delete = 0; int64_t size_to_insert = 0; For(edits) { Assert(it.range.min >= 0); Assert(it.range.max >= it.range.min); Assert(it.range.max <= buffer->len); size_to_delete += GetRangeSize(it.range); size_to_insert += it.string.len; } #if DEBUG_BUILD // Make sure edit ranges don't overlap ForItem(it1, edits) { ForItem(it2, edits) { if (&it1 == &it2) continue; bool a2_inside = it2.range.min >= it1.range.min && it2.range.min <= it1.range.max; Assert(!a2_inside); bool b2_inside = it2.range.max >= it1.range.min && it2.range.max <= it1.range.max; Assert(!b2_inside); } } #endif int64_t len_offset = size_to_insert - size_to_delete; int64_t allocated_size_required = Max((int64_t)0, len_offset); if (buffer->len + allocated_size_required > buffer->cap) { int64_t new_cap = AlignUp(buffer->cap + allocated_size_required, 4096); if (buffer->allocator.proc == NULL) buffer->allocator = GetSystemAllocator(); for (int i = 0; i < 2; i += 1) { char *data = AllocArray(buffer->allocator, char, new_cap); Assert(data); memcpy(data, buffer->data[i], buffer->len); Dealloc(buffer->allocator, &buffer->data[i]); buffer->data[i] = data; } buffer->cap = new_cap; } int srci = buffer->bi; int dsti = (buffer->bi + 1) % 2; Scratch scratch((Arena *)buffer->allocator.object); Array writes = {scratch}; int64_t prev_source = 0; int64_t prev_dest = 0; For(edits) { Range source_range = {prev_source, it.range.min}; if (GetRangeSize(source_range) != 0) { String source_string = {}; source_string.data = buffer->data[srci] + source_range.min; source_string.len = GetRangeSize(source_range); Range dest_range = {prev_dest, prev_dest + source_string.len}; writes.add({dest_range, source_string}); prev_dest = dest_range.max; } Range dest_range = {prev_dest, prev_dest + it.string.len}; writes.add({dest_range, it.string}); prev_dest = dest_range.max; prev_source = it.range.max; } // Add remaining range Range source_range = {prev_source, buffer->len}; if (GetRangeSize(source_range)) { String source_string = {}; source_string.data = buffer->data[srci] + source_range.min; source_string.len = GetRangeSize(source_range); Range dest_range = {prev_dest, prev_dest + source_string.len}; writes.add({dest_range, source_string}); } #if DEBUG_BUILD for (int64_t i = 0; i < writes.len - 1; i += 1) { Assert(writes[i].range.max == writes[i + 1].range.min); } #endif int64_t new_buffer_len = 0; For(writes) { memcpy(buffer->data[dsti] + new_buffer_len, it.string.data, it.string.len); new_buffer_len += it.string.len; } buffer->bi = dsti; Assert(new_buffer_len == buffer->len + len_offset); buffer->len = new_buffer_len; // Update lines // Make sure we always have one line, even if the buffer is empty, // this way we can nicely clamp things without worrying of getting // a negative when doing (len - 1). { String delimiter = "\n"; String string = {buffer->data[dsti], buffer->len}; buffer->lines.allocator = buffer->allocator; buffer->lines.clear(); int64_t index = 0; int64_t base_index = 0; while (Seek(string, delimiter, &index)) { buffer->lines.add({base_index, base_index + index}); base_index += index + delimiter.len; string = string.skip(index + delimiter.len); } buffer->lines.add({base_index, base_index + string.len}); } } void InitBuffer(Buffer *buffer) { Scratch scratch; Array edits = {}; AddEdit(&edits, {}, ""); ApplyEdits(buffer, edits); } String CopyNullTerminated(Allocator allocator, Buffer &buffer, Range range) { String buffer_string = GetString(buffer, range); String result = Copy(allocator, buffer_string); return result; } int64_t AdjustUTF8Pos(String string, int64_t pos, int64_t direction) { for (; pos >= 0 && pos < string.len;) { if (IsUTF8ContinuationByte(string.data[pos])) { pos += direction; } else { break; } } return pos; } int64_t AdjustUTF8Pos(const Buffer &buffer, int64_t pos, int64_t direction = 1, bool clamp = true) { String string = GetString(buffer); pos = AdjustUTF8Pos(string, pos, direction); if (clamp) pos = Clamp(buffer, pos); return pos; } uint32_t GetUTF32(Buffer &buffer, int64_t pos, int64_t *codepoint_size) { if (!InBounds(buffer, pos)) { return 0; } char *p = GetCharP(buffer, pos); int64_t max = buffer.len - pos; UTF32Result utf32 = UTF8ToUTF32(p, (int)max); Assert(utf32.error == 0); if (utf32.error != 0) return 0; if (codepoint_size) codepoint_size[0] = utf32.advance; return utf32.out_str; } constexpr int16_t ITERATE_FORWARD = 1; constexpr int16_t ITERATE_BACKWARD = -1; struct BufferIter { Buffer *buffer; int64_t pos; int64_t end; int64_t direction; int64_t utf8_codepoint_size; int64_t codepoint_index; uint32_t item; }; bool IsValid(const BufferIter &iter) { Assert(iter.direction == ITERATE_FORWARD || iter.direction == ITERATE_BACKWARD); bool result = false; if (iter.direction == ITERATE_BACKWARD) { result = iter.pos >= iter.end; } else { result = iter.pos < iter.end; } if (result) { Assert(!IsUTF8ContinuationByte(GetChar(*iter.buffer, iter.pos))); Assert(InBounds(*iter.buffer, iter.pos)); } return result; } void Advance(BufferIter *iter) { Assert(iter->direction == ITERATE_FORWARD || iter->direction == ITERATE_BACKWARD); iter->codepoint_index += 1; if (iter->direction == ITERATE_FORWARD) { iter->pos += iter->utf8_codepoint_size; } else { iter->pos = AdjustUTF8Pos(*iter->buffer, iter->pos - 1, ITERATE_BACKWARD, false); } if (!IsValid(*iter)) return; iter->item = GetUTF32(*iter->buffer, iter->pos, &iter->utf8_codepoint_size); } BufferIter Iterate(Buffer &buffer, Range range, int64_t direction = ITERATE_FORWARD) { Assert(direction == ITERATE_FORWARD || direction == ITERATE_BACKWARD); Assert(!IsUTF8ContinuationByte(GetChar(buffer, range.min))); Assert(range.max >= range.min); range.min = Clamp(buffer, range.min); range.max = Clamp(buffer, range.max); BufferIter result = {&buffer, range.min, range.max, direction}; if (direction == ITERATE_BACKWARD) { result.end = range.min; result.pos = range.max; } Advance(&result); return result; } Line GetLine(Buffer &buffer, int64_t line) { Assert(buffer.lines.len); line = Clamp(line, (int64_t)0, buffer.lines.len - 1); Range range = buffer.lines[line]; Line result = {line, range}; return result; } Line FindLine(Buffer &buffer, int64_t pos) { For(buffer.lines) { // The program is doing '<= it.max' so as to include the new line. // Otherwise this function wouldn't be able to find certain positions. if (pos >= it.min && pos <= it.max) { Line result = {buffer.lines.get_index(it), it}; return result; } } return {}; } LineAndColumn FindLineAndColumn(Buffer &buffer, int64_t pos) { LineAndColumn result = {}; result.column = 1; result.line = FindLine(buffer, pos); for (BufferIter iter = Iterate(buffer, result.line.range); IsValid(iter); Advance(&iter)) { // @todo: make sure we handle when there is invalid unicode in the stream if (iter.pos == pos) { result.column = iter.codepoint_index; break; } } return result; } int64_t FindPos(Buffer &buffer, int64_t line_number, int64_t column) { Line line = GetLine(buffer, line_number); int64_t result = line.range.max; for (BufferIter iter = Iterate(buffer, line.range); IsValid(iter); Advance(&iter)) { if (iter.codepoint_index == column) { result = iter.pos; break; } } return result; } int64_t Seek(Buffer &buffer, int64_t pos, int64_t direction = ITERATE_FORWARD) { Assert(direction == ITERATE_FORWARD || direction == ITERATE_BACKWARD); // ( - inclusive // < - non-inclusive int64_t min = 0; int64_t max = 0; char c = 0; if (direction == ITERATE_FORWARD) { // (pos + 1, end> min = AdjustUTF8Pos(buffer, pos + 1, ITERATE_FORWARD); max = buffer.len; c = GetChar(buffer, min); } else { // (0, pos> max = pos; min = 0; int64_t next = AdjustUTF8Pos(buffer, max - 1, ITERATE_BACKWARD); c = GetChar(buffer, next); } bool standing_on_whitespace = IsWhitespace(c); bool seek_whitespace = standing_on_whitespace == false; bool seek_word = standing_on_whitespace; int64_t result = direction == ITERATE_BACKWARD ? 0 : buffer.len; BufferIter iter = Iterate(buffer, {min, max}, direction); int64_t prev_pos = iter.pos; for (; IsValid(iter); Advance(&iter)) { bool char_is_whitespace = iter.item < 255 && IsWhitespace(iter.item); if (seek_word && char_is_whitespace == false) { result = prev_pos; break; } if (seek_whitespace && char_is_whitespace) { if (direction == ITERATE_FORWARD) { result = iter.pos; } else { result = prev_pos; } break; } prev_pos = iter.pos; } return result; } void RunBufferTests() { Scratch scratch; { Buffer buffer = {scratch}; Array edits = {scratch}; AddEdit(&edits, {0, 0}, "Things and other things"); ApplyEdits(&buffer, edits); String string = {buffer.data[buffer.bi], buffer.len}; Assert(string == "Things and other things"); Assert(buffer.lines.len == 1); Assert(GetString(buffer, buffer.lines[0]) == "Things and other things"); edits.clear(); AddEdit(&edits, GetEnd(buffer), " memes"); ApplyEdits(&buffer, edits); Assert(GetString(buffer, buffer.lines[0]) == "Things and other things memes"); } { Buffer buffer = {scratch}; Array edits = {scratch}; edits.add({}); ApplyEdits(&buffer, edits); Assert("" == GetString(buffer)); Assert(buffer.lines.len == 1); } { Buffer buffer = {scratch}; Array edits = {scratch}; edits.add({ {0, 0}, "Things and other things" }); ApplyEdits(&buffer, edits); edits.clear(); AddEdit(&edits, {0, 6}, "Memes"); AddEdit(&edits, {7, 10}, "dna"); AddEdit(&edits, {11, 16}, "BigOther"); ApplyEdits(&buffer, edits); String string = {buffer.data[buffer.bi], buffer.len}; Assert(string == "Memes dna BigOther things"); Assert(buffer.lines.len == 1); Assert(GetString(buffer, buffer.lines[0]) == "Memes dna BigOther things"); } { Buffer buffer = {scratch}; Array edits = {scratch}; edits.add({ {0, 0}, "Things and other things\n" "Things and other things\n" }); ApplyEdits(&buffer, edits); Assert(buffer.lines.len == 3); Assert(GetString(buffer, buffer.lines[1]) == "Things and other things"); Assert(GetString(buffer, buffer.lines[0]) == "Things and other things"); Assert(GetString(buffer, buffer.lines[2]) == ""); { Array s = {scratch}; for (BufferIter iter = Iterate(buffer, {0, 6}); IsValid(iter); Advance(&iter)) { Assert(iter.item < 255); s.add((char)iter.item); } String str = {s.data, s.len}; Assert(str == "Things"); } { Array s = {scratch}; for (BufferIter iter = Iterate(buffer, {0, 6}, ITERATE_BACKWARD); IsValid(iter); Advance(&iter)) { Assert(iter.item < 255); s.add((char)iter.item); } String str = {s.data, s.len}; Assert(str == "sgnihT"); } { Array s = {scratch}; for (BufferIter iter = Iterate(buffer, {0, buffer.len}); IsValid(iter); Advance(&iter)) { Assert(iter.item < 255); s.add((char)iter.item); } String str = {s.data, s.len}; String b = {GetCharP(buffer, 0), buffer.len}; Assert(str == b); } { Array s = {scratch}; for (BufferIter iter = Iterate(buffer, {0, buffer.len}, ITERATE_BACKWARD); IsValid(iter); Advance(&iter)) { Assert(iter.item < 255); s.add((char)iter.item); } String str = {s.data, s.len}; String b = {GetCharP(buffer, 0), buffer.len}; Assert(str.len == b.len); } } { Arena *arena = AllocArena(); Buffer buffer = {*arena}; Array edits = {*arena}; edits.add({ {0, 0}, "Things and other things\n" "Things and other things\n" }); int iters = 100; for (int i = 0; i < iters; i += 1) { ApplyEdits(&buffer, edits); for (int64_t j = 0; j < i; j += 1) { String string = GetString(buffer, {edits[0].string.len * j, edits[0].string.len * (j + 1)}); Assert(string == edits[0].string); } } Assert(edits[0].string.len * iters == buffer.len); Assert(buffer.lines.len == iters * 2 + 1); Line l0 = FindLine(buffer, 4); Assert(l0.number == 0); Assert(l0.range.min == 0); Assert(l0.range.max < 30); Line l1 = FindLine(buffer, 30); Assert(l1.number == 1); Assert(l1.range.min > 20); Assert(l1.range.max < 50); Assert(l1.range.max == GetLine(buffer, 1).range.max); // Make sure there are no gaps for (int64_t i = 100; i < 600; i += 1) { Line l2 = FindLine(buffer, i); Assert(l2.number > 0); } } }