/* Ranges are from 0 to n+1 (one past last index). <0,4> = 0,1,2,3 These seem the best, I tried other formations but these are much better then the rest. First of all you can represent the cursor at the end of the buffer by doing: . This action in itself doesn't select anything, the formation doesn't force you to index the buffer and so on, it's reduced to a pure position. In the end you can nicely represent cursors in select mode and non-select mode. This property of being able to represent pure positions makes it possible to clamp the values to the same range <0, buffer_len>, and if the things are past the range we end up with a pure value. Very nice behaviour, The program won't delete anything. */ struct Range { int64_t min; int64_t max; // one past last index // <0,4> = 0,1,2,3 }; // @end_of_buffer - need to make sure that we can actually lookup the end of buffer line. // This end of buffer is also incorporated into the layout. struct Line { int64_t number; Range range; int64_t max_without_new_line; }; struct LineAndColumn { Line line; int64_t column; }; struct Edit { Range range; String string; }; struct Cursor { union { Range range; int64_t pos[2]; }; int64_t ifront; }; // - Buffer should be initialized before use! struct Buffer { Allocator allocator; char *data[2]; int64_t cap; int64_t len; int bi; // current buffer index Array lines; }; int64_t GetRangeSize(Range range) { int64_t result = range.max - range.min; return result; } Range GetRange(const Buffer &buffer) { Range result = {0, buffer.len}; return result; } int64_t Clamp(const Buffer &buffer, int64_t pos) { int64_t result = Clamp(pos, (int64_t)0, buffer.len); return result; } Range Clamp(const Buffer &buffer, Range range) { Range result = {}; result.min = Clamp(buffer, range.min); result.max = Clamp(buffer, range.max); return result; } Range GetEnd(const Buffer &buffer) { Range range = {buffer.len, buffer.len}; return range; } Range MakeRange(int64_t a, int64_t b) { Range result = {}; result.min = Min(a, b); result.max = Max(a, b); return result; } Range MakeRange(int64_t a) { Range result = {a, a}; return result; } int64_t GetFront(Cursor cursor) { int64_t result = cursor.pos[cursor.ifront]; return result; } int64_t GetBack(Cursor cursor) { int64_t index = (cursor.ifront + 1) % 2; int64_t result = cursor.pos[index]; return result; } Cursor MakeCursor(int64_t front, int64_t back) { Cursor result = {}; if (front >= back) { result.range.min = back; result.range.max = front; result.ifront = 1; } else { result.range.min = front; result.range.max = back; result.ifront = 0; } return result; } Cursor ChangeBack(Cursor cursor, int64_t back) { int64_t front = GetFront(cursor); Cursor result = MakeCursor(front, back); return result; } Cursor ChangeFront(Cursor cursor, int64_t front) { int64_t back = GetBack(cursor); Cursor result = MakeCursor(front, back); return result; } void AddEdit(Array *edits, Range range, String string) { edits->add({range, string}); } bool InBounds(const Buffer &buffer, int64_t pos) { bool result = pos >= 0 && pos < buffer.len; return result; } char GetChar(const Buffer &buffer, int64_t pos) { if (!InBounds(buffer, pos)) return 0; return buffer.data[buffer.bi][pos]; } char *GetCharP(const Buffer &buffer, int64_t pos) { if (!InBounds(buffer, pos)) return 0; return buffer.data[buffer.bi] + pos; } String GetString(const Buffer &buffer, Range range = {0, INT64_MAX}) { range = Clamp(buffer, range); String result = {GetCharP(buffer, range.min), GetRangeSize(range)}; return result; } bool AreEqual(Range a, Range b) { bool result = a.min == b.min && a.max == b.max; return result; } bool AreEqual(Cursor a, Cursor b) { bool result = AreEqual(a.range, b.range) && a.ifront == b.ifront; return result; } bool InRange(int64_t a, Range b) { bool result = a >= b.min && a < b.max; return result; } void MergeSort(int64_t Count, Edit *First, Edit *Temp) { // SortKey = range.min if (Count == 1) { // NOTE(casey): No work to do. } else if (Count == 2) { Edit *EntryA = First; Edit *EntryB = First + 1; if (EntryA->range.min > EntryB->range.min) { Swap(EntryA, EntryB); } } else { int64_t Half0 = Count / 2; int64_t Half1 = Count - Half0; Assert(Half0 >= 1); Assert(Half1 >= 1); Edit *InHalf0 = First; Edit *InHalf1 = First + Half0; Edit *End = First + Count; MergeSort(Half0, InHalf0, Temp); MergeSort(Half1, InHalf1, Temp); Edit *ReadHalf0 = InHalf0; Edit *ReadHalf1 = InHalf1; Edit *Out = Temp; for (int64_t Index = 0; Index < Count; ++Index) { if (ReadHalf0 == InHalf1) { *Out++ = *ReadHalf1++; } else if (ReadHalf1 == End) { *Out++ = *ReadHalf0++; } else if (ReadHalf0->range.min < ReadHalf1->range.min) { *Out++ = *ReadHalf0++; } else { *Out++ = *ReadHalf1++; } } Assert(Out == (Temp + Count)); Assert(ReadHalf0 == InHalf1); Assert(ReadHalf1 == End); // TODO(casey): Not really necessary if we ping-pong for (int64_t Index = 0; Index < Count; ++Index) { First[Index] = Temp[Index]; } } } void _ApplyEdits(Buffer *buffer, Array edits) { Scratch scratch((Arena *)buffer->allocator.object); Assert(buffer->data[0]); Assert(buffer->allocator.proc); Assert(buffer->lines.len); // Figure out how much we insert and how much we delete so // we can resize buffers properly if necessary Assert(edits.len); int64_t size_to_delete = 0; int64_t size_to_insert = 0; For(edits) { Assert(it.range.min >= 0); Assert(it.range.max >= it.range.min); Assert(it.range.max <= buffer->len); size_to_delete += GetRangeSize(it.range); size_to_insert += it.string.len; } #if DEBUG_BUILD // Make sure edit ranges don't overlap ForItem(it1, edits) { ForItem(it2, edits) { if (&it1 == &it2) continue; bool a2_inside = it2.range.min >= it1.range.min && it2.range.min < it1.range.max; Assert(!a2_inside); bool b2_inside = it2.range.max > it1.range.min && it2.range.max <= it1.range.max; Assert(!b2_inside); } } #endif // We need to sort from lowest to higest based on range.min { Array edits_copy = edits.copy(scratch); MergeSort(edits.len, edits_copy.data, edits.data); edits = edits_copy; } // Try resizing the buffers int64_t len_offset = size_to_insert - size_to_delete; int64_t allocated_size_required = Max((int64_t)0, len_offset); if (buffer->len + allocated_size_required > buffer->cap) { int64_t new_cap = AlignUp(buffer->cap + allocated_size_required, 4096); if (buffer->allocator.proc == NULL) buffer->allocator = GetSystemAllocator(); for (int i = 0; i < 2; i += 1) { char *data = AllocArray(buffer->allocator, char, new_cap); Assert(data); memcpy(data, buffer->data[i], buffer->len); Dealloc(buffer->allocator, &buffer->data[i]); buffer->data[i] = data; } buffer->cap = new_cap; } // Figure out what we need to write to the second buffer int srci = buffer->bi; int dsti = (buffer->bi + 1) % 2; Array writes = {scratch}; int64_t prev_source = 0; int64_t prev_dest = 0; For(edits) { TraceLog(LOG_DEBUG, "edit dsti: %d, srci: %d, range: %lld to %lld, string: '%.*s'", dsti, srci, (long long)it.range.min, (long long)it.range.max, Min(5, (int)it.string.len), it.string.data); Range source_range = {prev_source, it.range.min}; if (GetRangeSize(source_range) != 0) { String source_string = {}; source_string.data = buffer->data[srci] + source_range.min; source_string.len = GetRangeSize(source_range); Range dest_range = {prev_dest, prev_dest + source_string.len}; writes.add({dest_range, source_string}); prev_dest = dest_range.max; } Range dest_range = {prev_dest, prev_dest + it.string.len}; writes.add({dest_range, it.string}); prev_dest = dest_range.max; prev_source = it.range.max; } // Add remaining range Range source_range = {prev_source, buffer->len}; if (GetRangeSize(source_range)) { String source_string = {}; source_string.data = buffer->data[srci] + source_range.min; source_string.len = GetRangeSize(source_range); Range dest_range = {prev_dest, prev_dest + source_string.len}; writes.add({dest_range, source_string}); } // Make sure there are no gaps between ranges #if DEBUG_BUILD for (int64_t i = 0; i < writes.len - 1; i += 1) { Assert(writes[i].range.max == writes[i + 1].range.min); } #endif // Write to the second buffer int64_t new_buffer_len = 0; For(writes) { Assert(it.range.min >= 0); Assert(it.range.max >= 0); TraceLog(LOG_DEBUG, "write dsti: %d, srci: %d, range: %lld to %lld, string: '%.*s'", dsti, srci, (long long)it.range.min, (long long)it.range.max, Min(5, (int)it.string.len), it.string.data); memcpy(buffer->data[dsti] + new_buffer_len, it.string.data, it.string.len); new_buffer_len += it.string.len; } buffer->bi = dsti; Assert(new_buffer_len == buffer->len + len_offset); buffer->len = new_buffer_len; // Update lines // Make sure we always have one line, even if the buffer is empty, // this way we can nicely clamp things without worrying of getting // a negative when doing (len - 1). { String delimiter = "\n"; String string = {buffer->data[dsti], buffer->len}; buffer->lines.allocator = buffer->allocator; buffer->lines.clear(); int64_t index = 0; int64_t base_index = 0; while (Seek(string, delimiter, &index)) { buffer->lines.add({base_index, base_index + index + delimiter.len}); base_index += index + delimiter.len; string = string.skip(index + delimiter.len); } buffer->lines.add({base_index, base_index + string.len}); } Assert(buffer->data[0]); Assert(buffer->allocator.proc); Assert(buffer->lines.len); } void InitBuffer(Allocator allocator, Buffer *buffer, int64_t _size = 4096) { int64_t size = AlignUp(_size, 4096); buffer->allocator = allocator; for (int i = 0; i < 2; i += 1) { buffer->data[i] = AllocArray(allocator, char, size); Assert(buffer->data[i]); } buffer->cap = size; buffer->lines.allocator = allocator; buffer->lines.add({}); } String CopyNullTerminated(Allocator allocator, Buffer &buffer, Range range) { String buffer_string = GetString(buffer, range); String result = Copy(allocator, buffer_string); return result; } int64_t AdjustUTF8Pos(String string, int64_t pos, int64_t direction) { for (; pos >= 0 && pos < string.len;) { if (IsUTF8ContinuationByte(string.data[pos])) { pos += direction; } else { break; } } return pos; } int64_t AdjustUTF8Pos(const Buffer &buffer, int64_t pos, int64_t direction = 1, bool clamp = true) { String string = GetString(buffer); pos = AdjustUTF8Pos(string, pos, direction); if (clamp) pos = Clamp(buffer, pos); return pos; } uint32_t GetUTF32(Buffer &buffer, int64_t pos, int64_t *codepoint_size) { if (!InBounds(buffer, pos)) { return 0; } char *p = GetCharP(buffer, pos); int64_t max = buffer.len - pos; UTF32Result utf32 = UTF8ToUTF32(p, (int)max); Assert(utf32.error == 0); if (utf32.error != 0) return 0; if (codepoint_size) codepoint_size[0] = utf32.advance; return utf32.out_str; } constexpr int16_t ITERATE_FORWARD = 1; constexpr int16_t ITERATE_BACKWARD = -1; struct BufferIter { Buffer *buffer; int64_t pos; int64_t end; int64_t direction; int64_t utf8_codepoint_size; int64_t codepoint_index; uint32_t item; }; bool IsValid(const BufferIter &iter) { Assert(iter.direction == ITERATE_FORWARD || iter.direction == ITERATE_BACKWARD); bool result = false; if (iter.direction == ITERATE_BACKWARD) { result = iter.pos >= iter.end; } else { result = iter.pos < iter.end; } if (result) { Assert(!IsUTF8ContinuationByte(GetChar(*iter.buffer, iter.pos))); Assert(InBounds(*iter.buffer, iter.pos)); } return result; } void Advance(BufferIter *iter) { Assert(iter->direction == ITERATE_FORWARD || iter->direction == ITERATE_BACKWARD); iter->codepoint_index += 1; if (iter->direction == ITERATE_FORWARD) { iter->pos += iter->utf8_codepoint_size; } else { iter->pos = AdjustUTF8Pos(*iter->buffer, iter->pos - 1, ITERATE_BACKWARD, false); } if (!IsValid(*iter)) return; iter->item = GetUTF32(*iter->buffer, iter->pos, &iter->utf8_codepoint_size); } BufferIter Iterate(Buffer &buffer, Range range, int64_t direction = ITERATE_FORWARD) { Assert(direction == ITERATE_FORWARD || direction == ITERATE_BACKWARD); Assert(!IsUTF8ContinuationByte(GetChar(buffer, range.min))); Assert(range.max >= range.min); range.min = Clamp(buffer, range.min); range.max = Clamp(buffer, range.max); BufferIter result = {&buffer, range.min, range.max, direction}; result.codepoint_index = -1; if (direction == ITERATE_BACKWARD) { result.end = range.min; result.pos = range.max; } Advance(&result); return result; } Line GetLineByIndex(Buffer &buffer, int64_t line) { Assert(buffer.lines.len); line = Clamp(line, (int64_t)0, buffer.lines.len - 1); Range range = buffer.lines[line]; Line result = {line, range, range.max}; if (range.max > range.min && GetChar(buffer, range.max - 1) == '\n') result.max_without_new_line -= 1; return result; } Line FindLine(Buffer &buffer, int64_t pos) { Line result = {}; For(buffer.lines) { if (pos >= it.min && pos < it.max) { result = {buffer.lines.get_index(it), it, it.max}; if (it.max > it.min && GetChar(buffer, it.max - 1) == '\n') result.max_without_new_line -= 1; return result; } } if (pos == buffer.len) { // @end of buffer auto &it = buffer.lines[buffer.lines.len - 1]; Assert(it.max == buffer.len); result = {buffer.lines.get_index(it), it, it.max}; if (it.max > it.min && GetChar(buffer, it.max - 1) == '\n') result.max_without_new_line -= 1; return result; } return result; } LineAndColumn FindLineAndColumn(Buffer &buffer, int64_t pos) { LineAndColumn result = {}; result.column = 0; result.line = FindLine(buffer, pos); // @end_of_buffer if (pos == result.line.range.min && pos == result.line.range.max) { return result; } for (BufferIter iter = Iterate(buffer, result.line.range); IsValid(iter); Advance(&iter)) { // @todo: make sure we handle when there is invalid unicode in the stream if (iter.pos == pos) { result.column = iter.codepoint_index; break; } } return result; } int64_t FindPos(Buffer &buffer, int64_t line_number, int64_t column) { Line line = GetLineByIndex(buffer, line_number); int64_t result = line.max_without_new_line; for (BufferIter iter = Iterate(buffer, line.range); IsValid(iter); Advance(&iter)) { if (iter.codepoint_index == column) { result = iter.pos; break; } } return result; } int64_t Seek(Buffer &buffer, int64_t pos, int64_t direction = ITERATE_FORWARD) { Assert(direction == ITERATE_FORWARD || direction == ITERATE_BACKWARD); // ( - inclusive // < - non-inclusive int64_t min = 0; int64_t max = 0; char c = 0; if (direction == ITERATE_FORWARD) { // (pos + 1, end> min = AdjustUTF8Pos(buffer, pos + 1, ITERATE_FORWARD); max = buffer.len; c = GetChar(buffer, min); } else { // (0, pos> max = pos; min = 0; int64_t next = AdjustUTF8Pos(buffer, max - 1, ITERATE_BACKWARD); c = GetChar(buffer, next); } bool standing_on_whitespace = IsWhitespace(c); bool seek_whitespace = standing_on_whitespace == false; bool seek_word = standing_on_whitespace; int64_t result = direction == ITERATE_BACKWARD ? 0 : buffer.len; BufferIter iter = Iterate(buffer, {min, max}, direction); int64_t prev_pos = iter.pos; for (; IsValid(iter); Advance(&iter)) { bool char_is_whitespace = iter.item < 255 && IsWhitespace(iter.item); if (seek_word && char_is_whitespace == false) { result = prev_pos; break; } if (seek_whitespace && char_is_whitespace) { if (direction == ITERATE_FORWARD) { result = iter.pos; } else { result = prev_pos; } break; } prev_pos = iter.pos; } return result; } int64_t MoveRight(Buffer &buffer, int64_t pos) { pos = pos + 1; pos = AdjustUTF8Pos(buffer, pos); Assert(pos >= 0 && pos <= buffer.len); return pos; } int64_t MoveLeft(Buffer &buffer, int64_t pos) { pos = pos - 1; pos = AdjustUTF8Pos(buffer, pos, -1); Assert(pos >= 0 && pos <= buffer.len); return pos; } int64_t MoveDown(Buffer &buffer, int64_t pos, int64_t count = 1) { LineAndColumn info = FindLineAndColumn(buffer, pos); int64_t new_pos = FindPos(buffer, info.line.number + count, info.column); return new_pos; } int64_t MoveUp(Buffer &buffer, int64_t pos, int64_t count = 1) { LineAndColumn info = FindLineAndColumn(buffer, pos); int64_t new_pos = FindPos(buffer, info.line.number - count, info.column); return new_pos; } Range EncloseWord(Buffer &buffer, int64_t pos) { Range result = {}; result.min = Seek(buffer, pos, ITERATE_BACKWARD); result.max = Seek(buffer, pos, ITERATE_FORWARD); return result; }