Files
imgui_transcript_browser/src/text_editor/buffer.cpp
2024-07-04 16:21:02 +02:00

629 lines
19 KiB
C++

/*
Ranges are from 0 to n+1 (one past last index).
<0,4> = 0,1,2,3
These seem the best, I tried other formations but these are
much better then the rest.
First of all you can represent the cursor at the end of the buffer
by doing: <buffer_len, buffer_len>. This action in itself doesn't
select anything, the formation doesn't force you to index the
buffer and so on, it's reduced to a pure position. In the end
you can nicely represent cursors in select mode and non-select mode.
This property of being able to represent pure positions makes
it possible to clamp the values to the same range <0, buffer_len>,
and if the things are past the range we end up with a pure value.
Very nice behaviour, The program won't delete anything.
*/
struct Range {
int64_t min;
int64_t max; // one past last index
// <0,4> = 0,1,2,3
};
// @end_of_buffer - need to make sure that we can actually lookup the end of buffer line.
// This end of buffer is also incorporated into the layout.
struct Line {
int64_t number;
Range range;
int64_t max_without_new_line;
};
struct LineAndColumn {
Line line;
int64_t column;
};
struct Edit {
Range range;
String string;
};
struct Cursor {
union {
Range range;
int64_t pos[2];
};
int64_t ifront;
};
// - Buffer should be initialized before use!
struct Buffer {
Allocator allocator;
char *data[2];
int64_t cap;
int64_t len;
int bi; // current buffer index
Array<Range> lines;
};
int64_t GetRangeSize(Range range) {
int64_t result = range.max - range.min;
return result;
}
Range GetRange(const Buffer &buffer) {
Range result = {0, buffer.len};
return result;
}
int64_t Clamp(const Buffer &buffer, int64_t pos) {
int64_t result = Clamp(pos, (int64_t)0, buffer.len);
return result;
}
Range Clamp(const Buffer &buffer, Range range) {
Range result = {};
result.min = Clamp(buffer, range.min);
result.max = Clamp(buffer, range.max);
return result;
}
Range GetEnd(const Buffer &buffer) {
Range range = {buffer.len, buffer.len};
return range;
}
Range MakeRange(int64_t a, int64_t b) {
Range result = {};
result.min = Min(a, b);
result.max = Max(a, b);
return result;
}
Range MakeRange(int64_t a) {
Range result = {a, a};
return result;
}
int64_t GetFront(Cursor cursor) {
int64_t result = cursor.pos[cursor.ifront];
return result;
}
int64_t GetBack(Cursor cursor) {
int64_t index = (cursor.ifront + 1) % 2;
int64_t result = cursor.pos[index];
return result;
}
Cursor MakeCursor(int64_t front, int64_t back) {
Cursor result = {};
if (front >= back) {
result.range.min = back;
result.range.max = front;
result.ifront = 1;
} else {
result.range.min = front;
result.range.max = back;
result.ifront = 0;
}
return result;
}
Cursor ChangeBack(Cursor cursor, int64_t back) {
int64_t front = GetFront(cursor);
Cursor result = MakeCursor(front, back);
return result;
}
Cursor ChangeFront(Cursor cursor, int64_t front) {
int64_t back = GetBack(cursor);
Cursor result = MakeCursor(front, back);
return result;
}
void AddEdit(Array<Edit> *edits, Range range, String string) {
edits->add({range, string});
}
bool InBounds(const Buffer &buffer, int64_t pos) {
bool result = pos >= 0 && pos < buffer.len;
return result;
}
char GetChar(const Buffer &buffer, int64_t pos) {
if (!InBounds(buffer, pos)) return 0;
return buffer.data[buffer.bi][pos];
}
char *GetCharP(const Buffer &buffer, int64_t pos) {
if (!InBounds(buffer, pos)) return 0;
return buffer.data[buffer.bi] + pos;
}
String GetString(const Buffer &buffer, Range range = {0, INT64_MAX}) {
range = Clamp(buffer, range);
String result = {GetCharP(buffer, range.min), GetRangeSize(range)};
return result;
}
bool AreEqual(Range a, Range b) {
bool result = a.min == b.min && a.max == b.max;
return result;
}
bool AreEqual(Cursor a, Cursor b) {
bool result = AreEqual(a.range, b.range) && a.ifront == b.ifront;
return result;
}
bool InRange(int64_t a, Range b) {
bool result = a >= b.min && a < b.max;
return result;
}
void MergeSort(int64_t Count, Edit *First, Edit *Temp) {
// SortKey = range.min
if (Count == 1) {
// NOTE(casey): No work to do.
} else if (Count == 2) {
Edit *EntryA = First;
Edit *EntryB = First + 1;
if (EntryA->range.min > EntryB->range.min) {
Swap(EntryA, EntryB);
}
} else {
int64_t Half0 = Count / 2;
int64_t Half1 = Count - Half0;
Assert(Half0 >= 1);
Assert(Half1 >= 1);
Edit *InHalf0 = First;
Edit *InHalf1 = First + Half0;
Edit *End = First + Count;
MergeSort(Half0, InHalf0, Temp);
MergeSort(Half1, InHalf1, Temp);
Edit *ReadHalf0 = InHalf0;
Edit *ReadHalf1 = InHalf1;
Edit *Out = Temp;
for (int64_t Index = 0;
Index < Count;
++Index) {
if (ReadHalf0 == InHalf1) {
*Out++ = *ReadHalf1++;
} else if (ReadHalf1 == End) {
*Out++ = *ReadHalf0++;
} else if (ReadHalf0->range.min < ReadHalf1->range.min) {
*Out++ = *ReadHalf0++;
} else {
*Out++ = *ReadHalf1++;
}
}
Assert(Out == (Temp + Count));
Assert(ReadHalf0 == InHalf1);
Assert(ReadHalf1 == End);
// TODO(casey): Not really necessary if we ping-pong
for (int64_t Index = 0;
Index < Count;
++Index) {
First[Index] = Temp[Index];
}
}
}
void _ApplyEdits(Buffer *buffer, Array<Edit> edits) {
Scratch scratch((Arena *)buffer->allocator.object);
Assert(buffer->data[0]);
Assert(buffer->allocator.proc);
Assert(buffer->lines.len);
// Figure out how much we insert and how much we delete so
// we can resize buffers properly if necessary
Assert(edits.len);
int64_t size_to_delete = 0;
int64_t size_to_insert = 0;
For(edits) {
Assert(it.range.min >= 0);
Assert(it.range.max >= it.range.min);
Assert(it.range.max <= buffer->len);
size_to_delete += GetRangeSize(it.range);
size_to_insert += it.string.len;
}
#if DEBUG_BUILD
// Make sure edit ranges don't overlap
ForItem(it1, edits) {
ForItem(it2, edits) {
if (&it1 == &it2) continue;
bool a2_inside = it2.range.min >= it1.range.min && it2.range.min < it1.range.max;
Assert(!a2_inside);
bool b2_inside = it2.range.max > it1.range.min && it2.range.max <= it1.range.max;
Assert(!b2_inside);
}
}
#endif
// We need to sort from lowest to higest based on range.min
{
Array<Edit> edits_copy = edits.copy(scratch);
MergeSort(edits.len, edits_copy.data, edits.data);
edits = edits_copy;
}
// Try resizing the buffers
int64_t len_offset = size_to_insert - size_to_delete;
int64_t allocated_size_required = Max((int64_t)0, len_offset);
if (buffer->len + allocated_size_required > buffer->cap) {
int64_t new_cap = AlignUp(buffer->cap + allocated_size_required, 4096);
if (buffer->allocator.proc == NULL) buffer->allocator = GetSystemAllocator();
for (int i = 0; i < 2; i += 1) {
char *data = AllocArray(buffer->allocator, char, new_cap);
Assert(data);
memcpy(data, buffer->data[i], buffer->len);
Dealloc(buffer->allocator, &buffer->data[i]);
buffer->data[i] = data;
}
buffer->cap = new_cap;
}
// Figure out what we need to write to the second buffer
int srci = buffer->bi;
int dsti = (buffer->bi + 1) % 2;
Array<Edit> writes = {scratch};
int64_t prev_source = 0;
int64_t prev_dest = 0;
For(edits) {
TraceLog(LOG_DEBUG, "edit dsti: %d, srci: %d, range: %lld to %lld, string: '%.*s'", dsti, srci, (long long)it.range.min, (long long)it.range.max, Min(5, (int)it.string.len), it.string.data);
Range source_range = {prev_source, it.range.min};
if (GetRangeSize(source_range) != 0) {
String source_string = {};
source_string.data = buffer->data[srci] + source_range.min;
source_string.len = GetRangeSize(source_range);
Range dest_range = {prev_dest, prev_dest + source_string.len};
writes.add({dest_range, source_string});
prev_dest = dest_range.max;
}
Range dest_range = {prev_dest, prev_dest + it.string.len};
writes.add({dest_range, it.string});
prev_dest = dest_range.max;
prev_source = it.range.max;
}
// Add remaining range
Range source_range = {prev_source, buffer->len};
if (GetRangeSize(source_range)) {
String source_string = {};
source_string.data = buffer->data[srci] + source_range.min;
source_string.len = GetRangeSize(source_range);
Range dest_range = {prev_dest, prev_dest + source_string.len};
writes.add({dest_range, source_string});
}
// Make sure there are no gaps between ranges
#if DEBUG_BUILD
for (int64_t i = 0; i < writes.len - 1; i += 1) {
Assert(writes[i].range.max == writes[i + 1].range.min);
}
#endif
// Write to the second buffer
int64_t new_buffer_len = 0;
For(writes) {
Assert(it.range.min >= 0);
Assert(it.range.max >= 0);
TraceLog(LOG_DEBUG, "write dsti: %d, srci: %d, range: %lld to %lld, string: '%.*s'", dsti, srci, (long long)it.range.min, (long long)it.range.max, Min(5, (int)it.string.len), it.string.data);
memcpy(buffer->data[dsti] + new_buffer_len, it.string.data, it.string.len);
new_buffer_len += it.string.len;
}
buffer->bi = dsti;
Assert(new_buffer_len == buffer->len + len_offset);
buffer->len = new_buffer_len;
// Update lines
// Make sure we always have one line, even if the buffer is empty,
// this way we can nicely clamp things without worrying of getting
// a negative when doing (len - 1).
{
String delimiter = "\n";
String string = {buffer->data[dsti], buffer->len};
buffer->lines.allocator = buffer->allocator;
buffer->lines.clear();
int64_t index = 0;
int64_t base_index = 0;
while (Seek(string, delimiter, &index)) {
buffer->lines.add({base_index, base_index + index + delimiter.len});
base_index += index + delimiter.len;
string = string.skip(index + delimiter.len);
}
buffer->lines.add({base_index, base_index + string.len});
}
Assert(buffer->data[0]);
Assert(buffer->allocator.proc);
Assert(buffer->lines.len);
}
void InitBuffer(Allocator allocator, Buffer *buffer, int64_t _size = 4096) {
int64_t size = AlignUp(_size, 4096);
buffer->allocator = allocator;
for (int i = 0; i < 2; i += 1) {
buffer->data[i] = AllocArray(allocator, char, size);
Assert(buffer->data[i]);
}
buffer->cap = size;
buffer->lines.allocator = allocator;
buffer->lines.add({});
}
String CopyNullTerminated(Allocator allocator, Buffer &buffer, Range range) {
String buffer_string = GetString(buffer, range);
String result = Copy(allocator, buffer_string);
return result;
}
int64_t AdjustUTF8Pos(String string, int64_t pos, int64_t direction) {
for (; pos >= 0 && pos < string.len;) {
if (IsUTF8ContinuationByte(string.data[pos])) {
pos += direction;
} else {
break;
}
}
return pos;
}
int64_t AdjustUTF8Pos(const Buffer &buffer, int64_t pos, int64_t direction = 1, bool clamp = true) {
String string = GetString(buffer);
pos = AdjustUTF8Pos(string, pos, direction);
if (clamp) pos = Clamp(buffer, pos);
return pos;
}
uint32_t GetUTF32(Buffer &buffer, int64_t pos, int64_t *codepoint_size) {
if (!InBounds(buffer, pos)) {
return 0;
}
char *p = GetCharP(buffer, pos);
int64_t max = buffer.len - pos;
UTF32Result utf32 = UTF8ToUTF32(p, (int)max);
Assert(utf32.error == 0);
if (utf32.error != 0) return 0;
if (codepoint_size) codepoint_size[0] = utf32.advance;
return utf32.out_str;
}
constexpr int16_t ITERATE_FORWARD = 1;
constexpr int16_t ITERATE_BACKWARD = -1;
struct BufferIter {
Buffer *buffer;
int64_t pos;
int64_t end;
int64_t direction;
int64_t utf8_codepoint_size;
int64_t codepoint_index;
uint32_t item;
};
bool IsValid(const BufferIter &iter) {
Assert(iter.direction == ITERATE_FORWARD || iter.direction == ITERATE_BACKWARD);
bool result = false;
if (iter.direction == ITERATE_BACKWARD) {
result = iter.pos >= iter.end;
} else {
result = iter.pos < iter.end;
}
if (result) {
Assert(!IsUTF8ContinuationByte(GetChar(*iter.buffer, iter.pos)));
Assert(InBounds(*iter.buffer, iter.pos));
}
return result;
}
void Advance(BufferIter *iter) {
Assert(iter->direction == ITERATE_FORWARD || iter->direction == ITERATE_BACKWARD);
iter->codepoint_index += 1;
if (iter->direction == ITERATE_FORWARD) {
iter->pos += iter->utf8_codepoint_size;
} else {
iter->pos = AdjustUTF8Pos(*iter->buffer, iter->pos - 1, ITERATE_BACKWARD, false);
}
if (!IsValid(*iter)) return;
iter->item = GetUTF32(*iter->buffer, iter->pos, &iter->utf8_codepoint_size);
}
BufferIter Iterate(Buffer &buffer, Range range, int64_t direction = ITERATE_FORWARD) {
Assert(direction == ITERATE_FORWARD || direction == ITERATE_BACKWARD);
Assert(!IsUTF8ContinuationByte(GetChar(buffer, range.min)));
Assert(range.max >= range.min);
range.min = Clamp(buffer, range.min);
range.max = Clamp(buffer, range.max);
BufferIter result = {&buffer, range.min, range.max, direction};
result.codepoint_index = -1;
if (direction == ITERATE_BACKWARD) {
result.end = range.min;
result.pos = range.max;
}
Advance(&result);
return result;
}
Line GetLineByIndex(Buffer &buffer, int64_t line) {
Assert(buffer.lines.len);
line = Clamp(line, (int64_t)0, buffer.lines.len - 1);
Range range = buffer.lines[line];
Line result = {line, range, range.max};
if (range.max > range.min && GetChar(buffer, range.max - 1) == '\n') result.max_without_new_line -= 1;
return result;
}
Line FindLine(Buffer &buffer, int64_t pos) {
Line result = {};
For(buffer.lines) {
if (pos >= it.min && pos < it.max) {
result = {buffer.lines.get_index(it), it, it.max};
if (it.max > it.min && GetChar(buffer, it.max - 1) == '\n') result.max_without_new_line -= 1;
return result;
}
}
if (pos == buffer.len) { // @end of buffer
auto &it = buffer.lines[buffer.lines.len - 1];
Assert(it.max == buffer.len);
result = {buffer.lines.get_index(it), it, it.max};
if (it.max > it.min && GetChar(buffer, it.max - 1) == '\n') result.max_without_new_line -= 1;
return result;
}
return result;
}
LineAndColumn FindLineAndColumn(Buffer &buffer, int64_t pos) {
LineAndColumn result = {};
result.column = 0;
result.line = FindLine(buffer, pos);
// @end_of_buffer
if (pos == result.line.range.min && pos == result.line.range.max) {
return result;
}
for (BufferIter iter = Iterate(buffer, result.line.range); IsValid(iter); Advance(&iter)) {
// @todo: make sure we handle when there is invalid unicode in the stream
if (iter.pos == pos) {
result.column = iter.codepoint_index;
break;
}
}
return result;
}
int64_t FindPos(Buffer &buffer, int64_t line_number, int64_t column) {
Line line = GetLineByIndex(buffer, line_number);
int64_t result = line.max_without_new_line;
for (BufferIter iter = Iterate(buffer, line.range); IsValid(iter); Advance(&iter)) {
if (iter.codepoint_index == column) {
result = iter.pos;
break;
}
}
return result;
}
int64_t Seek(Buffer &buffer, int64_t pos, int64_t direction = ITERATE_FORWARD) {
Assert(direction == ITERATE_FORWARD || direction == ITERATE_BACKWARD);
// ( - inclusive
// < - non-inclusive
int64_t min = 0;
int64_t max = 0;
char c = 0;
if (direction == ITERATE_FORWARD) {
// (pos + 1, end>
min = AdjustUTF8Pos(buffer, pos + 1, ITERATE_FORWARD);
max = buffer.len;
c = GetChar(buffer, min);
} else {
// (0, pos>
max = pos;
min = 0;
int64_t next = AdjustUTF8Pos(buffer, max - 1, ITERATE_BACKWARD);
c = GetChar(buffer, next);
}
bool standing_on_whitespace = IsWhitespace(c);
bool seek_whitespace = standing_on_whitespace == false;
bool seek_word = standing_on_whitespace;
int64_t result = direction == ITERATE_BACKWARD ? 0 : buffer.len;
BufferIter iter = Iterate(buffer, {min, max}, direction);
int64_t prev_pos = iter.pos;
for (; IsValid(iter); Advance(&iter)) {
bool char_is_whitespace = iter.item < 255 && IsWhitespace(iter.item);
if (seek_word && char_is_whitespace == false) {
result = prev_pos;
break;
}
if (seek_whitespace && char_is_whitespace) {
if (direction == ITERATE_FORWARD) {
result = iter.pos;
} else {
result = prev_pos;
}
break;
}
prev_pos = iter.pos;
}
return result;
}
int64_t MoveRight(Buffer &buffer, int64_t pos) {
pos = pos + 1;
pos = AdjustUTF8Pos(buffer, pos);
Assert(pos >= 0 && pos <= buffer.len);
return pos;
}
int64_t MoveLeft(Buffer &buffer, int64_t pos) {
pos = pos - 1;
pos = AdjustUTF8Pos(buffer, pos, -1);
Assert(pos >= 0 && pos <= buffer.len);
return pos;
}
int64_t MoveDown(Buffer &buffer, int64_t pos, int64_t count = 1) {
LineAndColumn info = FindLineAndColumn(buffer, pos);
int64_t new_pos = FindPos(buffer, info.line.number + count, info.column);
return new_pos;
}
int64_t MoveUp(Buffer &buffer, int64_t pos, int64_t count = 1) {
LineAndColumn info = FindLineAndColumn(buffer, pos);
int64_t new_pos = FindPos(buffer, info.line.number - count, info.column);
return new_pos;
}
Range EncloseWord(Buffer &buffer, int64_t pos) {
Range result = {};
result.min = Seek(buffer, pos, ITERATE_BACKWARD);
result.max = Seek(buffer, pos, ITERATE_FORWARD);
return result;
}