Init repo

This commit is contained in:
Krzosa Karol
2024-06-19 06:51:06 +02:00
commit a75c8a2e4f
50 changed files with 13616 additions and 0 deletions

View File

@@ -0,0 +1,78 @@
struct PdfPage {
String content;
int64_t number;
};
String Merge(Allocator allocator, Array<String> list, String separator = " ") {
int64_t char_count = 0;
For(list) char_count += it.len;
if (char_count == 0) return {};
int64_t node_count = list.len;
int64_t base_size = (char_count + 1);
int64_t sep_size = (node_count - 1) * separator.len;
int64_t size = base_size + sep_size;
char *buff = (char *)AllocSize(allocator, sizeof(char) * (size + 1));
String string = {buff, 0};
For(list) {
Assert(string.len + it.len <= size);
memcpy(string.data + string.len, it.data, it.len);
string.len += it.len;
if (!list.is_last(it)) {
memcpy(string.data + string.len, separator.data, separator.len);
string.len += separator.len;
}
}
Assert(string.len == size - 1);
string.data[size] = 0;
return string;
}
Array<PdfPage> ReadPDF(Allocator allocator, String filename) {
Scratch scratch;
char buffer[1024];
char *filename_char = NullTerminate(scratch, filename);
pdfio_file_t *file = pdfioFileOpen(filename_char, NULL, NULL, NULL, NULL);
if (file == NULL) return {};
defer { pdfioFileClose(file); };
Array<PdfPage> pages = {allocator};
for (int i = 0, num_pages = pdfioFileGetNumPages(file); i < num_pages; i++) {
pdfio_obj_t *obj = pdfioFileGetPage(file, i);
if (obj == NULL) continue;
PdfPage *page = pages.alloc();
page->number = i + 1;
Array<String> content = {scratch};
size_t num_streams = pdfioPageGetNumStreams(obj);
for (int j = 0; j < num_streams; j++) {
pdfio_stream_t *st = pdfioPageOpenStream(obj, j, true);
if (st == NULL) continue;
defer { pdfioStreamClose(st); };
bool first = true;
while (pdfioStreamGetToken(st, buffer, sizeof(buffer))) {
if (buffer[0] == '(') {
if (first) {
first = false;
} else {
// content.add(" ");
}
content.add(Copy(scratch, buffer + 1));
} else if (!strcmp(buffer, "Td") || !strcmp(buffer, "TD") || !strcmp(buffer, "T*") || !strcmp(buffer, "\'") || !strcmp(buffer, "\"")) {
// content.add("\n");
first = true;
}
}
// if (!first) content.add("\n");
}
page->content = Merge(allocator, content, "");
}
return pages;
}