Reading multiple directories in transcript browser

This commit is contained in:
Krzosa Karol
2024-06-19 10:40:20 +02:00
parent 055be9b058
commit 7fe6aa2a97
5 changed files with 88 additions and 173 deletions

View File

@@ -1,8 +1,3 @@
struct PdfPage {
String content;
int64_t number;
};
String Merge(Allocator allocator, Array<String> list, String separator = " ") {
int64_t char_count = 0;
For(list) char_count += it.len;
@@ -27,52 +22,3 @@ String Merge(Allocator allocator, Array<String> list, String separator = " ") {
string.data[size] = 0;
return string;
}
Array<PdfPage> ReadPDF(Allocator allocator, String filename) {
Scratch scratch;
char buffer[1024];
char *filename_char = NullTerminate(scratch, filename);
pdfio_file_t *file = pdfioFileOpen(filename_char, NULL, NULL, NULL, NULL);
if (file == NULL) return {};
defer { pdfioFileClose(file); };
Array<PdfPage> pages = {allocator};
for (int i = 0, num_pages = pdfioFileGetNumPages(file); i < num_pages; i++) {
pdfio_obj_t *obj = pdfioFileGetPage(file, i);
if (obj == NULL) continue;
PdfPage *page = pages.alloc();
page->number = i + 1;
Array<String> content = {scratch};
size_t num_streams = pdfioPageGetNumStreams(obj);
for (int j = 0; j < num_streams; j++) {
pdfio_stream_t *st = pdfioPageOpenStream(obj, j, true);
if (st == NULL) continue;
defer { pdfioStreamClose(st); };
bool first = true;
while (pdfioStreamGetToken(st, buffer, sizeof(buffer))) {
if (buffer[0] == '(') {
if (first) {
first = false;
} else {
// content.add(" ");
}
content.add(Copy(scratch, buffer + 1));
} else if (!strcmp(buffer, "Td") || !strcmp(buffer, "TD") || !strcmp(buffer, "T*") || !strcmp(buffer, "\'") || !strcmp(buffer, "\"")) {
// content.add("\n");
first = true;
}
}
// if (!first) content.add("\n");
}
page->content = Merge(allocator, content, "");
}
return pages;
}