Init repo
This commit is contained in:
78
src/pdf_browser/read_pdf.cpp
Normal file
78
src/pdf_browser/read_pdf.cpp
Normal file
@@ -0,0 +1,78 @@
|
||||
struct PdfPage {
|
||||
String content;
|
||||
int64_t number;
|
||||
};
|
||||
|
||||
String Merge(Allocator allocator, Array<String> list, String separator = " ") {
|
||||
int64_t char_count = 0;
|
||||
For(list) char_count += it.len;
|
||||
if (char_count == 0) return {};
|
||||
int64_t node_count = list.len;
|
||||
|
||||
int64_t base_size = (char_count + 1);
|
||||
int64_t sep_size = (node_count - 1) * separator.len;
|
||||
int64_t size = base_size + sep_size;
|
||||
char *buff = (char *)AllocSize(allocator, sizeof(char) * (size + 1));
|
||||
String string = {buff, 0};
|
||||
For(list) {
|
||||
Assert(string.len + it.len <= size);
|
||||
memcpy(string.data + string.len, it.data, it.len);
|
||||
string.len += it.len;
|
||||
if (!list.is_last(it)) {
|
||||
memcpy(string.data + string.len, separator.data, separator.len);
|
||||
string.len += separator.len;
|
||||
}
|
||||
}
|
||||
Assert(string.len == size - 1);
|
||||
string.data[size] = 0;
|
||||
return string;
|
||||
}
|
||||
|
||||
Array<PdfPage> ReadPDF(Allocator allocator, String filename) {
|
||||
Scratch scratch;
|
||||
char buffer[1024];
|
||||
|
||||
char *filename_char = NullTerminate(scratch, filename);
|
||||
pdfio_file_t *file = pdfioFileOpen(filename_char, NULL, NULL, NULL, NULL);
|
||||
if (file == NULL) return {};
|
||||
defer { pdfioFileClose(file); };
|
||||
|
||||
Array<PdfPage> pages = {allocator};
|
||||
for (int i = 0, num_pages = pdfioFileGetNumPages(file); i < num_pages; i++) {
|
||||
pdfio_obj_t *obj = pdfioFileGetPage(file, i);
|
||||
if (obj == NULL) continue;
|
||||
|
||||
PdfPage *page = pages.alloc();
|
||||
page->number = i + 1;
|
||||
|
||||
Array<String> content = {scratch};
|
||||
size_t num_streams = pdfioPageGetNumStreams(obj);
|
||||
for (int j = 0; j < num_streams; j++) {
|
||||
pdfio_stream_t *st = pdfioPageOpenStream(obj, j, true);
|
||||
if (st == NULL) continue;
|
||||
defer { pdfioStreamClose(st); };
|
||||
|
||||
bool first = true;
|
||||
while (pdfioStreamGetToken(st, buffer, sizeof(buffer))) {
|
||||
if (buffer[0] == '(') {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
// content.add(" ");
|
||||
}
|
||||
|
||||
content.add(Copy(scratch, buffer + 1));
|
||||
} else if (!strcmp(buffer, "Td") || !strcmp(buffer, "TD") || !strcmp(buffer, "T*") || !strcmp(buffer, "\'") || !strcmp(buffer, "\"")) {
|
||||
// content.add("\n");
|
||||
first = true;
|
||||
}
|
||||
}
|
||||
|
||||
// if (!first) content.add("\n");
|
||||
}
|
||||
|
||||
page->content = Merge(allocator, content, "");
|
||||
}
|
||||
|
||||
return pages;
|
||||
}
|
||||
Reference in New Issue
Block a user