Big update
This commit is contained in:
76
src/transcript_browser/read_pdf.cpp
Normal file
76
src/transcript_browser/read_pdf.cpp
Normal file
@@ -0,0 +1,76 @@
|
||||
struct PDFPage {
|
||||
String string;
|
||||
int number;
|
||||
};
|
||||
|
||||
struct PDF {
|
||||
String filename;
|
||||
Array<PDFPage> pages;
|
||||
};
|
||||
|
||||
// @todo: pull request the object close
|
||||
// @todo: something needs to be done with unicode codepoints
|
||||
PDF pdfioReadPDF(Allocator allocator, String filename) {
|
||||
Assert(filename.data[filename.len] == 0);
|
||||
PDF result = {};
|
||||
result.pages = {allocator};
|
||||
result.filename = filename;
|
||||
|
||||
pdfio_file_t *file = pdfioFileOpen(filename.data, NULL, NULL, NULL, NULL);
|
||||
Assert(file);
|
||||
defer { pdfioFileClose(file); };
|
||||
|
||||
char buffer[1024];
|
||||
size_t page_count = pdfioFileGetNumPages(file);
|
||||
result.pages.reserve(page_count);
|
||||
|
||||
for (size_t page_i = 0; page_i < page_count; page_i += 1) {
|
||||
pdfio_obj_t *obj = pdfioFileGetPage(file, page_i);
|
||||
if (obj == NULL) continue;
|
||||
defer { pdfioObjClose(obj); };
|
||||
PDFPage *page = result.pages.alloc();
|
||||
page->number = (int)page_i;
|
||||
|
||||
Scratch scratch((Arena *)allocator.object);
|
||||
Array<String> strings = {scratch};
|
||||
size_t num_streams = pdfioPageGetNumStreams(obj);
|
||||
for (size_t stream_i = 0; stream_i < num_streams; stream_i += 1) {
|
||||
pdfio_stream_t *st = pdfioPageOpenStream(obj, stream_i, true);
|
||||
if (st == NULL) continue;
|
||||
defer { pdfioStreamClose(st); };
|
||||
|
||||
bool first = true;
|
||||
while (pdfioStreamGetToken(st, buffer, sizeof(buffer))) {
|
||||
if (buffer[0] == '(') {
|
||||
if (first) {
|
||||
first = false;
|
||||
}
|
||||
|
||||
strings.add(Copy(scratch, buffer + 1));
|
||||
} else if (!strcmp(buffer, "Td") || !strcmp(buffer, "TD") || !strcmp(buffer, "T*") || !strcmp(buffer, "\'") || !strcmp(buffer, "\"")) {
|
||||
first = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
page->string = Merge(allocator, strings, "");
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#if 1
|
||||
extern "C" void OutputDebugStringA(const char *);
|
||||
void Printf(const char *string, ...) {
|
||||
Scratch scratch;
|
||||
STRING_FORMAT(scratch, string, result);
|
||||
OutputDebugStringA(result.data);
|
||||
}
|
||||
#else
|
||||
#define Printf(...) (0)
|
||||
#endif
|
||||
|
||||
void PrintPDF(PDF pdf) {
|
||||
For(pdf.pages) {
|
||||
Printf("%d, %.*s\n", it.number, FmtString(it.string));
|
||||
}
|
||||
}
|
||||
63
src/transcript_browser/read_srt.cpp
Normal file
63
src/transcript_browser/read_srt.cpp
Normal file
@@ -0,0 +1,63 @@
|
||||
struct TimeString {
|
||||
uint16_t hour;
|
||||
uint16_t minute;
|
||||
uint16_t second;
|
||||
String string;
|
||||
};
|
||||
|
||||
Array<TimeString> ParseSrtFile(Arena *arena, String filename) {
|
||||
String content = ReadFile(*arena, filename);
|
||||
Array<String> lines = Split(*arena, content, "\n");
|
||||
|
||||
IterRemove(lines) {
|
||||
IterRemovePrepare(lines);
|
||||
it = Trim(it);
|
||||
if (it.len == 0) remove_item = true;
|
||||
}
|
||||
|
||||
long section_number = 1;
|
||||
Array<TimeString> time_strings = {*arena};
|
||||
for (int i = 0; i < lines.len;) {
|
||||
String it0 = lines[i++];
|
||||
long num = strtol(it0.data, NULL, 10);
|
||||
Assert(section_number == num);
|
||||
section_number += 1;
|
||||
|
||||
TimeString item = {};
|
||||
String it1 = lines[i++];
|
||||
item.hour = (uint16_t)strtol(it1.data, NULL, 10);
|
||||
item.minute = (uint16_t)strtol(it1.data + 3, NULL, 10);
|
||||
item.second = (uint16_t)strtol(it1.data + 6, NULL, 10);
|
||||
|
||||
String next_section_number = Format(*arena, "%d", section_number);
|
||||
while (i < lines.len && lines[i] != next_section_number) {
|
||||
String it = lines[i];
|
||||
item.string = lines[i];
|
||||
time_strings.add(item);
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
IterRemove(time_strings) {
|
||||
IterRemovePrepare(time_strings);
|
||||
if (i > 0 && AreEqual(time_strings[i - 1].string, time_strings[i].string, true)) {
|
||||
remove_item = true;
|
||||
}
|
||||
}
|
||||
|
||||
return time_strings;
|
||||
}
|
||||
|
||||
String FindVideoForSRT(Array<String> &filenames, String srt_path) {
|
||||
String base = ChopLastPeriod(srt_path); // .srt
|
||||
base = ChopLastPeriod(base); // .en
|
||||
|
||||
For(filenames) {
|
||||
if (StartsWith(it, base)) {
|
||||
if (EndsWith(it, ".mkv") || EndsWith(it, ".webm") || EndsWith(it, ".mp4")) {
|
||||
return it;
|
||||
}
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
Reference in New Issue
Block a user