Big update
This commit is contained in:
@@ -1,68 +1,31 @@
|
||||
struct TimeString {
|
||||
uint16_t hour;
|
||||
uint16_t minute;
|
||||
uint16_t second;
|
||||
String string;
|
||||
};
|
||||
|
||||
Array<TimeString> ParseSrtFile(Arena *arena, String filename) {
|
||||
String content = ReadFile(*arena, filename);
|
||||
Array<String> lines = Split(*arena, content, "\n");
|
||||
|
||||
IterRemove(lines) {
|
||||
IterRemovePrepare(lines);
|
||||
it = Trim(it);
|
||||
if (it.len == 0) remove_item = true;
|
||||
}
|
||||
|
||||
long section_number = 1;
|
||||
Array<TimeString> time_strings = {*arena};
|
||||
for (int i = 0; i < lines.len;) {
|
||||
String it0 = lines[i++];
|
||||
long num = strtol(it0.data, NULL, 10);
|
||||
Assert(section_number == num);
|
||||
section_number += 1;
|
||||
|
||||
TimeString item = {};
|
||||
String it1 = lines[i++];
|
||||
item.hour = (uint16_t)strtol(it1.data, NULL, 10);
|
||||
item.minute = (uint16_t)strtol(it1.data + 3, NULL, 10);
|
||||
item.second = (uint16_t)strtol(it1.data + 6, NULL, 10);
|
||||
|
||||
String next_section_number = Format(*arena, "%d", section_number);
|
||||
while (i < lines.len && lines[i] != next_section_number) {
|
||||
String it = lines[i];
|
||||
item.string = lines[i];
|
||||
time_strings.add(item);
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
IterRemove(time_strings) {
|
||||
IterRemovePrepare(time_strings);
|
||||
if (i > 0 && AreEqual(time_strings[i - 1].string, time_strings[i].string, true)) {
|
||||
remove_item = true;
|
||||
}
|
||||
}
|
||||
|
||||
return time_strings;
|
||||
}
|
||||
|
||||
struct TimeFile {
|
||||
Array<TimeString> time_strings;
|
||||
String file;
|
||||
};
|
||||
|
||||
struct ParseThreadIO {
|
||||
Array<String> input_files;
|
||||
};
|
||||
|
||||
struct XToTimeString {
|
||||
String string; // String inside transcript arena
|
||||
uint16_t hour;
|
||||
uint16_t minute;
|
||||
uint16_t second;
|
||||
String filepath;
|
||||
enum SourceKind {
|
||||
SourceKind_Invalid,
|
||||
SourceKind_SRT,
|
||||
SourceKind_PDF,
|
||||
SourceKind_TXT,
|
||||
};
|
||||
|
||||
struct XToSource {
|
||||
SourceKind kind;
|
||||
String string; // String inside x arena
|
||||
String filepath;
|
||||
union {
|
||||
struct {
|
||||
uint16_t hour;
|
||||
uint16_t minute;
|
||||
uint16_t second;
|
||||
} srt;
|
||||
struct {
|
||||
int page;
|
||||
} pdf;
|
||||
struct {
|
||||
int64_t row;
|
||||
} txt;
|
||||
};
|
||||
};
|
||||
|
||||
struct FileLoadResult {
|
||||
@@ -72,7 +35,7 @@ struct FileLoadResult {
|
||||
|
||||
Arena XArena;
|
||||
std::mutex XArenaAddMutex;
|
||||
Array<XToTimeString> XToTime;
|
||||
Array<XToSource> XToSourceArray;
|
||||
Array<FileLoadResult> XFileLoadResults;
|
||||
int64_t XLoadThreadComplete;
|
||||
|
||||
@@ -80,24 +43,49 @@ void XInitLoading() {
|
||||
InitArena(&XArena);
|
||||
XArena.align = 0;
|
||||
|
||||
XToTime.reserve(1000000);
|
||||
XToSourceArray.reserve(1000000);
|
||||
XFileLoadResults.reserve(10000);
|
||||
}
|
||||
|
||||
WORK_FUNCTION(ParseFilesWork) {
|
||||
ParseThreadIO *io = (ParseThreadIO *)data;
|
||||
ForItem(it_time_file, io->input_files) {
|
||||
Scratch scratch;
|
||||
Array<TimeString> time_strings = ParseSrtFile(scratch, it_time_file);
|
||||
Scratch scratch;
|
||||
if (EndsWith(it_time_file, ".srt", true)) {
|
||||
Array<TimeString> time_strings = ParseSrtFile(scratch, it_time_file);
|
||||
|
||||
XArenaAddMutex.lock();
|
||||
For(time_strings) {
|
||||
String s = Copy(XArena, it.string);
|
||||
s.data[s.len] = ' ';
|
||||
XToTime.add({s, it.hour, it.minute, it.second, it_time_file});
|
||||
XArenaAddMutex.lock();
|
||||
For(time_strings) {
|
||||
String s = Copy(XArena, it.string);
|
||||
s.data[s.len] = ' ';
|
||||
|
||||
XToSource t = {SourceKind_SRT, s, it_time_file};
|
||||
t.srt = {it.hour, it.minute, it.second};
|
||||
XToSourceArray.add(t);
|
||||
}
|
||||
XArenaAddMutex.unlock();
|
||||
XFileLoadResults.bounded_add({it_time_file});
|
||||
} else if (EndsWith(it_time_file, ".txt", true) || EndsWith(it_time_file, ".html", true)) {
|
||||
String string = ReadFile(scratch, it_time_file);
|
||||
if (string.data) {
|
||||
XArenaAddMutex.lock();
|
||||
Array<String> lines = Split(scratch, string, "\n");
|
||||
For(lines) {
|
||||
String s = Copy(XArena, it);
|
||||
s.data[s.len] = ' ';
|
||||
|
||||
XToSource t = {SourceKind_TXT, s, it_time_file};
|
||||
t.txt = {lines.get_index(it)};
|
||||
XToSourceArray.add(t);
|
||||
}
|
||||
XArenaAddMutex.unlock();
|
||||
XFileLoadResults.bounded_add({it_time_file});
|
||||
} else {
|
||||
XFileLoadResults.bounded_add({it_time_file, "failed to read the file"});
|
||||
}
|
||||
} else {
|
||||
XFileLoadResults.bounded_add({it_time_file, "internal error: extension is not supported but got propagated to parse stage"});
|
||||
}
|
||||
XArenaAddMutex.unlock();
|
||||
XFileLoadResults.bounded_add({it_time_file});
|
||||
}
|
||||
AtomicIncrement(&XLoadThreadComplete);
|
||||
}
|
||||
@@ -118,31 +106,33 @@ void XUnlockFileResults() {
|
||||
|
||||
void XAddFolder(String folder, Array<String> *filenames) {
|
||||
Scratch scratch;
|
||||
Array<String> srt_files = {scratch};
|
||||
Array<String> files_to_parse = {scratch};
|
||||
for (FileIter iter = IterateFiles(scratch, folder); IsValid(iter); Advance(&iter)) {
|
||||
String file = Copy(Perm, iter.absolute_path);
|
||||
filenames->add(file);
|
||||
if (EndsWith(iter.filename, ".srt")) {
|
||||
srt_files.add(file);
|
||||
if (EndsWith(iter.filename, ".srt", true)) {
|
||||
files_to_parse.add(file);
|
||||
} else if (EndsWith(iter.filename, ".txt", true) || EndsWith(iter.filename, ".html", true)) {
|
||||
files_to_parse.add(file);
|
||||
}
|
||||
}
|
||||
|
||||
if (srt_files.len == 0) {
|
||||
if (files_to_parse.len == 0) {
|
||||
XFileLoadResults.add({Copy(Perm, folder), "no files found"});
|
||||
return;
|
||||
}
|
||||
|
||||
int64_t thread_count = MainWorkQueue.thread_count;
|
||||
int64_t files_per_thread = srt_files.len / thread_count;
|
||||
int64_t remainder = srt_files.len % thread_count;
|
||||
int64_t files_per_thread = files_to_parse.len / thread_count;
|
||||
int64_t remainder = files_to_parse.len % thread_count;
|
||||
int64_t fi = 0;
|
||||
|
||||
Array<ParseThreadIO> io = {Perm};
|
||||
io.reserve(thread_count);
|
||||
for (int ti = 0; ti < thread_count; ti += 1) {
|
||||
Array<String> files = {Perm};
|
||||
for (int i = 0; fi < srt_files.len && i < files_per_thread + remainder; fi += 1, i += 1) {
|
||||
files.add(srt_files[fi]);
|
||||
for (int i = 0; fi < files_to_parse.len && i < files_per_thread + remainder; fi += 1, i += 1) {
|
||||
files.add(files_to_parse[fi]);
|
||||
}
|
||||
if (remainder) remainder = 0;
|
||||
|
||||
@@ -152,11 +142,11 @@ void XAddFolder(String folder, Array<String> *filenames) {
|
||||
}
|
||||
}
|
||||
|
||||
XToTimeString *XFindItem(String string) {
|
||||
XToTimeString *result = NULL;
|
||||
XToSource *XFindSource(String string) {
|
||||
XToSource *result = NULL;
|
||||
|
||||
XArenaAddMutex.lock();
|
||||
For(XToTime) {
|
||||
For(XToSourceArray) {
|
||||
uintptr_t begin = (uintptr_t)(it.string.data);
|
||||
uintptr_t end = (uintptr_t)(it.string.data + it.string.len);
|
||||
uintptr_t needle = (uintptr_t)string.data;
|
||||
|
||||
Reference in New Issue
Block a user