Add text editor, misc changes

This commit is contained in:
Krzosa Karol
2024-06-20 08:17:54 +02:00
parent 7fe6aa2a97
commit b919678913
5 changed files with 184 additions and 90 deletions

View File

@@ -1,16 +1,37 @@
import os, sys
import pypdf
reader = pypdf.PdfReader("C:/Users/Karol/Desktop/Hegels-Logic.pdf")
if len(sys.argv) != 2:
print("expected single argument with filename or folder to extract text from")
exit(0)
f = open("asd.txt", "w", encoding="utf-8")
for i in range(len(reader.pages)):
page = reader.pages[i]
text = page.extract_text()
text = text.replace("\n", "")
text = text.replace("-", "")
f.write(f">>>>>>>>{i + 1}<<<<<<<<<\n")
f.write(text)
f.write("\n")
def write_pdf_for(filename):
reader = pypdf.PdfReader(filename)
extract_filename = filename + ".txt"
if os.path.exists(extract_filename):
print(f"skipping {extract_filename}, file exists")
return
f.close()
f = open(extract_filename, "w", encoding="utf-8")
for i in range(len(reader.pages)):
page = reader.pages[i]
text = page.extract_text()
text = text.replace("\n", "")
text = text.replace("-", "")
f.write(f">>>>>>>>{i + 1}<<<<<<<<<\n")
f.write(text)
f.write("\n")
f.close()
print(f"generated: {extract_filename}")
if os.path.isdir(sys.argv[1]):
for file in os.listdir(sys.argv[1]):
if file.endswith(".pdf"):
write_pdf_for(file)
elif os.path.isfile(sys.argv[1]):
write_pdf_for(sys.argv[1])
else:
print(f"argument you passed in: {sys.argv[1]} is not a filename or folder")