Add text editor, misc changes
This commit is contained in:
@@ -1,16 +1,37 @@
|
||||
import os, sys
|
||||
import pypdf
|
||||
|
||||
reader = pypdf.PdfReader("C:/Users/Karol/Desktop/Hegels-Logic.pdf")
|
||||
if len(sys.argv) != 2:
|
||||
print("expected single argument with filename or folder to extract text from")
|
||||
exit(0)
|
||||
|
||||
f = open("asd.txt", "w", encoding="utf-8")
|
||||
for i in range(len(reader.pages)):
|
||||
page = reader.pages[i]
|
||||
text = page.extract_text()
|
||||
text = text.replace("\n", "")
|
||||
text = text.replace("-", "")
|
||||
|
||||
f.write(f">>>>>>>>{i + 1}<<<<<<<<<\n")
|
||||
f.write(text)
|
||||
f.write("\n")
|
||||
def write_pdf_for(filename):
|
||||
reader = pypdf.PdfReader(filename)
|
||||
extract_filename = filename + ".txt"
|
||||
if os.path.exists(extract_filename):
|
||||
print(f"skipping {extract_filename}, file exists")
|
||||
return
|
||||
|
||||
f.close()
|
||||
f = open(extract_filename, "w", encoding="utf-8")
|
||||
for i in range(len(reader.pages)):
|
||||
page = reader.pages[i]
|
||||
text = page.extract_text()
|
||||
text = text.replace("\n", "")
|
||||
text = text.replace("-", "")
|
||||
|
||||
f.write(f">>>>>>>>{i + 1}<<<<<<<<<\n")
|
||||
f.write(text)
|
||||
f.write("\n")
|
||||
|
||||
f.close()
|
||||
print(f"generated: {extract_filename}")
|
||||
|
||||
if os.path.isdir(sys.argv[1]):
|
||||
for file in os.listdir(sys.argv[1]):
|
||||
if file.endswith(".pdf"):
|
||||
write_pdf_for(file)
|
||||
elif os.path.isfile(sys.argv[1]):
|
||||
write_pdf_for(sys.argv[1])
|
||||
else:
|
||||
print(f"argument you passed in: {sys.argv[1]} is not a filename or folder")
|
||||
Reference in New Issue
Block a user