import os, sys import pypdf if len(sys.argv) != 2: print("expected single argument with filename or folder to extract text from") exit(0) def write_pdf_for(filename): reader = pypdf.PdfReader(filename) extract_filename = filename + ".txt" if os.path.exists(extract_filename): print(f"skipping {extract_filename}, file exists") return f = open(extract_filename, "w", encoding="utf-8") for i in range(len(reader.pages)): page = reader.pages[i] text = page.extract_text() text = text.replace("\n", "") text = text.replace("-", "") f.write(f">>>>>>>>{i + 1}<<<<<<<<<\n") f.write(text) f.write("\n") f.close() print(f"generated: {extract_filename}") if os.path.isdir(sys.argv[1]): for file in os.listdir(sys.argv[1]): if file.endswith(".pdf"): write_pdf_for(file) elif os.path.isfile(sys.argv[1]): write_pdf_for(sys.argv[1]) else: print(f"argument you passed in: {sys.argv[1]} is not a filename or folder")