Spaces:
Running
Running
File size: 547 Bytes
22c5be7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
from pypdf import PdfReader
def extract_text_pypdf(pdf_path):
reader = PdfReader(pdf_path)
text = ""
for page in reader.pages:
text += page.extract_text(extraction_mode='plain') + "\n\n"
return text
if __name__ == "__main__":
# Example Usage
pdf_path = "CFR-2019-title21-vol2.pdf" # Replace with your file path
pdf_text = extract_text_pypdf(pdf_path)
# Save to a text file
with open("output.txt", "w", encoding="utf-8") as f:
f.write(pdf_text)
print(pdf_text) # Print extracted text
|