Spaces:
Running
Running
| from pypdf import PdfReader | |
| def extract_text_pypdf(pdf_path): | |
| reader = PdfReader(pdf_path) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text(extraction_mode='plain') + "\n\n" | |
| return text | |
| if __name__ == "__main__": | |
| # Example Usage | |
| pdf_path = "CFR-2019-title21-vol2.pdf" # Replace with your file path | |
| pdf_text = extract_text_pypdf(pdf_path) | |
| # Save to a text file | |
| with open("output.txt", "w", encoding="utf-8") as f: | |
| f.write(pdf_text) | |
| print(pdf_text) # Print extracted text | |