Spaces:
Sleeping
Sleeping
| from pypdf import PdfReader | |
| reader = PdfReader("1-s2.0-S2405844024055324-main.pdf") | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() + "\n" | |
| # Limit output to avoid token limit issues, or save to file and read chunks. | |
| # I'll save to a text file. | |
| with open("paper_content.txt", "w", encoding="utf-8") as f: | |
| f.write(text) | |
| print("PDF content extracted to paper_content.txt") | |