Spaces:
Runtime error
Runtime error
| import fitz # PyMuPDF | |
| import requests | |
| from io import BytesIO | |
| import time | |
| def parse_pdf_from_url(url): | |
| res = requests.get(url) | |
| doc = fitz.open(stream=BytesIO(res.content), filetype="pdf") | |
| chunks = [] | |
| for page in doc: | |
| text = page.get_text() | |
| if text.strip(): | |
| chunks.append(text) | |
| doc.close() | |
| return chunks | |
| def parse_pdf_from_file(file_path): | |
| """Parse a local PDF file and extract text chunks""" | |
| try: | |
| doc = fitz.open(file_path) | |
| chunks = [] | |
| for page in doc: | |
| text = page.get_text() | |
| if text.strip(): | |
| chunks.append(text) | |
| doc.close() | |
| return chunks | |
| except Exception as e: | |
| raise Exception(f"Error parsing PDF file {file_path}: {str(e)}") | |