Spaces:
Running
Running
| from PyPDF2 import PdfReader | |
| from docx import Document | |
| def load_document(path: str) -> str: | |
| if path.endswith(".pdf"): | |
| reader = PdfReader(path) | |
| return " ".join(page.extract_text() or "" for page in reader.pages) | |
| if path.endswith(".docx"): | |
| doc = Document(path) | |
| return " ".join(p.text for p in doc.paragraphs) | |
| raise ValueError("Unsupported file type") | |