Spaces:
Runtime error
Runtime error
| from typing import List, Tuple | |
| from pypdf import PdfReader | |
| from docx import Document | |
| def read_pdf(path: str) -> List[Tuple[int, str]]: | |
| reader = PdfReader(path) | |
| pages = [] | |
| for i, page in enumerate(reader.pages): | |
| text = (page.extract_text() or "").strip() | |
| if text: | |
| pages.append((i + 1, text)) | |
| return pages | |
| def read_docx(path: str) -> List[Tuple[int, str]]: | |
| doc = Document(path) | |
| text = "\n".join(p.text for p in doc.paragraphs if p.text.strip()).strip() | |
| return [(1, text)] if text else [] | |