Spaces:
Running
Running
| import os | |
| import PyPDF2 | |
| from docx import Document | |
| SUPPORTED_ERROR = "❌ Supported formats: .txt, .pdf, .docx" | |
| def extract_text(file_path: str) -> str: | |
| ext = os.path.splitext(file_path)[1].lower() | |
| try: | |
| if ext == ".txt": | |
| with open(file_path, "r", encoding="utf-8") as f: | |
| return f.read().strip() | |
| if ext == ".pdf": | |
| text = "" | |
| with open(file_path, "rb") as f: | |
| reader = PyPDF2.PdfReader(f) | |
| for page in reader.pages: | |
| text += (page.extract_text() or "") + "\n" | |
| return text.strip() | |
| if ext == ".docx": | |
| doc = Document(file_path) | |
| return "\n".join( | |
| p.text for p in doc.paragraphs if p.text.strip() | |
| ).strip() | |
| return SUPPORTED_ERROR | |
| except Exception as e: | |
| return f"❌ Error reading file: {str(e)}" | |