Spaces:
Sleeping
Sleeping
| import os | |
| import pdfplumber | |
| from docx import Document | |
| def parse_file(file_path): | |
| ext = os.path.splitext(file_path)[-1].lower() | |
| if ext == ".txt": | |
| with open(file_path, "r", encoding="utf-8") as f: | |
| return f.read() | |
| elif ext == ".docx": | |
| doc = Document(file_path) | |
| return "\n".join([para.text for para in doc.paragraphs]) | |
| elif ext == ".pdf": | |
| text = "" | |
| with pdfplumber.open(file_path) as pdf: | |
| for page in pdf.pages: | |
| text += page.extract_text() + "\n" | |
| return text.strip() | |
| else: | |
| return "Unsupported file format." | |