Spaces:
Running
Running
| # detector/utils.py | |
| from PyPDF2 import PdfReader | |
| import docx | |
| async def extract_text_from_file(file): | |
| filename = file.filename.lower() | |
| if filename.endswith(".pdf"): | |
| reader = PdfReader(file.file) | |
| return "\n".join([page.extract_text() or "" for page in reader.pages]) | |
| elif filename.endswith(".docx"): | |
| document = docx.Document(file.file) | |
| return "\n".join([para.text for para in document.paragraphs]) | |
| elif filename.endswith(".txt"): | |
| return (await file.read()).decode("utf-8") | |
| else: | |
| raise ValueError("Unsupported file type.") | |