Spaces:
Running
Running
| from pypdf import PdfReader | |
| import docx | |
| from io import BytesIO | |
| import logging | |
| from fastapi import HTTPException | |
| def parse_docx(file: BytesIO): | |
| doc = docx.Document(file) | |
| text = "" | |
| for para in doc.paragraphs: | |
| text += para.text + "\n" | |
| return text | |
| def parse_pdf(file: BytesIO): | |
| try: | |
| doc = PdfReader(file) | |
| text = "" | |
| for page in doc.pages: | |
| text += page.extract_text() | |
| return text | |
| except Exception as e: | |
| logging.error(f"Error while processing PDF: {str(e)}") | |
| raise HTTPException( | |
| status_code=500, detail="Error processing PDF file") | |
| def parse_txt(file: BytesIO): | |
| return file.read().decode("utf-8") | |