Spaces:
Running
Running
| import io | |
| import docx | |
| from app.utils.common import clean_text | |
| def extract_text_from_docx(file_bytes: bytes) -> str: | |
| """ | |
| Extracts text from a DOCX file stream. | |
| """ | |
| # Create a file-like object from bytes | |
| file_stream = io.BytesIO(file_bytes) | |
| doc = docx.Document(file_stream) | |
| text_content = [] | |
| for para in doc.paragraphs: | |
| text_content.append(para.text) | |
| full_text = "\n".join(text_content) | |
| return clean_text(full_text) |