Spaces:
Running
Running
File size: 476 Bytes
0c6fb97 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | import io
import docx
from app.utils.common import clean_text
def extract_text_from_docx(file_bytes: bytes) -> str:
"""
Extracts text from a DOCX file stream.
"""
# Create a file-like object from bytes
file_stream = io.BytesIO(file_bytes)
doc = docx.Document(file_stream)
text_content = []
for para in doc.paragraphs:
text_content.append(para.text)
full_text = "\n".join(text_content)
return clean_text(full_text) |