File size: 476 Bytes
0c6fb97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import io
import docx
from app.utils.common import clean_text

def extract_text_from_docx(file_bytes: bytes) -> str:
    """
    Extracts text from a DOCX file stream.
    """
    # Create a file-like object from bytes
    file_stream = io.BytesIO(file_bytes)
    doc = docx.Document(file_stream)
    
    text_content = []
    for para in doc.paragraphs:
        text_content.append(para.text)
        
    full_text = "\n".join(text_content)
    return clean_text(full_text)