resume-optimizer-api / app /utils /docx_parser.py
JermaineAI's picture
ready for deployment
0c6fb97
raw
history blame contribute delete
476 Bytes
import io
import docx
from app.utils.common import clean_text
def extract_text_from_docx(file_bytes: bytes) -> str:
"""
Extracts text from a DOCX file stream.
"""
# Create a file-like object from bytes
file_stream = io.BytesIO(file_bytes)
doc = docx.Document(file_stream)
text_content = []
for para in doc.paragraphs:
text_content.append(para.text)
full_text = "\n".join(text_content)
return clean_text(full_text)