Noo88ear's picture
πŸš€ Initial deployment of Multi-Agent Job Application Assistant
7498f2c
from __future__ import annotations
from typing import Optional
import io
import logging
logger = logging.getLogger(__name__)
# Try to import document libraries
try:
from docx import Document # type: ignore
DOCX_AVAILABLE = True
except Exception: # pragma: no cover
Document = None # type: ignore
DOCX_AVAILABLE = False
logger.info("python-docx not available - .docx support disabled")
try:
import PyPDF2 # type: ignore
PDF_AVAILABLE = True
except Exception:
PyPDF2 = None # type: ignore
PDF_AVAILABLE = False
logger.info("PyPDF2 not available - .pdf support disabled")
def read_uploaded_text(file) -> Optional[str]:
"""Read text from a Streamlit UploadedFile. Supports .txt, .docx, and .pdf."""
if file is None:
return None
name = file.name.lower()
logger.info(f"Attempting to read file: {file.name}")
try:
if name.endswith(".txt"):
data = file.getvalue()
text = data.decode("utf-8", errors="ignore")
logger.info(f"Successfully read .txt file: {len(text)} characters")
return text
elif name.endswith(".docx"):
if not DOCX_AVAILABLE:
logger.warning("python-docx not installed. Cannot read .docx files.")
logger.info("Install with: pip install python-docx")
return None
data = file.getvalue()
bio = io.BytesIO(data)
doc = Document(bio) # type: ignore
parts = []
for p in doc.paragraphs:
if p.text.strip(): # Only add non-empty paragraphs
parts.append(p.text)
text = "\n".join(parts)
logger.info(f"Successfully read .docx file: {len(text)} characters")
return text
elif name.endswith(".pdf"):
if not PDF_AVAILABLE:
logger.warning("PyPDF2 not installed. Cannot read .pdf files.")
logger.info("Install with: pip install PyPDF2")
return None
data = file.getvalue()
bio = io.BytesIO(data)
pdf_reader = PyPDF2.PdfReader(bio) # type: ignore
text_parts = []
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text_parts.append(page.extract_text())
text = "\n".join(text_parts)
logger.info(f"Successfully read .pdf file: {len(text)} characters")
return text
else:
logger.warning(f"Unsupported file type: {name}")
return None
except Exception as e:
logger.error(f"Error reading file {file.name}: {str(e)}", exc_info=True)
return None