import PyPDF2
import docx
import io

def parse_pdf(file_stream):
    """
    Extracts text from a PDF file stream.

    Args:
        file_stream: A file-like object (e.g., from st.file_uploader).

    Returns:
        str: The extracted text from the PDF.
    """
    text = ""
    try:
        reader = PyPDF2.PdfReader(file_stream)
        for page in reader.pages:
            text += page.extract_text() or ""
    except Exception as e:
        print(f"Error reading PDF: {e}")
        raise ValueError("Could not parse the PDF file. It might be corrupted or image-based.")
    return text

def parse_docx(file_stream):
    """
    Extracts text from a DOCX file stream.

    Args:
        file_stream: A file-like object.

    Returns:
        str: The extracted text from the DOCX file.
    """
    text = ""
    try:
        doc = docx.Document(file_stream)
        for para in doc.paragraphs:
            text += para.text + "\n"
    except Exception as e:
        print(f"Error reading DOCX: {e}")
        raise ValueError("Could not parse the DOCX file.")
    return text

def parse_resume(uploaded_file):
    """
    Parses an uploaded resume file (PDF or DOCX) and returns its text content.

    Args:
        uploaded_file: The file object from Streamlit's file_uploader.

    Returns:
        str: The text content of the resume.

    Raises:
        ValueError: If the file type is not supported or parsing fails.
    """
    if uploaded_file is None:
        raise ValueError("No file uploaded.")

    file_extension = uploaded_file.name.split('.')[-1].lower()
    
    # We use BytesIO to handle the file in memory
    file_stream = io.BytesIO(uploaded_file.getvalue())

    if file_extension == 'pdf':
        return parse_pdf(file_stream)
    elif file_extension == 'docx':
        return parse_docx(file_stream)
    else:
        raise ValueError(f"Unsupported file type: '{file_extension}'. Please upload a PDF or DOCX file.")