Spaces:

devmalik-official
/

resume-analyzer

Sleeping

File size: 2,227 Bytes

1aea493

import PyPDF2
from docx import Document
from pathlib import Path
import io


def extract_text_from_pdf(file_content):
    """Extract text from PDF file"""
    try:
        pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_content))
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text() + "\n"
        return text.strip()
    except Exception as e:
        raise Exception(f"Error reading PDF: {str(e)}")


def extract_text_from_docx(file_content):
    """Extract text from DOCX file"""
    try:
        doc = Document(io.BytesIO(file_content))
        text = ""
        for para in doc.paragraphs:
            text += para.text + "\n"
        for table in doc.tables:
            for row in table.rows:
                for cell in row.cells:
                    text += cell.text + " "
            text += "\n"
        return text.strip()
    except Exception as e:
        raise Exception(f"Error reading DOCX: {str(e)}")


def extract_text_from_txt(file_content):
    """Extract text from TXT file"""
    try:
        return file_content.decode('utf-8').strip()
    except Exception as e:
        raise Exception(f"Error reading TXT: {str(e)}")


def parse_resume(file_content, file_extension):
    """
    Parse resume based on file type
    
    Args:
        file_content: Binary file content
        file_extension: File extension (.pdf, .docx, .txt)
    
    Returns:
        Extracted text from resume
    """
    file_extension = file_extension.lower()
    
    if file_extension == ".pdf":
        return extract_text_from_pdf(file_content)
    elif file_extension in [".docx", ".doc"]:
        return extract_text_from_docx(file_content)
    elif file_extension == ".txt":
        return extract_text_from_txt(file_content)
    else:
        raise ValueError(f"Unsupported file format: {file_extension}")


def extract_from_uploaded_file(uploaded_file):
    """
    Extract text from uploaded file object
    
    Args:
        uploaded_file: Streamlit uploaded file object
    
    Returns:
        Extracted text
    """
    file_extension = Path(uploaded_file.name).suffix.lower()
    file_content = uploaded_file.read()
    return parse_resume(file_content, file_extension)