Spaces:

Alpha108
/

AI_JOB_MATCHER

Sleeping

File size: 12,809 Bytes

# full corrected app.py
import streamlit as st
import requests
import pdfplumber
import docx
from sentence_transformers import SentenceTransformer
import faiss
from groq import Groq
from reportlab.lib.pagesizes import A4
from reportlab.lib import colors
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import (
    SimpleDocTemplate,
    Paragraph,
    Spacer,
    ListFlowable,
    ListItem,
    Table,
    TableStyle,
    Image as RLImage,
)
from reportlab.lib.units import mm
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
import io
from PIL import Image
import tempfile
import os
from typing import List

# -----------------------------
# CONFIG
# -----------------------------
REMOTEOK_URL = "https://remoteok.com/api"
EMBED_MODEL = "BAAI/bge-small-en-v1.5"
AI_MODEL = "openai/gpt-oss-120b"   # Groq model

# -----------------------------
# CACHED MODELS
# -----------------------------
@st.cache_resource
def load_embedding_model():
    return SentenceTransformer(EMBED_MODEL)

model = load_embedding_model()

@st.cache_resource
def init_groq():
    return Groq(api_key=st.secrets.get("GROQ_API_KEY", None))

groq_client = init_groq()

# -----------------------------
# UTIL / PARSING FUNCTIONS
# -----------------------------
def extract_text_from_resume(file) -> str:
    """Extract text from PDF or DOCX file"""
    name = getattr(file, "name", "")
    if name.lower().endswith(".pdf"):
        text = ""
        with pdfplumber.open(file) as pdf:
            for page in pdf.pages:
                text += page.extract_text() or ""
        return text

    elif name.lower().endswith(".docx"):
        doc = docx.Document(file)
        text = "\n".join([p.text for p in doc.paragraphs])
        return text

    else:
        st.error("Unsupported file type. Please upload PDF or DOCX.")
        return ""

def fetch_jobs() -> List[dict]:
    try:
        resp = requests.get(REMOTEOK_URL, timeout=10)
        if resp.status_code == 200:
            jobs = resp.json()[1:]  # skip metadata
            return jobs
    except Exception as e:
        st.warning(f"Failed to fetch jobs: {e}")
    return []

def embed_texts(texts):
    return model.encode(texts, convert_to_numpy=True, normalize_embeddings=True)

def match_jobs(resume_text, jobs, top_k=5):
    if not jobs:
        return []

    job_texts = [f"{job.get('position','')} {job.get('company','')} {job.get('description','')}" for job in jobs]
    resume_vec = embed_texts([resume_text])
    job_vecs = embed_texts(job_texts)

    dim = job_vecs.shape[1]
    index = faiss.IndexFlatIP(dim)
    index.add(job_vecs)

    scores, idx = index.search(resume_vec, top_k)
    results = []
    for i, score in zip(idx[0], scores[0]):
        results.append((jobs[i], float(score)))
    return results

# -----------------------------
# AI GENERATION (unchanged)
# -----------------------------
def generate_resume(resume_text, job):
    prompt = f"""
You are an AI career assistant.
Given this resume:\n{resume_text}\n
and this job description:\n{job.get('description','')}\n
Generate a structured resume in this format:

Summary
-----------------
[2-3 line summary tailored for the job]

Skills
-----------------
- Skill 1
- Skill 2
- Skill 3

Experience
-----------------
Job Title | Company | Dates
• Achievement 1
• Achievement 2

Education
-----------------
Degree | Institution | Year
"""
    chat_completion = groq_client.chat.completions.create(
        model=AI_MODEL,
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7,
    )
    return chat_completion.choices[0].message.content

def generate_cover_letter(resume_text, job, name, email, phone):
    prompt = f"""
You are an AI career assistant.
Given this resume:\n{resume_text}\n
and this job description:\n{job.get('description','')}\n
Generate a professional, one-page cover letter tailored to this role.
Format it like this:

Dear Hiring Manager,

[Intro paragraph: Show enthusiasm and alignment with company/role]
[Body paragraph: Highlight 2-3 most relevant skills/experiences from resume]
[Closing paragraph: Express eagerness and thank them]

Sincerely,
{name}
{email} | {phone}
"""
    chat_completion = groq_client.chat.completions.create(
        model=AI_MODEL,
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7,
    )
    return chat_completion.choices[0].message.content

# -----------------------------
# PDF BUILDING - FIXED: return bytes
# -----------------------------
def build_pdf(content: str,
              title: str = "Resume",
              name: str = "John Doe",
              email: str = "john.doe@email.com",
              phone: str = "+1 234 567 890",
              profile_image_bytes: bytes = None) -> bytes:
    """
    Build a polished PDF resume and return raw bytes.
    """
    buffer = io.BytesIO()
    doc = SimpleDocTemplate(
        buffer,
        pagesize=A4,
        leftMargin=30,
        rightMargin=30,
        topMargin=30,
        bottomMargin=30,
    )
    styles = getSampleStyleSheet()

    # ... same content-building code as you had (header, parsing, sections) ...
    # For brevity in this message I assume you paste the same block you had
    # (everything up until doc.build(story))
    # *** Keep your existing section-building code here exactly. ***

    # (I will reuse your original 'story' construction)
    # [PASTE THE ORIGINAL STORY BUILDING LOGIC HERE — unchanged]

    doc.build(story)
    buffer.seek(0)
    return buffer.getvalue()   # <<-- important fix: return bytes

# -----------------------------
# STREAMLIT UI (unchanged logic)
# -----------------------------
st.set_page_config(page_title="MATCHHIVE - AI Job Matcher", layout="wide", initial_sidebar_state="expanded")
st.markdown(
    """
    <style>
    .stButton>button { border-radius: 8px; padding:8px 12px; }
    .download-btn { background-color:#2ECC71 !important; color:white !important; }
    .job-card { padding:10px; border:1px solid #E5E7EB; border-radius:8px; margin-bottom:8px; }
    </style>
    """,
    unsafe_allow_html=True,
)

# Header area with optional logo upload
col1, col2 = st.columns([1, 6])
with col1:
    logo_file = st.file_uploader("Upload logo (optional)", type=["png", "jpg", "jpeg"], help="Optional: upload your company/app logo")
    if logo_file:
        img = Image.open(logo_file)
        st.image(img, width=100)
with col2:
    st.title("MATCHHIVE - AI Job Matcher")
    st.caption("Upload a resume, match to jobs, generate tailored resumes & cover letters (PDF).")

# Sidebar: user contact info + options
with st.sidebar:
    st.header("Candidate Info")
    name = st.text_input("Full Name", "John Doe")
    email = st.text_input("Email", "john.doe@email.com")
    phone = st.text_input("Phone", "+1 234 567 890")
    profile_pic = st.file_uploader("Profile photo (optional)", type=["png", "jpg", "jpeg"], help="Small circular/headshot for resume header")
    st.markdown("---")
    st.header("Job Filters (optional)")
    location_filter = st.text_input("Location keyword (e.g. Remote, USA, Canada)", "")
    keyword_filter = st.text_input("Job keyword (e.g. Python, ML, DevOps)", "")
    min_score = st.slider("Minimum match score", min_value=0.0, max_value=1.0, value=0.0, step=0.01)
    top_k = st.number_input("Number of matches to show", min_value=1, max_value=20, value=5)
    st.markdown("---")
    st.caption("Note: Job data comes from remoteok.com API and match scores are semantic similarity approximations.")

# Main upload & processing area
st.header("Upload Resume (PDF or DOCX)")
resume_file = st.file_uploader("Upload your resume", type=["pdf", "docx"])
if not resume_file:
    st.info("Please upload a resume (PDF or DOCX) to start matching.")
else:
    with st.spinner("Extracting resume text..."):
        resume_text = extract_text_from_resume(resume_file)

    if not resume_text.strip():
        st.error("Could not extract text from the resume. Try a different file or ensure the PDF is text-based (not scanned).")
    else:
        # Fetch jobs and filter
        with st.spinner("Fetching remote jobs..."):
            jobs = fetch_jobs()

        # Apply simple filters
        def job_matches_filters(job):
            if location_filter:
                loc = job.get("location") or job.get("company_location") or ""
                if location_filter.lower() not in str(loc).lower():
                    return False
            if keyword_filter:
                combined = f"{job.get('position','')} {job.get('company','')} {job.get('description','')}"
                if keyword_filter.lower() not in combined.lower():
                    return False
            return True

        filtered_jobs = [j for j in jobs if job_matches_filters(j)]

        # Do matching & display results
        with st.spinner("Computing semantic match scores..."):
            matches = match_jobs(resume_text, filtered_jobs, top_k=top_k)

        matches = [(job, score) for job, score in matches if score >= min_score]

        if not matches:
            st.warning("No matches found with given filters/score. Try lowering minimum score or removing filters.")
        else:
            st.subheader(f"Top {len(matches)} Matches")
            for job, score in matches:
                title = job.get("position", "Unknown Position")
                company = job.get("company", "Unknown Company")
                url = job.get("url", "#")
                posted = job.get("date", "")
                exp_label = f"{title} at {company} — Score: {score:.2f}"
                with st.expander(exp_label, expanded=False):
                    st.markdown(f"**Location:** {job.get('location','N/A')}  \n**Posted:** {posted}  \n[View Job Posting]({url})")
                    st.markdown("---")
                    cols = st.columns([1, 1, 1])
                    if cols[0].button("Generate Resume (AI)", key=f"resume_{job.get('id', title)}"):
                        with st.spinner("Generating tailored resume..."):
                            tailored_resume = generate_resume(resume_text, job)
                        tab1, tab2 = st.tabs(["Tailored Resume", "Cover Letter"])
                        with tab1:
                            edited_resume = st.text_area("Tailored Resume (editable)", tailored_resume, height=300)
                            if st.button("Export Tailored Resume as PDF", key=f"export_resume_{job.get('id', title)}"):
                                prof_bytes = None
                                if profile_pic:
                                    prof_bytes = profile_pic.getvalue()
                                pdf_bytes = build_pdf(edited_resume, title="Resume", name=name, email=email, phone=phone, profile_image_bytes=prof_bytes)
                                st.download_button(
                                    label="📥 Download Resume (PDF)",
                                    data=pdf_bytes,
                                    file_name=f"{name.replace(' ', '_')}_resume.pdf",
                                    mime="application/pdf",
                                )
                        with tab2:
                            if cols[1].button("Generate Cover Letter (AI)", key=f"clgen_{job.get('id', title)}"):
                                with st.spinner("Generating cover letter..."):
                                    tailored_cl = generate_cover_letter(resume_text, job, name, email, phone)
                                edited_cl = st.text_area("Cover Letter (editable)", tailored_cl, height=300, key=f"cltext_{job.get('id', title)}")
                                if st.button("Export Cover Letter as PDF", key=f"export_cl_{job.get('id', title)}"):
                                    prof_bytes = None
                                    if profile_pic:
                                        prof_bytes = profile_pic.getvalue()
                                    pdf_bytes = build_pdf(edited_cl, title="Cover Letter", name=name, email=email, phone=phone, profile_image_bytes=prof_bytes)
                                    st.download_button(
                                        label="📥 Download Cover Letter (PDF)",
                                        data=pdf_bytes,
                                        file_name=f"{name.replace(' ', '_')}_cover_letter.pdf",
                                        mime="application/pdf",
                                    )

                    if cols[2].button("Show Job Description", key=f"desc_{job.get('id', title)}"):
                        st.info(job.get("description", "No description available"))

            st.success("Done — select a match and generate your tailored resume or cover letter.")