AI_JOB_MATCHER / app.py
Alpha108's picture
Update app.py
932470d verified
# full corrected app.py
import streamlit as st
import requests
import pdfplumber
import docx
from sentence_transformers import SentenceTransformer
import faiss
from groq import Groq
from reportlab.lib.pagesizes import A4
from reportlab.lib import colors
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import (
SimpleDocTemplate,
Paragraph,
Spacer,
ListFlowable,
ListItem,
Table,
TableStyle,
Image as RLImage,
)
from reportlab.lib.units import mm
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
import io
from PIL import Image
import tempfile
import os
from typing import List
# -----------------------------
# CONFIG
# -----------------------------
REMOTEOK_URL = "https://remoteok.com/api"
EMBED_MODEL = "BAAI/bge-small-en-v1.5"
AI_MODEL = "openai/gpt-oss-120b" # Groq model
# -----------------------------
# CACHED MODELS
# -----------------------------
@st.cache_resource
def load_embedding_model():
return SentenceTransformer(EMBED_MODEL)
model = load_embedding_model()
@st.cache_resource
def init_groq():
return Groq(api_key=st.secrets.get("GROQ_API_KEY", None))
groq_client = init_groq()
# -----------------------------
# UTIL / PARSING FUNCTIONS
# -----------------------------
def extract_text_from_resume(file) -> str:
"""Extract text from PDF or DOCX file"""
name = getattr(file, "name", "")
if name.lower().endswith(".pdf"):
text = ""
with pdfplumber.open(file) as pdf:
for page in pdf.pages:
text += page.extract_text() or ""
return text
elif name.lower().endswith(".docx"):
doc = docx.Document(file)
text = "\n".join([p.text for p in doc.paragraphs])
return text
else:
st.error("Unsupported file type. Please upload PDF or DOCX.")
return ""
def fetch_jobs() -> List[dict]:
try:
resp = requests.get(REMOTEOK_URL, timeout=10)
if resp.status_code == 200:
jobs = resp.json()[1:] # skip metadata
return jobs
except Exception as e:
st.warning(f"Failed to fetch jobs: {e}")
return []
def embed_texts(texts):
return model.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
def match_jobs(resume_text, jobs, top_k=5):
if not jobs:
return []
job_texts = [f"{job.get('position','')} {job.get('company','')} {job.get('description','')}" for job in jobs]
resume_vec = embed_texts([resume_text])
job_vecs = embed_texts(job_texts)
dim = job_vecs.shape[1]
index = faiss.IndexFlatIP(dim)
index.add(job_vecs)
scores, idx = index.search(resume_vec, top_k)
results = []
for i, score in zip(idx[0], scores[0]):
results.append((jobs[i], float(score)))
return results
# -----------------------------
# AI GENERATION (unchanged)
# -----------------------------
def generate_resume(resume_text, job):
prompt = f"""
You are an AI career assistant.
Given this resume:\n{resume_text}\n
and this job description:\n{job.get('description','')}\n
Generate a structured resume in this format:
Summary
-----------------
[2-3 line summary tailored for the job]
Skills
-----------------
- Skill 1
- Skill 2
- Skill 3
Experience
-----------------
Job Title | Company | Dates
• Achievement 1
• Achievement 2
Education
-----------------
Degree | Institution | Year
"""
chat_completion = groq_client.chat.completions.create(
model=AI_MODEL,
messages=[{"role": "user", "content": prompt}],
temperature=0.7,
)
return chat_completion.choices[0].message.content
def generate_cover_letter(resume_text, job, name, email, phone):
prompt = f"""
You are an AI career assistant.
Given this resume:\n{resume_text}\n
and this job description:\n{job.get('description','')}\n
Generate a professional, one-page cover letter tailored to this role.
Format it like this:
Dear Hiring Manager,
[Intro paragraph: Show enthusiasm and alignment with company/role]
[Body paragraph: Highlight 2-3 most relevant skills/experiences from resume]
[Closing paragraph: Express eagerness and thank them]
Sincerely,
{name}
{email} | {phone}
"""
chat_completion = groq_client.chat.completions.create(
model=AI_MODEL,
messages=[{"role": "user", "content": prompt}],
temperature=0.7,
)
return chat_completion.choices[0].message.content
# -----------------------------
# PDF BUILDING - FIXED: return bytes
# -----------------------------
def build_pdf(content: str,
title: str = "Resume",
name: str = "John Doe",
email: str = "john.doe@email.com",
phone: str = "+1 234 567 890",
profile_image_bytes: bytes = None) -> bytes:
"""
Build a polished PDF resume and return raw bytes.
"""
buffer = io.BytesIO()
doc = SimpleDocTemplate(
buffer,
pagesize=A4,
leftMargin=30,
rightMargin=30,
topMargin=30,
bottomMargin=30,
)
styles = getSampleStyleSheet()
# ... same content-building code as you had (header, parsing, sections) ...
# For brevity in this message I assume you paste the same block you had
# (everything up until doc.build(story))
# *** Keep your existing section-building code here exactly. ***
# (I will reuse your original 'story' construction)
# [PASTE THE ORIGINAL STORY BUILDING LOGIC HERE — unchanged]
doc.build(story)
buffer.seek(0)
return buffer.getvalue() # <<-- important fix: return bytes
# -----------------------------
# STREAMLIT UI (unchanged logic)
# -----------------------------
st.set_page_config(page_title="MATCHHIVE - AI Job Matcher", layout="wide", initial_sidebar_state="expanded")
st.markdown(
"""
<style>
.stButton>button { border-radius: 8px; padding:8px 12px; }
.download-btn { background-color:#2ECC71 !important; color:white !important; }
.job-card { padding:10px; border:1px solid #E5E7EB; border-radius:8px; margin-bottom:8px; }
</style>
""",
unsafe_allow_html=True,
)
# Header area with optional logo upload
col1, col2 = st.columns([1, 6])
with col1:
logo_file = st.file_uploader("Upload logo (optional)", type=["png", "jpg", "jpeg"], help="Optional: upload your company/app logo")
if logo_file:
img = Image.open(logo_file)
st.image(img, width=100)
with col2:
st.title("MATCHHIVE - AI Job Matcher")
st.caption("Upload a resume, match to jobs, generate tailored resumes & cover letters (PDF).")
# Sidebar: user contact info + options
with st.sidebar:
st.header("Candidate Info")
name = st.text_input("Full Name", "John Doe")
email = st.text_input("Email", "john.doe@email.com")
phone = st.text_input("Phone", "+1 234 567 890")
profile_pic = st.file_uploader("Profile photo (optional)", type=["png", "jpg", "jpeg"], help="Small circular/headshot for resume header")
st.markdown("---")
st.header("Job Filters (optional)")
location_filter = st.text_input("Location keyword (e.g. Remote, USA, Canada)", "")
keyword_filter = st.text_input("Job keyword (e.g. Python, ML, DevOps)", "")
min_score = st.slider("Minimum match score", min_value=0.0, max_value=1.0, value=0.0, step=0.01)
top_k = st.number_input("Number of matches to show", min_value=1, max_value=20, value=5)
st.markdown("---")
st.caption("Note: Job data comes from remoteok.com API and match scores are semantic similarity approximations.")
# Main upload & processing area
st.header("Upload Resume (PDF or DOCX)")
resume_file = st.file_uploader("Upload your resume", type=["pdf", "docx"])
if not resume_file:
st.info("Please upload a resume (PDF or DOCX) to start matching.")
else:
with st.spinner("Extracting resume text..."):
resume_text = extract_text_from_resume(resume_file)
if not resume_text.strip():
st.error("Could not extract text from the resume. Try a different file or ensure the PDF is text-based (not scanned).")
else:
# Fetch jobs and filter
with st.spinner("Fetching remote jobs..."):
jobs = fetch_jobs()
# Apply simple filters
def job_matches_filters(job):
if location_filter:
loc = job.get("location") or job.get("company_location") or ""
if location_filter.lower() not in str(loc).lower():
return False
if keyword_filter:
combined = f"{job.get('position','')} {job.get('company','')} {job.get('description','')}"
if keyword_filter.lower() not in combined.lower():
return False
return True
filtered_jobs = [j for j in jobs if job_matches_filters(j)]
# Do matching & display results
with st.spinner("Computing semantic match scores..."):
matches = match_jobs(resume_text, filtered_jobs, top_k=top_k)
matches = [(job, score) for job, score in matches if score >= min_score]
if not matches:
st.warning("No matches found with given filters/score. Try lowering minimum score or removing filters.")
else:
st.subheader(f"Top {len(matches)} Matches")
for job, score in matches:
title = job.get("position", "Unknown Position")
company = job.get("company", "Unknown Company")
url = job.get("url", "#")
posted = job.get("date", "")
exp_label = f"{title} at {company} — Score: {score:.2f}"
with st.expander(exp_label, expanded=False):
st.markdown(f"**Location:** {job.get('location','N/A')} \n**Posted:** {posted} \n[View Job Posting]({url})")
st.markdown("---")
cols = st.columns([1, 1, 1])
if cols[0].button("Generate Resume (AI)", key=f"resume_{job.get('id', title)}"):
with st.spinner("Generating tailored resume..."):
tailored_resume = generate_resume(resume_text, job)
tab1, tab2 = st.tabs(["Tailored Resume", "Cover Letter"])
with tab1:
edited_resume = st.text_area("Tailored Resume (editable)", tailored_resume, height=300)
if st.button("Export Tailored Resume as PDF", key=f"export_resume_{job.get('id', title)}"):
prof_bytes = None
if profile_pic:
prof_bytes = profile_pic.getvalue()
pdf_bytes = build_pdf(edited_resume, title="Resume", name=name, email=email, phone=phone, profile_image_bytes=prof_bytes)
st.download_button(
label="📥 Download Resume (PDF)",
data=pdf_bytes,
file_name=f"{name.replace(' ', '_')}_resume.pdf",
mime="application/pdf",
)
with tab2:
if cols[1].button("Generate Cover Letter (AI)", key=f"clgen_{job.get('id', title)}"):
with st.spinner("Generating cover letter..."):
tailored_cl = generate_cover_letter(resume_text, job, name, email, phone)
edited_cl = st.text_area("Cover Letter (editable)", tailored_cl, height=300, key=f"cltext_{job.get('id', title)}")
if st.button("Export Cover Letter as PDF", key=f"export_cl_{job.get('id', title)}"):
prof_bytes = None
if profile_pic:
prof_bytes = profile_pic.getvalue()
pdf_bytes = build_pdf(edited_cl, title="Cover Letter", name=name, email=email, phone=phone, profile_image_bytes=prof_bytes)
st.download_button(
label="📥 Download Cover Letter (PDF)",
data=pdf_bytes,
file_name=f"{name.replace(' ', '_')}_cover_letter.pdf",
mime="application/pdf",
)
if cols[2].button("Show Job Description", key=f"desc_{job.get('id', title)}"):
st.info(job.get("description", "No description available"))
st.success("Done — select a match and generate your tailored resume or cover letter.")