TalentSync / pages /sample.py
UmaKumpatla's picture
Update pages/sample.py
46c943d verified
import os
import zipfile
import tempfile
import fitz # PyMuPDF
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from langchain_community.llms import HuggingFaceHub
# Set Hugging Face token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HF_Token")
# Initialize the language model (Mistral)
llm = HuggingFaceHub(
repo_id="mistralai/Mistral-7B-Instruct-v0.1",
model_kwargs={"temperature": 0.5, "max_new_tokens": 512}
)
# Extract text from PDF
def extract_text_from_pdf(file_bytes):
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(file_bytes)
doc = fitz.open(tmp_file.name)
text = ""
for page in doc:
text += page.get_text()
return text
# Create structured extraction prompts
def create_resume_prompt(text):
return (
"Extract structured information from the resume:\n"
"1. Full Name\n2. Education\n3. Total Experience (years)\n4. Key Skills\n5. Projects (if any)\n"
f"\nResume Text:\n{text}"
)
def create_jd_prompt(text):
return (
"Extract structured information from the job description:\n"
"1. Job ID\n2. Company Name\n3. Role\n4. Experience Required\n5. Skills Required\n6. Education Required\n7. Location\n"
f"\nJD Text:\n{text}"
)
def matching_prompt(jd, resumes, top_n=3):
return (
f"You are a resume screening expert.\n"
f"Job Description:\n{jd}\n"
f"Resumes:\n{resumes}\n"
f"Rank the top {top_n} matching candidates with reasons. Format:\n"
"1. Candidate Name - Reason\n2. Candidate Name - Reason\n..."
)
# Streamlit UI
st.set_page_config(page_title="Resume Matcher", layout="wide")
st.title("πŸ€– Smart Resume Matcher with JD Insights")
zip_file = st.file_uploader("Upload ZIP of Resumes (PDF only)", type=["zip"])
jd_file = st.file_uploader("Upload Job Description (PDF or TXT)", type=["pdf", "txt"])
jd_text_input = st.text_area("Or paste JD directly below")
top_n = st.slider("Select number of top matches", 1, 10, 3)
if st.button("πŸ” Match Resumes"):
if not zip_file or not (jd_file or jd_text_input.strip()):
st.warning("Please upload both resumes and a JD.")
st.stop()
jd_text = ""
if jd_file:
jd_text = extract_text_from_pdf(jd_file.read()) if jd_file.name.endswith(".pdf") else jd_file.read().decode("utf-8")
elif jd_text_input:
jd_text = jd_text_input
jd_structured = llm.invoke(create_jd_prompt(jd_text))
st.subheader("πŸ“‹ Extracted JD Details")
st.markdown(jd_structured)
resumes_info = ""
resume_skills_list = []
resume_names = []
with tempfile.TemporaryDirectory() as tmpdir:
with zipfile.ZipFile(zip_file, 'r') as z:
pdf_files = [f for f in z.namelist() if f.endswith(".pdf")]
for file in pdf_files:
with z.open(file) as resume_pdf:
text = extract_text_from_pdf(resume_pdf.read())
result = llm.invoke(create_resume_prompt(text))
resumes_info += f"\n\nResume File: {file}\n{result}"
# Skill parsing for heatmap
skills_line = next((line for line in result.split('\n') if "Key Skills" in line), "")
skills = [skill.strip().lower() for skill in skills_line.replace("Key Skills:", "").split(',') if skill.strip()]
resume_skills_list.append(skills)
resume_names.append(file.replace(".pdf", ""))
# Matching logic
match_response = llm.invoke(matching_prompt(jd_text, resumes_info, top_n))
st.subheader("πŸ† Top Matches")
st.markdown(match_response)
# Skill heatmap visualization
st.subheader("πŸ”¬ Skill Match Heatmap")
jd_keywords = [kw.strip().lower() for kw in jd_structured.split() if len(kw) > 2 and kw.isalpha()]
jd_keywords = list(set(jd_keywords))
heatmap_data = pd.DataFrame(0, index=resume_names, columns=jd_keywords)
for i, skills in enumerate(resume_skills_list):
for kw in jd_keywords:
if kw in skills:
heatmap_data.loc[resume_names[i], kw] = 1
if not heatmap_data.empty:
fig, ax = plt.subplots(figsize=(12, len(resume_names)*0.5 + 2))
sns.heatmap(heatmap_data, annot=True, cmap="YlGnBu", cbar=False, ax=ax)
st.pyplot(fig)
else:
st.info("No matching skills found for heatmap.")