TalentSync / app.py
UmaKumpatla's picture
Update app.py
a162d5c verified
import os
import zipfile
import tempfile
import fitz # PyMuPDF
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import re
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
# Set HuggingFace API keys
hf_token = os.getenv("hf1")
if not hf_token:
st.error("HuggingFace token not found. Please set HF_Token as an environment variable.")
st.stop()
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("hf1")
os.environ["hf1"] = os.getenv("hf1")
# Load LLM
llm_base = HuggingFaceEndpoint(
repo_id="meta-llama/Llama-3.1-8B-Instruct",
provider="novita",
temperature=0.7,
max_new_tokens=150,
task="conversational"
)
llm = ChatHuggingFace(
llm=llm_base,
repo_id="meta-llama/Llama-3.2-3B-Instruct",
provider="novita",
temperature=0.7,
max_new_tokens=150,
task="conversational"
)
# Text extraction from PDF
def extract_text_from_pdf(file_bytes):
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(file_bytes)
doc = fitz.open(tmp_file.name)
text = "".join([page.get_text() for page in doc])
return text
# Prompt creators
def create_resume_prompt(text):
return f"""
Extract structured information from the resume:
1. Full Name
2. Education
3. Total Experience (in years)
4. Key Skills
5. Projects (Names and Outcomes if any)
Resume Text:
{text}
"""
def create_jd_prompt(text):
return f"""
Extract structured job description info:
1. Job ID
2. Company Name
3. Role
4. Experience Required
5. Skills Required
6. Education Required
7. Location
Job Description:
{text}
"""
def matching_prompt(jd_text, resumes_info, top_n=3):
return f"""
You are a resume screening expert. Based on the JD and resume summaries below, return the top {top_n} matching candidates.
Criteria:
- Skill alignment
- Relevant experience
- Education
- Domain-specific keywords
Job Description:
{jd_text}
Resumes:
{resumes_info}
Format:
1. Candidate Name: Reason
2. Candidate Name: Reason
... up to {top_n}
"""
# UI setup
st.set_page_config(page_title="Resume Matcher + JD Extractor", layout="centered")
st.title("πŸ€– Resume Matcher & JD Extractor")
st.markdown("Upload resumes in a ZIP file and a Job Description. The app will extract, match, and visualize candidate alignment.")
# Upload section
zip_file = st.file_uploader("πŸ“ Upload ZIP of Resumes (PDF)", type=["zip"])
jd_file = st.file_uploader("πŸ“„ Upload Job Description (PDF/TXT)", type=["pdf", "txt"])
jd_text_input = st.text_area("✍️ Or Paste Job Description Text")
top_n = st.slider("How many top candidates to return?", min_value=1, max_value=10, value=3)
if st.button("πŸ” Match Candidates"):
if not zip_file or not (jd_file or jd_text_input.strip()):
st.warning("Please upload both a ZIP of resumes and a JD.")
st.stop()
# Extract JD
if jd_file:
jd_text = extract_text_from_pdf(jd_file.read()) if jd_file.name.endswith(".pdf") else jd_file.read().decode("utf-8")
else:
jd_text = jd_text_input.strip()
jd_structured = llm.invoke(create_jd_prompt(jd_text)).content
st.subheader("πŸ“Œ Extracted JD Information")
st.markdown(jd_structured)
# Extract resumes
resumes_info = ""
resume_texts = {}
with tempfile.TemporaryDirectory() as tmpdir:
with zipfile.ZipFile(zip_file, "r") as z:
pdf_files = [f for f in z.namelist() if f.endswith(".pdf")]
if not pdf_files:
st.error("No PDF resumes found.")
st.stop()
st.success(f"Found {len(pdf_files)} resumes. Extracting...")
for file in pdf_files:
with z.open(file) as resume_pdf:
pdf_data = resume_pdf.read()
text = extract_text_from_pdf(pdf_data)
summary = llm.invoke(create_resume_prompt(text)).content
resumes_info += f"\n\nResume File: {file}\n{summary}"
resume_texts[file] = summary
# Match candidates
st.info("πŸ”— Matching resumes to JD...")
match_result = llm.invoke(matching_prompt(jd_text, resumes_info, top_n)).content
st.subheader("βœ… Top Matched Candidates")
st.markdown(match_result)
# Visualize Match Heatmap
def extract_required_skills_and_experience(jd_structured_text):
skills_match = re.search(r"Skills Required[:\-\u2013]?\s*(.*)", jd_structured_text, re.IGNORECASE)
exp_match = re.search(r"Experience Required[:\-\u2013]?\s*(.*)", jd_structured_text, re.IGNORECASE)
skills = []
if skills_match:
skills_line = skills_match.group(1)
skills = [s.strip().lower() for s in re.split(r"[,;/\n]", skills_line) if s.strip()]
min_exp = 0
if exp_match:
match_years = re.search(r"(\d+)\+?", exp_match.group(1))
if match_years:
min_exp = int(match_years.group(1))
return skills, min_exp
required_skills, required_exp = extract_required_skills_and_experience(jd_structured)
match_matrix = []
for file_name, summary in resume_texts.items():
skill_score = 0
exp_score = 0
exp_match = re.search(r"Total Experience[:\-\u2013]?\s*(\d+)", summary, re.IGNORECASE)
candidate_exp = int(exp_match.group(1)) if exp_match else 0
if candidate_exp >= required_exp:
exp_score = 1
skill_matches = sum(skill.lower() in summary.lower() for skill in required_skills)
skill_score = round(skill_matches / len(required_skills), 2) if required_skills else 0
match_matrix.append({
"Resume": file_name,
"Experience Match": exp_score,
"Skill Match %": skill_score
})
df_match = pd.DataFrame(match_matrix).set_index("Resume")
st.subheader("πŸ“Š Heatmap: Skills & Experience Match")
fig, ax = plt.subplots(figsize=(8, len(df_match) * 0.5 + 1))
sns.heatmap(df_match, annot=True, cmap="YlGnBu", linewidths=0.5, cbar=False, ax=ax)
st.pyplot(fig)