Spaces:

UmaKumpatla
/

TalentSync

Sleeping

App Files Files Community

TalentSync / app.py

UmaKumpatla

Update app.py

a162d5c verified 9 months ago

raw

history blame contribute delete

6.11 kB

	import os
	import zipfile
	import tempfile
	import fitz # PyMuPDF
	import streamlit as st
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt
	import re
	from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace

	# Set HuggingFace API keys
	hf_token = os.getenv("hf1")
	if not hf_token:
	st.error("HuggingFace token not found. Please set HF_Token as an environment variable.")
	st.stop()

	os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("hf1")
	os.environ["hf1"] = os.getenv("hf1")

	# Load LLM
	llm_base = HuggingFaceEndpoint(
	repo_id="meta-llama/Llama-3.1-8B-Instruct",
	provider="novita",
	temperature=0.7,
	max_new_tokens=150,
	task="conversational"
	)
	llm = ChatHuggingFace(
	llm=llm_base,
	repo_id="meta-llama/Llama-3.2-3B-Instruct",
	provider="novita",
	temperature=0.7,
	max_new_tokens=150,
	task="conversational"
	)

	# Text extraction from PDF
	def extract_text_from_pdf(file_bytes):
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
	tmp_file.write(file_bytes)
	doc = fitz.open(tmp_file.name)
	text = "".join([page.get_text() for page in doc])
	return text

	# Prompt creators
	def create_resume_prompt(text):
	return f"""
	Extract structured information from the resume:
	1. Full Name
	2. Education
	3. Total Experience (in years)
	4. Key Skills
	5. Projects (Names and Outcomes if any)

	Resume Text:
	{text}
	"""

	def create_jd_prompt(text):
	return f"""
	Extract structured job description info:
	1. Job ID
	2. Company Name
	3. Role
	4. Experience Required
	5. Skills Required
	6. Education Required
	7. Location

	Job Description:
	{text}
	"""

	def matching_prompt(jd_text, resumes_info, top_n=3):
	return f"""
	You are a resume screening expert. Based on the JD and resume summaries below, return the top {top_n} matching candidates.

	Criteria:
	- Skill alignment
	- Relevant experience
	- Education
	- Domain-specific keywords

	Job Description:
	{jd_text}

	Resumes:
	{resumes_info}

	Format:
	1. Candidate Name: Reason
	2. Candidate Name: Reason
	... up to {top_n}
	"""

	# UI setup
	st.set_page_config(page_title="Resume Matcher + JD Extractor", layout="centered")
	st.title("🤖 Resume Matcher & JD Extractor")
	st.markdown("Upload resumes in a ZIP file and a Job Description. The app will extract, match, and visualize candidate alignment.")

	# Upload section
	zip_file = st.file_uploader("📁 Upload ZIP of Resumes (PDF)", type=["zip"])
	jd_file = st.file_uploader("📄 Upload Job Description (PDF/TXT)", type=["pdf", "txt"])
	jd_text_input = st.text_area("✍️ Or Paste Job Description Text")
	top_n = st.slider("How many top candidates to return?", min_value=1, max_value=10, value=3)

	if st.button("🔍 Match Candidates"):
	if not zip_file or not (jd_file or jd_text_input.strip()):
	st.warning("Please upload both a ZIP of resumes and a JD.")
	st.stop()

	# Extract JD
	if jd_file:
	jd_text = extract_text_from_pdf(jd_file.read()) if jd_file.name.endswith(".pdf") else jd_file.read().decode("utf-8")
	else:
	jd_text = jd_text_input.strip()

	jd_structured = llm.invoke(create_jd_prompt(jd_text)).content
	st.subheader("📌 Extracted JD Information")
	st.markdown(jd_structured)

	# Extract resumes
	resumes_info = ""
	resume_texts = {}

	with tempfile.TemporaryDirectory() as tmpdir:
	with zipfile.ZipFile(zip_file, "r") as z:
	pdf_files = [f for f in z.namelist() if f.endswith(".pdf")]
	if not pdf_files:
	st.error("No PDF resumes found.")
	st.stop()

	st.success(f"Found {len(pdf_files)} resumes. Extracting...")

	for file in pdf_files:
	with z.open(file) as resume_pdf:
	pdf_data = resume_pdf.read()
	text = extract_text_from_pdf(pdf_data)
	summary = llm.invoke(create_resume_prompt(text)).content
	resumes_info += f"\n\nResume File: {file}\n{summary}"
	resume_texts[file] = summary

	# Match candidates
	st.info("🔗 Matching resumes to JD...")
	match_result = llm.invoke(matching_prompt(jd_text, resumes_info, top_n)).content
	st.subheader("✅ Top Matched Candidates")
	st.markdown(match_result)

	# Visualize Match Heatmap
	def extract_required_skills_and_experience(jd_structured_text):
	skills_match = re.search(r"Skills Required[:\-\u2013]?\s(.)", jd_structured_text, re.IGNORECASE)
	exp_match = re.search(r"Experience Required[:\-\u2013]?\s(.)", jd_structured_text, re.IGNORECASE)

	skills = []
	if skills_match:
	skills_line = skills_match.group(1)
	skills = [s.strip().lower() for s in re.split(r"[,;/\n]", skills_line) if s.strip()]

	min_exp = 0
	if exp_match:
	match_years = re.search(r"(\d+)\+?", exp_match.group(1))
	if match_years:
	min_exp = int(match_years.group(1))

	return skills, min_exp

	required_skills, required_exp = extract_required_skills_and_experience(jd_structured)

	match_matrix = []
	for file_name, summary in resume_texts.items():
	skill_score = 0
	exp_score = 0

	exp_match = re.search(r"Total Experience[:\-\u2013]?\s*(\d+)", summary, re.IGNORECASE)
	candidate_exp = int(exp_match.group(1)) if exp_match else 0
	if candidate_exp >= required_exp:
	exp_score = 1

	skill_matches = sum(skill.lower() in summary.lower() for skill in required_skills)
	skill_score = round(skill_matches / len(required_skills), 2) if required_skills else 0

	match_matrix.append({
	"Resume": file_name,
	"Experience Match": exp_score,
	"Skill Match %": skill_score
	})

	df_match = pd.DataFrame(match_matrix).set_index("Resume")

	st.subheader("📊 Heatmap: Skills & Experience Match")
	fig, ax = plt.subplots(figsize=(8, len(df_match) * 0.5 + 1))
	sns.heatmap(df_match, annot=True, cmap="YlGnBu", linewidths=0.5, cbar=False, ax=ax)
	st.pyplot(fig)