Spaces:

UmaKumpatla
/

TalentSync

Sleeping

App Files Files Community

TalentSync / pages /sample.py

UmaKumpatla

Update pages/sample.py

46c943d verified 9 months ago

raw

history blame contribute delete

4.5 kB

	import os
	import zipfile
	import tempfile
	import fitz # PyMuPDF
	import streamlit as st
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt
	from langchain_community.llms import HuggingFaceHub

	# Set Hugging Face token
	os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HF_Token")

	# Initialize the language model (Mistral)
	llm = HuggingFaceHub(
	repo_id="mistralai/Mistral-7B-Instruct-v0.1",
	model_kwargs={"temperature": 0.5, "max_new_tokens": 512}
	)

	# Extract text from PDF

	def extract_text_from_pdf(file_bytes):
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
	tmp_file.write(file_bytes)
	doc = fitz.open(tmp_file.name)
	text = ""
	for page in doc:
	text += page.get_text()
	return text

	# Create structured extraction prompts

	def create_resume_prompt(text):
	return (
	"Extract structured information from the resume:\n"
	"1. Full Name\n2. Education\n3. Total Experience (years)\n4. Key Skills\n5. Projects (if any)\n"
	f"\nResume Text:\n{text}"
	)

	def create_jd_prompt(text):
	return (
	"Extract structured information from the job description:\n"
	"1. Job ID\n2. Company Name\n3. Role\n4. Experience Required\n5. Skills Required\n6. Education Required\n7. Location\n"
	f"\nJD Text:\n{text}"
	)

	def matching_prompt(jd, resumes, top_n=3):
	return (
	f"You are a resume screening expert.\n"
	f"Job Description:\n{jd}\n"
	f"Resumes:\n{resumes}\n"
	f"Rank the top {top_n} matching candidates with reasons. Format:\n"
	"1. Candidate Name - Reason\n2. Candidate Name - Reason\n..."
	)

	# Streamlit UI
	st.set_page_config(page_title="Resume Matcher", layout="wide")
	st.title("🤖 Smart Resume Matcher with JD Insights")

	zip_file = st.file_uploader("Upload ZIP of Resumes (PDF only)", type=["zip"])
	jd_file = st.file_uploader("Upload Job Description (PDF or TXT)", type=["pdf", "txt"])
	jd_text_input = st.text_area("Or paste JD directly below")
	top_n = st.slider("Select number of top matches", 1, 10, 3)

	if st.button("🔍 Match Resumes"):
	if not zip_file or not (jd_file or jd_text_input.strip()):
	st.warning("Please upload both resumes and a JD.")
	st.stop()

	jd_text = ""
	if jd_file:
	jd_text = extract_text_from_pdf(jd_file.read()) if jd_file.name.endswith(".pdf") else jd_file.read().decode("utf-8")
	elif jd_text_input:
	jd_text = jd_text_input

	jd_structured = llm.invoke(create_jd_prompt(jd_text))
	st.subheader("📋 Extracted JD Details")
	st.markdown(jd_structured)

	resumes_info = ""
	resume_skills_list = []
	resume_names = []

	with tempfile.TemporaryDirectory() as tmpdir:
	with zipfile.ZipFile(zip_file, 'r') as z:
	pdf_files = [f for f in z.namelist() if f.endswith(".pdf")]

	for file in pdf_files:
	with z.open(file) as resume_pdf:
	text = extract_text_from_pdf(resume_pdf.read())
	result = llm.invoke(create_resume_prompt(text))
	resumes_info += f"\n\nResume File: {file}\n{result}"

	# Skill parsing for heatmap
	skills_line = next((line for line in result.split('\n') if "Key Skills" in line), "")
	skills = [skill.strip().lower() for skill in skills_line.replace("Key Skills:", "").split(',') if skill.strip()]
	resume_skills_list.append(skills)
	resume_names.append(file.replace(".pdf", ""))

	# Matching logic
	match_response = llm.invoke(matching_prompt(jd_text, resumes_info, top_n))
	st.subheader("🏆 Top Matches")
	st.markdown(match_response)

	# Skill heatmap visualization
	st.subheader("🔬 Skill Match Heatmap")
	jd_keywords = [kw.strip().lower() for kw in jd_structured.split() if len(kw) > 2 and kw.isalpha()]
	jd_keywords = list(set(jd_keywords))

	heatmap_data = pd.DataFrame(0, index=resume_names, columns=jd_keywords)

	for i, skills in enumerate(resume_skills_list):
	for kw in jd_keywords:
	if kw in skills:
	heatmap_data.loc[resume_names[i], kw] = 1

	if not heatmap_data.empty:
	fig, ax = plt.subplots(figsize=(12, len(resume_names)*0.5 + 2))
	sns.heatmap(heatmap_data, annot=True, cmap="YlGnBu", cbar=False, ax=ax)
	st.pyplot(fig)
	else:
	st.info("No matching skills found for heatmap.")