import os import zipfile import tempfile import fitz # PyMuPDF import streamlit as st import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import re from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace # Set HuggingFace API keys hf_token = os.getenv("hf1") if not hf_token: st.error("HuggingFace token not found. Please set HF_Token as an environment variable.") st.stop() os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("hf1") os.environ["hf1"] = os.getenv("hf1") # Load LLM llm_base = HuggingFaceEndpoint( repo_id="meta-llama/Llama-3.1-8B-Instruct", provider="novita", temperature=0.7, max_new_tokens=150, task="conversational" ) llm = ChatHuggingFace( llm=llm_base, repo_id="meta-llama/Llama-3.2-3B-Instruct", provider="novita", temperature=0.7, max_new_tokens=150, task="conversational" ) # Text extraction from PDF def extract_text_from_pdf(file_bytes): with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: tmp_file.write(file_bytes) doc = fitz.open(tmp_file.name) text = "".join([page.get_text() for page in doc]) return text # Prompt creators def create_resume_prompt(text): return f""" Extract structured information from the resume: 1. Full Name 2. Education 3. Total Experience (in years) 4. Key Skills 5. Projects (Names and Outcomes if any) Resume Text: {text} """ def create_jd_prompt(text): return f""" Extract structured job description info: 1. Job ID 2. Company Name 3. Role 4. Experience Required 5. Skills Required 6. Education Required 7. Location Job Description: {text} """ def matching_prompt(jd_text, resumes_info, top_n=3): return f""" You are a resume screening expert. Based on the JD and resume summaries below, return the top {top_n} matching candidates. Criteria: - Skill alignment - Relevant experience - Education - Domain-specific keywords Job Description: {jd_text} Resumes: {resumes_info} Format: 1. Candidate Name: Reason 2. Candidate Name: Reason ... up to {top_n} """ # UI setup st.set_page_config(page_title="Resume Matcher + JD Extractor", layout="centered") st.title("🤖 Resume Matcher & JD Extractor") st.markdown("Upload resumes in a ZIP file and a Job Description. The app will extract, match, and visualize candidate alignment.") # Upload section zip_file = st.file_uploader("📁 Upload ZIP of Resumes (PDF)", type=["zip"]) jd_file = st.file_uploader("📄 Upload Job Description (PDF/TXT)", type=["pdf", "txt"]) jd_text_input = st.text_area("✍️ Or Paste Job Description Text") top_n = st.slider("How many top candidates to return?", min_value=1, max_value=10, value=3) if st.button("🔍 Match Candidates"): if not zip_file or not (jd_file or jd_text_input.strip()): st.warning("Please upload both a ZIP of resumes and a JD.") st.stop() # Extract JD if jd_file: jd_text = extract_text_from_pdf(jd_file.read()) if jd_file.name.endswith(".pdf") else jd_file.read().decode("utf-8") else: jd_text = jd_text_input.strip() jd_structured = llm.invoke(create_jd_prompt(jd_text)).content st.subheader("📌 Extracted JD Information") st.markdown(jd_structured) # Extract resumes resumes_info = "" resume_texts = {} with tempfile.TemporaryDirectory() as tmpdir: with zipfile.ZipFile(zip_file, "r") as z: pdf_files = [f for f in z.namelist() if f.endswith(".pdf")] if not pdf_files: st.error("No PDF resumes found.") st.stop() st.success(f"Found {len(pdf_files)} resumes. Extracting...") for file in pdf_files: with z.open(file) as resume_pdf: pdf_data = resume_pdf.read() text = extract_text_from_pdf(pdf_data) summary = llm.invoke(create_resume_prompt(text)).content resumes_info += f"\n\nResume File: {file}\n{summary}" resume_texts[file] = summary # Match candidates st.info("🔗 Matching resumes to JD...") match_result = llm.invoke(matching_prompt(jd_text, resumes_info, top_n)).content st.subheader("✅ Top Matched Candidates") st.markdown(match_result) # Visualize Match Heatmap def extract_required_skills_and_experience(jd_structured_text): skills_match = re.search(r"Skills Required[:\-\u2013]?\s*(.*)", jd_structured_text, re.IGNORECASE) exp_match = re.search(r"Experience Required[:\-\u2013]?\s*(.*)", jd_structured_text, re.IGNORECASE) skills = [] if skills_match: skills_line = skills_match.group(1) skills = [s.strip().lower() for s in re.split(r"[,;/\n]", skills_line) if s.strip()] min_exp = 0 if exp_match: match_years = re.search(r"(\d+)\+?", exp_match.group(1)) if match_years: min_exp = int(match_years.group(1)) return skills, min_exp required_skills, required_exp = extract_required_skills_and_experience(jd_structured) match_matrix = [] for file_name, summary in resume_texts.items(): skill_score = 0 exp_score = 0 exp_match = re.search(r"Total Experience[:\-\u2013]?\s*(\d+)", summary, re.IGNORECASE) candidate_exp = int(exp_match.group(1)) if exp_match else 0 if candidate_exp >= required_exp: exp_score = 1 skill_matches = sum(skill.lower() in summary.lower() for skill in required_skills) skill_score = round(skill_matches / len(required_skills), 2) if required_skills else 0 match_matrix.append({ "Resume": file_name, "Experience Match": exp_score, "Skill Match %": skill_score }) df_match = pd.DataFrame(match_matrix).set_index("Resume") st.subheader("📊 Heatmap: Skills & Experience Match") fig, ax = plt.subplots(figsize=(8, len(df_match) * 0.5 + 1)) sns.heatmap(df_match, annot=True, cmap="YlGnBu", linewidths=0.5, cbar=False, ax=ax) st.pyplot(fig)