import os import zipfile import tempfile import fitz # PyMuPDF import streamlit as st import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from langchain_community.llms import HuggingFaceHub # Set Hugging Face token os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HF_Token") # Initialize the language model (Mistral) llm = HuggingFaceHub( repo_id="mistralai/Mistral-7B-Instruct-v0.1", model_kwargs={"temperature": 0.5, "max_new_tokens": 512} ) # Extract text from PDF def extract_text_from_pdf(file_bytes): with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: tmp_file.write(file_bytes) doc = fitz.open(tmp_file.name) text = "" for page in doc: text += page.get_text() return text # Create structured extraction prompts def create_resume_prompt(text): return ( "Extract structured information from the resume:\n" "1. Full Name\n2. Education\n3. Total Experience (years)\n4. Key Skills\n5. Projects (if any)\n" f"\nResume Text:\n{text}" ) def create_jd_prompt(text): return ( "Extract structured information from the job description:\n" "1. Job ID\n2. Company Name\n3. Role\n4. Experience Required\n5. Skills Required\n6. Education Required\n7. Location\n" f"\nJD Text:\n{text}" ) def matching_prompt(jd, resumes, top_n=3): return ( f"You are a resume screening expert.\n" f"Job Description:\n{jd}\n" f"Resumes:\n{resumes}\n" f"Rank the top {top_n} matching candidates with reasons. Format:\n" "1. Candidate Name - Reason\n2. Candidate Name - Reason\n..." ) # Streamlit UI st.set_page_config(page_title="Resume Matcher", layout="wide") st.title("🤖 Smart Resume Matcher with JD Insights") zip_file = st.file_uploader("Upload ZIP of Resumes (PDF only)", type=["zip"]) jd_file = st.file_uploader("Upload Job Description (PDF or TXT)", type=["pdf", "txt"]) jd_text_input = st.text_area("Or paste JD directly below") top_n = st.slider("Select number of top matches", 1, 10, 3) if st.button("🔍 Match Resumes"): if not zip_file or not (jd_file or jd_text_input.strip()): st.warning("Please upload both resumes and a JD.") st.stop() jd_text = "" if jd_file: jd_text = extract_text_from_pdf(jd_file.read()) if jd_file.name.endswith(".pdf") else jd_file.read().decode("utf-8") elif jd_text_input: jd_text = jd_text_input jd_structured = llm.invoke(create_jd_prompt(jd_text)) st.subheader("📋 Extracted JD Details") st.markdown(jd_structured) resumes_info = "" resume_skills_list = [] resume_names = [] with tempfile.TemporaryDirectory() as tmpdir: with zipfile.ZipFile(zip_file, 'r') as z: pdf_files = [f for f in z.namelist() if f.endswith(".pdf")] for file in pdf_files: with z.open(file) as resume_pdf: text = extract_text_from_pdf(resume_pdf.read()) result = llm.invoke(create_resume_prompt(text)) resumes_info += f"\n\nResume File: {file}\n{result}" # Skill parsing for heatmap skills_line = next((line for line in result.split('\n') if "Key Skills" in line), "") skills = [skill.strip().lower() for skill in skills_line.replace("Key Skills:", "").split(',') if skill.strip()] resume_skills_list.append(skills) resume_names.append(file.replace(".pdf", "")) # Matching logic match_response = llm.invoke(matching_prompt(jd_text, resumes_info, top_n)) st.subheader("🏆 Top Matches") st.markdown(match_response) # Skill heatmap visualization st.subheader("🔬 Skill Match Heatmap") jd_keywords = [kw.strip().lower() for kw in jd_structured.split() if len(kw) > 2 and kw.isalpha()] jd_keywords = list(set(jd_keywords)) heatmap_data = pd.DataFrame(0, index=resume_names, columns=jd_keywords) for i, skills in enumerate(resume_skills_list): for kw in jd_keywords: if kw in skills: heatmap_data.loc[resume_names[i], kw] = 1 if not heatmap_data.empty: fig, ax = plt.subplots(figsize=(12, len(resume_names)*0.5 + 2)) sns.heatmap(heatmap_data, annot=True, cmap="YlGnBu", cbar=False, ax=ax) st.pyplot(fig) else: st.info("No matching skills found for heatmap.")