Spaces:
Sleeping
Sleeping
| import os | |
| import zipfile | |
| import tempfile | |
| import fitz # PyMuPDF | |
| import streamlit as st | |
| import pandas as pd | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| from langchain_community.llms import HuggingFaceHub | |
| # Set Hugging Face token | |
| os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HF_Token") | |
| # Initialize the language model (Mistral) | |
| llm = HuggingFaceHub( | |
| repo_id="mistralai/Mistral-7B-Instruct-v0.1", | |
| model_kwargs={"temperature": 0.5, "max_new_tokens": 512} | |
| ) | |
| # Extract text from PDF | |
| def extract_text_from_pdf(file_bytes): | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: | |
| tmp_file.write(file_bytes) | |
| doc = fitz.open(tmp_file.name) | |
| text = "" | |
| for page in doc: | |
| text += page.get_text() | |
| return text | |
| # Create structured extraction prompts | |
| def create_resume_prompt(text): | |
| return ( | |
| "Extract structured information from the resume:\n" | |
| "1. Full Name\n2. Education\n3. Total Experience (years)\n4. Key Skills\n5. Projects (if any)\n" | |
| f"\nResume Text:\n{text}" | |
| ) | |
| def create_jd_prompt(text): | |
| return ( | |
| "Extract structured information from the job description:\n" | |
| "1. Job ID\n2. Company Name\n3. Role\n4. Experience Required\n5. Skills Required\n6. Education Required\n7. Location\n" | |
| f"\nJD Text:\n{text}" | |
| ) | |
| def matching_prompt(jd, resumes, top_n=3): | |
| return ( | |
| f"You are a resume screening expert.\n" | |
| f"Job Description:\n{jd}\n" | |
| f"Resumes:\n{resumes}\n" | |
| f"Rank the top {top_n} matching candidates with reasons. Format:\n" | |
| "1. Candidate Name - Reason\n2. Candidate Name - Reason\n..." | |
| ) | |
| # Streamlit UI | |
| st.set_page_config(page_title="Resume Matcher", layout="wide") | |
| st.title("π€ Smart Resume Matcher with JD Insights") | |
| zip_file = st.file_uploader("Upload ZIP of Resumes (PDF only)", type=["zip"]) | |
| jd_file = st.file_uploader("Upload Job Description (PDF or TXT)", type=["pdf", "txt"]) | |
| jd_text_input = st.text_area("Or paste JD directly below") | |
| top_n = st.slider("Select number of top matches", 1, 10, 3) | |
| if st.button("π Match Resumes"): | |
| if not zip_file or not (jd_file or jd_text_input.strip()): | |
| st.warning("Please upload both resumes and a JD.") | |
| st.stop() | |
| jd_text = "" | |
| if jd_file: | |
| jd_text = extract_text_from_pdf(jd_file.read()) if jd_file.name.endswith(".pdf") else jd_file.read().decode("utf-8") | |
| elif jd_text_input: | |
| jd_text = jd_text_input | |
| jd_structured = llm.invoke(create_jd_prompt(jd_text)) | |
| st.subheader("π Extracted JD Details") | |
| st.markdown(jd_structured) | |
| resumes_info = "" | |
| resume_skills_list = [] | |
| resume_names = [] | |
| with tempfile.TemporaryDirectory() as tmpdir: | |
| with zipfile.ZipFile(zip_file, 'r') as z: | |
| pdf_files = [f for f in z.namelist() if f.endswith(".pdf")] | |
| for file in pdf_files: | |
| with z.open(file) as resume_pdf: | |
| text = extract_text_from_pdf(resume_pdf.read()) | |
| result = llm.invoke(create_resume_prompt(text)) | |
| resumes_info += f"\n\nResume File: {file}\n{result}" | |
| # Skill parsing for heatmap | |
| skills_line = next((line for line in result.split('\n') if "Key Skills" in line), "") | |
| skills = [skill.strip().lower() for skill in skills_line.replace("Key Skills:", "").split(',') if skill.strip()] | |
| resume_skills_list.append(skills) | |
| resume_names.append(file.replace(".pdf", "")) | |
| # Matching logic | |
| match_response = llm.invoke(matching_prompt(jd_text, resumes_info, top_n)) | |
| st.subheader("π Top Matches") | |
| st.markdown(match_response) | |
| # Skill heatmap visualization | |
| st.subheader("π¬ Skill Match Heatmap") | |
| jd_keywords = [kw.strip().lower() for kw in jd_structured.split() if len(kw) > 2 and kw.isalpha()] | |
| jd_keywords = list(set(jd_keywords)) | |
| heatmap_data = pd.DataFrame(0, index=resume_names, columns=jd_keywords) | |
| for i, skills in enumerate(resume_skills_list): | |
| for kw in jd_keywords: | |
| if kw in skills: | |
| heatmap_data.loc[resume_names[i], kw] = 1 | |
| if not heatmap_data.empty: | |
| fig, ax = plt.subplots(figsize=(12, len(resume_names)*0.5 + 2)) | |
| sns.heatmap(heatmap_data, annot=True, cmap="YlGnBu", cbar=False, ax=ax) | |
| st.pyplot(fig) | |
| else: | |
| st.info("No matching skills found for heatmap.") |