Spaces:
Sleeping
Sleeping
| import os | |
| import zipfile | |
| import tempfile | |
| import fitz # PyMuPDF | |
| import streamlit as st | |
| import pandas as pd | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| import re | |
| from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace | |
| # Set HuggingFace API keys | |
| hf_token = os.getenv("hf1") | |
| if not hf_token: | |
| st.error("HuggingFace token not found. Please set HF_Token as an environment variable.") | |
| st.stop() | |
| os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("hf1") | |
| os.environ["hf1"] = os.getenv("hf1") | |
| # Load LLM | |
| llm_base = HuggingFaceEndpoint( | |
| repo_id="meta-llama/Llama-3.1-8B-Instruct", | |
| provider="novita", | |
| temperature=0.7, | |
| max_new_tokens=150, | |
| task="conversational" | |
| ) | |
| llm = ChatHuggingFace( | |
| llm=llm_base, | |
| repo_id="meta-llama/Llama-3.2-3B-Instruct", | |
| provider="novita", | |
| temperature=0.7, | |
| max_new_tokens=150, | |
| task="conversational" | |
| ) | |
| # Text extraction from PDF | |
| def extract_text_from_pdf(file_bytes): | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file: | |
| tmp_file.write(file_bytes) | |
| doc = fitz.open(tmp_file.name) | |
| text = "".join([page.get_text() for page in doc]) | |
| return text | |
| # Prompt creators | |
| def create_resume_prompt(text): | |
| return f""" | |
| Extract structured information from the resume: | |
| 1. Full Name | |
| 2. Education | |
| 3. Total Experience (in years) | |
| 4. Key Skills | |
| 5. Projects (Names and Outcomes if any) | |
| Resume Text: | |
| {text} | |
| """ | |
| def create_jd_prompt(text): | |
| return f""" | |
| Extract structured job description info: | |
| 1. Job ID | |
| 2. Company Name | |
| 3. Role | |
| 4. Experience Required | |
| 5. Skills Required | |
| 6. Education Required | |
| 7. Location | |
| Job Description: | |
| {text} | |
| """ | |
| def matching_prompt(jd_text, resumes_info, top_n=3): | |
| return f""" | |
| You are a resume screening expert. Based on the JD and resume summaries below, return the top {top_n} matching candidates. | |
| Criteria: | |
| - Skill alignment | |
| - Relevant experience | |
| - Education | |
| - Domain-specific keywords | |
| Job Description: | |
| {jd_text} | |
| Resumes: | |
| {resumes_info} | |
| Format: | |
| 1. Candidate Name: Reason | |
| 2. Candidate Name: Reason | |
| up to {top_n} | |
| """ | |
| # UI setup | |
| st.set_page_config(page_title="Resume Matcher + JD Extractor", layout="centered") | |
| st.title("π€ Resume Matcher & JD Extractor") | |
| st.markdown("Upload resumes in a ZIP file and a Job Description. The app will extract, match, and visualize candidate alignment.") | |
| # Upload section | |
| zip_file = st.file_uploader("π Upload ZIP of Resumes (PDF)", type=["zip"]) | |
| jd_file = st.file_uploader("π Upload Job Description (PDF/TXT)", type=["pdf", "txt"]) | |
| jd_text_input = st.text_area("βοΈ Or Paste Job Description Text") | |
| top_n = st.slider("How many top candidates to return?", min_value=1, max_value=10, value=3) | |
| if st.button("π Match Candidates"): | |
| if not zip_file or not (jd_file or jd_text_input.strip()): | |
| st.warning("Please upload both a ZIP of resumes and a JD.") | |
| st.stop() | |
| # Extract JD | |
| if jd_file: | |
| jd_text = extract_text_from_pdf(jd_file.read()) if jd_file.name.endswith(".pdf") else jd_file.read().decode("utf-8") | |
| else: | |
| jd_text = jd_text_input.strip() | |
| jd_structured = llm.invoke(create_jd_prompt(jd_text)).content | |
| st.subheader("π Extracted JD Information") | |
| st.markdown(jd_structured) | |
| # Extract resumes | |
| resumes_info = "" | |
| resume_texts = {} | |
| with tempfile.TemporaryDirectory() as tmpdir: | |
| with zipfile.ZipFile(zip_file, "r") as z: | |
| pdf_files = [f for f in z.namelist() if f.endswith(".pdf")] | |
| if not pdf_files: | |
| st.error("No PDF resumes found.") | |
| st.stop() | |
| st.success(f"Found {len(pdf_files)} resumes. Extracting...") | |
| for file in pdf_files: | |
| with z.open(file) as resume_pdf: | |
| pdf_data = resume_pdf.read() | |
| text = extract_text_from_pdf(pdf_data) | |
| summary = llm.invoke(create_resume_prompt(text)).content | |
| resumes_info += f"\n\nResume File: {file}\n{summary}" | |
| resume_texts[file] = summary | |
| # Match candidates | |
| st.info("π Matching resumes to JD...") | |
| match_result = llm.invoke(matching_prompt(jd_text, resumes_info, top_n)).content | |
| st.subheader("β Top Matched Candidates") | |
| st.markdown(match_result) | |
| # Visualize Match Heatmap | |
| def extract_required_skills_and_experience(jd_structured_text): | |
| skills_match = re.search(r"Skills Required[:\-\u2013]?\s*(.*)", jd_structured_text, re.IGNORECASE) | |
| exp_match = re.search(r"Experience Required[:\-\u2013]?\s*(.*)", jd_structured_text, re.IGNORECASE) | |
| skills = [] | |
| if skills_match: | |
| skills_line = skills_match.group(1) | |
| skills = [s.strip().lower() for s in re.split(r"[,;/\n]", skills_line) if s.strip()] | |
| min_exp = 0 | |
| if exp_match: | |
| match_years = re.search(r"(\d+)\+?", exp_match.group(1)) | |
| if match_years: | |
| min_exp = int(match_years.group(1)) | |
| return skills, min_exp | |
| required_skills, required_exp = extract_required_skills_and_experience(jd_structured) | |
| match_matrix = [] | |
| for file_name, summary in resume_texts.items(): | |
| skill_score = 0 | |
| exp_score = 0 | |
| exp_match = re.search(r"Total Experience[:\-\u2013]?\s*(\d+)", summary, re.IGNORECASE) | |
| candidate_exp = int(exp_match.group(1)) if exp_match else 0 | |
| if candidate_exp >= required_exp: | |
| exp_score = 1 | |
| skill_matches = sum(skill.lower() in summary.lower() for skill in required_skills) | |
| skill_score = round(skill_matches / len(required_skills), 2) if required_skills else 0 | |
| match_matrix.append({ | |
| "Resume": file_name, | |
| "Experience Match": exp_score, | |
| "Skill Match %": skill_score | |
| }) | |
| df_match = pd.DataFrame(match_matrix).set_index("Resume") | |
| st.subheader("π Heatmap: Skills & Experience Match") | |
| fig, ax = plt.subplots(figsize=(8, len(df_match) * 0.5 + 1)) | |
| sns.heatmap(df_match, annot=True, cmap="YlGnBu", linewidths=0.5, cbar=False, ax=ax) | |
| st.pyplot(fig) | |