Spaces:

MBG0903
/

Procelevate-TalentMatch-AI

Sleeping

File size: 21,282 Bytes

import os
import json
import re
import tempfile
from typing import Dict, Any

import pandas as pd
import gradio as gr
import PyPDF2
from openai import OpenAI

# =========================
# Config
# =========================
APP_TITLE = "Procelevate TalentMatch AI"
APP_SUBTITLE = "Agentic AI for Data Analyst / Data Engineering Candidate Screening"
PROCELEVATE_BLUE = "#0F2C59"
MAX_RESUMES = 5

DEFAULT_JD = """Role: Data Analyst / Data Engineer

We are looking for candidates with experience in SQL, Python, data visualization, ETL pipelines, data cleaning, dashboarding, and cloud data platforms. Exposure to Power BI, Tableau, Azure, AWS, Snowflake, Airflow, or Spark is preferred.

Responsibilities:
- Analyze large datasets and generate insights
- Build and maintain data pipelines
- Create dashboards and business reports
- Work with stakeholders to gather requirements
- Ensure data quality and governance

Preferred:
- 3+ years of experience
- Strong communication skills
- Experience in analytics and engineering environments
"""

api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key) if api_key else None


# =========================
# Utility Functions
# =========================
def clean_text(text: str) -> str:
    if not text:
        return ""
    return re.sub(r"\s+", " ", text).strip()


def read_pdf(file_path: str) -> str:
    text = ""
    try:
        with open(file_path, "rb") as f:
            reader = PyPDF2.PdfReader(f)
            for page in reader.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text + "\n"
    except Exception as e:
        return f"Error reading PDF: {str(e)}"
    return clean_text(text)


def read_txt(file_path: str) -> str:
    try:
        with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
            return clean_text(f.read())
    except Exception as e:
        return f"Error reading TXT: {str(e)}"


def file_to_text(file_obj) -> str:
    if file_obj is None:
        return ""

    file_path = file_obj if isinstance(file_obj, str) else file_obj.name
    lower = file_path.lower()

    if lower.endswith(".pdf"):
        return read_pdf(file_path)
    elif lower.endswith(".txt"):
        return read_txt(file_path)
    else:
        return "Unsupported file format. Please upload PDF or TXT."


def call_llm_json(system_prompt: str, user_prompt: str) -> Dict[str, Any]:
    if not client:
        return {"error": "OPENAI_API_KEY is not set. Please add it in Hugging Face Space secrets."}

    try:
        response = client.chat.completions.create(
            model="gpt-4.1-mini",
            temperature=0.2,
            response_format={"type": "json_object"},
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt},
            ],
        )
        content = response.choices[0].message.content
        return json.loads(content)
    except Exception as e:
        return {"error": str(e)}


def safe_list_to_text(value) -> str:
    if isinstance(value, list):
        return ", ".join(str(v) for v in value)
    if value is None:
        return ""
    return str(value)


def get_decision_band(recommendation: str) -> str:
    rec = (recommendation or "").strip().lower()
    if rec == "shortlist":
        return "Green"
    elif rec == "hold":
        return "Amber"
    elif rec == "reject":
        return "Red"
    return "Review"


def build_candidate_detail(record: Dict[str, Any]) -> str:
    return f"""
### Candidate Detail Review

**Candidate:** {record.get('Candidate', '')}  
**Current Role:** {record.get('Current Role', '')}  
**Experience:** {record.get('Experience (Yrs)', '')} years  
**Location:** {record.get('Location', '')}  
**Role Fit:** {record.get('Role Fit', '')}  
**Match Score:** {record.get('Match Score', '')}  
**Recommendation:** {record.get('Recommendation', '')}  
**Decision Band:** {record.get('Decision Band', '')}

---

**Why Selected**  
{record.get('Why Selected', '')}

**Matched Skills**  
{record.get('Matched Skills', '')}

**Missing Skills / Gaps**  
{record.get('Missing Skills / Gaps', '')}

**Interview Focus Areas**  
{record.get('Interview Focus Areas', '')}

**Recommended HR Action**  
{record.get('Recommended HR Action', '')}

**Response Intent**  
{record.get('Response Intent', '')}

**Final Status**  
{record.get('Final Status', '')}

**Next Step**  
{record.get('Next Step', '')}

**Bias-Safe Review Note**  
{record.get('Bias-Safe Review Note', '')}
"""


# =========================
# Agent Functions
# =========================
def resume_intake_agent(resume_text: str) -> Dict[str, Any]:
    system_prompt = """
You are a resume intake agent for HR and talent screening.
Extract structured candidate information from a resume.
Return only valid JSON.
"""

    user_prompt = f"""
Extract the following fields from this resume text.

Fields:
- candidate_name
- email
- phone
- current_location
- total_experience_years
- current_role
- current_company
- skills (list)
- tools_technologies (list)
- education (list)
- certifications (list)
- projects_summary
- relevant_data_analyst_experience
- relevant_data_engineering_experience
- likely_notice_period
- summary

Resume Text:
{resume_text}
"""
    return call_llm_json(system_prompt, user_prompt)


def jd_matching_agent(jd_text: str, candidate_profile: Dict[str, Any]) -> Dict[str, Any]:
    system_prompt = """
You are a JD matching agent.
Compare the candidate profile against the job description.
Return only valid JSON.
Be practical, explainable, and business-friendly.
"""

    user_prompt = f"""
Compare this candidate profile against the following Job Description.

Job Description:
{jd_text}

Candidate Profile:
{json.dumps(candidate_profile, indent=2)}

Return JSON with:
- role_fit (Data Analyst / Data Engineer / Both / Low Fit)
- match_score (0 to 100)
- matched_skills (list)
- missing_skills (list)
- why_selected (list)
- interview_focus_areas (list)
- recommended_hr_action
- recommendation (Shortlist / Hold / Reject)
- bias_safe_review_note

Recommendation guidance:
- Shortlist if profile is strongly aligned and match_score >= 80
- Hold if profile is partially aligned and match_score is between 60 and 79
- Reject if profile is weakly aligned or match_score < 60

Important:
- The recommendation must be based only on role fit, relevant skills, tools, experience, and job-related factors.
- Do not use sensitive personal attributes.
"""
    return call_llm_json(system_prompt, user_prompt)


def outreach_agent(candidate_name: str, role_title: str, recommendation: str) -> Dict[str, Any]:
    system_prompt = """
You are an HR outreach agent.
Generate a short professional outreach message.
Return only valid JSON.
"""

    user_prompt = f"""
Generate a professional first-contact message for:
Candidate Name: {candidate_name}
Role Title: {role_title}
Recommendation: {recommendation}

Return JSON with:
- whatsapp_message
- email_message
"""
    return call_llm_json(system_prompt, user_prompt)


def response_classifier_agent(candidate_response: str) -> Dict[str, Any]:
    system_prompt = """
You are a candidate response classification agent.
Classify candidate response intent.
Return only valid JSON.
"""

    user_prompt = f"""
Classify this candidate response:

Response:
{candidate_response}

Return JSON with:
- intent (Interested / Not Interested / Available Later / Needs More Information / Unclear)
- summary
- recommended_next_action
"""
    return call_llm_json(system_prompt, user_prompt)


def hr_summary_agent(
    candidate_profile: Dict[str, Any],
    match_result: Dict[str, Any],
    response_result: Dict[str, Any]
) -> Dict[str, Any]:
    system_prompt = """
You are an HR briefing agent.
Prepare a concise recruiter-ready summary for final human review.
Return only valid JSON.
"""

    user_prompt = f"""
Create an HR summary using these inputs.

Candidate Profile:
{json.dumps(candidate_profile, indent=2)}

JD Match Result:
{json.dumps(match_result, indent=2)}

Candidate Response Result:
{json.dumps(response_result, indent=2)}

Return JSON with:
- final_status
- recruiter_summary
- interview_readiness
- recommended_next_step
- recruiter_action_pack
"""
    return call_llm_json(system_prompt, user_prompt)


# =========================
# Main Workflow
# =========================
def run_talentmatch(role_title: str, jd_text: str, resume_files):
    try:
        if not api_key:
            error_df = pd.DataFrame(
                [{"Error": "OPENAI_API_KEY is not set in Hugging Face Space secrets."}]
            )
            return (
                error_df,
                "OPENAI_API_KEY is not set.",
                gr.Dropdown(choices=[], value=None),
                "Please configure OPENAI_API_KEY in Hugging Face Space secrets.",
                None,
                {},
            )

        if not jd_text or not jd_text.strip():
            error_df = pd.DataFrame(
                [{"Error": "Please paste or keep the default Job Description."}]
            )
            return (
                error_df,
                "Please paste or keep the default Job Description.",
                gr.Dropdown(choices=[], value=None),
                "Please provide a Job Description.",
                None,
                {},
            )

        if not resume_files:
            error_df = pd.DataFrame([{"Error": "Please upload at least one resume."}])
            return (
                error_df,
                "Please upload at least one resume.",
                gr.Dropdown(choices=[], value=None),
                "Please upload at least one resume.",
                None,
                {},
            )

        if len(resume_files) > MAX_RESUMES:
            error_df = pd.DataFrame(
                [
                    {
                        "Error": f"Please upload maximum {MAX_RESUMES} resumes per run for optimal performance."
                    }
                ]
            )
            return (
                error_df,
                f"Please upload maximum {MAX_RESUMES} resumes per run for optimal performance.",
                gr.Dropdown(choices=[], value=None),
                f"Reduce the uploaded resumes to {MAX_RESUMES} or fewer.",
                None,
                {},
            )

        records = []

        for file_obj in resume_files:
            resume_text = file_to_text(file_obj)

            if resume_text.startswith("Error"):
                raise Exception(f"Resume read failed for {file_obj.name}: {resume_text}")

            candidate_profile = resume_intake_agent(resume_text)
            if "error" in candidate_profile:
                raise Exception(
                    f"Resume Intake Agent failed for {file_obj.name}: {candidate_profile['error']}"
                )

            match_result = jd_matching_agent(jd_text, candidate_profile)
            if "error" in match_result:
                raise Exception(
                    f"JD Matching Agent failed for {file_obj.name}: {match_result['error']}"
                )

            candidate_name = (
                candidate_profile.get("candidate_name")
                or os.path.basename(file_obj.name).replace(".pdf", "").replace(".txt", "")
            )

            # -----------------------------
            # Score-based recommendation override
            # -----------------------------
            raw_score = match_result.get("match_score", 0)
            try:
                score = float(raw_score)
            except Exception:
                score = 0.0

            if score >= 80:
                recommendation = "Shortlist"
            elif score >= 60:
                recommendation = "Hold"
            else:
                recommendation = "Reject"

            # Keep recommendation aligned in match_result too
            match_result["recommendation"] = recommendation

            outreach = outreach_agent(candidate_name, role_title, recommendation)
            if "error" in outreach:
                raise Exception(
                    f"Outreach Agent failed for {file_obj.name}: {outreach['error']}"
                )

            if recommendation == "Shortlist":
                simulated_response = (
                    "Yes, I am interested and available to discuss this opportunity further."
                )
            elif recommendation == "Hold":
                simulated_response = (
                    "Please share more details about the role, team structure, and work location."
                )
            else:
                simulated_response = (
                    "Thank you for reaching out. I am currently not looking for a change."
                )

            response_result = response_classifier_agent(simulated_response)
            if "error" in response_result:
                raise Exception(
                    f"Response Classifier Agent failed for {file_obj.name}: {response_result['error']}"
                )

            hr_summary = hr_summary_agent(candidate_profile, match_result, response_result)
            if "error" in hr_summary:
                raise Exception(
                    f"HR Summary Agent failed for {file_obj.name}: {hr_summary['error']}"
                )

            decision_band = get_decision_band(recommendation)

            record = {
                "Candidate": candidate_name,
                "Current Role": candidate_profile.get("current_role", ""),
                "Experience (Yrs)": candidate_profile.get("total_experience_years", ""),
                "Location": candidate_profile.get("current_location", ""),
                "Role Fit": match_result.get("role_fit", ""),
                "Match Score": score,
                "Recommendation": recommendation,
                "Decision Band": decision_band,
                "Why Selected": safe_list_to_text(match_result.get("why_selected", [])),
                "Matched Skills": safe_list_to_text(match_result.get("matched_skills", [])),
                "Missing Skills / Gaps": safe_list_to_text(
                    match_result.get("missing_skills", [])
                ),
                "Interview Focus Areas": safe_list_to_text(
                    match_result.get("interview_focus_areas", [])
                ),
                "Recommended HR Action": match_result.get("recommended_hr_action", ""),
                "Response Intent": response_result.get("intent", ""),
                "Final Status": hr_summary.get("final_status", ""),
                "Next Step": hr_summary.get("recommended_next_step", ""),
                "Bias-Safe Review Note": match_result.get(
                    "bias_safe_review_note",
                    "Recommendation is based on job-related skills, experience, tools, and role fit only.",
                ),
            }
            records.append(record)

        full_df = pd.DataFrame(records)

        compact_columns = [
            "Candidate",
            "Current Role",
            "Experience (Yrs)",
            "Role Fit",
            "Match Score",
            "Recommendation",
            "Decision Band",
        ]
        compact_df = full_df[compact_columns].copy()

        avg_score = (
            round(
                pd.to_numeric(full_df["Match Score"], errors="coerce").fillna(0).mean(),
                1,
            )
            if len(full_df)
            else 0
        )
        shortlisted = (
            int((full_df["Recommendation"] == "Shortlist").sum())
            if "Recommendation" in full_df.columns
            else 0
        )
        hold_count = (
            int((full_df["Recommendation"] == "Hold").sum())
            if "Recommendation" in full_df.columns
            else 0
        )
        reject_count = (
            int((full_df["Recommendation"] == "Reject").sum())
            if "Recommendation" in full_df.columns
            else 0
        )

        summary_text = (
            f"Total Candidates: {len(full_df)}\n"
            f"Shortlisted: {shortlisted}\n"
            f"Hold: {hold_count}\n"
            f"Rejected: {reject_count}\n"
            f"Average Match Score: {avg_score}\n\n"
            f"Decision Band Logic:\n"
            f"- Green = Shortlist\n"
            f"- Amber = Hold\n"
            f"- Red = Reject"
        )

        csv_path = os.path.join(tempfile.gettempdir(), "talentmatch_screening_results.csv")
        full_df.to_csv(csv_path, index=False)

        candidate_choices = full_df["Candidate"].tolist()
        default_candidate = candidate_choices[0] if candidate_choices else None
        first_record = records[0] if records else {}
        detail_markdown = (
            build_candidate_detail(first_record)
            if first_record
            else "No candidate details available."
        )

        state_data = {record["Candidate"]: record for record in records}

        return (
            compact_df,
            summary_text,
            gr.Dropdown(choices=candidate_choices, value=default_candidate),
            detail_markdown,
            csv_path,
            state_data,
        )

    except Exception as e:
        error_message = f"Workflow failed: {str(e)}"
        error_df = pd.DataFrame([{"Error": error_message}])
        return (
            error_df,
            error_message,
            gr.Dropdown(choices=[], value=None),
            error_message,
            None,
            {},
        )


def update_candidate_detail(selected_candidate, state_data):
    if not selected_candidate or not state_data:
        return "No candidate selected."

    record = state_data.get(selected_candidate)
    if not record:
        return "Candidate details not found."

    return build_candidate_detail(record)


# =========================
# UI Styling
# =========================
custom_css = f"""
body {{
    background: white;
}}

h1, h2, h3 {{
    color: {PROCELEVATE_BLUE} !important;
}}

.gradio-container {{
    max-width: 1320px !important;
}}

.procelevate-header {{
    text-align: center;
    margin-bottom: 10px;
}}

.notice-box {{
    border: 1px solid #d9e2f0;
    background: #f8fbff;
    border-radius: 12px;
    padding: 14px;
    margin-top: 8px;
    margin-bottom: 16px;
}}
"""


# =========================
# Gradio App
# =========================
with gr.Blocks(css=custom_css, title=APP_TITLE) as demo:
    state_store = gr.State({})

    gr.HTML(
        f"""
        <div class="procelevate-header">
            <h1>{APP_TITLE}</h1>
            <h3>{APP_SUBTITLE}</h3>
        </div>
        <div class="notice-box">
            <b>Prototype Scope:</b> This demo shows how Agentic AI can support HR teams by parsing resumes,
            matching profiles to a job description, generating shortlist recommendations, drafting outreach,
            interpreting candidate responses, and preparing recruiter-ready summaries.<br><br>
            <b>Enterprise Upgrade:</b> This version adds explainability, interview focus guidance, decision bands,
            HR action recommendations, and a bias-safe review note.<br><br>
            <b>Important:</b> This is a decision-support system. Final hiring decisions remain with HR.<br><br>
            <b>Usage Note:</b> Upload up to {MAX_RESUMES} resumes per run for stable demo performance.
        </div>
        """
    )

    with gr.Row():
        with gr.Column(scale=2):
            role_title = gr.Textbox(
                label="Role Title",
                value="Data Analyst / Data Engineer",
                placeholder="Enter target role"
            )

            jd_text = gr.Textbox(
                label="Paste Job Description",
                lines=14,
                value=DEFAULT_JD,
                placeholder="Paste the JD here..."
            )

            resume_files = gr.File(
                label="Upload Candidate Resumes (PDF or TXT)",
                file_count="multiple",
                file_types=[".pdf", ".txt"]
            )

            run_button = gr.Button("Run TalentMatch Workflow", variant="primary")

        with gr.Column(scale=1):
            output_summary = gr.Textbox(
                label="Screening Summary",
                lines=12
            )

            output_file = gr.File(
                label="Download Screening Results CSV",
                visible=True
            )

    output_df = gr.Dataframe(
        label="Candidate Screening Dashboard",
        interactive=False
    )

    gr.Markdown("### Candidate Review Detail")

    candidate_selector = gr.Dropdown(
        label="Select Candidate",
        choices=[],
        value=None
    )

    candidate_detail = gr.Markdown("Run the workflow to view candidate details.")

    run_button.click(
        fn=run_talentmatch,
        inputs=[role_title, jd_text, resume_files],
        outputs=[output_df, output_summary, candidate_selector, candidate_detail, output_file, state_store]
    )

    candidate_selector.change(
        fn=update_candidate_detail,
        inputs=[candidate_selector, state_store],
        outputs=[candidate_detail]
    )

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)