Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import json | |
| import os | |
| import requests | |
| import re | |
| from functools import lru_cache | |
| # ---------------------------- | |
| # CONFIG | |
| # ---------------------------- | |
| JSON_FILE = "form-submissions-1.json" | |
| MODEL_ID = "google/flan-t5-small" | |
| # NOTE: HF_API_TOKEN MUST be set in your environment variables/Space secrets. | |
| HF_API_TOKEN = os.environ.get("HF_API_TOKEN") | |
| FILTERED_CSV = "/tmp/filtered_candidates.csv" | |
| OUTPUT_FILE = "/tmp/outputs.csv" | |
| BATCH_SIZE = 50 | |
| if not HF_API_TOKEN: | |
| pass | |
| CATEGORIES = { | |
| "AI": [ | |
| "AI/ML Ops Engineer","Senior Machine Learning Engineer","Principal Data Scientist", | |
| "Senior Data Scientist","Machine Learning Research Scientist","Senior AI/ML Engineer", | |
| "AI/ML Engineer","Big Data Engineer","AI Research Scientist","AI Research Analyst Consultant", | |
| "AI Analyst","Senior Data Analyst","Automation Engineer","Senior Data Engineer", | |
| "Machine Learning Engineer","Data Engineer","Data Scientist","Data Analyst" | |
| ], | |
| "Marketing": ["Marketing Specialist","Sales Agent","Salesman","Sales Associate"], | |
| "CTO": ["Chief Technology Officer","CTO"], | |
| "Legal": ["Legal Specialist","Attorney","Legal Intern","Lawyer"], | |
| "Finance": ["Financial Analyst","Financial Advisor"] | |
| } | |
| # ---------------------------- | |
| # LLM Call for Scoring (Focus: Role Experience ONLY) | |
| # ---------------------------- | |
| def score_candidate(candidate_str, category_name, job_titles_tuple): | |
| if not HF_API_TOKEN: | |
| print("API Token is missing. Returning score 0.") | |
| return 0 | |
| prompt = f""" | |
| You are an HR assistant. Your task is to rate a candidate's suitability based ONLY on their previous job roles. | |
| Rate the suitability of the following candidate on a scale of 1 (Lowest) to 10 (Highest). | |
| The score must reflect how closely the candidate's 'Roles' align with the target job titles. | |
| The target roles for the '{category_name}' category are: {list(job_titles_tuple)} | |
| Candidate JSON: {candidate_str} | |
| **Task**: Respond ONLY with the rating number (an integer from 1 to 10). | |
| """ | |
| headers = {"Authorization": f"Bearer {HF_API_TOKEN}", "Content-Type": "application/json"} | |
| payload = { | |
| "inputs": prompt, | |
| "parameters": { | |
| "max_new_tokens": 5, | |
| "return_full_text": False, | |
| "temperature": 0.1 | |
| } | |
| } | |
| try: | |
| response = requests.post( | |
| f"https://api-inference.huggingface.co/models/{MODEL_ID}", | |
| headers=headers, | |
| data=json.dumps(payload), | |
| timeout=60 | |
| ) | |
| response.raise_for_status() | |
| result = response.json() | |
| generated_text = result[0].get("generated_text", "0").strip() | |
| match = re.search(r'\d+', generated_text) | |
| if match: | |
| score = int(match.group(0)) | |
| return max(1, min(10, score)) | |
| return 0 | |
| except Exception as e: | |
| print(f"LLM scoring call failed for candidate (API/Network Error): {e}") | |
| return 0 | |
| # ---------------------------- | |
| # Step 1: Filter by roles (Unchanged) | |
| # ---------------------------- | |
| def filter_by_roles(category_name): | |
| job_titles = CATEGORIES[category_name] | |
| try: | |
| with open(JSON_FILE, encoding="utf-8") as f: | |
| data = json.load(f) | |
| except FileNotFoundError: | |
| return pd.DataFrame(), f"Error: JSON file '{JSON_FILE}' not found. The LLM can't proceed." | |
| filtered = [] | |
| for person in data: | |
| work_exps = person.get("work_experiences", []) | |
| if not work_exps: | |
| continue | |
| non_fullstack_roles = [ | |
| exp.get("roleName") for exp in work_exps | |
| if exp.get("roleName") and "full stack developer" not in exp.get("roleName").lower() | |
| ] | |
| if not non_fullstack_roles: | |
| continue | |
| if any(role in job_titles for role in non_fullstack_roles): | |
| filtered.append({ | |
| "Name": person.get("name"), | |
| "Email": person.get("email"), | |
| "Phone": person.get("phone"), | |
| "Location": person.get("location"), | |
| "Roles": ", ".join(non_fullstack_roles), | |
| "Skills": ", ".join(person.get("skills", [])), | |
| "Salary": person.get("annual_salary_expectation", {}).get("full-time","N/A"), | |
| "Category": category_name | |
| }) | |
| if not filtered: | |
| return pd.DataFrame(), f"No candidates found matching roles for category '{category_name}'. The LLM can't proceed." | |
| df = pd.DataFrame(filtered) | |
| df.to_csv(FILTERED_CSV, index=False) | |
| return df, f"{len(df)} candidates filtered by role for category '{category_name}'. Ready for LLM scoring." | |
| # ---------------------------- | |
| # Step 2: LLM recommendations (Scoring, Sorting, and Output) | |
| # ---------------------------- | |
| def llm_recommendations(category_name): | |
| job_titles = CATEGORIES[category_name] | |
| if not os.path.exists(FILTERED_CSV): | |
| df_filtered, msg = filter_by_roles(category_name) | |
| if df_filtered.empty: | |
| return msg | |
| else: | |
| df_filtered = pd.read_csv(FILTERED_CSV) | |
| df_filtered = df_filtered[df_filtered["Category"] == category_name] | |
| if df_filtered.empty: | |
| return f"No filtered candidates found for category '{category_name}'. Run Step 1 first." | |
| # Prepare for scoring | |
| df_filtered_clean = df_filtered.fillna('N/A') | |
| filtered_candidates = df_filtered_clean.to_dict(orient="records") | |
| scores = [] | |
| for person in filtered_candidates: | |
| candidate_info = { | |
| "Name": person.get("Name"), | |
| "Roles": person.get("Roles"), | |
| "Skills": person.get("Skills") | |
| } | |
| candidate_str = json.dumps(candidate_info) | |
| score = score_candidate(candidate_str, category_name, tuple(job_titles)) | |
| scores.append(score) | |
| df_filtered["LLM_Score"] = scores | |
| df_recommended = df_filtered[df_filtered["LLM_Score"] > 0].copy() | |
| if df_recommended.empty: | |
| if not HF_API_TOKEN: | |
| return "❌ LLM failed: The HF_API_TOKEN is not set or is invalid. Set the token and try again." | |
| return f"LLM scored all candidates 0. The candidates' roles are deemed irrelevant by the LLM for '{category_name}'." | |
| def parse_salary(s): | |
| try: | |
| return float(str(s).replace("$","").replace(",","").replace("N/A", str(float('inf')))) | |
| except: | |
| return float('inf') | |
| df_recommended["Salary_sort"] = df_recommended["Salary"].apply(parse_salary) | |
| df_top5 = df_recommended.sort_values( | |
| by=['LLM_Score', 'Salary_sort'], | |
| ascending=[False, True] | |
| ).head(5) | |
| final_names = df_top5["Name"].tolist() | |
| output_text = f"Top {len(final_names)} Recommended Candidates for the '{category_name}' Category:\n\n" | |
| for i, name in enumerate(final_names): | |
| score = df_top5.iloc[i]['LLM_Score'] | |
| output_text += f"{i+1}. {name} (Suitability Score: {score}/10)\n" | |
| output_text += "\nThese candidates were ranked by the LLM based **only on the alignment of their previous job roles** with the target roles, using expected salary as a tie-breaker." | |
| return output_text | |
| # ---------------------------- | |
| # Show first 5 raw JSON candidates (Unchanged) | |
| # ---------------------------- | |
| def show_first_candidates(): | |
| try: | |
| with open(JSON_FILE, encoding="utf-8") as f: | |
| data = json.load(f) | |
| return pd.DataFrame(data[:5]) | |
| except FileNotFoundError: | |
| return pd.DataFrame({"Error": [f"JSON file '{JSON_FILE}' not found. Please ensure it is present."]}) | |
| except Exception as e: | |
| return pd.DataFrame({"Error": [f"Failed to load JSON: {e}"]}) | |
| # ---------------------------- | |
| # Gradio interface (Updated Heading and Launch) | |
| # ---------------------------- | |
| with gr.Blocks() as app: | |
| # 🚩 CHANGE: Updated Heading | |
| gr.Markdown("# 🤖 Candidate Selection (Role-Based Scoring)") | |
| gr.Markdown("#### 🔍 Raw JSON Preview: First 5 Candidates") | |
| gr.Dataframe(show_first_candidates(), label="First 5 JSON Entries") | |
| gr.Markdown("---") | |
| category_dropdown = gr.Dropdown(list(CATEGORIES.keys()), label="1. Select Category") | |
| # Step 1: Filter by roles | |
| filter_button = gr.Button("2. Filter Candidates by Roles") | |
| filtered_df = gr.Dataframe(label="Filtered Candidates (Preview)") | |
| filter_status = gr.Textbox(label="Filter Status", placeholder="Click 'Filter Candidates by Roles' to start.") | |
| filter_button.click(filter_by_roles, inputs=[category_dropdown], outputs=[filtered_df, filter_status]) | |
| gr.Markdown("---") | |
| # Step 2: LLM Recommendations | |
| llm_button = gr.Button("3. Get LLM Recommendations (Role Experience Ranking)") | |
| llm_output_text = gr.Textbox(label="Top Candidate Recommendations Summary", lines=10, placeholder="Click 'Get LLM Recommendations' after Step 2 completes.") | |
| llm_button.click(llm_recommendations, inputs=[category_dropdown], outputs=[llm_output_text]) | |
| if __name__ == "__main__": | |
| # 🚩 CHANGE: Set share=True to generate a public link | |
| app.launch(share=True) |