Spaces:
Sleeping
Sleeping
File size: 9,248 Bytes
9a17edb 4392910 db61f50 35bd947 384f205 9a17edb 2c59240 3aeed34 db61f50 2c59240 db61f50 3aeed34 9a17edb 2c59240 3aeed34 9a17edb db61f50 384f205 db61f50 daf3997 db61f50 daf3997 db61f50 daf3997 db61f50 daf3997 db61f50 2c59240 a76c6ef db61f50 a76c6ef fede7d8 2c59240 9a17edb 3aeed34 9a17edb a76c6ef fede7d8 9a17edb a76c6ef fede7d8 2c59240 3fb3ad6 2c59240 db61f50 2c59240 db61f50 5d1b2b2 db61f50 8608c15 db61f50 98ef19f db61f50 8608c15 db61f50 5d1b2b2 3fb3ad6 5d1b2b2 3fb3ad6 5d1b2b2 db61f50 3fb3ad6 db61f50 3fb3ad6 9a17edb db61f50 e023318 a76c6ef e023318 9a17edb db61f50 9a17edb e023318 db61f50 a76c6ef db61f50 daf3997 db61f50 638b476 db61f50 638b476 db61f50 2c59240 db61f50 e023318 9a17edb db61f50 98ef19f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 |
import gradio as gr
import pandas as pd
import json
import os
import requests
import re
from functools import lru_cache
# ----------------------------
# CONFIG
# ----------------------------
JSON_FILE = "form-submissions-1.json"
MODEL_ID = "google/flan-t5-small"
# NOTE: HF_API_TOKEN MUST be set in your environment variables/Space secrets.
HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
FILTERED_CSV = "/tmp/filtered_candidates.csv"
OUTPUT_FILE = "/tmp/outputs.csv"
BATCH_SIZE = 50
if not HF_API_TOKEN:
pass
CATEGORIES = {
"AI": [
"AI/ML Ops Engineer","Senior Machine Learning Engineer","Principal Data Scientist",
"Senior Data Scientist","Machine Learning Research Scientist","Senior AI/ML Engineer",
"AI/ML Engineer","Big Data Engineer","AI Research Scientist","AI Research Analyst Consultant",
"AI Analyst","Senior Data Analyst","Automation Engineer","Senior Data Engineer",
"Machine Learning Engineer","Data Engineer","Data Scientist","Data Analyst"
],
"Marketing": ["Marketing Specialist","Sales Agent","Salesman","Sales Associate"],
"CTO": ["Chief Technology Officer","CTO"],
"Legal": ["Legal Specialist","Attorney","Legal Intern","Lawyer"],
"Finance": ["Financial Analyst","Financial Advisor"]
}
# ----------------------------
# LLM Call for Scoring (Focus: Role Experience ONLY)
# ----------------------------
@lru_cache(maxsize=512)
def score_candidate(candidate_str, category_name, job_titles_tuple):
if not HF_API_TOKEN:
print("API Token is missing. Returning score 0.")
return 0
prompt = f"""
You are an HR assistant. Your task is to rate a candidate's suitability based ONLY on their previous job roles.
Rate the suitability of the following candidate on a scale of 1 (Lowest) to 10 (Highest).
The score must reflect how closely the candidate's 'Roles' align with the target job titles.
The target roles for the '{category_name}' category are: {list(job_titles_tuple)}
Candidate JSON: {candidate_str}
**Task**: Respond ONLY with the rating number (an integer from 1 to 10).
"""
headers = {"Authorization": f"Bearer {HF_API_TOKEN}", "Content-Type": "application/json"}
payload = {
"inputs": prompt,
"parameters": {
"max_new_tokens": 5,
"return_full_text": False,
"temperature": 0.1
}
}
try:
response = requests.post(
f"https://api-inference.huggingface.co/models/{MODEL_ID}",
headers=headers,
data=json.dumps(payload),
timeout=60
)
response.raise_for_status()
result = response.json()
generated_text = result[0].get("generated_text", "0").strip()
match = re.search(r'\d+', generated_text)
if match:
score = int(match.group(0))
return max(1, min(10, score))
return 0
except Exception as e:
print(f"LLM scoring call failed for candidate (API/Network Error): {e}")
return 0
# ----------------------------
# Step 1: Filter by roles (Unchanged)
# ----------------------------
def filter_by_roles(category_name):
job_titles = CATEGORIES[category_name]
try:
with open(JSON_FILE, encoding="utf-8") as f:
data = json.load(f)
except FileNotFoundError:
return pd.DataFrame(), f"Error: JSON file '{JSON_FILE}' not found. The LLM can't proceed."
filtered = []
for person in data:
work_exps = person.get("work_experiences", [])
if not work_exps:
continue
non_fullstack_roles = [
exp.get("roleName") for exp in work_exps
if exp.get("roleName") and "full stack developer" not in exp.get("roleName").lower()
]
if not non_fullstack_roles:
continue
if any(role in job_titles for role in non_fullstack_roles):
filtered.append({
"Name": person.get("name"),
"Email": person.get("email"),
"Phone": person.get("phone"),
"Location": person.get("location"),
"Roles": ", ".join(non_fullstack_roles),
"Skills": ", ".join(person.get("skills", [])),
"Salary": person.get("annual_salary_expectation", {}).get("full-time","N/A"),
"Category": category_name
})
if not filtered:
return pd.DataFrame(), f"No candidates found matching roles for category '{category_name}'. The LLM can't proceed."
df = pd.DataFrame(filtered)
df.to_csv(FILTERED_CSV, index=False)
return df, f"{len(df)} candidates filtered by role for category '{category_name}'. Ready for LLM scoring."
# ----------------------------
# Step 2: LLM recommendations (Scoring, Sorting, and Output)
# ----------------------------
def llm_recommendations(category_name):
job_titles = CATEGORIES[category_name]
if not os.path.exists(FILTERED_CSV):
df_filtered, msg = filter_by_roles(category_name)
if df_filtered.empty:
return msg
else:
df_filtered = pd.read_csv(FILTERED_CSV)
df_filtered = df_filtered[df_filtered["Category"] == category_name]
if df_filtered.empty:
return f"No filtered candidates found for category '{category_name}'. Run Step 1 first."
# Prepare for scoring
df_filtered_clean = df_filtered.fillna('N/A')
filtered_candidates = df_filtered_clean.to_dict(orient="records")
scores = []
for person in filtered_candidates:
candidate_info = {
"Name": person.get("Name"),
"Roles": person.get("Roles"),
"Skills": person.get("Skills")
}
candidate_str = json.dumps(candidate_info)
score = score_candidate(candidate_str, category_name, tuple(job_titles))
scores.append(score)
df_filtered["LLM_Score"] = scores
df_recommended = df_filtered[df_filtered["LLM_Score"] > 0].copy()
if df_recommended.empty:
if not HF_API_TOKEN:
return "❌ LLM failed: The HF_API_TOKEN is not set or is invalid. Set the token and try again."
return f"LLM scored all candidates 0. The candidates' roles are deemed irrelevant by the LLM for '{category_name}'."
def parse_salary(s):
try:
return float(str(s).replace("$","").replace(",","").replace("N/A", str(float('inf'))))
except:
return float('inf')
df_recommended["Salary_sort"] = df_recommended["Salary"].apply(parse_salary)
df_top5 = df_recommended.sort_values(
by=['LLM_Score', 'Salary_sort'],
ascending=[False, True]
).head(5)
final_names = df_top5["Name"].tolist()
output_text = f"Top {len(final_names)} Recommended Candidates for the '{category_name}' Category:\n\n"
for i, name in enumerate(final_names):
score = df_top5.iloc[i]['LLM_Score']
output_text += f"{i+1}. {name} (Suitability Score: {score}/10)\n"
output_text += "\nThese candidates were ranked by the LLM based **only on the alignment of their previous job roles** with the target roles, using expected salary as a tie-breaker."
return output_text
# ----------------------------
# Show first 5 raw JSON candidates (Unchanged)
# ----------------------------
def show_first_candidates():
try:
with open(JSON_FILE, encoding="utf-8") as f:
data = json.load(f)
return pd.DataFrame(data[:5])
except FileNotFoundError:
return pd.DataFrame({"Error": [f"JSON file '{JSON_FILE}' not found. Please ensure it is present."]})
except Exception as e:
return pd.DataFrame({"Error": [f"Failed to load JSON: {e}"]})
# ----------------------------
# Gradio interface (Updated Heading and Launch)
# ----------------------------
with gr.Blocks() as app:
# 🚩 CHANGE: Updated Heading
gr.Markdown("# 🤖 Candidate Selection (Role-Based Scoring)")
gr.Markdown("#### 🔍 Raw JSON Preview: First 5 Candidates")
gr.Dataframe(show_first_candidates(), label="First 5 JSON Entries")
gr.Markdown("---")
category_dropdown = gr.Dropdown(list(CATEGORIES.keys()), label="1. Select Category")
# Step 1: Filter by roles
filter_button = gr.Button("2. Filter Candidates by Roles")
filtered_df = gr.Dataframe(label="Filtered Candidates (Preview)")
filter_status = gr.Textbox(label="Filter Status", placeholder="Click 'Filter Candidates by Roles' to start.")
filter_button.click(filter_by_roles, inputs=[category_dropdown], outputs=[filtered_df, filter_status])
gr.Markdown("---")
# Step 2: LLM Recommendations
llm_button = gr.Button("3. Get LLM Recommendations (Role Experience Ranking)")
llm_output_text = gr.Textbox(label="Top Candidate Recommendations Summary", lines=10, placeholder="Click 'Get LLM Recommendations' after Step 2 completes.")
llm_button.click(llm_recommendations, inputs=[category_dropdown], outputs=[llm_output_text])
if __name__ == "__main__":
# 🚩 CHANGE: Set share=True to generate a public link
app.launch(share=True) |