File size: 9,248 Bytes
9a17edb
 
 
4392910
db61f50
35bd947
384f205
9a17edb
2c59240
 
 
3aeed34
db61f50
 
 
2c59240
db61f50
 
 
 
 
3aeed34
9a17edb
2c59240
 
 
 
 
 
 
3aeed34
 
 
 
9a17edb
 
 
db61f50
384f205
db61f50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
daf3997
db61f50
 
 
 
 
 
 
 
daf3997
 
db61f50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
daf3997
db61f50
 
daf3997
db61f50
 
 
2c59240
 
a76c6ef
 
 
 
db61f50
a76c6ef
fede7d8
2c59240
9a17edb
 
3aeed34
9a17edb
a76c6ef
 
 
 
fede7d8
9a17edb
a76c6ef
fede7d8
2c59240
 
 
 
 
 
 
 
 
 
3fb3ad6
2c59240
db61f50
2c59240
 
 
db61f50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d1b2b2
db61f50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8608c15
db61f50
98ef19f
db61f50
 
 
 
8608c15
db61f50
 
 
 
 
 
 
 
 
 
 
 
 
5d1b2b2
3fb3ad6
5d1b2b2
3fb3ad6
5d1b2b2
db61f50
 
3fb3ad6
db61f50
3fb3ad6
 
9a17edb
db61f50
 
 
e023318
a76c6ef
 
 
 
 
 
 
 
e023318
9a17edb
db61f50
9a17edb
e023318
db61f50
 
a76c6ef
db61f50
 
daf3997
db61f50
 
638b476
db61f50
 
 
 
 
638b476
db61f50
2c59240
db61f50
 
 
 
e023318
9a17edb
db61f50
98ef19f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
import gradio as gr
import pandas as pd
import json
import os
import requests
import re 
from functools import lru_cache

# ----------------------------
# CONFIG
# ----------------------------
JSON_FILE = "form-submissions-1.json"
MODEL_ID = "google/flan-t5-small" 
# NOTE: HF_API_TOKEN MUST be set in your environment variables/Space secrets.
HF_API_TOKEN = os.environ.get("HF_API_TOKEN") 
FILTERED_CSV = "/tmp/filtered_candidates.csv"
OUTPUT_FILE = "/tmp/outputs.csv"
BATCH_SIZE = 50 

if not HF_API_TOKEN:
    pass 

CATEGORIES = {
    "AI": [
        "AI/ML Ops Engineer","Senior Machine Learning Engineer","Principal Data Scientist",
        "Senior Data Scientist","Machine Learning Research Scientist","Senior AI/ML Engineer",
        "AI/ML Engineer","Big Data Engineer","AI Research Scientist","AI Research Analyst Consultant",
        "AI Analyst","Senior Data Analyst","Automation Engineer","Senior Data Engineer",
        "Machine Learning Engineer","Data Engineer","Data Scientist","Data Analyst"
    ],
    "Marketing": ["Marketing Specialist","Sales Agent","Salesman","Sales Associate"],
    "CTO": ["Chief Technology Officer","CTO"],
    "Legal": ["Legal Specialist","Attorney","Legal Intern","Lawyer"],
    "Finance": ["Financial Analyst","Financial Advisor"]
}

# ----------------------------
# LLM Call for Scoring (Focus: Role Experience ONLY)
# ----------------------------
@lru_cache(maxsize=512) 
def score_candidate(candidate_str, category_name, job_titles_tuple):
    if not HF_API_TOKEN:
         print("API Token is missing. Returning score 0.")
         return 0
         
    prompt = f"""
You are an HR assistant. Your task is to rate a candidate's suitability based ONLY on their previous job roles.
Rate the suitability of the following candidate on a scale of 1 (Lowest) to 10 (Highest).
The score must reflect how closely the candidate's 'Roles' align with the target job titles.

The target roles for the '{category_name}' category are: {list(job_titles_tuple)}

Candidate JSON: {candidate_str}

**Task**: Respond ONLY with the rating number (an integer from 1 to 10).
"""
    headers = {"Authorization": f"Bearer {HF_API_TOKEN}", "Content-Type": "application/json"}
    
    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": 5, 
            "return_full_text": False,
            "temperature": 0.1 
        }
    }
    
    try:
        response = requests.post(
            f"https://api-inference.huggingface.co/models/{MODEL_ID}",
            headers=headers,
            data=json.dumps(payload),
            timeout=60 
        )
        response.raise_for_status()
        result = response.json()
             
        generated_text = result[0].get("generated_text", "0").strip()
        
        match = re.search(r'\d+', generated_text)
        if match:
            score = int(match.group(0))
            return max(1, min(10, score)) 
        
        return 0 
            
    except Exception as e:
        print(f"LLM scoring call failed for candidate (API/Network Error): {e}")
        return 0 

# ----------------------------
# Step 1: Filter by roles (Unchanged)
# ----------------------------
def filter_by_roles(category_name):
    job_titles = CATEGORIES[category_name]
    try:
        with open(JSON_FILE, encoding="utf-8") as f:
            data = json.load(f)
    except FileNotFoundError:
        return pd.DataFrame(), f"Error: JSON file '{JSON_FILE}' not found. The LLM can't proceed."
        
    filtered = []

    for person in data:
        work_exps = person.get("work_experiences", [])
        if not work_exps:
            continue
        non_fullstack_roles = [
            exp.get("roleName") for exp in work_exps 
            if exp.get("roleName") and "full stack developer" not in exp.get("roleName").lower()
        ]
        if not non_fullstack_roles:
            continue
            
        if any(role in job_titles for role in non_fullstack_roles):
            filtered.append({
                "Name": person.get("name"),
                "Email": person.get("email"),
                "Phone": person.get("phone"),
                "Location": person.get("location"),
                "Roles": ", ".join(non_fullstack_roles),
                "Skills": ", ".join(person.get("skills", [])),
                "Salary": person.get("annual_salary_expectation", {}).get("full-time","N/A"),
                "Category": category_name
            })
            
    if not filtered:
        return pd.DataFrame(), f"No candidates found matching roles for category '{category_name}'. The LLM can't proceed."

    df = pd.DataFrame(filtered)
    df.to_csv(FILTERED_CSV, index=False)
    return df, f"{len(df)} candidates filtered by role for category '{category_name}'. Ready for LLM scoring."


# ----------------------------
# Step 2: LLM recommendations (Scoring, Sorting, and Output)
# ----------------------------
def llm_recommendations(category_name):
    job_titles = CATEGORIES[category_name]

    if not os.path.exists(FILTERED_CSV):
        df_filtered, msg = filter_by_roles(category_name)
        if df_filtered.empty:
            return msg
    else:
        df_filtered = pd.read_csv(FILTERED_CSV)
        df_filtered = df_filtered[df_filtered["Category"] == category_name]

    if df_filtered.empty:
        return f"No filtered candidates found for category '{category_name}'. Run Step 1 first."

    # Prepare for scoring
    df_filtered_clean = df_filtered.fillna('N/A')
    filtered_candidates = df_filtered_clean.to_dict(orient="records")
    
    scores = []
    
    for person in filtered_candidates:
        candidate_info = {
            "Name": person.get("Name"),
            "Roles": person.get("Roles"),
            "Skills": person.get("Skills") 
        }
        candidate_str = json.dumps(candidate_info)
        
        score = score_candidate(candidate_str, category_name, tuple(job_titles))
        scores.append(score)

    df_filtered["LLM_Score"] = scores
    
    df_recommended = df_filtered[df_filtered["LLM_Score"] > 0].copy()

    if df_recommended.empty:
        if not HF_API_TOKEN:
            return "❌ LLM failed: The HF_API_TOKEN is not set or is invalid. Set the token and try again."
        return f"LLM scored all candidates 0. The candidates' roles are deemed irrelevant by the LLM for '{category_name}'."
    
    def parse_salary(s):
        try:
            return float(str(s).replace("$","").replace(",","").replace("N/A", str(float('inf'))))
        except:
            return float('inf')
            
    df_recommended["Salary_sort"] = df_recommended["Salary"].apply(parse_salary)

    df_top5 = df_recommended.sort_values(
        by=['LLM_Score', 'Salary_sort'],
        ascending=[False, True] 
    ).head(5)

    final_names = df_top5["Name"].tolist()
    
    output_text = f"Top {len(final_names)} Recommended Candidates for the '{category_name}' Category:\n\n"
    
    for i, name in enumerate(final_names):
        score = df_top5.iloc[i]['LLM_Score']
        output_text += f"{i+1}. {name} (Suitability Score: {score}/10)\n"
    
    output_text += "\nThese candidates were ranked by the LLM based **only on the alignment of their previous job roles** with the target roles, using expected salary as a tie-breaker."
    
    return output_text

# ----------------------------
# Show first 5 raw JSON candidates (Unchanged)
# ----------------------------
def show_first_candidates():
    try:
        with open(JSON_FILE, encoding="utf-8") as f:
             data = json.load(f)
             return pd.DataFrame(data[:5])
    except FileNotFoundError:
        return pd.DataFrame({"Error": [f"JSON file '{JSON_FILE}' not found. Please ensure it is present."]})
    except Exception as e:
        return pd.DataFrame({"Error": [f"Failed to load JSON: {e}"]})

# ----------------------------
# Gradio interface (Updated Heading and Launch)
# ----------------------------
with gr.Blocks() as app:
    # 🚩 CHANGE: Updated Heading
    gr.Markdown("# 🤖 Candidate Selection (Role-Based Scoring)") 
    
    gr.Markdown("#### 🔍 Raw JSON Preview: First 5 Candidates")
    gr.Dataframe(show_first_candidates(), label="First 5 JSON Entries")

    gr.Markdown("---")
    category_dropdown = gr.Dropdown(list(CATEGORIES.keys()), label="1. Select Category")

    # Step 1: Filter by roles
    filter_button = gr.Button("2. Filter Candidates by Roles")
    filtered_df = gr.Dataframe(label="Filtered Candidates (Preview)")
    filter_status = gr.Textbox(label="Filter Status", placeholder="Click 'Filter Candidates by Roles' to start.")
    filter_button.click(filter_by_roles, inputs=[category_dropdown], outputs=[filtered_df, filter_status])

    gr.Markdown("---")

    # Step 2: LLM Recommendations
    llm_button = gr.Button("3. Get LLM Recommendations (Role Experience Ranking)")
    llm_output_text = gr.Textbox(label="Top Candidate Recommendations Summary", lines=10, placeholder="Click 'Get LLM Recommendations' after Step 2 completes.")
    llm_button.click(llm_recommendations, inputs=[category_dropdown], outputs=[llm_output_text])

if __name__ == "__main__":
    # 🚩 CHANGE: Set share=True to generate a public link
    app.launch(share=True)