curiouscurrent commited on
Commit
5d1b2b2
·
verified ·
1 Parent(s): edfa5fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -73
app.py CHANGED
@@ -14,7 +14,7 @@ MODEL_ID = "google/flan-t5-large"
14
  HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
15
  FILTERED_CSV = "/tmp/filtered_candidates.csv"
16
  OUTPUT_FILE = "/tmp/outputs.csv"
17
- BATCH_SIZE = 50
18
 
19
  if not HF_API_TOKEN:
20
  raise ValueError("HF_API_TOKEN not found in environment. Add it in Space Secrets.")
@@ -34,28 +34,33 @@ CATEGORIES = {
34
  }
35
 
36
  # ----------------------------
37
- # LLM cached call (Updated for flexibility)
38
  # ----------------------------
39
- @lru_cache(maxsize=512)
40
- def call_llm(candidate_str, category_name, job_titles_tuple):
41
- # 🚩 FLEXIBLE PROMPT: Asking the LLM to find "potential match" instead of "strong alignment"
42
  prompt = f"""
43
- You are an HR assistant. Your task is to quickly filter candidates.
44
- Based ONLY on the 'Roles' and 'Skills' fields provided in the candidate JSON, determine if the candidate is a potential match for the category '{category_name}'.
45
- The category includes the following job titles: {list(job_titles_tuple)}
46
- Candidate JSON: {candidate_str}
47
- Your entire response must be ONLY one word: 'Yes' or 'No'.
 
 
 
 
 
 
 
48
  """
49
  headers = {"Authorization": f"Bearer {HF_API_TOKEN}", "Content-Type": "application/json"}
50
 
51
- # 🚩 FLEXIBLE PARAMETERS: Increased max_new_tokens slightly and added temperature
52
- # Temperature > 0 encourages more diverse/flexible interpretation.
53
  payload = {
54
  "inputs": prompt,
55
  "parameters": {
56
- "max_new_tokens": 20,
 
57
  "return_full_text": False,
58
- "temperature": 0.5 # Add some randomness to avoid ultra-strict "No"
59
  }
60
  }
61
 
@@ -64,34 +69,30 @@ Your entire response must be ONLY one word: 'Yes' or 'No'.
64
  f"https://api-inference.huggingface.co/models/{MODEL_ID}",
65
  headers=headers,
66
  data=json.dumps(payload),
67
- timeout=60
68
  )
69
  response.raise_for_status()
70
  result = response.json()
71
 
72
  if isinstance(result, dict) and "error" in result:
73
  print(f"LLM API Error: {result.get('error')}")
74
- return "No"
75
 
76
- generated_text = result[0].get("generated_text", "No").strip().lower()
 
77
 
78
- # Check for 'yes' and 'no' keywords
79
- if "yes" in generated_text:
80
- return "Yes"
81
- # Only return "No" if "yes" wasn't found, otherwise it's likely a match failure
82
- elif "no" in generated_text:
83
- return "No"
84
- else:
85
- # Fallback for unexpected output (e.g., model generates preamble text)
86
- print(f"Unexpected LLM output: '{generated_text}'. Defaulting to 'No'.")
87
- return "No"
88
 
89
  except Exception as e:
90
- print("LLM call failed:", e)
91
- return "No"
92
 
93
  # ----------------------------
94
- # Step 1: Filter by roles
95
  # ----------------------------
96
  def filter_by_roles(category_name):
97
  job_titles = CATEGORIES[category_name]
@@ -127,14 +128,15 @@ def filter_by_roles(category_name):
127
  })
128
 
129
  if not filtered:
130
- return pd.DataFrame(), f"No candidates found matching roles for category '{category_name}'."
131
 
132
  df = pd.DataFrame(filtered)
133
  df.to_csv(FILTERED_CSV, index=False)
134
- return df, f"{len(df)} candidates filtered by role for category '{category_name}'. Ready for LLM check."
 
135
 
136
  # ----------------------------
137
- # Step 2: LLM recommendations
138
  # ----------------------------
139
  def llm_recommendations(category_name):
140
  job_titles = CATEGORIES[category_name]
@@ -143,59 +145,50 @@ def llm_recommendations(category_name):
143
  df_filtered, msg = filter_by_roles(category_name)
144
  if df_filtered.empty:
145
  return msg
146
-
147
- df_filtered = pd.read_csv(FILTERED_CSV)
148
- df_filtered = df_filtered[df_filtered["Category"] == category_name]
149
 
150
  if df_filtered.empty:
151
  return f"No filtered candidates found for category '{category_name}'. Run Step 1 first."
152
 
153
- recommended = []
154
- df_filtered_clean = df_filtered.fillna('N/A')
155
- filtered_candidates = df_filtered_clean.to_dict(orient="records")
156
-
157
- for i in range(0, len(filtered_candidates), BATCH_SIZE):
158
- batch = filtered_candidates[i:i+BATCH_SIZE]
159
- for person in batch:
160
- candidate_info = {
161
- "Name": person.get("Name"),
162
- "Roles": person.get("Roles"),
163
- "Skills": person.get("Skills")
164
- }
165
- candidate_str = json.dumps(candidate_info)
166
- response = call_llm(candidate_str, category_name, tuple(job_titles))
167
-
168
- if response == "Yes":
169
- recommended.append(person)
170
 
171
- if not recommended:
172
- return f"LLM determined no candidates are suitable for the '{category_name}' category. Try another category or loosen the initial role filters."
173
 
174
- df_rec = pd.DataFrame(recommended)
 
175
 
176
- def parse_salary(s):
177
- try:
178
- return float(str(s).replace("$","").replace(",","").replace("N/A", str(float('inf'))))
179
- except:
180
- return float('inf')
181
-
182
- df_rec["Salary_sort"] = df_rec["Salary"].apply(parse_salary)
183
- df_rec = df_rec.sort_values("Salary_sort").drop(columns=["Salary_sort"])
184
- df_top5 = df_rec.head(5)
185
 
186
- candidate_names = df_top5["Name"].tolist()
 
 
 
 
187
 
188
- output_text = f"Top {len(candidate_names)} Recommended Candidates for the '{category_name}' Category:\n\n"
189
 
190
- for i, name in enumerate(candidate_names):
191
  output_text += f"{i+1}. {name}\n"
192
 
193
- output_text += "\nThese candidates were selected as a potential match by the LLM and sorted by lowest expected salary."
194
 
195
  return output_text
196
 
197
  # ----------------------------
198
- # Show first 5 raw JSON candidates
199
  # ----------------------------
200
  def show_first_candidates():
201
  try:
@@ -208,10 +201,10 @@ def show_first_candidates():
208
  return pd.DataFrame({"Error": [f"Failed to load JSON: {e}"]})
209
 
210
  # ----------------------------
211
- # Gradio interface
212
  # ----------------------------
213
  with gr.Blocks() as app:
214
- gr.Markdown("# Candidate Recommendation Engine")
215
 
216
  gr.Markdown("#### Raw JSON Preview: First 5 Candidates")
217
  gr.Dataframe(show_first_candidates(), label="First 5 JSON Entries")
@@ -228,7 +221,7 @@ with gr.Blocks() as app:
228
  gr.Markdown("---")
229
 
230
  # Step 2: LLM Recommendations
231
- llm_button = gr.Button("3. Get LLM Recommendations (Text Summary)")
232
  llm_output_text = gr.Textbox(label="Top Candidate Recommendations Summary", lines=10, placeholder="Click 'Get LLM Recommendations' after Step 2 completes.")
233
  llm_button.click(llm_recommendations, inputs=[category_dropdown], outputs=[llm_output_text])
234
 
 
14
  HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
15
  FILTERED_CSV = "/tmp/filtered_candidates.csv"
16
  OUTPUT_FILE = "/tmp/outputs.csv"
17
+ BATCH_SIZE = 50 # Not used for LLM, but kept for consistency
18
 
19
  if not HF_API_TOKEN:
20
  raise ValueError("HF_API_TOKEN not found in environment. Add it in Space Secrets.")
 
34
  }
35
 
36
  # ----------------------------
37
+ # New LLM Call for Ranking
38
  # ----------------------------
39
+ @lru_cache(maxsize=1) # Cache only the last ranking request
40
+ def rank_candidates(candidates_list_str, category_name, job_titles_tuple):
 
41
  prompt = f"""
42
+ You are an HR expert specializing in the '{category_name}' category.
43
+ Your goal is to rank the provided candidates based on two criteria:
44
+ 1. **Experience**: Inferred from relevant roles and extensive skills.
45
+ 2. **Educational Background**: Assume candidates with technical roles/skills have a strong technical education (e.g., MSc/PhD).
46
+
47
+ The target roles are: {list(job_titles_tuple)}
48
+
49
+ Review the following list of candidates (JSON format):
50
+ {candidates_list_str}
51
+
52
+ **Task**: Select the **top 5 most promising candidates** from this list.
53
+ **Output Format**: Respond ONLY with a comma-separated list of the candidates' **Names**. Do not include any numbers, prefixes, or commentary.
54
  """
55
  headers = {"Authorization": f"Bearer {HF_API_TOKEN}", "Content-Type": "application/json"}
56
 
 
 
57
  payload = {
58
  "inputs": prompt,
59
  "parameters": {
60
+ # Set max_new_tokens higher since the output is a list of names
61
+ "max_new_tokens": 100,
62
  "return_full_text": False,
63
+ "temperature": 0.3 # Use low temperature for focused extraction
64
  }
65
  }
66
 
 
69
  f"https://api-inference.huggingface.co/models/{MODEL_ID}",
70
  headers=headers,
71
  data=json.dumps(payload),
72
+ timeout=120 # Increased timeout for larger request
73
  )
74
  response.raise_for_status()
75
  result = response.json()
76
 
77
  if isinstance(result, dict) and "error" in result:
78
  print(f"LLM API Error: {result.get('error')}")
79
+ return []
80
 
81
+ # The model should return a string like "Name1, Name2, Name3"
82
+ generated_text = result[0].get("generated_text", "").strip()
83
 
84
+ # Parse the comma-separated list of names
85
+ # Clean up the output by splitting by comma, stripping whitespace, and removing empty strings
86
+ ranked_names = [name.strip() for name in generated_text.split(',') if name.strip()]
87
+
88
+ return ranked_names
 
 
 
 
 
89
 
90
  except Exception as e:
91
+ print("LLM ranking call failed:", e)
92
+ return []
93
 
94
  # ----------------------------
95
+ # Step 1: Filter by roles (Unchanged)
96
  # ----------------------------
97
  def filter_by_roles(category_name):
98
  job_titles = CATEGORIES[category_name]
 
128
  })
129
 
130
  if not filtered:
131
+ return pd.DataFrame(), f"No candidates found matching roles for category '{category_name}'. The LLM can't proceed."
132
 
133
  df = pd.DataFrame(filtered)
134
  df.to_csv(FILTERED_CSV, index=False)
135
+ return df, f"{len(df)} candidates filtered by role for category '{category_name}'. Ready for LLM ranking."
136
+
137
 
138
  # ----------------------------
139
+ # Step 2: LLM recommendations (Modified for Ranking)
140
  # ----------------------------
141
  def llm_recommendations(category_name):
142
  job_titles = CATEGORIES[category_name]
 
145
  df_filtered, msg = filter_by_roles(category_name)
146
  if df_filtered.empty:
147
  return msg
148
+ else:
149
+ df_filtered = pd.read_csv(FILTERED_CSV)
150
+ df_filtered = df_filtered[df_filtered["Category"] == category_name]
151
 
152
  if df_filtered.empty:
153
  return f"No filtered candidates found for category '{category_name}'. Run Step 1 first."
154
 
155
+ # Select the top 10 candidates based on alphabetical name sort (arbitrary tie-breaker)
156
+ # and prepare the data for the single LLM ranking call.
157
+ df_top_for_llm = df_filtered.head(10).fillna('N/A')
158
+
159
+ # Only send necessary info for ranking
160
+ candidates_to_rank = df_top_for_llm[["Name", "Roles", "Skills"]].to_dict(orient="records")
161
+ candidates_list_str = json.dumps(candidates_to_rank, indent=2)
162
+
163
+ # 🚩 Single LLM call to rank the batch
164
+ ranked_names = rank_candidates(candidates_list_str, category_name, tuple(job_titles))
 
 
 
 
 
 
 
165
 
166
+ if not ranked_names:
167
+ return f"LLM failed to extract or rank suitable candidates for '{category_name}'. Check API status or model availability."
168
 
169
+ # Reorder the original DataFrame based on the names returned by the LLM
170
+ name_to_rank = {name: i for i, name in enumerate(ranked_names)}
171
 
172
+ # Filter to only include the names returned by the LLM
173
+ df_ranked = df_filtered[df_filtered["Name"].isin(ranked_names)].copy()
 
 
 
 
 
 
 
174
 
175
+ # Use the rank dictionary to sort the DataFrame
176
+ df_ranked["LLM_Rank"] = df_ranked["Name"].map(name_to_rank)
177
+ df_top5 = df_ranked.sort_values(by="LLM_Rank").head(5).drop(columns=["LLM_Rank"])
178
+
179
+ final_names = df_top5["Name"].tolist()
180
 
181
+ output_text = f"Top {len(final_names)} Recommended Candidates for the '{category_name}' Category:\n\n"
182
 
183
+ for i, name in enumerate(final_names):
184
  output_text += f"{i+1}. {name}\n"
185
 
186
+ output_text += "\nThese candidates were ranked by the LLM based on inferred experience (roles/skills) and assumed education."
187
 
188
  return output_text
189
 
190
  # ----------------------------
191
+ # Show first 5 raw JSON candidates (Unchanged)
192
  # ----------------------------
193
  def show_first_candidates():
194
  try:
 
201
  return pd.DataFrame({"Error": [f"Failed to load JSON: {e}"]})
202
 
203
  # ----------------------------
204
+ # Gradio interface (Unchanged)
205
  # ----------------------------
206
  with gr.Blocks() as app:
207
+ gr.Markdown("# Candidate Recommendation Engine (Experience & Education Focus)")
208
 
209
  gr.Markdown("#### Raw JSON Preview: First 5 Candidates")
210
  gr.Dataframe(show_first_candidates(), label="First 5 JSON Entries")
 
221
  gr.Markdown("---")
222
 
223
  # Step 2: LLM Recommendations
224
+ llm_button = gr.Button("3. Get LLM Recommendations (Experience & Education Ranking)")
225
  llm_output_text = gr.Textbox(label="Top Candidate Recommendations Summary", lines=10, placeholder="Click 'Get LLM Recommendations' after Step 2 completes.")
226
  llm_button.click(llm_recommendations, inputs=[category_dropdown], outputs=[llm_output_text])
227