curiouscurrent commited on
Commit
edfa5fb
·
verified ·
1 Parent(s): 3fb3ad6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -24
app.py CHANGED
@@ -12,7 +12,6 @@ JSON_FILE = "form-submissions-1.json"
12
  # Using a suitable generative LLM (Flan-T5 Large)
13
  MODEL_ID = "google/flan-t5-large"
14
  HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
15
- # NOTE: Keeping these temp files for the filtering step, though output format changes
16
  FILTERED_CSV = "/tmp/filtered_candidates.csv"
17
  OUTPUT_FILE = "/tmp/outputs.csv"
18
  BATCH_SIZE = 50
@@ -35,24 +34,28 @@ CATEGORIES = {
35
  }
36
 
37
  # ----------------------------
38
- # LLM cached call
39
  # ----------------------------
40
  @lru_cache(maxsize=512)
41
  def call_llm(candidate_str, category_name, job_titles_tuple):
 
42
  prompt = f"""
43
- You are an HR assistant. Review this candidate and determine if they are suitable for the category '{category_name}'.
 
44
  The category includes the following job titles: {list(job_titles_tuple)}
45
  Candidate JSON: {candidate_str}
46
- Respond only 'Yes' if suitable, otherwise 'No'.
47
  """
48
  headers = {"Authorization": f"Bearer {HF_API_TOKEN}", "Content-Type": "application/json"}
49
 
50
- # Adding generation parameters for strict, short output
 
51
  payload = {
52
  "inputs": prompt,
53
  "parameters": {
54
- "max_new_tokens": 10,
55
- "return_full_text": False
 
56
  }
57
  }
58
 
@@ -72,17 +75,19 @@ Respond only 'Yes' if suitable, otherwise 'No'.
72
 
73
  generated_text = result[0].get("generated_text", "No").strip().lower()
74
 
75
- # Robust check for 'yes' (in case the model is not perfectly following instructions)
76
  if "yes" in generated_text:
77
  return "Yes"
 
78
  elif "no" in generated_text:
79
  return "No"
80
  else:
 
 
81
  return "No"
82
 
83
  except Exception as e:
84
  print("LLM call failed:", e)
85
- # In case of API failure, it should not be cached as a negative result (but the lru_cache will cache the 'No')
86
  return "No"
87
 
88
  # ----------------------------
@@ -122,25 +127,22 @@ def filter_by_roles(category_name):
122
  })
123
 
124
  if not filtered:
125
- # Return a message instead of the CSV path
126
  return pd.DataFrame(), f"No candidates found matching roles for category '{category_name}'."
127
 
128
  df = pd.DataFrame(filtered)
129
  df.to_csv(FILTERED_CSV, index=False)
130
- # Return a success message
131
  return df, f"{len(df)} candidates filtered by role for category '{category_name}'. Ready for LLM check."
132
 
133
  # ----------------------------
134
- # Step 2: LLM recommendations (Modified to return text)
135
  # ----------------------------
136
  def llm_recommendations(category_name):
137
  job_titles = CATEGORIES[category_name]
138
 
139
  if not os.path.exists(FILTERED_CSV):
140
- # Rerun filtering to ensure the CSV exists
141
  df_filtered, msg = filter_by_roles(category_name)
142
  if df_filtered.empty:
143
- return msg # Return the error/no-match message
144
 
145
  df_filtered = pd.read_csv(FILTERED_CSV)
146
  df_filtered = df_filtered[df_filtered["Category"] == category_name]
@@ -167,11 +169,10 @@ def llm_recommendations(category_name):
167
  recommended.append(person)
168
 
169
  if not recommended:
170
- return f"LLM determined no candidates are suitable for the '{category_name}' category."
171
 
172
  df_rec = pd.DataFrame(recommended)
173
 
174
- # Sort by numeric salary to get the top 5 with lowest expected salary first
175
  def parse_salary(s):
176
  try:
177
  return float(str(s).replace("$","").replace(",","").replace("N/A", str(float('inf'))))
@@ -182,18 +183,14 @@ def llm_recommendations(category_name):
182
  df_rec = df_rec.sort_values("Salary_sort").drop(columns=["Salary_sort"])
183
  df_top5 = df_rec.head(5)
184
 
185
- # 🚩 NEW: Generate Text Output
186
  candidate_names = df_top5["Name"].tolist()
187
 
188
- if not candidate_names:
189
- return f"LLM check passed, but sorting resulted in an empty list (unexpected). No recommendations to display."
190
-
191
  output_text = f"Top {len(candidate_names)} Recommended Candidates for the '{category_name}' Category:\n\n"
192
 
193
  for i, name in enumerate(candidate_names):
194
  output_text += f"{i+1}. {name}\n"
195
 
196
- output_text += "\nThese candidates were selected based on relevance by the LLM and sorted by lowest expected salary."
197
 
198
  return output_text
199
 
@@ -211,7 +208,7 @@ def show_first_candidates():
211
  return pd.DataFrame({"Error": [f"Failed to load JSON: {e}"]})
212
 
213
  # ----------------------------
214
- # Gradio interface (Updated)
215
  # ----------------------------
216
  with gr.Blocks() as app:
217
  gr.Markdown("# Candidate Recommendation Engine")
@@ -225,7 +222,6 @@ with gr.Blocks() as app:
225
  # Step 1: Filter by roles
226
  filter_button = gr.Button("2. Filter Candidates by Roles")
227
  filtered_df = gr.Dataframe(label="Filtered Candidates (Preview)")
228
- # 🚩 CHANGE: Display a status message for filtering
229
  filter_status = gr.Textbox(label="Filter Status", placeholder="Click 'Filter Candidates by Roles' to start.")
230
  filter_button.click(filter_by_roles, inputs=[category_dropdown], outputs=[filtered_df, filter_status])
231
 
@@ -233,7 +229,6 @@ with gr.Blocks() as app:
233
 
234
  # Step 2: LLM Recommendations
235
  llm_button = gr.Button("3. Get LLM Recommendations (Text Summary)")
236
- # 🚩 CHANGE: Output is now a Textbox
237
  llm_output_text = gr.Textbox(label="Top Candidate Recommendations Summary", lines=10, placeholder="Click 'Get LLM Recommendations' after Step 2 completes.")
238
  llm_button.click(llm_recommendations, inputs=[category_dropdown], outputs=[llm_output_text])
239
 
 
12
  # Using a suitable generative LLM (Flan-T5 Large)
13
  MODEL_ID = "google/flan-t5-large"
14
  HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
 
15
  FILTERED_CSV = "/tmp/filtered_candidates.csv"
16
  OUTPUT_FILE = "/tmp/outputs.csv"
17
  BATCH_SIZE = 50
 
34
  }
35
 
36
  # ----------------------------
37
+ # LLM cached call (Updated for flexibility)
38
  # ----------------------------
39
  @lru_cache(maxsize=512)
40
  def call_llm(candidate_str, category_name, job_titles_tuple):
41
+ # 🚩 FLEXIBLE PROMPT: Asking the LLM to find "potential match" instead of "strong alignment"
42
  prompt = f"""
43
+ You are an HR assistant. Your task is to quickly filter candidates.
44
+ Based ONLY on the 'Roles' and 'Skills' fields provided in the candidate JSON, determine if the candidate is a potential match for the category '{category_name}'.
45
  The category includes the following job titles: {list(job_titles_tuple)}
46
  Candidate JSON: {candidate_str}
47
+ Your entire response must be ONLY one word: 'Yes' or 'No'.
48
  """
49
  headers = {"Authorization": f"Bearer {HF_API_TOKEN}", "Content-Type": "application/json"}
50
 
51
+ # 🚩 FLEXIBLE PARAMETERS: Increased max_new_tokens slightly and added temperature
52
+ # Temperature > 0 encourages more diverse/flexible interpretation.
53
  payload = {
54
  "inputs": prompt,
55
  "parameters": {
56
+ "max_new_tokens": 20,
57
+ "return_full_text": False,
58
+ "temperature": 0.5 # Add some randomness to avoid ultra-strict "No"
59
  }
60
  }
61
 
 
75
 
76
  generated_text = result[0].get("generated_text", "No").strip().lower()
77
 
78
+ # Check for 'yes' and 'no' keywords
79
  if "yes" in generated_text:
80
  return "Yes"
81
+ # Only return "No" if "yes" wasn't found, otherwise it's likely a match failure
82
  elif "no" in generated_text:
83
  return "No"
84
  else:
85
+ # Fallback for unexpected output (e.g., model generates preamble text)
86
+ print(f"Unexpected LLM output: '{generated_text}'. Defaulting to 'No'.")
87
  return "No"
88
 
89
  except Exception as e:
90
  print("LLM call failed:", e)
 
91
  return "No"
92
 
93
  # ----------------------------
 
127
  })
128
 
129
  if not filtered:
 
130
  return pd.DataFrame(), f"No candidates found matching roles for category '{category_name}'."
131
 
132
  df = pd.DataFrame(filtered)
133
  df.to_csv(FILTERED_CSV, index=False)
 
134
  return df, f"{len(df)} candidates filtered by role for category '{category_name}'. Ready for LLM check."
135
 
136
  # ----------------------------
137
+ # Step 2: LLM recommendations
138
  # ----------------------------
139
  def llm_recommendations(category_name):
140
  job_titles = CATEGORIES[category_name]
141
 
142
  if not os.path.exists(FILTERED_CSV):
 
143
  df_filtered, msg = filter_by_roles(category_name)
144
  if df_filtered.empty:
145
+ return msg
146
 
147
  df_filtered = pd.read_csv(FILTERED_CSV)
148
  df_filtered = df_filtered[df_filtered["Category"] == category_name]
 
169
  recommended.append(person)
170
 
171
  if not recommended:
172
+ return f"LLM determined no candidates are suitable for the '{category_name}' category. Try another category or loosen the initial role filters."
173
 
174
  df_rec = pd.DataFrame(recommended)
175
 
 
176
  def parse_salary(s):
177
  try:
178
  return float(str(s).replace("$","").replace(",","").replace("N/A", str(float('inf'))))
 
183
  df_rec = df_rec.sort_values("Salary_sort").drop(columns=["Salary_sort"])
184
  df_top5 = df_rec.head(5)
185
 
 
186
  candidate_names = df_top5["Name"].tolist()
187
 
 
 
 
188
  output_text = f"Top {len(candidate_names)} Recommended Candidates for the '{category_name}' Category:\n\n"
189
 
190
  for i, name in enumerate(candidate_names):
191
  output_text += f"{i+1}. {name}\n"
192
 
193
+ output_text += "\nThese candidates were selected as a potential match by the LLM and sorted by lowest expected salary."
194
 
195
  return output_text
196
 
 
208
  return pd.DataFrame({"Error": [f"Failed to load JSON: {e}"]})
209
 
210
  # ----------------------------
211
+ # Gradio interface
212
  # ----------------------------
213
  with gr.Blocks() as app:
214
  gr.Markdown("# Candidate Recommendation Engine")
 
222
  # Step 1: Filter by roles
223
  filter_button = gr.Button("2. Filter Candidates by Roles")
224
  filtered_df = gr.Dataframe(label="Filtered Candidates (Preview)")
 
225
  filter_status = gr.Textbox(label="Filter Status", placeholder="Click 'Filter Candidates by Roles' to start.")
226
  filter_button.click(filter_by_roles, inputs=[category_dropdown], outputs=[filtered_df, filter_status])
227
 
 
229
 
230
  # Step 2: LLM Recommendations
231
  llm_button = gr.Button("3. Get LLM Recommendations (Text Summary)")
 
232
  llm_output_text = gr.Textbox(label="Top Candidate Recommendations Summary", lines=10, placeholder="Click 'Get LLM Recommendations' after Step 2 completes.")
233
  llm_button.click(llm_recommendations, inputs=[category_dropdown], outputs=[llm_output_text])
234