curiouscurrent commited on
Commit
a76c6ef
·
verified ·
1 Parent(s): 27df5d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -18
app.py CHANGED
@@ -9,13 +9,15 @@ from functools import lru_cache
9
  # CONFIG
10
  # ----------------------------
11
  JSON_FILE = "form-submissions-1.json"
12
- MODEL_ID = "HuggingFaceH4/sgpt-3.5-mini"
 
13
  HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
14
  FILTERED_CSV = "/tmp/filtered_candidates.csv"
15
  OUTPUT_FILE = "/tmp/outputs.csv"
16
  BATCH_SIZE = 50
17
 
18
  if not HF_API_TOKEN:
 
19
  raise ValueError("HF_API_TOKEN not found in environment. Add it in Space Secrets.")
20
 
21
  CATEGORIES = {
@@ -54,9 +56,24 @@ Respond only 'Yes' if suitable, otherwise 'No'.
54
  )
55
  response.raise_for_status()
56
  result = response.json()
 
 
57
  if isinstance(result, dict) and "error" in result:
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  return "No"
59
- return result[0].get("generated_text","No")
60
  except Exception as e:
61
  print("LLM call failed:", e)
62
  return "No"
@@ -66,16 +83,29 @@ Respond only 'Yes' if suitable, otherwise 'No'.
66
  # ----------------------------
67
  def filter_by_roles(category_name):
68
  job_titles = CATEGORIES[category_name]
69
- data = json.load(open(JSON_FILE, encoding="utf-8"))
 
 
 
 
 
 
 
70
  filtered = []
71
 
72
  for person in data:
73
  work_exps = person.get("work_experiences", [])
74
  if not work_exps:
75
  continue
76
- non_fullstack_roles = [exp.get("roleName") for exp in work_exps if "full stack developer" not in exp.get("roleName","").lower()]
 
 
 
 
77
  if not non_fullstack_roles:
78
  continue
 
 
79
  if any(role in job_titles for role in non_fullstack_roles):
80
  filtered.append({
81
  "Name": person.get("name"),
@@ -101,21 +131,27 @@ def llm_recommendations(category_name):
101
  job_titles = CATEGORIES[category_name]
102
 
103
  if not os.path.exists(FILTERED_CSV):
104
- return pd.DataFrame(), None
105
-
106
- df_filtered = pd.read_csv(FILTERED_CSV)
107
- df_filtered = df_filtered[df_filtered["Category"] == category_name]
 
 
 
108
 
109
  if df_filtered.empty:
110
  return pd.DataFrame(), None
111
 
112
  recommended = []
113
- filtered_candidates = df_filtered.to_dict(orient="records")
 
 
114
 
 
115
  for i in range(0, len(filtered_candidates), BATCH_SIZE):
116
  batch = filtered_candidates[i:i+BATCH_SIZE]
117
  for person in batch:
118
- # Only send necessary info
119
  candidate_info = {
120
  "Name": person.get("Name"),
121
  "Roles": person.get("Roles"),
@@ -123,7 +159,9 @@ def llm_recommendations(category_name):
123
  }
124
  candidate_str = json.dumps(candidate_info)
125
  response = call_llm(candidate_str, category_name, tuple(job_titles))
126
- if response.strip().lower().startswith("yes"):
 
 
127
  recommended.append(person)
128
 
129
  if not recommended:
@@ -133,9 +171,11 @@ def llm_recommendations(category_name):
133
  # Sort by numeric salary
134
  def parse_salary(s):
135
  try:
136
- return float(s.replace("$","").replace(",",""))
 
137
  except:
138
  return float('inf')
 
139
  df_rec["Salary_sort"] = df_rec["Salary"].apply(parse_salary)
140
  df_rec = df_rec.sort_values("Salary_sort").drop(columns=["Salary_sort"])
141
  df_top5 = df_rec.head(5)
@@ -147,21 +187,30 @@ def llm_recommendations(category_name):
147
  # Show first 5 raw JSON candidates
148
  # ----------------------------
149
  def show_first_candidates():
150
- data = json.load(open(JSON_FILE, encoding="utf-8"))
151
- return pd.DataFrame(data[:5])
 
 
 
 
 
 
 
152
 
153
  # ----------------------------
154
  # Gradio interface
155
  # ----------------------------
156
  with gr.Blocks() as app:
157
- gr.Markdown("### Raw JSON Preview: First 5 Candidates")
 
 
158
  gr.Dataframe(show_first_candidates(), label="First 5 JSON Entries")
159
 
160
  gr.Markdown("---")
161
  category_dropdown = gr.Dropdown(list(CATEGORIES.keys()), label="Select Category")
162
 
163
  # Step 1: Filter by roles
164
- filter_button = gr.Button("Filter by Roles")
165
  filtered_df = gr.Dataframe(label="Filtered Candidates by Roles")
166
  download_filtered = gr.File(label="Download Filtered CSV", file_types=[".csv"])
167
  filter_button.click(filter_by_roles, inputs=[category_dropdown], outputs=[filtered_df, download_filtered])
@@ -169,10 +218,10 @@ with gr.Blocks() as app:
169
  gr.Markdown("---")
170
 
171
  # Step 2: LLM Recommendations
172
- llm_button = gr.Button("Get LLM Recommendations")
173
  llm_df = gr.Dataframe(label="Top 5 Recommended Candidates")
174
  download_llm = gr.File(label="Download Recommendations CSV", file_types=[".csv"])
175
  llm_button.click(llm_recommendations, inputs=[category_dropdown], outputs=[llm_df, download_llm])
176
 
177
  if __name__ == "__main__":
178
- app.launch()
 
9
  # CONFIG
10
  # ----------------------------
11
  JSON_FILE = "form-submissions-1.json"
12
+ # 🚩 FIX: Changed the model ID from an embedding model to a generative LLM.
13
+ MODEL_ID = "google/flan-t5-large"
14
  HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
15
  FILTERED_CSV = "/tmp/filtered_candidates.csv"
16
  OUTPUT_FILE = "/tmp/outputs.csv"
17
  BATCH_SIZE = 50
18
 
19
  if not HF_API_TOKEN:
20
+ # This check is good, but ensure the token is set in your environment (or space secrets)
21
  raise ValueError("HF_API_TOKEN not found in environment. Add it in Space Secrets.")
22
 
23
  CATEGORIES = {
 
56
  )
57
  response.raise_for_status()
58
  result = response.json()
59
+
60
+ # Check for API error structure
61
  if isinstance(result, dict) and "error" in result:
62
+ print(f"LLM API Error: {result.get('error')}")
63
+ return "No"
64
+
65
+ # Extract the generated text safely and clean it up
66
+ generated_text = result[0].get("generated_text", "No").strip().lower()
67
+
68
+ # Flank-T5 often prepends the prompt or a part of it, so we only need the key decision word
69
+ # We look for 'yes' or 'no' anywhere in the response and prioritize 'yes' if found.
70
+ if "yes" in generated_text:
71
+ return "Yes"
72
+ elif "no" in generated_text:
73
+ return "No"
74
+ else:
75
  return "No"
76
+
77
  except Exception as e:
78
  print("LLM call failed:", e)
79
  return "No"
 
83
  # ----------------------------
84
  def filter_by_roles(category_name):
85
  job_titles = CATEGORIES[category_name]
86
+ # NOTE: Assuming 'form-submissions-1.json' exists in the current directory
87
+ try:
88
+ with open(JSON_FILE, encoding="utf-8") as f:
89
+ data = json.load(f)
90
+ except FileNotFoundError:
91
+ print(f"Error: JSON file '{JSON_FILE}' not found.")
92
+ return pd.DataFrame(), None
93
+
94
  filtered = []
95
 
96
  for person in data:
97
  work_exps = person.get("work_experiences", [])
98
  if not work_exps:
99
  continue
100
+ # Improved: Check if roleName is not None before calling .lower()
101
+ non_fullstack_roles = [
102
+ exp.get("roleName") for exp in work_exps
103
+ if exp.get("roleName") and "full stack developer" not in exp.get("roleName").lower()
104
+ ]
105
  if not non_fullstack_roles:
106
  continue
107
+
108
+ # Check for role match in the list of titles
109
  if any(role in job_titles for role in non_fullstack_roles):
110
  filtered.append({
111
  "Name": person.get("name"),
 
131
  job_titles = CATEGORIES[category_name]
132
 
133
  if not os.path.exists(FILTERED_CSV):
134
+ # Re-run the filtering step if the CSV is missing
135
+ df_filtered, _ = filter_by_roles(category_name)
136
+ if df_filtered.empty:
137
+ return pd.DataFrame(), None
138
+ else:
139
+ df_filtered = pd.read_csv(FILTERED_CSV)
140
+ df_filtered = df_filtered[df_filtered["Category"] == category_name]
141
 
142
  if df_filtered.empty:
143
  return pd.DataFrame(), None
144
 
145
  recommended = []
146
+ # Drop N/A values before converting to dict, otherwise json.dumps might fail if they are NaN
147
+ df_filtered_clean = df_filtered.fillna('N/A')
148
+ filtered_candidates = df_filtered_clean.to_dict(orient="records")
149
 
150
+ # The batching loop is fine, we will rely on the improved call_llm
151
  for i in range(0, len(filtered_candidates), BATCH_SIZE):
152
  batch = filtered_candidates[i:i+BATCH_SIZE]
153
  for person in batch:
154
+ # Only send necessary info to save context length and cost
155
  candidate_info = {
156
  "Name": person.get("Name"),
157
  "Roles": person.get("Roles"),
 
159
  }
160
  candidate_str = json.dumps(candidate_info)
161
  response = call_llm(candidate_str, category_name, tuple(job_titles))
162
+
163
+ # 🚩 IMPROVEMENT: The call_llm function now returns a clean 'Yes' or 'No'
164
+ if response == "Yes":
165
  recommended.append(person)
166
 
167
  if not recommended:
 
171
  # Sort by numeric salary
172
  def parse_salary(s):
173
  try:
174
+ # Remove currency symbols, commas, and convert to float
175
+ return float(str(s).replace("$","").replace(",","").replace("N/A", str(float('inf'))))
176
  except:
177
  return float('inf')
178
+
179
  df_rec["Salary_sort"] = df_rec["Salary"].apply(parse_salary)
180
  df_rec = df_rec.sort_values("Salary_sort").drop(columns=["Salary_sort"])
181
  df_top5 = df_rec.head(5)
 
187
  # Show first 5 raw JSON candidates
188
  # ----------------------------
189
  def show_first_candidates():
190
+ # NOTE: Assuming 'form-submissions-1.json' exists
191
+ try:
192
+ with open(JSON_FILE, encoding="utf-8") as f:
193
+ data = json.load(f)
194
+ return pd.DataFrame(data[:5])
195
+ except FileNotFoundError:
196
+ return pd.DataFrame({"Error": [f"JSON file '{JSON_FILE}' not found. Please ensure it is present."]})
197
+ except Exception as e:
198
+ return pd.DataFrame({"Error": [f"Failed to load JSON: {e}"]})
199
 
200
  # ----------------------------
201
  # Gradio interface
202
  # ----------------------------
203
  with gr.Blocks() as app:
204
+ gr.Markdown("### Candidate Recommendation Engine")
205
+
206
+ gr.Markdown("#### Raw JSON Preview: First 5 Candidates")
207
  gr.Dataframe(show_first_candidates(), label="First 5 JSON Entries")
208
 
209
  gr.Markdown("---")
210
  category_dropdown = gr.Dropdown(list(CATEGORIES.keys()), label="Select Category")
211
 
212
  # Step 1: Filter by roles
213
+ filter_button = gr.Button("1. Filter by Roles")
214
  filtered_df = gr.Dataframe(label="Filtered Candidates by Roles")
215
  download_filtered = gr.File(label="Download Filtered CSV", file_types=[".csv"])
216
  filter_button.click(filter_by_roles, inputs=[category_dropdown], outputs=[filtered_df, download_filtered])
 
218
  gr.Markdown("---")
219
 
220
  # Step 2: LLM Recommendations
221
+ llm_button = gr.Button("2. Get LLM Recommendations (Requires Step 1 to run first)")
222
  llm_df = gr.Dataframe(label="Top 5 Recommended Candidates")
223
  download_llm = gr.File(label="Download Recommendations CSV", file_types=[".csv"])
224
  llm_button.click(llm_recommendations, inputs=[category_dropdown], outputs=[llm_df, download_llm])
225
 
226
  if __name__ == "__main__":
227
+ app.launch()