curiouscurrent commited on
Commit
789c241
·
verified ·
1 Parent(s): e023318

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -55
app.py CHANGED
@@ -9,7 +9,7 @@ from functools import lru_cache
9
  # CONFIG
10
  # ----------------------------
11
  JSON_FILE = "form-submissions-1.json"
12
- MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
13
  HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
14
 
15
  if not HF_API_TOKEN:
@@ -29,13 +29,15 @@ CATEGORIES = {
29
  "Finance": ["Financial Analyst","Financial Advisor"]
30
  }
31
 
 
 
 
32
  # ----------------------------
33
- # LLM caching
34
  # ----------------------------
35
  @lru_cache(maxsize=512)
36
- def call_zephyr_cached(candidate_str, category_name, job_titles_tuple):
37
- try:
38
- prompt = f"""
39
  You are an HR assistant. Review this candidate and determine if they are suitable for the category '{category_name}'.
40
  The category includes the following job titles: {list(job_titles_tuple)}
41
 
@@ -43,8 +45,9 @@ Candidate JSON: {candidate_str}
43
 
44
  Respond only 'Yes' if suitable, otherwise 'No'.
45
  """
46
- headers = {"Authorization": f"Bearer {HF_API_TOKEN}", "Content-Type": "application/json"}
47
- payload = {"inputs": prompt}
 
48
  response = requests.post(
49
  f"https://api-inference.huggingface.co/models/{MODEL_ID}",
50
  headers=headers,
@@ -57,13 +60,13 @@ Respond only 'Yes' if suitable, otherwise 'No'.
57
  return "No"
58
  return result[0].get("generated_text","No")
59
  except Exception as e:
60
- print("Zephyr call failed:", e)
61
  return "No"
62
 
63
  # ----------------------------
64
- # Candidate filtering
65
  # ----------------------------
66
- def filter_candidates(category_name, job_titles):
67
  data = json.load(open(JSON_FILE, encoding="utf-8"))
68
  filtered = []
69
  for person in data:
@@ -75,40 +78,51 @@ def filter_candidates(category_name, job_titles):
75
  continue
76
  if any(role in job_titles for role in non_fullstack_roles):
77
  filtered.append(person)
78
- print(f"Filtered {len(filtered)} candidates for {category_name}")
79
  return filtered
80
 
81
- def get_top_candidates(category_name, job_titles, top_n=5):
82
- filtered_candidates = filter_candidates(category_name, job_titles)
 
 
 
 
83
  recommended = []
84
 
85
- for person in filtered_candidates:
86
- candidate_str = json.dumps(person)
87
- response = call_zephyr_cached(candidate_str, category_name, tuple(job_titles))
88
- if "Yes" in response:
89
- work_exps = person.get("work_experiences", [])
90
- non_fullstack_roles = [exp.get("roleName") for exp in work_exps if "full stack developer" not in exp.get("roleName","").lower()]
91
- recommended.append({
92
- "Name": person.get("name"),
93
- "Email": person.get("email"),
94
- "Phone": person.get("phone"),
95
- "Location": person.get("location"),
96
- "Roles": ", ".join(non_fullstack_roles),
97
- "Skills": ", ".join(person.get("skills", [])),
98
- "Salary": person.get("annual_salary_expectation", {}).get("full-time","N/A"),
99
- "Category": category_name
100
- })
101
-
102
- if not recommended:
103
- return pd.DataFrame()
104
-
105
- df = pd.DataFrame(recommended)
106
- df["Salary_sort"] = df["Salary"].apply(lambda s: float(s.replace("$","").replace(",","")) if isinstance(s,str) and s.startswith("$") else float('inf'))
107
- df = df.sort_values("Salary_sort").drop(columns=["Salary_sort"])
108
- return df.head(top_n)
 
 
 
 
 
 
 
 
109
 
110
  # ----------------------------
111
- # Show first 5 candidates from raw JSON
112
  # ----------------------------
113
  def show_first_candidates():
114
  data = json.load(open(JSON_FILE, encoding="utf-8"))
@@ -120,29 +134,23 @@ def show_first_candidates():
120
  # Gradio interface
121
  # ----------------------------
122
  def run_dashboard(category):
123
- if category not in CATEGORIES:
 
124
  return pd.DataFrame(), None
125
- df = get_top_candidates(category, CATEGORIES[category], top_n=5)
126
- if df.empty:
127
- return df, None
128
- file_path = "/tmp/outputs.csv"
129
- df.to_csv(file_path, index=False)
130
- return df, file_path
131
-
132
- demo = gr.Interface(
133
- fn=run_dashboard,
134
- inputs=gr.Dropdown(list(CATEGORIES.keys()), label="Select Category"),
135
- outputs=[gr.Dataframe(label="Top 5 Recommended Candidates"),
136
- gr.File(label="Download CSV")],
137
- title="Startup Candidate Dashboard - Zephyr-7B-Beta",
138
- description="Top 5 candidates per category using Zephyr LLM. Download CSV available."
139
- )
140
 
141
- # Add separate interface to show first 5 raw candidates
142
  with gr.Blocks() as app:
143
  gr.Markdown("### Raw JSON Preview: First 5 Candidates")
144
  gr.Dataframe(show_first_candidates(), label="First 5 Candidates from JSON")
145
  gr.Markdown("---")
 
 
 
 
 
 
 
 
146
  demo.render()
147
 
148
  if __name__ == "__main__":
 
9
  # CONFIG
10
  # ----------------------------
11
  JSON_FILE = "form-submissions-1.json"
12
+ MODEL_ID = "HuggingFaceH4/sgpt-3.5-mini" # smaller, faster, stable
13
  HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
14
 
15
  if not HF_API_TOKEN:
 
29
  "Finance": ["Financial Analyst","Financial Advisor"]
30
  }
31
 
32
+ BATCH_SIZE = 50 # send candidates in small batches to LLM
33
+ OUTPUT_FILE = "/tmp/outputs.csv"
34
+
35
  # ----------------------------
36
+ # LLM cached call
37
  # ----------------------------
38
  @lru_cache(maxsize=512)
39
+ def call_llm(candidate_str, category_name, job_titles_tuple):
40
+ prompt = f"""
 
41
  You are an HR assistant. Review this candidate and determine if they are suitable for the category '{category_name}'.
42
  The category includes the following job titles: {list(job_titles_tuple)}
43
 
 
45
 
46
  Respond only 'Yes' if suitable, otherwise 'No'.
47
  """
48
+ headers = {"Authorization": f"Bearer {HF_API_TOKEN}", "Content-Type": "application/json"}
49
+ payload = {"inputs": prompt}
50
+ try:
51
  response = requests.post(
52
  f"https://api-inference.huggingface.co/models/{MODEL_ID}",
53
  headers=headers,
 
60
  return "No"
61
  return result[0].get("generated_text","No")
62
  except Exception as e:
63
+ print("LLM call failed:", e)
64
  return "No"
65
 
66
  # ----------------------------
67
+ # Pre-filter JSON
68
  # ----------------------------
69
+ def prefilter_candidates(category_name, job_titles):
70
  data = json.load(open(JSON_FILE, encoding="utf-8"))
71
  filtered = []
72
  for person in data:
 
78
  continue
79
  if any(role in job_titles for role in non_fullstack_roles):
80
  filtered.append(person)
 
81
  return filtered
82
 
83
+ # ----------------------------
84
+ # Process batches and save CSV
85
+ # ----------------------------
86
+ def process_category(category_name):
87
+ job_titles = CATEGORIES[category_name]
88
+ filtered_candidates = prefilter_candidates(category_name, job_titles)
89
  recommended = []
90
 
91
+ for i in range(0, len(filtered_candidates), BATCH_SIZE):
92
+ batch = filtered_candidates[i:i+BATCH_SIZE]
93
+ for person in batch:
94
+ candidate_str = json.dumps(person)
95
+ response = call_llm(candidate_str, category_name, tuple(job_titles))
96
+ if "Yes" in response:
97
+ work_exps = person.get("work_experiences", [])
98
+ non_fullstack_roles = [exp.get("roleName") for exp in work_exps if "full stack developer" not in exp.get("roleName","").lower()]
99
+ rec = {
100
+ "Name": person.get("name"),
101
+ "Email": person.get("email"),
102
+ "Phone": person.get("phone"),
103
+ "Location": person.get("location"),
104
+ "Roles": ", ".join(non_fullstack_roles),
105
+ "Skills": ", ".join(person.get("skills", [])),
106
+ "Salary": person.get("annual_salary_expectation", {}).get("full-time","N/A"),
107
+ "Category": category_name
108
+ }
109
+ recommended.append(rec)
110
+ # Incrementally save to CSV
111
+ if recommended:
112
+ df_temp = pd.DataFrame(recommended)
113
+ if os.path.exists(OUTPUT_FILE):
114
+ df_temp.to_csv(OUTPUT_FILE, mode="a", header=False, index=False)
115
+ else:
116
+ df_temp.to_csv(OUTPUT_FILE, index=False)
117
+
118
+ # Read full CSV and return top 5 for this category
119
+ df_all = pd.read_csv(OUTPUT_FILE)
120
+ df_category = df_all[df_all["Category"]==category_name]
121
+ df_category = df_category.sort_values("Salary", ascending=False).head(5)
122
+ return df_category
123
 
124
  # ----------------------------
125
+ # Show first 5 candidates from JSON
126
  # ----------------------------
127
  def show_first_candidates():
128
  data = json.load(open(JSON_FILE, encoding="utf-8"))
 
134
  # Gradio interface
135
  # ----------------------------
136
  def run_dashboard(category):
137
+ df_top5 = process_category(category)
138
+ if df_top5.empty:
139
  return pd.DataFrame(), None
140
+ return df_top5, OUTPUT_FILE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
 
142
  with gr.Blocks() as app:
143
  gr.Markdown("### Raw JSON Preview: First 5 Candidates")
144
  gr.Dataframe(show_first_candidates(), label="First 5 Candidates from JSON")
145
  gr.Markdown("---")
146
+ demo = gr.Interface(
147
+ fn=run_dashboard,
148
+ inputs=gr.Dropdown(list(CATEGORIES.keys()), label="Select Category"),
149
+ outputs=[gr.Dataframe(label="Top 5 Recommended Candidates"),
150
+ gr.File(label="Download CSV")],
151
+ title="Startup Candidate Dashboard - Batched LLM",
152
+ description="Top 5 candidates per category using smaller LLM with batch processing."
153
+ )
154
  demo.render()
155
 
156
  if __name__ == "__main__":