curiouscurrent commited on
Commit
384f205
·
verified ·
1 Parent(s): 3aeed34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -64
app.py CHANGED
@@ -3,12 +3,9 @@ import pandas as pd
3
  import json
4
  import os
5
  import requests
 
6
 
7
- # ----------------------------
8
- # CONFIG
9
- # ----------------------------
10
  JSON_FILE = "form-submissions-1.json"
11
- OUTPUT_FILE = "outputs.csv" # Cache LLM recommendations
12
  MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
13
  HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
14
 
@@ -30,22 +27,19 @@ CATEGORIES = {
30
  }
31
 
32
  # ----------------------------
33
- # Helper functions
34
  # ----------------------------
35
- def fetch_json_local(file_path):
36
- with open(file_path, "r", encoding="utf-8") as f:
37
- return json.load(f)
38
-
39
- def call_zephyr(candidate_json, category_name, job_titles):
40
  """
41
- Call Zephyr LLM for candidate recommendation
42
  """
43
  try:
44
  prompt = f"""
45
  You are an HR assistant. Review this candidate and determine if they are suitable for the category '{category_name}'.
46
- The category includes the following job titles: {job_titles}
47
 
48
- Candidate JSON: {candidate_json}
49
 
50
  Respond only 'Yes' if suitable, otherwise 'No'.
51
  """
@@ -69,11 +63,11 @@ Respond only 'Yes' if suitable, otherwise 'No'.
69
  print("Zephyr call failed:", e)
70
  return "No"
71
 
 
 
 
72
  def filter_candidates(category_name, job_titles):
73
- """
74
- Step 1: Filter candidates based on work experience
75
- """
76
- data = fetch_json_local(JSON_FILE)
77
  filtered = []
78
  for person in data:
79
  work_exps = person.get("work_experiences", [])
@@ -88,40 +82,16 @@ def filter_candidates(category_name, job_titles):
88
  return filtered
89
 
90
  def get_top_candidates(category_name, job_titles, top_n=5):
91
- """
92
- Step 2: Use outputs.csv cache, call Zephyr only if needed
93
- """
94
- # Load cache if exists
95
- if os.path.exists(OUTPUT_FILE):
96
- df_cache = pd.read_csv(OUTPUT_FILE)
97
- else:
98
- df_cache = pd.DataFrame()
99
-
100
  filtered_candidates = filter_candidates(category_name, job_titles)
101
  recommended = []
102
 
103
  for person in filtered_candidates:
104
- candidate_id = person.get("email") # unique identifier
105
- # Check if already cached
106
- if not df_cache.empty and candidate_id in df_cache["Email"].values and category_name in df_cache["Category"].values:
107
- row = df_cache[(df_cache["Email"]==candidate_id) & (df_cache["Category"]==category_name)].iloc[0]
108
- recommended.append({
109
- "Name": row["Name"],
110
- "Email": row["Email"],
111
- "Phone": row["Phone"],
112
- "Location": row["Location"],
113
- "Roles": row["Roles"],
114
- "Skills": row["Skills"],
115
- "Salary": row["Salary"]
116
- })
117
- continue
118
-
119
- # Call Zephyr LLM
120
- response = call_zephyr(json.dumps(person), category_name, job_titles)
121
  if "Yes" in response:
122
  work_exps = person.get("work_experiences", [])
123
  non_fullstack_roles = [exp.get("roleName") for exp in work_exps if "full stack developer" not in exp.get("roleName","").lower()]
124
- rec = {
125
  "Name": person.get("name"),
126
  "Email": person.get("email"),
127
  "Phone": person.get("phone"),
@@ -130,44 +100,40 @@ def get_top_candidates(category_name, job_titles, top_n=5):
130
  "Skills": ", ".join(person.get("skills", [])),
131
  "Salary": person.get("annual_salary_expectation", {}).get("full-time","N/A"),
132
  "Category": category_name
133
- }
134
- recommended.append(rec)
135
- # Add to cache
136
- df_cache = pd.concat([df_cache, pd.DataFrame([rec])], ignore_index=True)
137
-
138
- # Save cache
139
- if not df_cache.empty:
140
- df_cache.to_csv(OUTPUT_FILE, index=False)
141
 
142
  if not recommended:
143
  return pd.DataFrame()
144
 
145
  df = pd.DataFrame(recommended)
146
-
147
- # Sort by Salary (optional)
148
- def parse_salary(s):
149
- if isinstance(s, str) and s.startswith("$"):
150
- return float(s.replace("$","").replace(",",""))
151
- return float('inf')
152
- df["Salary_sort"] = df["Salary"].apply(parse_salary)
153
  df = df.sort_values("Salary_sort").drop(columns=["Salary_sort"])
154
-
155
  return df.head(top_n)
156
 
157
  # ----------------------------
158
- # Gradio Interface
159
  # ----------------------------
160
  def run_dashboard(category):
161
  if category not in CATEGORIES:
162
  return pd.DataFrame()
163
- return get_top_candidates(category, CATEGORIES[category], top_n=5)
 
 
 
 
 
 
 
 
 
164
 
165
  demo = gr.Interface(
166
  fn=run_dashboard,
167
  inputs=gr.Dropdown(list(CATEGORIES.keys()), label="Select Category"),
168
- outputs=gr.Dataframe(label="Top 5 Recommended Candidates"),
 
169
  title="Startup Candidate Dashboard - Zephyr-7B-Beta",
170
- description="Top 5 candidates per category using Zephyr LLM with outputs.csv caching."
171
  )
172
 
173
  if __name__ == "__main__":
 
3
  import json
4
  import os
5
  import requests
6
+ from functools import lru_cache
7
 
 
 
 
8
  JSON_FILE = "form-submissions-1.json"
 
9
  MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
10
  HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
11
 
 
27
  }
28
 
29
  # ----------------------------
30
+ # LLM caching
31
  # ----------------------------
32
+ @lru_cache(maxsize=512)
33
+ def call_zephyr_cached(candidate_str, category_name, job_titles_tuple):
 
 
 
34
  """
35
+ Cached Zephyr LLM call.
36
  """
37
  try:
38
  prompt = f"""
39
  You are an HR assistant. Review this candidate and determine if they are suitable for the category '{category_name}'.
40
+ The category includes the following job titles: {list(job_titles_tuple)}
41
 
42
+ Candidate JSON: {candidate_str}
43
 
44
  Respond only 'Yes' if suitable, otherwise 'No'.
45
  """
 
63
  print("Zephyr call failed:", e)
64
  return "No"
65
 
66
+ # ----------------------------
67
+ # Candidate filtering
68
+ # ----------------------------
69
  def filter_candidates(category_name, job_titles):
70
+ data = json.load(open(JSON_FILE, encoding="utf-8"))
 
 
 
71
  filtered = []
72
  for person in data:
73
  work_exps = person.get("work_experiences", [])
 
82
  return filtered
83
 
84
  def get_top_candidates(category_name, job_titles, top_n=5):
 
 
 
 
 
 
 
 
 
85
  filtered_candidates = filter_candidates(category_name, job_titles)
86
  recommended = []
87
 
88
  for person in filtered_candidates:
89
+ candidate_str = json.dumps(person)
90
+ response = call_zephyr_cached(candidate_str, category_name, tuple(job_titles))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  if "Yes" in response:
92
  work_exps = person.get("work_experiences", [])
93
  non_fullstack_roles = [exp.get("roleName") for exp in work_exps if "full stack developer" not in exp.get("roleName","").lower()]
94
+ recommended.append({
95
  "Name": person.get("name"),
96
  "Email": person.get("email"),
97
  "Phone": person.get("phone"),
 
100
  "Skills": ", ".join(person.get("skills", [])),
101
  "Salary": person.get("annual_salary_expectation", {}).get("full-time","N/A"),
102
  "Category": category_name
103
+ })
 
 
 
 
 
 
 
104
 
105
  if not recommended:
106
  return pd.DataFrame()
107
 
108
  df = pd.DataFrame(recommended)
109
+ df["Salary_sort"] = df["Salary"].apply(lambda s: float(s.replace("$","").replace(",","")) if isinstance(s,str) and s.startswith("$") else float('inf'))
 
 
 
 
 
 
110
  df = df.sort_values("Salary_sort").drop(columns=["Salary_sort"])
 
111
  return df.head(top_n)
112
 
113
  # ----------------------------
114
+ # Gradio interface
115
  # ----------------------------
116
  def run_dashboard(category):
117
  if category not in CATEGORIES:
118
  return pd.DataFrame()
119
+ df = get_top_candidates(category, CATEGORIES[category], top_n=5)
120
+ return df
121
+
122
+ def download_csv(category):
123
+ df = get_top_candidates(category, CATEGORIES[category], top_n=5)
124
+ if df.empty:
125
+ return None
126
+ file_path = "/tmp/outputs.csv"
127
+ df.to_csv(file_path, index=False)
128
+ return file_path
129
 
130
  demo = gr.Interface(
131
  fn=run_dashboard,
132
  inputs=gr.Dropdown(list(CATEGORIES.keys()), label="Select Category"),
133
+ outputs=[gr.Dataframe(label="Top 5 Recommended Candidates"),
134
+ gr.File(label="Download CSV", file_types=[".csv"], file_path_func=download_csv)],
135
  title="Startup Candidate Dashboard - Zephyr-7B-Beta",
136
+ description="Top 5 candidates per category using Zephyr LLM with caching. You can download the CSV."
137
  )
138
 
139
  if __name__ == "__main__":