curiouscurrent commited on
Commit
3aeed34
·
verified ·
1 Parent(s): fede7d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -78
app.py CHANGED
@@ -7,17 +7,14 @@ import requests
7
  # ----------------------------
8
  # CONFIG
9
  # ----------------------------
10
- JSON_FILE = "form-submissions-1.json" # local JSON file in the Space
 
11
  MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
12
-
13
- # Hugging Face token from Space Secrets
14
  HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
 
15
  if not HF_API_TOKEN:
16
  raise ValueError("HF_API_TOKEN not found in environment. Add it in Space Secrets.")
17
 
18
- # ----------------------------
19
- # CATEGORIES
20
- # ----------------------------
21
  CATEGORIES = {
22
  "AI": [
23
  "AI/ML Ops Engineer","Senior Machine Learning Engineer","Principal Data Scientist",
@@ -26,132 +23,151 @@ CATEGORIES = {
26
  "AI Analyst","Senior Data Analyst","Automation Engineer","Senior Data Engineer",
27
  "Machine Learning Engineer","Data Engineer","Data Scientist","Data Analyst"
28
  ],
29
- "Marketing": [
30
- "Marketing Specialist","Sales Agent","Salesman","Sales Associate"
31
- ],
32
- "CTO": [
33
- "Chief Technology Officer","CTO"
34
- ],
35
- "Legal": [
36
- "Legal Specialist","Attorney","Legal Intern","Lawyer"
37
- ],
38
- "Finance": [
39
- "Financial Analyst","Financial Advisor"
40
- ]
41
  }
42
 
43
  # ----------------------------
44
- # HELPER FUNCTIONS
45
  # ----------------------------
46
  def fetch_json_local(file_path):
47
  with open(file_path, "r", encoding="utf-8") as f:
48
  return json.load(f)
49
 
50
- def call_zephyr(prompt):
51
- headers = {
52
- "Authorization": f"Bearer {HF_API_TOKEN}",
53
- "Content-Type": "application/json"
54
- }
55
- payload = {"inputs": prompt}
56
- response = requests.post(
57
- f"https://api-inference.huggingface.co/models/{MODEL_ID}",
58
- headers=headers,
59
- data=json.dumps(payload),
60
- timeout=60
61
- )
62
- if response.status_code != 200:
63
- return f"Zephyr API error: {response.text}"
64
- result = response.json()
65
- if isinstance(result, dict) and "error" in result:
66
- return f"Zephyr API error: {result['error']}"
67
- return result[0].get("generated_text", "")
68
-
69
- def filter_candidates_by_category(category_name, job_titles):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  """
71
- Step 1: Filter candidates based on actual work experience roles.
72
  """
73
  data = fetch_json_local(JSON_FILE)
74
  filtered = []
75
  for person in data:
76
  work_exps = person.get("work_experiences", [])
77
- if len(work_exps) == 0:
78
  continue
79
-
80
- # Exclude candidates who ONLY have Full Stack roles
81
  non_fullstack_roles = [exp.get("roleName") for exp in work_exps if "full stack developer" not in exp.get("roleName","").lower()]
82
  if not non_fullstack_roles:
83
  continue
84
-
85
- # Include if any role matches the category
86
  if any(role in job_titles for role in non_fullstack_roles):
87
  filtered.append(person)
 
88
  return filtered
89
 
90
- def get_final_recommendations(category_name, job_titles, top_n=5):
91
  """
92
- Step 2: Use Zephyr LLM for final recommendation and pick top N candidates.
93
  """
94
- candidates = filter_candidates_by_category(category_name, job_titles)
95
- recommended = []
 
 
 
96
 
97
- for person in candidates:
98
- prompt = f"""
99
- You are an HR assistant. Review this candidate and determine if they are suitable for the category '{category_name}'.
100
- The category includes the following job titles: {job_titles}
101
 
102
- Candidate JSON: {json.dumps(person)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- Based on their work experience, skills, and education, respond only 'Yes' if suitable, otherwise 'No'.
105
- """
106
- response = call_zephyr(prompt)
107
- if response and "Yes" in response:
108
  work_exps = person.get("work_experiences", [])
109
  non_fullstack_roles = [exp.get("roleName") for exp in work_exps if "full stack developer" not in exp.get("roleName","").lower()]
110
- recommended.append({
111
  "Name": person.get("name"),
112
  "Email": person.get("email"),
113
  "Phone": person.get("phone"),
114
  "Location": person.get("location"),
115
  "Roles": ", ".join(non_fullstack_roles),
116
  "Skills": ", ".join(person.get("skills", [])),
117
- "Salary": person.get("annual_salary_expectation", {}).get("full-time", "N/A")
118
- })
119
-
120
- if len(recommended) == 0:
 
 
 
 
 
 
 
 
121
  return pd.DataFrame()
122
 
123
  df = pd.DataFrame(recommended)
124
 
125
- # Optional: Sort by salary if available (ascending)
126
  def parse_salary(s):
127
  if isinstance(s, str) and s.startswith("$"):
128
  return float(s.replace("$","").replace(",",""))
129
  return float('inf')
130
- if "Salary" in df.columns:
131
- df["Salary_sort"] = df["Salary"].apply(parse_salary)
132
- df = df.sort_values("Salary_sort")
133
- df = df.drop(columns=["Salary_sort"])
134
-
135
- return df.head(top_n) # return top N candidates
136
 
137
  # ----------------------------
138
- # GRADIO INTERFACE
139
  # ----------------------------
140
  def run_dashboard(category):
141
  if category not in CATEGORIES:
142
  return pd.DataFrame()
143
- df = get_final_recommendations(category, CATEGORIES[category], top_n=5)
144
- return df
145
-
146
- category_options = list(CATEGORIES.keys())
147
 
148
  demo = gr.Interface(
149
  fn=run_dashboard,
150
- inputs=gr.Dropdown(category_options, label="Select Category"),
151
  outputs=gr.Dataframe(label="Top 5 Recommended Candidates"),
152
- live=False,
153
  title="Startup Candidate Dashboard - Zephyr-7B-Beta",
154
- description="View top 5 final recommended candidates filtered by category using Zephyr LLM."
155
  )
156
 
157
  if __name__ == "__main__":
 
7
  # ----------------------------
8
  # CONFIG
9
  # ----------------------------
10
+ JSON_FILE = "form-submissions-1.json"
11
+ OUTPUT_FILE = "outputs.csv" # Cache LLM recommendations
12
  MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
 
 
13
  HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
14
+
15
  if not HF_API_TOKEN:
16
  raise ValueError("HF_API_TOKEN not found in environment. Add it in Space Secrets.")
17
 
 
 
 
18
  CATEGORIES = {
19
  "AI": [
20
  "AI/ML Ops Engineer","Senior Machine Learning Engineer","Principal Data Scientist",
 
23
  "AI Analyst","Senior Data Analyst","Automation Engineer","Senior Data Engineer",
24
  "Machine Learning Engineer","Data Engineer","Data Scientist","Data Analyst"
25
  ],
26
+ "Marketing": ["Marketing Specialist","Sales Agent","Salesman","Sales Associate"],
27
+ "CTO": ["Chief Technology Officer","CTO"],
28
+ "Legal": ["Legal Specialist","Attorney","Legal Intern","Lawyer"],
29
+ "Finance": ["Financial Analyst","Financial Advisor"]
 
 
 
 
 
 
 
 
30
  }
31
 
32
  # ----------------------------
33
+ # Helper functions
34
  # ----------------------------
35
  def fetch_json_local(file_path):
36
  with open(file_path, "r", encoding="utf-8") as f:
37
  return json.load(f)
38
 
39
+ def call_zephyr(candidate_json, category_name, job_titles):
40
+ """
41
+ Call Zephyr LLM for candidate recommendation
42
+ """
43
+ try:
44
+ prompt = f"""
45
+ You are an HR assistant. Review this candidate and determine if they are suitable for the category '{category_name}'.
46
+ The category includes the following job titles: {job_titles}
47
+
48
+ Candidate JSON: {candidate_json}
49
+
50
+ Respond only 'Yes' if suitable, otherwise 'No'.
51
+ """
52
+ headers = {
53
+ "Authorization": f"Bearer {HF_API_TOKEN}",
54
+ "Content-Type": "application/json"
55
+ }
56
+ payload = {"inputs": prompt}
57
+ response = requests.post(
58
+ f"https://api-inference.huggingface.co/models/{MODEL_ID}",
59
+ headers=headers,
60
+ data=json.dumps(payload),
61
+ timeout=60
62
+ )
63
+ response.raise_for_status()
64
+ result = response.json()
65
+ if isinstance(result, dict) and "error" in result:
66
+ return "No"
67
+ return result[0].get("generated_text","No")
68
+ except Exception as e:
69
+ print("Zephyr call failed:", e)
70
+ return "No"
71
+
72
+ def filter_candidates(category_name, job_titles):
73
  """
74
+ Step 1: Filter candidates based on work experience
75
  """
76
  data = fetch_json_local(JSON_FILE)
77
  filtered = []
78
  for person in data:
79
  work_exps = person.get("work_experiences", [])
80
+ if not work_exps:
81
  continue
 
 
82
  non_fullstack_roles = [exp.get("roleName") for exp in work_exps if "full stack developer" not in exp.get("roleName","").lower()]
83
  if not non_fullstack_roles:
84
  continue
 
 
85
  if any(role in job_titles for role in non_fullstack_roles):
86
  filtered.append(person)
87
+ print(f"Filtered {len(filtered)} candidates for {category_name}")
88
  return filtered
89
 
90
+ def get_top_candidates(category_name, job_titles, top_n=5):
91
  """
92
+ Step 2: Use outputs.csv cache, call Zephyr only if needed
93
  """
94
+ # Load cache if exists
95
+ if os.path.exists(OUTPUT_FILE):
96
+ df_cache = pd.read_csv(OUTPUT_FILE)
97
+ else:
98
+ df_cache = pd.DataFrame()
99
 
100
+ filtered_candidates = filter_candidates(category_name, job_titles)
101
+ recommended = []
 
 
102
 
103
+ for person in filtered_candidates:
104
+ candidate_id = person.get("email") # unique identifier
105
+ # Check if already cached
106
+ if not df_cache.empty and candidate_id in df_cache["Email"].values and category_name in df_cache["Category"].values:
107
+ row = df_cache[(df_cache["Email"]==candidate_id) & (df_cache["Category"]==category_name)].iloc[0]
108
+ recommended.append({
109
+ "Name": row["Name"],
110
+ "Email": row["Email"],
111
+ "Phone": row["Phone"],
112
+ "Location": row["Location"],
113
+ "Roles": row["Roles"],
114
+ "Skills": row["Skills"],
115
+ "Salary": row["Salary"]
116
+ })
117
+ continue
118
 
119
+ # Call Zephyr LLM
120
+ response = call_zephyr(json.dumps(person), category_name, job_titles)
121
+ if "Yes" in response:
 
122
  work_exps = person.get("work_experiences", [])
123
  non_fullstack_roles = [exp.get("roleName") for exp in work_exps if "full stack developer" not in exp.get("roleName","").lower()]
124
+ rec = {
125
  "Name": person.get("name"),
126
  "Email": person.get("email"),
127
  "Phone": person.get("phone"),
128
  "Location": person.get("location"),
129
  "Roles": ", ".join(non_fullstack_roles),
130
  "Skills": ", ".join(person.get("skills", [])),
131
+ "Salary": person.get("annual_salary_expectation", {}).get("full-time","N/A"),
132
+ "Category": category_name
133
+ }
134
+ recommended.append(rec)
135
+ # Add to cache
136
+ df_cache = pd.concat([df_cache, pd.DataFrame([rec])], ignore_index=True)
137
+
138
+ # Save cache
139
+ if not df_cache.empty:
140
+ df_cache.to_csv(OUTPUT_FILE, index=False)
141
+
142
+ if not recommended:
143
  return pd.DataFrame()
144
 
145
  df = pd.DataFrame(recommended)
146
 
147
+ # Sort by Salary (optional)
148
  def parse_salary(s):
149
  if isinstance(s, str) and s.startswith("$"):
150
  return float(s.replace("$","").replace(",",""))
151
  return float('inf')
152
+ df["Salary_sort"] = df["Salary"].apply(parse_salary)
153
+ df = df.sort_values("Salary_sort").drop(columns=["Salary_sort"])
154
+
155
+ return df.head(top_n)
 
 
156
 
157
  # ----------------------------
158
+ # Gradio Interface
159
  # ----------------------------
160
  def run_dashboard(category):
161
  if category not in CATEGORIES:
162
  return pd.DataFrame()
163
+ return get_top_candidates(category, CATEGORIES[category], top_n=5)
 
 
 
164
 
165
  demo = gr.Interface(
166
  fn=run_dashboard,
167
+ inputs=gr.Dropdown(list(CATEGORIES.keys()), label="Select Category"),
168
  outputs=gr.Dataframe(label="Top 5 Recommended Candidates"),
 
169
  title="Startup Candidate Dashboard - Zephyr-7B-Beta",
170
+ description="Top 5 candidates per category using Zephyr LLM with outputs.csv caching."
171
  )
172
 
173
  if __name__ == "__main__":