curiouscurrent commited on
Commit
2c59240
·
verified ·
1 Parent(s): 638b476

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -48
app.py CHANGED
@@ -5,9 +5,13 @@ import os
5
  import requests
6
  from functools import lru_cache
7
 
 
 
 
8
  JSON_FILE = "form-submissions-1.json"
9
  MODEL_ID = "HuggingFaceH4/sgpt-3.5-mini"
10
  HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
 
11
  OUTPUT_FILE = "/tmp/outputs.csv"
12
  BATCH_SIZE = 50
13
 
@@ -15,11 +19,13 @@ if not HF_API_TOKEN:
15
  raise ValueError("HF_API_TOKEN not found in environment. Add it in Space Secrets.")
16
 
17
  CATEGORIES = {
18
- "AI": ["AI/ML Ops Engineer","Senior Machine Learning Engineer","Principal Data Scientist",
19
- "Senior Data Scientist","Machine Learning Research Scientist","Senior AI/ML Engineer",
20
- "AI/ML Engineer","Big Data Engineer","AI Research Scientist","AI Research Analyst Consultant",
21
- "AI Analyst","Senior Data Analyst","Automation Engineer","Senior Data Engineer",
22
- "Machine Learning Engineer","Data Engineer","Data Scientist","Data Analyst"],
 
 
23
  "Marketing": ["Marketing Specialist","Sales Agent","Salesman","Sales Associate"],
24
  "CTO": ["Chief Technology Officer","CTO"],
25
  "Legal": ["Legal Specialist","Attorney","Legal Intern","Lawyer"],
@@ -56,11 +62,13 @@ Respond only 'Yes' if suitable, otherwise 'No'.
56
  return "No"
57
 
58
  # ----------------------------
59
- # Pre-filter JSON
60
  # ----------------------------
61
- def prefilter_candidates(category_name, job_titles):
 
62
  data = json.load(open(JSON_FILE, encoding="utf-8"))
63
  filtered = []
 
64
  for person in data:
65
  work_exps = person.get("work_experiences", [])
66
  if not work_exps:
@@ -69,15 +77,33 @@ def prefilter_candidates(category_name, job_titles):
69
  if not non_fullstack_roles:
70
  continue
71
  if any(role in job_titles for role in non_fullstack_roles):
72
- filtered.append(person)
73
- return filtered
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  # ----------------------------
76
- # Process batch and save CSV
77
  # ----------------------------
78
- def process_category(category_name):
79
  job_titles = CATEGORIES[category_name]
80
- filtered_candidates = prefilter_candidates(category_name, job_titles)
 
 
 
 
81
  recommended = []
82
 
83
  for i in range(0, len(filtered_candidates), BATCH_SIZE):
@@ -86,42 +112,27 @@ def process_category(category_name):
86
  candidate_str = json.dumps(person)
87
  response = call_llm(candidate_str, category_name, tuple(job_titles))
88
  if "Yes" in response:
89
- work_exps = person.get("work_experiences", [])
90
- non_fullstack_roles = [exp.get("roleName") for exp in work_exps if "full stack developer" not in exp.get("roleName","").lower()]
91
- rec = {
92
- "Name": person.get("name"),
93
- "Email": person.get("email"),
94
- "Phone": person.get("phone"),
95
- "Location": person.get("location"),
96
- "Roles": ", ".join(non_fullstack_roles),
97
- "Skills": ", ".join(person.get("skills", [])),
98
- "Salary": person.get("annual_salary_expectation", {}).get("full-time","N/A"),
99
- "Category": category_name
100
- }
101
- recommended.append(rec)
102
- # Save incrementally
103
- if recommended:
104
- df_temp = pd.DataFrame(recommended)
105
- if os.path.exists(OUTPUT_FILE):
106
- df_temp.to_csv(OUTPUT_FILE, mode="a", header=False, index=False)
107
- else:
108
- df_temp.to_csv(OUTPUT_FILE, index=False)
109
-
110
- # Return top 5
111
- df_all = pd.read_csv(OUTPUT_FILE)
112
- df_cat = df_all[df_all["Category"]==category_name]
113
- return df_cat.sort_values("Salary", ascending=False).head(5)
114
 
115
  # ----------------------------
116
- # Show first 5 JSON candidates
117
  # ----------------------------
118
  def show_first_candidates():
119
  data = json.load(open(JSON_FILE, encoding="utf-8"))
120
- first_5 = data[:5]
121
- return pd.DataFrame(first_5)
122
 
123
  # ----------------------------
124
- # Gradio UI
125
  # ----------------------------
126
  with gr.Blocks() as app:
127
  gr.Markdown("### Raw JSON Preview: First 5 Candidates")
@@ -129,15 +140,20 @@ with gr.Blocks() as app:
129
 
130
  gr.Markdown("---")
131
  category_dropdown = gr.Dropdown(list(CATEGORIES.keys()), label="Select Category")
132
- run_button = gr.Button("Get Top 5 Recommended Candidates")
133
- output_df = gr.Dataframe(label="Top 5 Recommended Candidates")
134
- download_file = gr.File(label="Download CSV", file_types=[".csv"])
135
 
136
- def run(category_name):
137
- df_top5 = process_category(category_name)
138
- return df_top5, OUTPUT_FILE
 
 
 
 
139
 
140
- run_button.click(run, inputs=[category_dropdown], outputs=[output_df, download_file])
 
 
 
 
141
 
142
  if __name__ == "__main__":
143
  app.launch()
 
5
  import requests
6
  from functools import lru_cache
7
 
8
+ # ----------------------------
9
+ # CONFIG
10
+ # ----------------------------
11
  JSON_FILE = "form-submissions-1.json"
12
  MODEL_ID = "HuggingFaceH4/sgpt-3.5-mini"
13
  HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
14
+ FILTERED_CSV = "/tmp/filtered_candidates.csv"
15
  OUTPUT_FILE = "/tmp/outputs.csv"
16
  BATCH_SIZE = 50
17
 
 
19
  raise ValueError("HF_API_TOKEN not found in environment. Add it in Space Secrets.")
20
 
21
  CATEGORIES = {
22
+ "AI": [
23
+ "AI/ML Ops Engineer","Senior Machine Learning Engineer","Principal Data Scientist",
24
+ "Senior Data Scientist","Machine Learning Research Scientist","Senior AI/ML Engineer",
25
+ "AI/ML Engineer","Big Data Engineer","AI Research Scientist","AI Research Analyst Consultant",
26
+ "AI Analyst","Senior Data Analyst","Automation Engineer","Senior Data Engineer",
27
+ "Machine Learning Engineer","Data Engineer","Data Scientist","Data Analyst"
28
+ ],
29
  "Marketing": ["Marketing Specialist","Sales Agent","Salesman","Sales Associate"],
30
  "CTO": ["Chief Technology Officer","CTO"],
31
  "Legal": ["Legal Specialist","Attorney","Legal Intern","Lawyer"],
 
62
  return "No"
63
 
64
  # ----------------------------
65
+ # Filter by roles (step 1)
66
  # ----------------------------
67
+ def filter_by_roles(category_name):
68
+ job_titles = CATEGORIES[category_name]
69
  data = json.load(open(JSON_FILE, encoding="utf-8"))
70
  filtered = []
71
+
72
  for person in data:
73
  work_exps = person.get("work_experiences", [])
74
  if not work_exps:
 
77
  if not non_fullstack_roles:
78
  continue
79
  if any(role in job_titles for role in non_fullstack_roles):
80
+ filtered.append({
81
+ "Name": person.get("name"),
82
+ "Email": person.get("email"),
83
+ "Phone": person.get("phone"),
84
+ "Location": person.get("location"),
85
+ "Roles": ", ".join(non_fullstack_roles),
86
+ "Skills": ", ".join(person.get("skills", [])),
87
+ "Salary": person.get("annual_salary_expectation", {}).get("full-time","N/A"),
88
+ "Category": category_name
89
+ })
90
+ if not filtered:
91
+ return pd.DataFrame(), None
92
+
93
+ df = pd.DataFrame(filtered)
94
+ df.to_csv(FILTERED_CSV, index=False)
95
+ return df, FILTERED_CSV
96
 
97
  # ----------------------------
98
+ # LLM-based recommendations (step 2)
99
  # ----------------------------
100
+ def llm_recommendations(category_name):
101
  job_titles = CATEGORIES[category_name]
102
+ if not os.path.exists(FILTERED_CSV):
103
+ return pd.DataFrame(), None
104
+
105
+ df = pd.read_csv(FILTERED_CSV)
106
+ filtered_candidates = df.to_dict(orient="records")
107
  recommended = []
108
 
109
  for i in range(0, len(filtered_candidates), BATCH_SIZE):
 
112
  candidate_str = json.dumps(person)
113
  response = call_llm(candidate_str, category_name, tuple(job_titles))
114
  if "Yes" in response:
115
+ recommended.append(person)
116
+
117
+ if not recommended:
118
+ return pd.DataFrame(), None
119
+
120
+ df_rec = pd.DataFrame(recommended)
121
+ df_rec["Salary_sort"] = df_rec["Salary"].apply(lambda s: float(s.replace("$","").replace(",","")) if isinstance(s,str) and s.startswith("$") else float('inf'))
122
+ df_rec = df_rec.sort_values("Salary_sort").drop(columns=["Salary_sort"])
123
+ df_rec = df_rec.head(5)
124
+ df_rec.to_csv(OUTPUT_FILE, index=False)
125
+ return df_rec, OUTPUT_FILE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  # ----------------------------
128
+ # Show first 5 raw JSON candidates
129
  # ----------------------------
130
  def show_first_candidates():
131
  data = json.load(open(JSON_FILE, encoding="utf-8"))
132
+ return pd.DataFrame(data[:5])
 
133
 
134
  # ----------------------------
135
+ # Gradio interface
136
  # ----------------------------
137
  with gr.Blocks() as app:
138
  gr.Markdown("### Raw JSON Preview: First 5 Candidates")
 
140
 
141
  gr.Markdown("---")
142
  category_dropdown = gr.Dropdown(list(CATEGORIES.keys()), label="Select Category")
 
 
 
143
 
144
+ # Step 1: Filter by roles
145
+ filter_button = gr.Button("Filter by Roles")
146
+ filtered_df = gr.Dataframe(label="Filtered Candidates by Roles")
147
+ download_filtered = gr.File(label="Download Filtered CSV", file_types=[".csv"])
148
+ filter_button.click(filter_by_roles, inputs=[category_dropdown], outputs=[filtered_df, download_filtered])
149
+
150
+ gr.Markdown("---")
151
 
152
+ # Step 2: LLM Recommendations
153
+ llm_button = gr.Button("Get LLM Recommendations")
154
+ llm_df = gr.Dataframe(label="Top 5 Recommended Candidates")
155
+ download_llm = gr.File(label="Download Recommendations CSV", file_types=[".csv"])
156
+ llm_button.click(llm_recommendations, inputs=[category_dropdown], outputs=[llm_df, download_llm])
157
 
158
  if __name__ == "__main__":
159
  app.launch()