curiouscurrent commited on
Commit
638b476
·
verified ·
1 Parent(s): 789c241

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -42
app.py CHANGED
@@ -5,33 +5,27 @@ import os
5
  import requests
6
  from functools import lru_cache
7
 
8
- # ----------------------------
9
- # CONFIG
10
- # ----------------------------
11
  JSON_FILE = "form-submissions-1.json"
12
- MODEL_ID = "HuggingFaceH4/sgpt-3.5-mini" # smaller, faster, stable
13
  HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
 
 
14
 
15
  if not HF_API_TOKEN:
16
  raise ValueError("HF_API_TOKEN not found in environment. Add it in Space Secrets.")
17
 
18
  CATEGORIES = {
19
- "AI": [
20
- "AI/ML Ops Engineer","Senior Machine Learning Engineer","Principal Data Scientist",
21
- "Senior Data Scientist","Machine Learning Research Scientist","Senior AI/ML Engineer",
22
- "AI/ML Engineer","Big Data Engineer","AI Research Scientist","AI Research Analyst Consultant",
23
- "AI Analyst","Senior Data Analyst","Automation Engineer","Senior Data Engineer",
24
- "Machine Learning Engineer","Data Engineer","Data Scientist","Data Analyst"
25
- ],
26
  "Marketing": ["Marketing Specialist","Sales Agent","Salesman","Sales Associate"],
27
  "CTO": ["Chief Technology Officer","CTO"],
28
  "Legal": ["Legal Specialist","Attorney","Legal Intern","Lawyer"],
29
  "Finance": ["Financial Analyst","Financial Advisor"]
30
  }
31
 
32
- BATCH_SIZE = 50 # send candidates in small batches to LLM
33
- OUTPUT_FILE = "/tmp/outputs.csv"
34
-
35
  # ----------------------------
36
  # LLM cached call
37
  # ----------------------------
@@ -40,9 +34,7 @@ def call_llm(candidate_str, category_name, job_titles_tuple):
40
  prompt = f"""
41
  You are an HR assistant. Review this candidate and determine if they are suitable for the category '{category_name}'.
42
  The category includes the following job titles: {list(job_titles_tuple)}
43
-
44
  Candidate JSON: {candidate_str}
45
-
46
  Respond only 'Yes' if suitable, otherwise 'No'.
47
  """
48
  headers = {"Authorization": f"Bearer {HF_API_TOKEN}", "Content-Type": "application/json"}
@@ -81,7 +73,7 @@ def prefilter_candidates(category_name, job_titles):
81
  return filtered
82
 
83
  # ----------------------------
84
- # Process batches and save CSV
85
  # ----------------------------
86
  def process_category(category_name):
87
  job_titles = CATEGORIES[category_name]
@@ -107,7 +99,7 @@ def process_category(category_name):
107
  "Category": category_name
108
  }
109
  recommended.append(rec)
110
- # Incrementally save to CSV
111
  if recommended:
112
  df_temp = pd.DataFrame(recommended)
113
  if os.path.exists(OUTPUT_FILE):
@@ -115,43 +107,37 @@ def process_category(category_name):
115
  else:
116
  df_temp.to_csv(OUTPUT_FILE, index=False)
117
 
118
- # Read full CSV and return top 5 for this category
119
  df_all = pd.read_csv(OUTPUT_FILE)
120
- df_category = df_all[df_all["Category"]==category_name]
121
- df_category = df_category.sort_values("Salary", ascending=False).head(5)
122
- return df_category
123
 
124
  # ----------------------------
125
- # Show first 5 candidates from JSON
126
  # ----------------------------
127
  def show_first_candidates():
128
  data = json.load(open(JSON_FILE, encoding="utf-8"))
129
  first_5 = data[:5]
130
- df = pd.DataFrame(first_5)
131
- return df
132
 
133
  # ----------------------------
134
- # Gradio interface
135
  # ----------------------------
136
- def run_dashboard(category):
137
- df_top5 = process_category(category)
138
- if df_top5.empty:
139
- return pd.DataFrame(), None
140
- return df_top5, OUTPUT_FILE
141
-
142
  with gr.Blocks() as app:
143
  gr.Markdown("### Raw JSON Preview: First 5 Candidates")
144
- gr.Dataframe(show_first_candidates(), label="First 5 Candidates from JSON")
 
145
  gr.Markdown("---")
146
- demo = gr.Interface(
147
- fn=run_dashboard,
148
- inputs=gr.Dropdown(list(CATEGORIES.keys()), label="Select Category"),
149
- outputs=[gr.Dataframe(label="Top 5 Recommended Candidates"),
150
- gr.File(label="Download CSV")],
151
- title="Startup Candidate Dashboard - Batched LLM",
152
- description="Top 5 candidates per category using smaller LLM with batch processing."
153
- )
154
- demo.render()
 
155
 
156
  if __name__ == "__main__":
157
  app.launch()
 
5
  import requests
6
  from functools import lru_cache
7
 
 
 
 
8
  JSON_FILE = "form-submissions-1.json"
9
+ MODEL_ID = "HuggingFaceH4/sgpt-3.5-mini"
10
  HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
11
+ OUTPUT_FILE = "/tmp/outputs.csv"
12
+ BATCH_SIZE = 50
13
 
14
  if not HF_API_TOKEN:
15
  raise ValueError("HF_API_TOKEN not found in environment. Add it in Space Secrets.")
16
 
17
  CATEGORIES = {
18
+ "AI": ["AI/ML Ops Engineer","Senior Machine Learning Engineer","Principal Data Scientist",
19
+ "Senior Data Scientist","Machine Learning Research Scientist","Senior AI/ML Engineer",
20
+ "AI/ML Engineer","Big Data Engineer","AI Research Scientist","AI Research Analyst Consultant",
21
+ "AI Analyst","Senior Data Analyst","Automation Engineer","Senior Data Engineer",
22
+ "Machine Learning Engineer","Data Engineer","Data Scientist","Data Analyst"],
 
 
23
  "Marketing": ["Marketing Specialist","Sales Agent","Salesman","Sales Associate"],
24
  "CTO": ["Chief Technology Officer","CTO"],
25
  "Legal": ["Legal Specialist","Attorney","Legal Intern","Lawyer"],
26
  "Finance": ["Financial Analyst","Financial Advisor"]
27
  }
28
 
 
 
 
29
  # ----------------------------
30
  # LLM cached call
31
  # ----------------------------
 
34
  prompt = f"""
35
  You are an HR assistant. Review this candidate and determine if they are suitable for the category '{category_name}'.
36
  The category includes the following job titles: {list(job_titles_tuple)}
 
37
  Candidate JSON: {candidate_str}
 
38
  Respond only 'Yes' if suitable, otherwise 'No'.
39
  """
40
  headers = {"Authorization": f"Bearer {HF_API_TOKEN}", "Content-Type": "application/json"}
 
73
  return filtered
74
 
75
  # ----------------------------
76
+ # Process batch and save CSV
77
  # ----------------------------
78
  def process_category(category_name):
79
  job_titles = CATEGORIES[category_name]
 
99
  "Category": category_name
100
  }
101
  recommended.append(rec)
102
+ # Save incrementally
103
  if recommended:
104
  df_temp = pd.DataFrame(recommended)
105
  if os.path.exists(OUTPUT_FILE):
 
107
  else:
108
  df_temp.to_csv(OUTPUT_FILE, index=False)
109
 
110
+ # Return top 5
111
  df_all = pd.read_csv(OUTPUT_FILE)
112
+ df_cat = df_all[df_all["Category"]==category_name]
113
+ return df_cat.sort_values("Salary", ascending=False).head(5)
 
114
 
115
  # ----------------------------
116
+ # Show first 5 JSON candidates
117
  # ----------------------------
118
  def show_first_candidates():
119
  data = json.load(open(JSON_FILE, encoding="utf-8"))
120
  first_5 = data[:5]
121
+ return pd.DataFrame(first_5)
 
122
 
123
  # ----------------------------
124
+ # Gradio UI
125
  # ----------------------------
 
 
 
 
 
 
126
  with gr.Blocks() as app:
127
  gr.Markdown("### Raw JSON Preview: First 5 Candidates")
128
+ gr.Dataframe(show_first_candidates(), label="First 5 JSON Entries")
129
+
130
  gr.Markdown("---")
131
+ category_dropdown = gr.Dropdown(list(CATEGORIES.keys()), label="Select Category")
132
+ run_button = gr.Button("Get Top 5 Recommended Candidates")
133
+ output_df = gr.Dataframe(label="Top 5 Recommended Candidates")
134
+ download_file = gr.File(label="Download CSV", file_types=[".csv"])
135
+
136
+ def run(category_name):
137
+ df_top5 = process_category(category_name)
138
+ return df_top5, OUTPUT_FILE
139
+
140
+ run_button.click(run, inputs=[category_dropdown], outputs=[output_df, download_file])
141
 
142
  if __name__ == "__main__":
143
  app.launch()