Spaces:

curiouscurrent
/

appliedai

Sleeping

App Files Files Community

curiouscurrent commited on Sep 26, 2025

Commit

638b476

verified ·

1 Parent(s): 789c241

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -42

app.py CHANGED Viewed

@@ -5,33 +5,27 @@ import os
 import requests
 from functools import lru_cache
-# ----------------------------
-# CONFIG
-# ----------------------------
 JSON_FILE = "form-submissions-1.json"
-MODEL_ID = "HuggingFaceH4/sgpt-3.5-mini"  # smaller, faster, stable
 HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
 if not HF_API_TOKEN:
     raise ValueError("HF_API_TOKEN not found in environment. Add it in Space Secrets.")
 CATEGORIES = {
-    "AI": [
-        "AI/ML Ops Engineer","Senior Machine Learning Engineer","Principal Data Scientist",
-        "Senior Data Scientist","Machine Learning Research Scientist","Senior AI/ML Engineer",
-        "AI/ML Engineer","Big Data Engineer","AI Research Scientist","AI Research Analyst Consultant",
-        "AI Analyst","Senior Data Analyst","Automation Engineer","Senior Data Engineer",
-        "Machine Learning Engineer","Data Engineer","Data Scientist","Data Analyst"
-    ],
     "Marketing": ["Marketing Specialist","Sales Agent","Salesman","Sales Associate"],
     "CTO": ["Chief Technology Officer","CTO"],
     "Legal": ["Legal Specialist","Attorney","Legal Intern","Lawyer"],
     "Finance": ["Financial Analyst","Financial Advisor"]
 }
-BATCH_SIZE = 50  # send candidates in small batches to LLM
-OUTPUT_FILE = "/tmp/outputs.csv"
 # ----------------------------
 # LLM cached call
 # ----------------------------
@@ -40,9 +34,7 @@ def call_llm(candidate_str, category_name, job_titles_tuple):
     prompt = f"""
 You are an HR assistant. Review this candidate and determine if they are suitable for the category '{category_name}'.
 The category includes the following job titles: {list(job_titles_tuple)}
 Candidate JSON: {candidate_str}
 Respond only 'Yes' if suitable, otherwise 'No'.
 """
     headers = {"Authorization": f"Bearer {HF_API_TOKEN}", "Content-Type": "application/json"}
@@ -81,7 +73,7 @@ def prefilter_candidates(category_name, job_titles):
     return filtered
 # ----------------------------
-# Process batches and save CSV
 # ----------------------------
 def process_category(category_name):
     job_titles = CATEGORIES[category_name]
@@ -107,7 +99,7 @@ def process_category(category_name):
                     "Category": category_name
                 }
                 recommended.append(rec)
-        # Incrementally save to CSV
         if recommended:
             df_temp = pd.DataFrame(recommended)
             if os.path.exists(OUTPUT_FILE):
@@ -115,43 +107,37 @@ def process_category(category_name):
             else:
                 df_temp.to_csv(OUTPUT_FILE, index=False)
-    # Read full CSV and return top 5 for this category
     df_all = pd.read_csv(OUTPUT_FILE)
-    df_category = df_all[df_all["Category"]==category_name]
-    df_category = df_category.sort_values("Salary", ascending=False).head(5)
-    return df_category
 # ----------------------------
-# Show first 5 candidates from JSON
 # ----------------------------
 def show_first_candidates():
     data = json.load(open(JSON_FILE, encoding="utf-8"))
     first_5 = data[:5]
-    df = pd.DataFrame(first_5)
-    return df
 # ----------------------------
-# Gradio interface
 # ----------------------------
-def run_dashboard(category):
-    df_top5 = process_category(category)
-    if df_top5.empty:
-        return pd.DataFrame(), None
-    return df_top5, OUTPUT_FILE
 with gr.Blocks() as app:
     gr.Markdown("### Raw JSON Preview: First 5 Candidates")
-    gr.Dataframe(show_first_candidates(), label="First 5 Candidates from JSON")
     gr.Markdown("---")
-    demo = gr.Interface(
-        fn=run_dashboard,
-        inputs=gr.Dropdown(list(CATEGORIES.keys()), label="Select Category"),
-        outputs=[gr.Dataframe(label="Top 5 Recommended Candidates"),
-                 gr.File(label="Download CSV")],
-        title="Startup Candidate Dashboard - Batched LLM",
-        description="Top 5 candidates per category using smaller LLM with batch processing."
-    )
-    demo.render()
 if __name__ == "__main__":
     app.launch()

 import requests
 from functools import lru_cache
 JSON_FILE = "form-submissions-1.json"
+MODEL_ID = "HuggingFaceH4/sgpt-3.5-mini"
 HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
+OUTPUT_FILE = "/tmp/outputs.csv"
+BATCH_SIZE = 50
 if not HF_API_TOKEN:
     raise ValueError("HF_API_TOKEN not found in environment. Add it in Space Secrets.")
 CATEGORIES = {
+    "AI": ["AI/ML Ops Engineer","Senior Machine Learning Engineer","Principal Data Scientist",
+           "Senior Data Scientist","Machine Learning Research Scientist","Senior AI/ML Engineer",
+           "AI/ML Engineer","Big Data Engineer","AI Research Scientist","AI Research Analyst Consultant",
+           "AI Analyst","Senior Data Analyst","Automation Engineer","Senior Data Engineer",
+           "Machine Learning Engineer","Data Engineer","Data Scientist","Data Analyst"],
     "Marketing": ["Marketing Specialist","Sales Agent","Salesman","Sales Associate"],
     "CTO": ["Chief Technology Officer","CTO"],
     "Legal": ["Legal Specialist","Attorney","Legal Intern","Lawyer"],
     "Finance": ["Financial Analyst","Financial Advisor"]
 }
 # ----------------------------
 # LLM cached call
 # ----------------------------
     prompt = f"""
 You are an HR assistant. Review this candidate and determine if they are suitable for the category '{category_name}'.
 The category includes the following job titles: {list(job_titles_tuple)}
 Candidate JSON: {candidate_str}
 Respond only 'Yes' if suitable, otherwise 'No'.
 """
     headers = {"Authorization": f"Bearer {HF_API_TOKEN}", "Content-Type": "application/json"}
     return filtered
 # ----------------------------
+# Process batch and save CSV
 # ----------------------------
 def process_category(category_name):
     job_titles = CATEGORIES[category_name]
                     "Category": category_name
                 }
                 recommended.append(rec)
+        # Save incrementally
         if recommended:
             df_temp = pd.DataFrame(recommended)
             if os.path.exists(OUTPUT_FILE):
             else:
                 df_temp.to_csv(OUTPUT_FILE, index=False)
+    # Return top 5
     df_all = pd.read_csv(OUTPUT_FILE)
+    df_cat = df_all[df_all["Category"]==category_name]
+    return df_cat.sort_values("Salary", ascending=False).head(5)
 # ----------------------------
+# Show first 5 JSON candidates
 # ----------------------------
 def show_first_candidates():
     data = json.load(open(JSON_FILE, encoding="utf-8"))
     first_5 = data[:5]
+    return pd.DataFrame(first_5)
 # ----------------------------
+# Gradio UI
 # ----------------------------
 with gr.Blocks() as app:
     gr.Markdown("### Raw JSON Preview: First 5 Candidates")
+    gr.Dataframe(show_first_candidates(), label="First 5 JSON Entries")
     gr.Markdown("---")
+    category_dropdown = gr.Dropdown(list(CATEGORIES.keys()), label="Select Category")
+    run_button = gr.Button("Get Top 5 Recommended Candidates")
+    output_df = gr.Dataframe(label="Top 5 Recommended Candidates")
+    download_file = gr.File(label="Download CSV", file_types=[".csv"])
+    def run(category_name):
+        df_top5 = process_category(category_name)
+        return df_top5, OUTPUT_FILE
+    run_button.click(run, inputs=[category_dropdown], outputs=[output_df, download_file])
 if __name__ == "__main__":
     app.launch()