Spaces:

curiouscurrent
/

appliedai

Sleeping

App Files Files Community

curiouscurrent commited on Sep 26, 2025

Commit

384f205

verified ·

1 Parent(s): 3aeed34

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -64

app.py CHANGED Viewed

@@ -3,12 +3,9 @@ import pandas as pd
 import json
 import os
 import requests
-# ----------------------------
-# CONFIG
-# ----------------------------
 JSON_FILE = "form-submissions-1.json"
-OUTPUT_FILE = "outputs.csv"  # Cache LLM recommendations
 MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
 HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
@@ -30,22 +27,19 @@ CATEGORIES = {
 }
 # ----------------------------
-# Helper functions
 # ----------------------------
-def fetch_json_local(file_path):
-    with open(file_path, "r", encoding="utf-8") as f:
-        return json.load(f)
-def call_zephyr(candidate_json, category_name, job_titles):
     """
-    Call Zephyr LLM for candidate recommendation
     """
     try:
         prompt = f"""
 You are an HR assistant. Review this candidate and determine if they are suitable for the category '{category_name}'.
-The category includes the following job titles: {job_titles}
-Candidate JSON: {candidate_json}
 Respond only 'Yes' if suitable, otherwise 'No'.
 """
@@ -69,11 +63,11 @@ Respond only 'Yes' if suitable, otherwise 'No'.
         print("Zephyr call failed:", e)
         return "No"
 def filter_candidates(category_name, job_titles):
-    """
-    Step 1: Filter candidates based on work experience
-    """
-    data = fetch_json_local(JSON_FILE)
     filtered = []
     for person in data:
         work_exps = person.get("work_experiences", [])
@@ -88,40 +82,16 @@ def filter_candidates(category_name, job_titles):
     return filtered
 def get_top_candidates(category_name, job_titles, top_n=5):
-    """
-    Step 2: Use outputs.csv cache, call Zephyr only if needed
-    """
-    # Load cache if exists
-    if os.path.exists(OUTPUT_FILE):
-        df_cache = pd.read_csv(OUTPUT_FILE)
-    else:
-        df_cache = pd.DataFrame()
     filtered_candidates = filter_candidates(category_name, job_titles)
     recommended = []
     for person in filtered_candidates:
-        candidate_id = person.get("email")  # unique identifier
-        # Check if already cached
-        if not df_cache.empty and candidate_id in df_cache["Email"].values and category_name in df_cache["Category"].values:
-            row = df_cache[(df_cache["Email"]==candidate_id) & (df_cache["Category"]==category_name)].iloc[0]
-            recommended.append({
-                "Name": row["Name"],
-                "Email": row["Email"],
-                "Phone": row["Phone"],
-                "Location": row["Location"],
-                "Roles": row["Roles"],
-                "Skills": row["Skills"],
-                "Salary": row["Salary"]
-            })
-            continue
-        # Call Zephyr LLM
-        response = call_zephyr(json.dumps(person), category_name, job_titles)
         if "Yes" in response:
             work_exps = person.get("work_experiences", [])
             non_fullstack_roles = [exp.get("roleName") for exp in work_exps if "full stack developer" not in exp.get("roleName","").lower()]
-            rec = {
                 "Name": person.get("name"),
                 "Email": person.get("email"),
                 "Phone": person.get("phone"),
@@ -130,44 +100,40 @@ def get_top_candidates(category_name, job_titles, top_n=5):
                 "Skills": ", ".join(person.get("skills", [])),
                 "Salary": person.get("annual_salary_expectation", {}).get("full-time","N/A"),
                 "Category": category_name
-            }
-            recommended.append(rec)
-            # Add to cache
-            df_cache = pd.concat([df_cache, pd.DataFrame([rec])], ignore_index=True)
-    # Save cache
-    if not df_cache.empty:
-        df_cache.to_csv(OUTPUT_FILE, index=False)
     if not recommended:
         return pd.DataFrame()
     df = pd.DataFrame(recommended)
-    # Sort by Salary (optional)
-    def parse_salary(s):
-        if isinstance(s, str) and s.startswith("$"):
-            return float(s.replace("$","").replace(",",""))
-        return float('inf')
-    df["Salary_sort"] = df["Salary"].apply(parse_salary)
     df = df.sort_values("Salary_sort").drop(columns=["Salary_sort"])
     return df.head(top_n)
 # ----------------------------
-# Gradio Interface
 # ----------------------------
 def run_dashboard(category):
     if category not in CATEGORIES:
         return pd.DataFrame()
-    return get_top_candidates(category, CATEGORIES[category], top_n=5)
 demo = gr.Interface(
     fn=run_dashboard,
     inputs=gr.Dropdown(list(CATEGORIES.keys()), label="Select Category"),
-    outputs=gr.Dataframe(label="Top 5 Recommended Candidates"),
     title="Startup Candidate Dashboard - Zephyr-7B-Beta",
-    description="Top 5 candidates per category using Zephyr LLM with outputs.csv caching."
 )
 if __name__ == "__main__":

 import json
 import os
 import requests
+from functools import lru_cache
 JSON_FILE = "form-submissions-1.json"
 MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
 HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
 }
 # ----------------------------
+# LLM caching
 # ----------------------------
+@lru_cache(maxsize=512)
+def call_zephyr_cached(candidate_str, category_name, job_titles_tuple):
     """
+    Cached Zephyr LLM call.
     """
     try:
         prompt = f"""
 You are an HR assistant. Review this candidate and determine if they are suitable for the category '{category_name}'.
+The category includes the following job titles: {list(job_titles_tuple)}
+Candidate JSON: {candidate_str}
 Respond only 'Yes' if suitable, otherwise 'No'.
 """
         print("Zephyr call failed:", e)
         return "No"
+# ----------------------------
+# Candidate filtering
+# ----------------------------
 def filter_candidates(category_name, job_titles):
+    data = json.load(open(JSON_FILE, encoding="utf-8"))
     filtered = []
     for person in data:
         work_exps = person.get("work_experiences", [])
     return filtered
 def get_top_candidates(category_name, job_titles, top_n=5):
     filtered_candidates = filter_candidates(category_name, job_titles)
     recommended = []
     for person in filtered_candidates:
+        candidate_str = json.dumps(person)
+        response = call_zephyr_cached(candidate_str, category_name, tuple(job_titles))
         if "Yes" in response:
             work_exps = person.get("work_experiences", [])
             non_fullstack_roles = [exp.get("roleName") for exp in work_exps if "full stack developer" not in exp.get("roleName","").lower()]
+            recommended.append({
                 "Name": person.get("name"),
                 "Email": person.get("email"),
                 "Phone": person.get("phone"),
                 "Skills": ", ".join(person.get("skills", [])),
                 "Salary": person.get("annual_salary_expectation", {}).get("full-time","N/A"),
                 "Category": category_name
+            })
     if not recommended:
         return pd.DataFrame()
     df = pd.DataFrame(recommended)
+    df["Salary_sort"] = df["Salary"].apply(lambda s: float(s.replace("$","").replace(",","")) if isinstance(s,str) and s.startswith("$") else float('inf'))
     df = df.sort_values("Salary_sort").drop(columns=["Salary_sort"])
     return df.head(top_n)
 # ----------------------------
+# Gradio interface
 # ----------------------------
 def run_dashboard(category):
     if category not in CATEGORIES:
         return pd.DataFrame()
+    df = get_top_candidates(category, CATEGORIES[category], top_n=5)
+    return df
+def download_csv(category):
+    df = get_top_candidates(category, CATEGORIES[category], top_n=5)
+    if df.empty:
+        return None
+    file_path = "/tmp/outputs.csv"
+    df.to_csv(file_path, index=False)
+    return file_path
 demo = gr.Interface(
     fn=run_dashboard,
     inputs=gr.Dropdown(list(CATEGORIES.keys()), label="Select Category"),
+    outputs=[gr.Dataframe(label="Top 5 Recommended Candidates"),
+             gr.File(label="Download CSV", file_types=[".csv"], file_path_func=download_csv)],
     title="Startup Candidate Dashboard - Zephyr-7B-Beta",
+    description="Top 5 candidates per category using Zephyr LLM with caching. You can download the CSV."
 )
 if __name__ == "__main__":