Spaces:

curiouscurrent
/

appliedai

Sleeping

App Files Files Community

curiouscurrent commited on Sep 26, 2025

Commit

3aeed34

verified ·

1 Parent(s): fede7d8

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -78

app.py CHANGED Viewed

@@ -7,17 +7,14 @@ import requests
 # ----------------------------
 # CONFIG
 # ----------------------------
-JSON_FILE = "form-submissions-1.json"  # local JSON file in the Space
 MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
-# Hugging Face token from Space Secrets
 HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
 if not HF_API_TOKEN:
     raise ValueError("HF_API_TOKEN not found in environment. Add it in Space Secrets.")
-# ----------------------------
-# CATEGORIES
-# ----------------------------
 CATEGORIES = {
     "AI": [
         "AI/ML Ops Engineer","Senior Machine Learning Engineer","Principal Data Scientist",
@@ -26,132 +23,151 @@ CATEGORIES = {
         "AI Analyst","Senior Data Analyst","Automation Engineer","Senior Data Engineer",
         "Machine Learning Engineer","Data Engineer","Data Scientist","Data Analyst"
     ],
-    "Marketing": [
-        "Marketing Specialist","Sales Agent","Salesman","Sales Associate"
-    ],
-    "CTO": [
-        "Chief Technology Officer","CTO"
-    ],
-    "Legal": [
-        "Legal Specialist","Attorney","Legal Intern","Lawyer"
-    ],
-    "Finance": [
-        "Financial Analyst","Financial Advisor"
-    ]
 }
 # ----------------------------
-# HELPER FUNCTIONS
 # ----------------------------
 def fetch_json_local(file_path):
     with open(file_path, "r", encoding="utf-8") as f:
         return json.load(f)
-def call_zephyr(prompt):
-    headers = {
-        "Authorization": f"Bearer {HF_API_TOKEN}",
-        "Content-Type": "application/json"
-    }
-    payload = {"inputs": prompt}
-    response = requests.post(
-        f"https://api-inference.huggingface.co/models/{MODEL_ID}",
-        headers=headers,
-        data=json.dumps(payload),
-        timeout=60
-    )
-    if response.status_code != 200:
-        return f"Zephyr API error: {response.text}"
-    result = response.json()
-    if isinstance(result, dict) and "error" in result:
-        return f"Zephyr API error: {result['error']}"
-    return result[0].get("generated_text", "")
-def filter_candidates_by_category(category_name, job_titles):
     """
-    Step 1: Filter candidates based on actual work experience roles.
     """
     data = fetch_json_local(JSON_FILE)
     filtered = []
     for person in data:
         work_exps = person.get("work_experiences", [])
-        if len(work_exps) == 0:
             continue
-        # Exclude candidates who ONLY have Full Stack roles
         non_fullstack_roles = [exp.get("roleName") for exp in work_exps if "full stack developer" not in exp.get("roleName","").lower()]
         if not non_fullstack_roles:
             continue
-        # Include if any role matches the category
         if any(role in job_titles for role in non_fullstack_roles):
             filtered.append(person)
     return filtered
-def get_final_recommendations(category_name, job_titles, top_n=5):
     """
-    Step 2: Use Zephyr LLM for final recommendation and pick top N candidates.
     """
-    candidates = filter_candidates_by_category(category_name, job_titles)
-    recommended = []
-    for person in candidates:
-        prompt = f"""
-You are an HR assistant. Review this candidate and determine if they are suitable for the category '{category_name}'.
-The category includes the following job titles: {job_titles}
-Candidate JSON: {json.dumps(person)}
-Based on their work experience, skills, and education, respond only 'Yes' if suitable, otherwise 'No'.
-"""
-        response = call_zephyr(prompt)
-        if response and "Yes" in response:
             work_exps = person.get("work_experiences", [])
             non_fullstack_roles = [exp.get("roleName") for exp in work_exps if "full stack developer" not in exp.get("roleName","").lower()]
-            recommended.append({
                 "Name": person.get("name"),
                 "Email": person.get("email"),
                 "Phone": person.get("phone"),
                 "Location": person.get("location"),
                 "Roles": ", ".join(non_fullstack_roles),
                 "Skills": ", ".join(person.get("skills", [])),
-                "Salary": person.get("annual_salary_expectation", {}).get("full-time", "N/A")
-            })
-    if len(recommended) == 0:
         return pd.DataFrame()
     df = pd.DataFrame(recommended)
-    # Optional: Sort by salary if available (ascending)
     def parse_salary(s):
         if isinstance(s, str) and s.startswith("$"):
             return float(s.replace("$","").replace(",",""))
         return float('inf')
-    if "Salary" in df.columns:
-        df["Salary_sort"] = df["Salary"].apply(parse_salary)
-        df = df.sort_values("Salary_sort")
-        df = df.drop(columns=["Salary_sort"])
-    return df.head(top_n)  # return top N candidates
 # ----------------------------
-# GRADIO INTERFACE
 # ----------------------------
 def run_dashboard(category):
     if category not in CATEGORIES:
         return pd.DataFrame()
-    df = get_final_recommendations(category, CATEGORIES[category], top_n=5)
-    return df
-category_options = list(CATEGORIES.keys())
 demo = gr.Interface(
     fn=run_dashboard,
-    inputs=gr.Dropdown(category_options, label="Select Category"),
     outputs=gr.Dataframe(label="Top 5 Recommended Candidates"),
-    live=False,
     title="Startup Candidate Dashboard - Zephyr-7B-Beta",
-    description="View top 5 final recommended candidates filtered by category using Zephyr LLM."
 )
 if __name__ == "__main__":

 # ----------------------------
 # CONFIG
 # ----------------------------
+JSON_FILE = "form-submissions-1.json"
+OUTPUT_FILE = "outputs.csv"  # Cache LLM recommendations
 MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
 HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
 if not HF_API_TOKEN:
     raise ValueError("HF_API_TOKEN not found in environment. Add it in Space Secrets.")
 CATEGORIES = {
     "AI": [
         "AI/ML Ops Engineer","Senior Machine Learning Engineer","Principal Data Scientist",
         "AI Analyst","Senior Data Analyst","Automation Engineer","Senior Data Engineer",
         "Machine Learning Engineer","Data Engineer","Data Scientist","Data Analyst"
     ],
+    "Marketing": ["Marketing Specialist","Sales Agent","Salesman","Sales Associate"],
+    "CTO": ["Chief Technology Officer","CTO"],
+    "Legal": ["Legal Specialist","Attorney","Legal Intern","Lawyer"],
+    "Finance": ["Financial Analyst","Financial Advisor"]
 }
 # ----------------------------
+# Helper functions
 # ----------------------------
 def fetch_json_local(file_path):
     with open(file_path, "r", encoding="utf-8") as f:
         return json.load(f)
+def call_zephyr(candidate_json, category_name, job_titles):
+    """
+    Call Zephyr LLM for candidate recommendation
+    """
+    try:
+        prompt = f"""
+You are an HR assistant. Review this candidate and determine if they are suitable for the category '{category_name}'.
+The category includes the following job titles: {job_titles}
+Candidate JSON: {candidate_json}
+Respond only 'Yes' if suitable, otherwise 'No'.
+"""
+        headers = {
+            "Authorization": f"Bearer {HF_API_TOKEN}",
+            "Content-Type": "application/json"
+        }
+        payload = {"inputs": prompt}
+        response = requests.post(
+            f"https://api-inference.huggingface.co/models/{MODEL_ID}",
+            headers=headers,
+            data=json.dumps(payload),
+            timeout=60
+        )
+        response.raise_for_status()
+        result = response.json()
+        if isinstance(result, dict) and "error" in result:
+            return "No"
+        return result[0].get("generated_text","No")
+    except Exception as e:
+        print("Zephyr call failed:", e)
+        return "No"
+def filter_candidates(category_name, job_titles):
     """
+    Step 1: Filter candidates based on work experience
     """
     data = fetch_json_local(JSON_FILE)
     filtered = []
     for person in data:
         work_exps = person.get("work_experiences", [])
+        if not work_exps:
             continue
         non_fullstack_roles = [exp.get("roleName") for exp in work_exps if "full stack developer" not in exp.get("roleName","").lower()]
         if not non_fullstack_roles:
             continue
         if any(role in job_titles for role in non_fullstack_roles):
             filtered.append(person)
+    print(f"Filtered {len(filtered)} candidates for {category_name}")
     return filtered
+def get_top_candidates(category_name, job_titles, top_n=5):
     """
+    Step 2: Use outputs.csv cache, call Zephyr only if needed
     """
+    # Load cache if exists
+    if os.path.exists(OUTPUT_FILE):
+        df_cache = pd.read_csv(OUTPUT_FILE)
+    else:
+        df_cache = pd.DataFrame()
+    filtered_candidates = filter_candidates(category_name, job_titles)
+    recommended = []
+    for person in filtered_candidates:
+        candidate_id = person.get("email")  # unique identifier
+        # Check if already cached
+        if not df_cache.empty and candidate_id in df_cache["Email"].values and category_name in df_cache["Category"].values:
+            row = df_cache[(df_cache["Email"]==candidate_id) & (df_cache["Category"]==category_name)].iloc[0]
+            recommended.append({
+                "Name": row["Name"],
+                "Email": row["Email"],
+                "Phone": row["Phone"],
+                "Location": row["Location"],
+                "Roles": row["Roles"],
+                "Skills": row["Skills"],
+                "Salary": row["Salary"]
+            })
+            continue
+        # Call Zephyr LLM
+        response = call_zephyr(json.dumps(person), category_name, job_titles)
+        if "Yes" in response:
             work_exps = person.get("work_experiences", [])
             non_fullstack_roles = [exp.get("roleName") for exp in work_exps if "full stack developer" not in exp.get("roleName","").lower()]
+            rec = {
                 "Name": person.get("name"),
                 "Email": person.get("email"),
                 "Phone": person.get("phone"),
                 "Location": person.get("location"),
                 "Roles": ", ".join(non_fullstack_roles),
                 "Skills": ", ".join(person.get("skills", [])),
+                "Salary": person.get("annual_salary_expectation", {}).get("full-time","N/A"),
+                "Category": category_name
+            }
+            recommended.append(rec)
+            # Add to cache
+            df_cache = pd.concat([df_cache, pd.DataFrame([rec])], ignore_index=True)
+    # Save cache
+    if not df_cache.empty:
+        df_cache.to_csv(OUTPUT_FILE, index=False)
+    if not recommended:
         return pd.DataFrame()
     df = pd.DataFrame(recommended)
+    # Sort by Salary (optional)
     def parse_salary(s):
         if isinstance(s, str) and s.startswith("$"):
             return float(s.replace("$","").replace(",",""))
         return float('inf')
+    df["Salary_sort"] = df["Salary"].apply(parse_salary)
+    df = df.sort_values("Salary_sort").drop(columns=["Salary_sort"])
+    return df.head(top_n)
 # ----------------------------
+# Gradio Interface
 # ----------------------------
 def run_dashboard(category):
     if category not in CATEGORIES:
         return pd.DataFrame()
+    return get_top_candidates(category, CATEGORIES[category], top_n=5)
 demo = gr.Interface(
     fn=run_dashboard,
+    inputs=gr.Dropdown(list(CATEGORIES.keys()), label="Select Category"),
     outputs=gr.Dataframe(label="Top 5 Recommended Candidates"),
     title="Startup Candidate Dashboard - Zephyr-7B-Beta",
+    description="Top 5 candidates per category using Zephyr LLM with outputs.csv caching."
 )
 if __name__ == "__main__":