Spaces:

curiouscurrent
/

appliedai

Sleeping

App Files Files Community

curiouscurrent commited on Sep 26, 2025

Commit

98ef19f

verified ·

1 Parent(s): 35bd947

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -72

app.py CHANGED Viewed

@@ -10,15 +10,15 @@ from functools import lru_cache
 # CONFIG
 # ----------------------------
 JSON_FILE = "form-submissions-1.json"
-# 🚩 FINAL FIX 1: Switching to the smallest, most reliable model
 MODEL_ID = "google/flan-t5-small"
-HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
 FILTERED_CSV = "/tmp/filtered_candidates.csv"
 OUTPUT_FILE = "/tmp/outputs.csv"
 BATCH_SIZE = 50
 if not HF_API_TOKEN:
-    raise ValueError("HF_API_TOKEN not found in environment. Add it in Space Secrets.")
 CATEGORIES = {
     "AI": [
@@ -35,69 +35,58 @@ CATEGORIES = {
 }
 # ----------------------------
-# LLM Call for Ranking (Model Switched)
 # ----------------------------
-@lru_cache(maxsize=1)
-def rank_candidates(candidates_list_str, category_name, job_titles_tuple):
     prompt = f"""
-You are an HR expert specializing in the '{category_name}' category.
-Your goal is to rank the provided candidates based on two criteria:
-1. **Experience**: Inferred from relevant roles and extensive skills.
-2. **Educational Background**: Assume candidates with technical roles/skills have a strong technical education (e.g., MSc/PhD).
-The target roles are: {list(job_titles_tuple)}
-Review the following list of candidates (JSON format):
-{candidates_list_str}
-**Task**: Select the **top 5 most promising candidates** from this list.
-**Output Format**: Respond ONLY with a numbered list (1. Name, 2. Name, etc.) of the candidates' **Names**. Do not include any commentary.
 """
     headers = {"Authorization": f"Bearer {HF_API_TOKEN}", "Content-Type": "application/json"}
     payload = {
         "inputs": prompt,
         "parameters": {
-            "max_new_tokens": 150,
             "return_full_text": False,
-            "temperature": 0.3
         }
     }
     try:
-        # NOTE: Flan-T5 Small should be much faster, but we keep the long timeout as a safety net.
         response = requests.post(
             f"https://api-inference.huggingface.co/models/{MODEL_ID}",
             headers=headers,
             data=json.dumps(payload),
-            timeout=120
         )
         response.raise_for_status()
         result = response.json()
-        if isinstance(result, dict) and "error" in result:
-             print(f"LLM API Error: {result.get('error')}")
-             return []
-        generated_text = result[0].get("generated_text", "").strip()
-        # 🚩 FINAL FIX 2: Slightly more permissive regex to capture common list formats (1., 1) or 1 -)
-        ranked_names = []
-        # Looks for: (1) start of line, (2) 1 or more digits, (3) a separator (dot, paren, or hyphen), (4) capture the rest
-        for match in re.findall(r'^\s*\d+[\.\)\-]\s*(.+)', generated_text, re.MULTILINE):
-            name = match.strip()
-            # Clean up potential trailing text (e.g., a candidate's description the model added)
-            name = re.sub(r'[,)].*$', '', name).strip()
-            if name:
-                # Only include names that are plausible (not too short)
-                if len(name.split()) >= 2 or len(name) > 4:
-                    ranked_names.append(name)
-        return ranked_names
     except Exception as e:
-        print("LLM ranking call failed:", e)
-        return []
 # ----------------------------
 # Step 1: Filter by roles (Unchanged)
@@ -140,11 +129,11 @@ def filter_by_roles(category_name):
     df = pd.DataFrame(filtered)
     df.to_csv(FILTERED_CSV, index=False)
-    return df, f"{len(df)} candidates filtered by role for category '{category_name}'. Ready for LLM ranking."
 # ----------------------------
-# Step 2: LLM recommendations (Robust Ranking Logic)
 # ----------------------------
 def llm_recommendations(category_name):
     job_titles = CATEGORIES[category_name]
@@ -160,43 +149,54 @@ def llm_recommendations(category_name):
     if df_filtered.empty:
         return f"No filtered candidates found for category '{category_name}'. Run Step 1 first."
-    # Select top 30 candidates for the LLM to review
-    df_top_for_llm = df_filtered.head(30).fillna('N/A')
-    # Only send necessary info for ranking
-    candidates_to_rank = df_top_for_llm[["Name", "Roles", "Skills"]].to_dict(orient="records")
-    candidates_list_str = json.dumps(candidates_to_rank, indent=2)
-    ranked_names = rank_candidates(candidates_list_str, category_name, tuple(job_titles))
-    if not ranked_names:
-        return f"LLM failed to extract or rank suitable candidates for '{category_name}'. Final troubleshooting steps: 1. Manually verify your HF_API_TOKEN is correct. 2. If the token is correct, the issue is with the data provided, which is causing the model to generate unusable output."
-    # Reorder the original DataFrame based on the names returned by the LLM
-    name_to_rank = {name: i for i, name in enumerate(ranked_names)}
-    # Filter to only include the names returned by the LLM
-    df_ranked = df_filtered[df_filtered["Name"].isin(ranked_names)].copy()
-    # Use the rank dictionary to sort the DataFrame
-    df_ranked["LLM_Rank"] = df_ranked["Name"].map(name_to_rank)
-    # Drop candidates the LLM mentioned but weren't in the original filter list
-    df_ranked.dropna(subset=['LLM_Rank'], inplace=True)
-    df_top5 = df_ranked.sort_values(by="LLM_Rank").head(5)
     final_names = df_top5["Name"].tolist()
-    if not final_names:
-        return f"The LLM returned names, but none matched the candidates available for ranking in '{category_name}'. This suggests the names in your JSON data do not exactly match the names generated by the LLM (e.g., 'John Smith' vs 'Mr. John Smith')."
     output_text = f"Top {len(final_names)} Recommended Candidates for the '{category_name}' Category:\n\n"
     for i, name in enumerate(final_names):
-        output_text += f"{i+1}. {name}\n"
-    output_text += "\nThese candidates were ranked by the LLM based on inferred experience and assumed education."
     return output_text
@@ -214,12 +214,13 @@ def show_first_candidates():
         return pd.DataFrame({"Error": [f"Failed to load JSON: {e}"]})
 # ----------------------------
-# Gradio interface (Unchanged)
 # ----------------------------
 with gr.Blocks() as app:
-    gr.Markdown("# Candidate Recommendation Engine (Final Robust Version)")
-    gr.Markdown("#### Raw JSON Preview: First 5 Candidates")
     gr.Dataframe(show_first_candidates(), label="First 5 JSON Entries")
     gr.Markdown("---")
@@ -234,9 +235,10 @@ with gr.Blocks() as app:
     gr.Markdown("---")
     # Step 2: LLM Recommendations
-    llm_button = gr.Button("3. Get LLM Recommendations (Experience & Education Ranking)")
     llm_output_text = gr.Textbox(label="Top Candidate Recommendations Summary", lines=10, placeholder="Click 'Get LLM Recommendations' after Step 2 completes.")
     llm_button.click(llm_recommendations, inputs=[category_dropdown], outputs=[llm_output_text])
 if __name__ == "__main__":
-    app.launch()

 # CONFIG
 # ----------------------------
 JSON_FILE = "form-submissions-1.json"
 MODEL_ID = "google/flan-t5-small"
+# NOTE: HF_API_TOKEN MUST be set in your environment variables/Space secrets.
+HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
 FILTERED_CSV = "/tmp/filtered_candidates.csv"
 OUTPUT_FILE = "/tmp/outputs.csv"
 BATCH_SIZE = 50
 if not HF_API_TOKEN:
+    pass
 CATEGORIES = {
     "AI": [
 }
 # ----------------------------
+# LLM Call for Scoring (Focus: Role Experience ONLY)
 # ----------------------------
+@lru_cache(maxsize=512)
+def score_candidate(candidate_str, category_name, job_titles_tuple):
+    if not HF_API_TOKEN:
+         print("API Token is missing. Returning score 0.")
+         return 0
     prompt = f"""
+You are an HR assistant. Your task is to rate a candidate's suitability based ONLY on their previous job roles.
+Rate the suitability of the following candidate on a scale of 1 (Lowest) to 10 (Highest).
+The score must reflect how closely the candidate's 'Roles' align with the target job titles.
+The target roles for the '{category_name}' category are: {list(job_titles_tuple)}
+Candidate JSON: {candidate_str}
+**Task**: Respond ONLY with the rating number (an integer from 1 to 10).
 """
     headers = {"Authorization": f"Bearer {HF_API_TOKEN}", "Content-Type": "application/json"}
     payload = {
         "inputs": prompt,
         "parameters": {
+            "max_new_tokens": 5,
             "return_full_text": False,
+            "temperature": 0.1
         }
     }
     try:
         response = requests.post(
             f"https://api-inference.huggingface.co/models/{MODEL_ID}",
             headers=headers,
             data=json.dumps(payload),
+            timeout=60
         )
         response.raise_for_status()
         result = response.json()
+        generated_text = result[0].get("generated_text", "0").strip()
+        match = re.search(r'\d+', generated_text)
+        if match:
+            score = int(match.group(0))
+            return max(1, min(10, score))
+        return 0
     except Exception as e:
+        print(f"LLM scoring call failed for candidate (API/Network Error): {e}")
+        return 0
 # ----------------------------
 # Step 1: Filter by roles (Unchanged)
     df = pd.DataFrame(filtered)
     df.to_csv(FILTERED_CSV, index=False)
+    return df, f"{len(df)} candidates filtered by role for category '{category_name}'. Ready for LLM scoring."
 # ----------------------------
+# Step 2: LLM recommendations (Scoring, Sorting, and Output)
 # ----------------------------
 def llm_recommendations(category_name):
     job_titles = CATEGORIES[category_name]
     if df_filtered.empty:
         return f"No filtered candidates found for category '{category_name}'. Run Step 1 first."
+    # Prepare for scoring
+    df_filtered_clean = df_filtered.fillna('N/A')
+    filtered_candidates = df_filtered_clean.to_dict(orient="records")
+    scores = []
+    for person in filtered_candidates:
+        candidate_info = {
+            "Name": person.get("Name"),
+            "Roles": person.get("Roles"),
+            "Skills": person.get("Skills")
+        }
+        candidate_str = json.dumps(candidate_info)
+        score = score_candidate(candidate_str, category_name, tuple(job_titles))
+        scores.append(score)
+    df_filtered["LLM_Score"] = scores
+    df_recommended = df_filtered[df_filtered["LLM_Score"] > 0].copy()
+    if df_recommended.empty:
+        if not HF_API_TOKEN:
+            return "❌ LLM failed: The HF_API_TOKEN is not set or is invalid. Set the token and try again."
+        return f"LLM scored all candidates 0. The candidates' roles are deemed irrelevant by the LLM for '{category_name}'."
+    def parse_salary(s):
+        try:
+            return float(str(s).replace("$","").replace(",","").replace("N/A", str(float('inf'))))
+        except:
+            return float('inf')
+    df_recommended["Salary_sort"] = df_recommended["Salary"].apply(parse_salary)
+    df_top5 = df_recommended.sort_values(
+        by=['LLM_Score', 'Salary_sort'],
+        ascending=[False, True]
+    ).head(5)
     final_names = df_top5["Name"].tolist()
     output_text = f"Top {len(final_names)} Recommended Candidates for the '{category_name}' Category:\n\n"
     for i, name in enumerate(final_names):
+        score = df_top5.iloc[i]['LLM_Score']
+        output_text += f"{i+1}. {name} (Suitability Score: {score}/10)\n"
+    output_text += "\nThese candidates were ranked by the LLM based **only on the alignment of their previous job roles** with the target roles, using expected salary as a tie-breaker."
     return output_text
         return pd.DataFrame({"Error": [f"Failed to load JSON: {e}"]})
 # ----------------------------
+# Gradio interface (Updated Heading and Launch)
 # ----------------------------
 with gr.Blocks() as app:
+    # 🚩 CHANGE: Updated Heading
+    gr.Markdown("# 🤖 Candidate Selection (Role-Based Scoring)")
+    gr.Markdown("#### 🔍 Raw JSON Preview: First 5 Candidates")
     gr.Dataframe(show_first_candidates(), label="First 5 JSON Entries")
     gr.Markdown("---")
     gr.Markdown("---")
     # Step 2: LLM Recommendations
+    llm_button = gr.Button("3. Get LLM Recommendations (Role Experience Ranking)")
     llm_output_text = gr.Textbox(label="Top Candidate Recommendations Summary", lines=10, placeholder="Click 'Get LLM Recommendations' after Step 2 completes.")
     llm_button.click(llm_recommendations, inputs=[category_dropdown], outputs=[llm_output_text])
 if __name__ == "__main__":
+    # 🚩 CHANGE: Set share=True to generate a public link
+    app.launch(share=True)