Spaces:

curiouscurrent
/

appliedai

Sleeping

App Files Files Community

curiouscurrent commited on Sep 26, 2025

Commit

5d1b2b2

verified ·

1 Parent(s): edfa5fb

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -73

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ MODEL_ID = "google/flan-t5-large"
 HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
 FILTERED_CSV = "/tmp/filtered_candidates.csv"
 OUTPUT_FILE = "/tmp/outputs.csv"
-BATCH_SIZE = 50
 if not HF_API_TOKEN:
     raise ValueError("HF_API_TOKEN not found in environment. Add it in Space Secrets.")
@@ -34,28 +34,33 @@ CATEGORIES = {
 }
 # ----------------------------
-# LLM cached call (Updated for flexibility)
 # ----------------------------
-@lru_cache(maxsize=512)
-def call_llm(candidate_str, category_name, job_titles_tuple):
-    # 🚩 FLEXIBLE PROMPT: Asking the LLM to find "potential match" instead of "strong alignment"
     prompt = f"""
-You are an HR assistant. Your task is to quickly filter candidates.
-Based ONLY on the 'Roles' and 'Skills' fields provided in the candidate JSON, determine if the candidate is a potential match for the category '{category_name}'.
-The category includes the following job titles: {list(job_titles_tuple)}
-Candidate JSON: {candidate_str}
-Your entire response must be ONLY one word: 'Yes' or 'No'.
 """
     headers = {"Authorization": f"Bearer {HF_API_TOKEN}", "Content-Type": "application/json"}
-    # 🚩 FLEXIBLE PARAMETERS: Increased max_new_tokens slightly and added temperature
-    # Temperature > 0 encourages more diverse/flexible interpretation.
     payload = {
         "inputs": prompt,
         "parameters": {
-            "max_new_tokens": 20,
             "return_full_text": False,
-            "temperature": 0.5  # Add some randomness to avoid ultra-strict "No"
         }
     }
@@ -64,34 +69,30 @@ Your entire response must be ONLY one word: 'Yes' or 'No'.
             f"https://api-inference.huggingface.co/models/{MODEL_ID}",
             headers=headers,
             data=json.dumps(payload),
-            timeout=60
         )
         response.raise_for_status()
         result = response.json()
         if isinstance(result, dict) and "error" in result:
              print(f"LLM API Error: {result.get('error')}")
-             return "No"
-        generated_text = result[0].get("generated_text", "No").strip().lower()
-        # Check for 'yes' and 'no' keywords
-        if "yes" in generated_text:
-            return "Yes"
-        # Only return "No" if "yes" wasn't found, otherwise it's likely a match failure
-        elif "no" in generated_text:
-            return "No"
-        else:
-            # Fallback for unexpected output (e.g., model generates preamble text)
-            print(f"Unexpected LLM output: '{generated_text}'. Defaulting to 'No'.")
-            return "No"
     except Exception as e:
-        print("LLM call failed:", e)
-        return "No"
 # ----------------------------
-# Step 1: Filter by roles
 # ----------------------------
 def filter_by_roles(category_name):
     job_titles = CATEGORIES[category_name]
@@ -127,14 +128,15 @@ def filter_by_roles(category_name):
             })
     if not filtered:
-        return pd.DataFrame(), f"No candidates found matching roles for category '{category_name}'."
     df = pd.DataFrame(filtered)
     df.to_csv(FILTERED_CSV, index=False)
-    return df, f"{len(df)} candidates filtered by role for category '{category_name}'. Ready for LLM check."
 # ----------------------------
-# Step 2: LLM recommendations
 # ----------------------------
 def llm_recommendations(category_name):
     job_titles = CATEGORIES[category_name]
@@ -143,59 +145,50 @@ def llm_recommendations(category_name):
         df_filtered, msg = filter_by_roles(category_name)
         if df_filtered.empty:
             return msg
-    df_filtered = pd.read_csv(FILTERED_CSV)
-    df_filtered = df_filtered[df_filtered["Category"] == category_name]
     if df_filtered.empty:
         return f"No filtered candidates found for category '{category_name}'. Run Step 1 first."
-    recommended = []
-    df_filtered_clean = df_filtered.fillna('N/A')
-    filtered_candidates = df_filtered_clean.to_dict(orient="records")
-    for i in range(0, len(filtered_candidates), BATCH_SIZE):
-        batch = filtered_candidates[i:i+BATCH_SIZE]
-        for person in batch:
-            candidate_info = {
-                "Name": person.get("Name"),
-                "Roles": person.get("Roles"),
-                "Skills": person.get("Skills")
-            }
-            candidate_str = json.dumps(candidate_info)
-            response = call_llm(candidate_str, category_name, tuple(job_titles))
-            if response == "Yes":
-                recommended.append(person)
-    if not recommended:
-        return f"LLM determined no candidates are suitable for the '{category_name}' category. Try another category or loosen the initial role filters."
-    df_rec = pd.DataFrame(recommended)
-    def parse_salary(s):
-        try:
-            return float(str(s).replace("$","").replace(",","").replace("N/A", str(float('inf'))))
-        except:
-            return float('inf')
-    df_rec["Salary_sort"] = df_rec["Salary"].apply(parse_salary)
-    df_rec = df_rec.sort_values("Salary_sort").drop(columns=["Salary_sort"])
-    df_top5 = df_rec.head(5)
-    candidate_names = df_top5["Name"].tolist()
-    output_text = f"Top {len(candidate_names)} Recommended Candidates for the '{category_name}' Category:\n\n"
-    for i, name in enumerate(candidate_names):
         output_text += f"{i+1}. {name}\n"
-    output_text += "\nThese candidates were selected as a potential match by the LLM and sorted by lowest expected salary."
     return output_text
 # ----------------------------
-# Show first 5 raw JSON candidates
 # ----------------------------
 def show_first_candidates():
     try:
@@ -208,10 +201,10 @@ def show_first_candidates():
         return pd.DataFrame({"Error": [f"Failed to load JSON: {e}"]})
 # ----------------------------
-# Gradio interface
 # ----------------------------
 with gr.Blocks() as app:
-    gr.Markdown("# Candidate Recommendation Engine")
     gr.Markdown("#### Raw JSON Preview: First 5 Candidates")
     gr.Dataframe(show_first_candidates(), label="First 5 JSON Entries")
@@ -228,7 +221,7 @@ with gr.Blocks() as app:
     gr.Markdown("---")
     # Step 2: LLM Recommendations
-    llm_button = gr.Button("3. Get LLM Recommendations (Text Summary)")
     llm_output_text = gr.Textbox(label="Top Candidate Recommendations Summary", lines=10, placeholder="Click 'Get LLM Recommendations' after Step 2 completes.")
     llm_button.click(llm_recommendations, inputs=[category_dropdown], outputs=[llm_output_text])

 HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
 FILTERED_CSV = "/tmp/filtered_candidates.csv"
 OUTPUT_FILE = "/tmp/outputs.csv"
+BATCH_SIZE = 50 # Not used for LLM, but kept for consistency
 if not HF_API_TOKEN:
     raise ValueError("HF_API_TOKEN not found in environment. Add it in Space Secrets.")
 }
 # ----------------------------
+# New LLM Call for Ranking
 # ----------------------------
+@lru_cache(maxsize=1) # Cache only the last ranking request
+def rank_candidates(candidates_list_str, category_name, job_titles_tuple):
     prompt = f"""
+You are an HR expert specializing in the '{category_name}' category.
+Your goal is to rank the provided candidates based on two criteria:
+1. **Experience**: Inferred from relevant roles and extensive skills.
+2. **Educational Background**: Assume candidates with technical roles/skills have a strong technical education (e.g., MSc/PhD).
+The target roles are: {list(job_titles_tuple)}
+Review the following list of candidates (JSON format):
+{candidates_list_str}
+**Task**: Select the **top 5 most promising candidates** from this list.
+**Output Format**: Respond ONLY with a comma-separated list of the candidates' **Names**. Do not include any numbers, prefixes, or commentary.
 """
     headers = {"Authorization": f"Bearer {HF_API_TOKEN}", "Content-Type": "application/json"}
     payload = {
         "inputs": prompt,
         "parameters": {
+            # Set max_new_tokens higher since the output is a list of names
+            "max_new_tokens": 100,
             "return_full_text": False,
+            "temperature": 0.3  # Use low temperature for focused extraction
         }
     }
             f"https://api-inference.huggingface.co/models/{MODEL_ID}",
             headers=headers,
             data=json.dumps(payload),
+            timeout=120 # Increased timeout for larger request
         )
         response.raise_for_status()
         result = response.json()
         if isinstance(result, dict) and "error" in result:
              print(f"LLM API Error: {result.get('error')}")
+             return []
+        # The model should return a string like "Name1, Name2, Name3"
+        generated_text = result[0].get("generated_text", "").strip()
+        # Parse the comma-separated list of names
+        # Clean up the output by splitting by comma, stripping whitespace, and removing empty strings
+        ranked_names = [name.strip() for name in generated_text.split(',') if name.strip()]
+        return ranked_names
     except Exception as e:
+        print("LLM ranking call failed:", e)
+        return []
 # ----------------------------
+# Step 1: Filter by roles (Unchanged)
 # ----------------------------
 def filter_by_roles(category_name):
     job_titles = CATEGORIES[category_name]
             })
     if not filtered:
+        return pd.DataFrame(), f"No candidates found matching roles for category '{category_name}'. The LLM can't proceed."
     df = pd.DataFrame(filtered)
     df.to_csv(FILTERED_CSV, index=False)
+    return df, f"{len(df)} candidates filtered by role for category '{category_name}'. Ready for LLM ranking."
 # ----------------------------
+# Step 2: LLM recommendations (Modified for Ranking)
 # ----------------------------
 def llm_recommendations(category_name):
     job_titles = CATEGORIES[category_name]
         df_filtered, msg = filter_by_roles(category_name)
         if df_filtered.empty:
             return msg
+    else:
+        df_filtered = pd.read_csv(FILTERED_CSV)
+        df_filtered = df_filtered[df_filtered["Category"] == category_name]
     if df_filtered.empty:
         return f"No filtered candidates found for category '{category_name}'. Run Step 1 first."
+    # Select the top 10 candidates based on alphabetical name sort (arbitrary tie-breaker)
+    # and prepare the data for the single LLM ranking call.
+    df_top_for_llm = df_filtered.head(10).fillna('N/A')
+    # Only send necessary info for ranking
+    candidates_to_rank = df_top_for_llm[["Name", "Roles", "Skills"]].to_dict(orient="records")
+    candidates_list_str = json.dumps(candidates_to_rank, indent=2)
+    # 🚩 Single LLM call to rank the batch
+    ranked_names = rank_candidates(candidates_list_str, category_name, tuple(job_titles))
+    if not ranked_names:
+        return f"LLM failed to extract or rank suitable candidates for '{category_name}'. Check API status or model availability."
+    # Reorder the original DataFrame based on the names returned by the LLM
+    name_to_rank = {name: i for i, name in enumerate(ranked_names)}
+    # Filter to only include the names returned by the LLM
+    df_ranked = df_filtered[df_filtered["Name"].isin(ranked_names)].copy()
+    # Use the rank dictionary to sort the DataFrame
+    df_ranked["LLM_Rank"] = df_ranked["Name"].map(name_to_rank)
+    df_top5 = df_ranked.sort_values(by="LLM_Rank").head(5).drop(columns=["LLM_Rank"])
+    final_names = df_top5["Name"].tolist()
+    output_text = f"Top {len(final_names)} Recommended Candidates for the '{category_name}' Category:\n\n"
+    for i, name in enumerate(final_names):
         output_text += f"{i+1}. {name}\n"
+    output_text += "\nThese candidates were ranked by the LLM based on inferred experience (roles/skills) and assumed education."
     return output_text
 # ----------------------------
+# Show first 5 raw JSON candidates (Unchanged)
 # ----------------------------
 def show_first_candidates():
     try:
         return pd.DataFrame({"Error": [f"Failed to load JSON: {e}"]})
 # ----------------------------
+# Gradio interface (Unchanged)
 # ----------------------------
 with gr.Blocks() as app:
+    gr.Markdown("# Candidate Recommendation Engine (Experience & Education Focus)")
     gr.Markdown("#### Raw JSON Preview: First 5 Candidates")
     gr.Dataframe(show_first_candidates(), label="First 5 JSON Entries")
     gr.Markdown("---")
     # Step 2: LLM Recommendations
+    llm_button = gr.Button("3. Get LLM Recommendations (Experience & Education Ranking)")
     llm_output_text = gr.Textbox(label="Top Candidate Recommendations Summary", lines=10, placeholder="Click 'Get LLM Recommendations' after Step 2 completes.")
     llm_button.click(llm_recommendations, inputs=[category_dropdown], outputs=[llm_output_text])