Spaces:

curiouscurrent
/

appliedai

Sleeping

App Files Files Community

curiouscurrent commited on Sep 26, 2025

Commit

edfa5fb

verified ·

1 Parent(s): 3fb3ad6

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -24

app.py CHANGED Viewed

@@ -12,7 +12,6 @@ JSON_FILE = "form-submissions-1.json"
 # Using a suitable generative LLM (Flan-T5 Large)
 MODEL_ID = "google/flan-t5-large"
 HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
-# NOTE: Keeping these temp files for the filtering step, though output format changes
 FILTERED_CSV = "/tmp/filtered_candidates.csv"
 OUTPUT_FILE = "/tmp/outputs.csv"
 BATCH_SIZE = 50
@@ -35,24 +34,28 @@ CATEGORIES = {
 }
 # ----------------------------
-# LLM cached call
 # ----------------------------
 @lru_cache(maxsize=512)
 def call_llm(candidate_str, category_name, job_titles_tuple):
     prompt = f"""
-You are an HR assistant. Review this candidate and determine if they are suitable for the category '{category_name}'.
 The category includes the following job titles: {list(job_titles_tuple)}
 Candidate JSON: {candidate_str}
-Respond only 'Yes' if suitable, otherwise 'No'.
 """
     headers = {"Authorization": f"Bearer {HF_API_TOKEN}", "Content-Type": "application/json"}
-    # Adding generation parameters for strict, short output
     payload = {
         "inputs": prompt,
         "parameters": {
-            "max_new_tokens": 10,
-            "return_full_text": False
         }
     }
@@ -72,17 +75,19 @@ Respond only 'Yes' if suitable, otherwise 'No'.
         generated_text = result[0].get("generated_text", "No").strip().lower()
-        # Robust check for 'yes' (in case the model is not perfectly following instructions)
         if "yes" in generated_text:
             return "Yes"
         elif "no" in generated_text:
             return "No"
         else:
             return "No"
     except Exception as e:
         print("LLM call failed:", e)
-        # In case of API failure, it should not be cached as a negative result (but the lru_cache will cache the 'No')
         return "No"
 # ----------------------------
@@ -122,25 +127,22 @@ def filter_by_roles(category_name):
             })
     if not filtered:
-        # Return a message instead of the CSV path
         return pd.DataFrame(), f"No candidates found matching roles for category '{category_name}'."
     df = pd.DataFrame(filtered)
     df.to_csv(FILTERED_CSV, index=False)
-    # Return a success message
     return df, f"{len(df)} candidates filtered by role for category '{category_name}'. Ready for LLM check."
 # ----------------------------
-# Step 2: LLM recommendations (Modified to return text)
 # ----------------------------
 def llm_recommendations(category_name):
     job_titles = CATEGORIES[category_name]
     if not os.path.exists(FILTERED_CSV):
-        # Rerun filtering to ensure the CSV exists
         df_filtered, msg = filter_by_roles(category_name)
         if df_filtered.empty:
-            return msg # Return the error/no-match message
     df_filtered = pd.read_csv(FILTERED_CSV)
     df_filtered = df_filtered[df_filtered["Category"] == category_name]
@@ -167,11 +169,10 @@ def llm_recommendations(category_name):
                 recommended.append(person)
     if not recommended:
-        return f"LLM determined no candidates are suitable for the '{category_name}' category."
     df_rec = pd.DataFrame(recommended)
-    # Sort by numeric salary to get the top 5 with lowest expected salary first
     def parse_salary(s):
         try:
             return float(str(s).replace("$","").replace(",","").replace("N/A", str(float('inf'))))
@@ -182,18 +183,14 @@ def llm_recommendations(category_name):
     df_rec = df_rec.sort_values("Salary_sort").drop(columns=["Salary_sort"])
     df_top5 = df_rec.head(5)
-    # 🚩 NEW: Generate Text Output
     candidate_names = df_top5["Name"].tolist()
-    if not candidate_names:
-        return f"LLM check passed, but sorting resulted in an empty list (unexpected). No recommendations to display."
     output_text = f"Top {len(candidate_names)} Recommended Candidates for the '{category_name}' Category:\n\n"
     for i, name in enumerate(candidate_names):
         output_text += f"{i+1}. {name}\n"
-    output_text += "\nThese candidates were selected based on relevance by the LLM and sorted by lowest expected salary."
     return output_text
@@ -211,7 +208,7 @@ def show_first_candidates():
         return pd.DataFrame({"Error": [f"Failed to load JSON: {e}"]})
 # ----------------------------
-# Gradio interface (Updated)
 # ----------------------------
 with gr.Blocks() as app:
     gr.Markdown("# Candidate Recommendation Engine")
@@ -225,7 +222,6 @@ with gr.Blocks() as app:
     # Step 1: Filter by roles
     filter_button = gr.Button("2. Filter Candidates by Roles")
     filtered_df = gr.Dataframe(label="Filtered Candidates (Preview)")
-    # 🚩 CHANGE: Display a status message for filtering
     filter_status = gr.Textbox(label="Filter Status", placeholder="Click 'Filter Candidates by Roles' to start.")
     filter_button.click(filter_by_roles, inputs=[category_dropdown], outputs=[filtered_df, filter_status])
@@ -233,7 +229,6 @@ with gr.Blocks() as app:
     # Step 2: LLM Recommendations
     llm_button = gr.Button("3. Get LLM Recommendations (Text Summary)")
-    # 🚩 CHANGE: Output is now a Textbox
     llm_output_text = gr.Textbox(label="Top Candidate Recommendations Summary", lines=10, placeholder="Click 'Get LLM Recommendations' after Step 2 completes.")
     llm_button.click(llm_recommendations, inputs=[category_dropdown], outputs=[llm_output_text])

 # Using a suitable generative LLM (Flan-T5 Large)
 MODEL_ID = "google/flan-t5-large"
 HF_API_TOKEN = os.environ.get("HF_API_TOKEN")
 FILTERED_CSV = "/tmp/filtered_candidates.csv"
 OUTPUT_FILE = "/tmp/outputs.csv"
 BATCH_SIZE = 50
 }
 # ----------------------------
+# LLM cached call (Updated for flexibility)
 # ----------------------------
 @lru_cache(maxsize=512)
 def call_llm(candidate_str, category_name, job_titles_tuple):
+    # 🚩 FLEXIBLE PROMPT: Asking the LLM to find "potential match" instead of "strong alignment"
     prompt = f"""
+You are an HR assistant. Your task is to quickly filter candidates.
+Based ONLY on the 'Roles' and 'Skills' fields provided in the candidate JSON, determine if the candidate is a potential match for the category '{category_name}'.
 The category includes the following job titles: {list(job_titles_tuple)}
 Candidate JSON: {candidate_str}
+Your entire response must be ONLY one word: 'Yes' or 'No'.
 """
     headers = {"Authorization": f"Bearer {HF_API_TOKEN}", "Content-Type": "application/json"}
+    # 🚩 FLEXIBLE PARAMETERS: Increased max_new_tokens slightly and added temperature
+    # Temperature > 0 encourages more diverse/flexible interpretation.
     payload = {
         "inputs": prompt,
         "parameters": {
+            "max_new_tokens": 20,
+            "return_full_text": False,
+            "temperature": 0.5  # Add some randomness to avoid ultra-strict "No"
         }
     }
         generated_text = result[0].get("generated_text", "No").strip().lower()
+        # Check for 'yes' and 'no' keywords
         if "yes" in generated_text:
             return "Yes"
+        # Only return "No" if "yes" wasn't found, otherwise it's likely a match failure
         elif "no" in generated_text:
             return "No"
         else:
+            # Fallback for unexpected output (e.g., model generates preamble text)
+            print(f"Unexpected LLM output: '{generated_text}'. Defaulting to 'No'.")
             return "No"
     except Exception as e:
         print("LLM call failed:", e)
         return "No"
 # ----------------------------
             })
     if not filtered:
         return pd.DataFrame(), f"No candidates found matching roles for category '{category_name}'."
     df = pd.DataFrame(filtered)
     df.to_csv(FILTERED_CSV, index=False)
     return df, f"{len(df)} candidates filtered by role for category '{category_name}'. Ready for LLM check."
 # ----------------------------
+# Step 2: LLM recommendations
 # ----------------------------
 def llm_recommendations(category_name):
     job_titles = CATEGORIES[category_name]
     if not os.path.exists(FILTERED_CSV):
         df_filtered, msg = filter_by_roles(category_name)
         if df_filtered.empty:
+            return msg
     df_filtered = pd.read_csv(FILTERED_CSV)
     df_filtered = df_filtered[df_filtered["Category"] == category_name]
                 recommended.append(person)
     if not recommended:
+        return f"LLM determined no candidates are suitable for the '{category_name}' category. Try another category or loosen the initial role filters."
     df_rec = pd.DataFrame(recommended)
     def parse_salary(s):
         try:
             return float(str(s).replace("$","").replace(",","").replace("N/A", str(float('inf'))))
     df_rec = df_rec.sort_values("Salary_sort").drop(columns=["Salary_sort"])
     df_top5 = df_rec.head(5)
     candidate_names = df_top5["Name"].tolist()
     output_text = f"Top {len(candidate_names)} Recommended Candidates for the '{category_name}' Category:\n\n"
     for i, name in enumerate(candidate_names):
         output_text += f"{i+1}. {name}\n"
+    output_text += "\nThese candidates were selected as a potential match by the LLM and sorted by lowest expected salary."
     return output_text
         return pd.DataFrame({"Error": [f"Failed to load JSON: {e}"]})
 # ----------------------------
+# Gradio interface
 # ----------------------------
 with gr.Blocks() as app:
     gr.Markdown("# Candidate Recommendation Engine")
     # Step 1: Filter by roles
     filter_button = gr.Button("2. Filter Candidates by Roles")
     filtered_df = gr.Dataframe(label="Filtered Candidates (Preview)")
     filter_status = gr.Textbox(label="Filter Status", placeholder="Click 'Filter Candidates by Roles' to start.")
     filter_button.click(filter_by_roles, inputs=[category_dropdown], outputs=[filtered_df, filter_status])
     # Step 2: LLM Recommendations
     llm_button = gr.Button("3. Get LLM Recommendations (Text Summary)")
     llm_output_text = gr.Textbox(label="Top Candidate Recommendations Summary", lines=10, placeholder="Click 'Get LLM Recommendations' after Step 2 completes.")
     llm_button.click(llm_recommendations, inputs=[category_dropdown], outputs=[llm_output_text])