Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import pandas as pd | |
| import numpy as np | |
| import ast | |
| from dotenv import load_dotenv | |
| from shiny import reactive, render, ui | |
| import google.generativeai as genai | |
| from google.generativeai.types import FunctionDeclaration, Tool | |
| from google.api_core.exceptions import ResourceExhausted | |
| import markdown | |
| from context import get_all_candidates, get_all_jobs | |
| load_dotenv() | |
| # === Tool Function === | |
| def correlate_columns(df: pd.DataFrame, col1: str, col2: str) -> dict: | |
| if col1 not in df.columns or col2 not in df.columns: | |
| return {"error": f"One or both columns not found: '{col1}', '{col2}'"} | |
| if col1 == col2: | |
| return {"error": "Cannot correlate a column with itself."} | |
| subset = df[[col1, col2]].dropna() | |
| for col in [col1, col2]: | |
| if subset[col].dtype == "object" or pd.api.types.is_categorical_dtype(subset[col]): | |
| subset[col], _ = pd.factorize(subset[col]) | |
| try: | |
| return subset.corr(method="pearson").to_dict() | |
| except Exception as e: | |
| return {"error": str(e)} | |
| # === Gemini Tool Setup === | |
| correlation_func_schema = FunctionDeclaration( | |
| name="correlate_columns", | |
| description="Calculate the Pearson correlation between two candidate variables.", | |
| parameters={ | |
| "type": "object", | |
| "properties": { | |
| "col1": {"type": "string", "description": "First column"}, | |
| "col2": {"type": "string", "description": "Second column"} | |
| }, | |
| "required": ["col1", "col2"] | |
| } | |
| ) | |
| correlation_tool = Tool(function_declarations=[correlation_func_schema]) | |
| genai.configure(api_key=os.getenv("GEMINI_API_KEY")) | |
| model = genai.GenerativeModel("gemini-2.0-flash", tools=[correlation_tool]) | |
| # === Server === | |
| def server(input, output, session): | |
| print("β Loaded context-aware Gemini correlation server") | |
| last_corr = reactive.Value(None) | |
| last_cols = reactive.Value(("", "")) | |
| chat_status = reactive.Value("") | |
| def _populate_job_ids(): | |
| raw_candidates = get_all_candidates() | |
| job_ids_used = {c.get("job_id") for c in raw_candidates.values() if "job_id" in c} | |
| all_jobs = get_all_jobs() | |
| # Build label: value mapping | |
| job_choices = { | |
| job_id: f"{job_data.get('title', 'Untitled')} ({job_id[:8]})" | |
| for job_id, job_data in all_jobs.items() | |
| if job_id in job_ids_used | |
| } | |
| print(f"π Populating job_id dropdown with {len(job_choices)} items") | |
| ui.update_select("job_id", choices=job_choices) | |
| def candidates(): | |
| raw = get_all_candidates() | |
| job_id = input.job_id() | |
| if not job_id: | |
| return pd.DataFrame() | |
| df = pd.DataFrame([c for c in raw.values() if c.get("job_id") == job_id]) | |
| #df["Years of Experience"] = pd.to_numeric(df["Years of Experience"], errors="coerce") | |
| #df["avg_score"] = pd.to_numeric(df["avg_score"], errors="coerce") | |
| #df["Key Skills"] = df["Key Skills"].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x) | |
| return df | |
| def _populate_cols(): | |
| df = candidates() | |
| if df.empty: | |
| return | |
| exclude = { | |
| "Name", "Email", "Key Skills", "Candidate ID", "Application ID", "Resume File", | |
| "Llama Summary", "Gemini Summary", "Note", "candidate_id", "job_id", | |
| "application_date", "source" | |
| } | |
| cols = [col for col in df.columns if col not in exclude] | |
| ui.update_select("col1", choices=cols) | |
| ui.update_select("col2", choices=cols) | |
| def candidate_table(): | |
| df = candidates() | |
| return df.drop(columns=["Resume File", "Llama Summary", "Gemini Summary", "onboarding_docs", "job_id", "Candidate ID"], errors="ignore").head(10) | |
| def correlation_output(): | |
| if input.calc_corr() == 0: | |
| return ui.p("β¬οΈ Select columns and click 'Calculate Correlation'.") | |
| df = candidates() | |
| col1 = input.col1() | |
| col2 = input.col2() | |
| if df.empty or not col1 or not col2: | |
| return ui.p("β οΈ Please select a job and valid columns.") | |
| result = correlate_columns(df, col1, col2) | |
| if "error" in result: | |
| return ui.p(f"β {result['error']}") | |
| try: | |
| corr_value = result[col1][col2] | |
| except: | |
| return ui.p("β Failed to extract correlation value.") | |
| last_corr.set(corr_value) | |
| last_cols.set((col1, col2)) | |
| prompt = ( | |
| f"The Pearson correlation between '{col1}' and '{col2}' is {corr_value:.4f}.\n\n" | |
| f"Explain this for a recruiter: include statistical meaning, hiring implications, and limitations." | |
| ) | |
| try: | |
| chat = model.start_chat() | |
| response = chat.send_message(prompt) | |
| explanation = markdown.markdown(response.text.strip()) | |
| except Exception as e: | |
| explanation = f"<b>β οΈ Gemini error:</b> {str(e)}" | |
| return ui.HTML(f""" | |
| <div><strong>{col1}</strong> vs <strong>{col2}</strong> correlation: <b>{corr_value:.4f}</b></div> | |
| <hr><div><strong>LLM Explanation:</strong><br>{explanation}</div> | |
| """) | |
| def chat_status_ui(): | |
| return chat_status.get() | |
| def chat_response(): | |
| user_msg = input.chat_input().strip() | |
| col1, col2 = last_cols.get() | |
| corr_value = last_corr.get() | |
| df = candidates() | |
| if not user_msg: | |
| return ui.HTML("<i>β οΈ Please enter a follow-up question.</i>") | |
| if df.empty: | |
| return ui.HTML("<i>β οΈ No candidate data loaded.</i>") | |
| if not col1 or corr_value is None: | |
| return ui.HTML("<i>β οΈ Please run a correlation first.</i>") | |
| chat_status.set("π¬ Thinking...") | |
| # Provide full data context for Gemini: 10 rows, all columns | |
| cleaned_df = df.drop(columns=["Resume File", "Llama Summary", "Gemini Summary"], errors="ignore") | |
| sample_json = json.dumps(cleaned_df.head(10).to_dict(orient="records"), indent=2) | |
| prompt = ( | |
| f"You are helping a recruiter analyze candidate data.\n\n" | |
| f"The last Pearson correlation was between '{col1}' and '{col2}' = {corr_value:.4f}.\n" | |
| f"The user asked: \"{user_msg}\"\n\n" | |
| f"Here is a preview of the first 10 rows of the dataset:\n{sample_json}\n\n" | |
| f"Use both the correlation and sample data to respond helpfully." | |
| ) | |
| try: | |
| chat = model.start_chat() | |
| response = chat.send_message(prompt) | |
| explanation = markdown.markdown(response.text.strip()) | |
| except ResourceExhausted: | |
| explanation = "<b>β Gemini quota exceeded. Try again soon.</b>" | |
| except Exception as e: | |
| explanation = f"<b>β Gemini error:</b> {str(e)}" | |
| chat_status.set("") | |
| return ui.HTML(explanation) | |