import os import json import pandas as pd import numpy as np import ast from dotenv import load_dotenv from shiny import reactive, render, ui import google.generativeai as genai from google.generativeai.types import FunctionDeclaration, Tool from google.api_core.exceptions import ResourceExhausted import markdown from context import get_all_candidates, get_all_jobs load_dotenv() # === Tool Function === def correlate_columns(df: pd.DataFrame, col1: str, col2: str) -> dict: if col1 not in df.columns or col2 not in df.columns: return {"error": f"One or both columns not found: '{col1}', '{col2}'"} if col1 == col2: return {"error": "Cannot correlate a column with itself."} subset = df[[col1, col2]].dropna() for col in [col1, col2]: if subset[col].dtype == "object" or pd.api.types.is_categorical_dtype(subset[col]): subset[col], _ = pd.factorize(subset[col]) try: return subset.corr(method="pearson").to_dict() except Exception as e: return {"error": str(e)} # === Gemini Tool Setup === correlation_func_schema = FunctionDeclaration( name="correlate_columns", description="Calculate the Pearson correlation between two candidate variables.", parameters={ "type": "object", "properties": { "col1": {"type": "string", "description": "First column"}, "col2": {"type": "string", "description": "Second column"} }, "required": ["col1", "col2"] } ) correlation_tool = Tool(function_declarations=[correlation_func_schema]) genai.configure(api_key=os.getenv("GEMINI_API_KEY")) model = genai.GenerativeModel("gemini-2.0-flash", tools=[correlation_tool]) # === Server === def server(input, output, session): print("✅ Loaded context-aware Gemini correlation server") last_corr = reactive.Value(None) last_cols = reactive.Value(("", "")) chat_status = reactive.Value("") @reactive.effect def _populate_job_ids(): raw_candidates = get_all_candidates() job_ids_used = {c.get("job_id") for c in raw_candidates.values() if "job_id" in c} all_jobs = get_all_jobs() # Build label: value mapping job_choices = { job_id: f"{job_data.get('title', 'Untitled')} ({job_id[:8]})" for job_id, job_data in all_jobs.items() if job_id in job_ids_used } print(f"📊 Populating job_id dropdown with {len(job_choices)} items") ui.update_select("job_id", choices=job_choices) @reactive.Calc def candidates(): raw = get_all_candidates() job_id = input.job_id() if not job_id: return pd.DataFrame() df = pd.DataFrame([c for c in raw.values() if c.get("job_id") == job_id]) #df["Years of Experience"] = pd.to_numeric(df["Years of Experience"], errors="coerce") #df["avg_score"] = pd.to_numeric(df["avg_score"], errors="coerce") #df["Key Skills"] = df["Key Skills"].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x) return df @reactive.effect def _populate_cols(): df = candidates() if df.empty: return exclude = { "Name", "Email", "Key Skills", "Candidate ID", "Application ID", "Resume File", "Llama Summary", "Gemini Summary", "Note", "candidate_id", "job_id", "application_date", "source" } cols = [col for col in df.columns if col not in exclude] ui.update_select("col1", choices=cols) ui.update_select("col2", choices=cols) @output @render.table def candidate_table(): df = candidates() return df.drop(columns=["Resume File", "Llama Summary", "Gemini Summary", "onboarding_docs", "job_id", "Candidate ID"], errors="ignore").head(10) @output @render.ui def correlation_output(): if input.calc_corr() == 0: return ui.p("⬇️ Select columns and click 'Calculate Correlation'.") df = candidates() col1 = input.col1() col2 = input.col2() if df.empty or not col1 or not col2: return ui.p("⚠️ Please select a job and valid columns.") result = correlate_columns(df, col1, col2) if "error" in result: return ui.p(f"❌ {result['error']}") try: corr_value = result[col1][col2] except: return ui.p("❌ Failed to extract correlation value.") last_corr.set(corr_value) last_cols.set((col1, col2)) prompt = ( f"The Pearson correlation between '{col1}' and '{col2}' is {corr_value:.4f}.\n\n" f"Explain this for a recruiter: include statistical meaning, hiring implications, and limitations." ) try: chat = model.start_chat() response = chat.send_message(prompt) explanation = markdown.markdown(response.text.strip()) except Exception as e: explanation = f"⚠️ Gemini error: {str(e)}" return ui.HTML(f"""
{col1} vs {col2} correlation: {corr_value:.4f}

LLM Explanation:
{explanation}
""") @output @render.text def chat_status_ui(): return chat_status.get() @output @render.ui @reactive.event(input.chat_send) def chat_response(): user_msg = input.chat_input().strip() col1, col2 = last_cols.get() corr_value = last_corr.get() df = candidates() if not user_msg: return ui.HTML("⚠️ Please enter a follow-up question.") if df.empty: return ui.HTML("⚠️ No candidate data loaded.") if not col1 or corr_value is None: return ui.HTML("⚠️ Please run a correlation first.") chat_status.set("💬 Thinking...") # Provide full data context for Gemini: 10 rows, all columns cleaned_df = df.drop(columns=["Resume File", "Llama Summary", "Gemini Summary"], errors="ignore") sample_json = json.dumps(cleaned_df.head(10).to_dict(orient="records"), indent=2) prompt = ( f"You are helping a recruiter analyze candidate data.\n\n" f"The last Pearson correlation was between '{col1}' and '{col2}' = {corr_value:.4f}.\n" f"The user asked: \"{user_msg}\"\n\n" f"Here is a preview of the first 10 rows of the dataset:\n{sample_json}\n\n" f"Use both the correlation and sample data to respond helpfully." ) try: chat = model.start_chat() response = chat.send_message(prompt) explanation = markdown.markdown(response.text.strip()) except ResourceExhausted: explanation = "❌ Gemini quota exceeded. Try again soon." except Exception as e: explanation = f"❌ Gemini error: {str(e)}" chat_status.set("") return ui.HTML(explanation)