import os import json import re import gdown import shutil import streamlit as st from dotenv import load_dotenv from PyPDF2 import PdfReader from typing import TypedDict, List from pydantic import BaseModel, Field # Mistral & LangGraph Imports from langchain_mistralai import ChatMistralAI from langgraph.graph import StateGraph, START, END # ================================================================= # 1. SETUP & UI STYLING # ================================================================= st.set_page_config(page_title="HR AI Agent", layout="wide", page_icon="👤") load_dotenv() # Use st.secrets for cloud or os.environ for local api_key = os.environ.get("MISTRAL_API_KEY") or st.secrets.get("MISTRAL_API_KEY") if not api_key: st.error("🔑 Mistral API Key not found. Please set it in your environment variables or secrets.") st.stop() # ================================================================= # 2. DATA SCHEMAS # ================================================================= class ScoredCandidate(BaseModel): name: str score: float = Field(..., description="Objective score 0.00-100.00.") review: str = Field(..., description="Exactly 2 lines of review comment.") class AgentState(TypedDict): gdrive_link: str job_description: str num_to_hire: int raw_candidates: List[dict] evaluated_results: dict final_report: str # ================================================================= # 3. HELPER FUNCTIONS # ================================================================= def download_from_gdrive(url): temp_dir = "temp_resumes" if os.path.exists(temp_dir): shutil.rmtree(temp_dir) os.makedirs(temp_dir) try: # Note: GDrive folders must be "Anyone with the link" gdown.download_folder(url, output=temp_dir, quiet=True, remaining_ok=True, use_cookies=False) return temp_dir except Exception as e: st.error(f"Error downloading from Google Drive: {e}") return None def process_pdfs_to_json(folder_path): llm = ChatMistralAI(model="mistral-large-latest", api_key=api_key, temperature=0) all_candidates_json = [] # Get all PDFs, including those in subfolders created by gdown files = [] for root, dirs, filenames in os.walk(folder_path): for f in filenames: if f.lower().endswith(".pdf"): files.append(os.path.join(root, f)) if not files: st.warning("No PDF files found in the folder.") return [] progress_bar = st.progress(0) status_text = st.empty() for i, path in enumerate(files): filename = os.path.basename(path) status_text.text(f"🔍 Analyzing: {filename}") try: reader = PdfReader(path) raw_text = "".join([page.extract_text() or "" for page in reader.pages]) if len(raw_text.strip()) < 50: continue # Skip empty or scanned PDFs without OCR prompt = f"Extract details from this resume into JSON (name, email, phone, skills, experience_years):\n{raw_text[:7000]}" response = llm.invoke(prompt) json_match = re.search(r"\{.*\}", response.content, re.DOTALL) if json_match: candidate_data = json.loads(json_match.group()) candidate_data["resume_text"] = raw_text all_candidates_json.append(candidate_data) except Exception: pass progress_bar.progress((i + 1) / len(files)) status_text.empty() progress_bar.empty() return all_candidates_json # ================================================================= # 4. AGENT NODES # ================================================================= def extract_resumes_node(state: AgentState): st.write("---") st.info("⚡ **Phase 1:** Fetching resumes from Google Drive...") temp_path = download_from_gdrive(state['gdrive_link']) if temp_path: candidates = process_pdfs_to_json(temp_path) shutil.rmtree(temp_path) # Cleanup return {"raw_candidates": candidates} return {"raw_candidates": []} def rank_candidates_node(state: AgentState): """ Evaluates candidates using a strict weighted rubric and 0-temperature to ensure deterministic and consistent scoring. """ print("\n" + "="*50) print("🚀 STEP 2: DETERMINISTIC SCORING ENGINE") print("="*50) # Initialize LLM with Temperature 0 for consistency llm = ChatMistralAI(model="mistral-large-latest", api_key=api_key, temperature=0) structured_llm = llm.with_structured_output(ScoredCandidate) scored_list = [] for cand in state['raw_candidates']: name = cand.get('name', 'Unknown Candidate') print(f"🧠 Analyzing: {name}...") # OPTIMIZED PROMPT: Using a Point-Based Rubric prompt = f""" YOU ARE AN EXPERT RECRUITER. Evaluate the candidate against the Job Description (JD). ### JOB DESCRIPTION: {state['job_description']} ### CANDIDATE DATA: {json.dumps(cand)} ### SCORING RUBRIC (Strict 100-Point Scale): 1. Technical Skill Match (40 pts): Compare 'skills' in candidate data to JD requirements. 2. Experience Level (30 pts): Rate years of experience and seniority fit. 3. Industry Fit (20 pts): Does their previous experience align with this JD's industry? 4. Education/Certifications (10 pts): Does the candidate meet the degree requirements? ### RULES: - You must be OBJECTIVE. If a skill is not explicitly mentioned, do not award points for it. - Temperature is set to 0; provide the most logical mathematical score. - The 'review' must explain exactly why points were deducted. - You must not make tie between candidates. """ try: # Mistral performs the evaluation based on the rubric above result = structured_llm.invoke(prompt) if result: scored_list.append(result.model_dump()) print(f"✅ Scored {name}: {result.score}/100") else: scored_list.append({"name": name, "score": 0.0, "review": "Parsing error in AI output."}) except Exception as e: print(f"⚠️ Error scoring {name}: {e}") scored_list.append({"name": name, "score": 0.0, "review": f"Processing Error: {str(e)}"}) # SORTING: Ensures the list is ordered by score (highest first) sorted_all = sorted(scored_list, key=lambda x: x['score'], reverse=True) # OUTPUT: Returns the updated state to the LangGraph return { "evaluated_results": { "all_evaluated_candidates": scored_list, "top_n_hired_list": sorted_all[:state['num_to_hire']] } } def report_node(state: AgentState): st.info("⚡ **Phase 3:** Compiling final report...") evals = state['evaluated_results']['top_n_hired_list'] report = "\n".join([f"🏆 **{c['name']}** (Score: {c['score']})\n{c['review']}\n" for c in evals]) return {"final_report": report} # ================================================================= # 5. GRAPH ORCHESTRATION # ================================================================= workflow = StateGraph(AgentState) workflow.add_node("parser", extract_resumes_node) workflow.add_node("ranker", rank_candidates_node) workflow.add_node("reporter", report_node) workflow.add_edge(START, "parser") workflow.add_edge("parser", "ranker") workflow.add_edge("ranker", "reporter") workflow.add_edge("reporter", END) app = workflow.compile() # ================================================================= # 6. UI LAYOUT # ================================================================= st.title("🌟 AI HR Agent: Google Drive Edition") col1, col2 = st.columns([2, 1]) with col1: jd_input = st.text_area("📋 Job Description", placeholder="Paste the job requirements here...", height=200) with col2: gdrive_link = st.text_input("🔗 Public GDrive Folder Link") hire_count = st.number_input("Selection Count (Top N)", min_value=1, max_value=20, value=3) analyze_btn = st.button("🚀 Run Analysis", type="primary", use_container_width=True) if analyze_btn: if not jd_input or not gdrive_link: st.warning("Please provide both a Job Description and a Google Drive Link.") else: inputs = { "gdrive_link": gdrive_link, "job_description": jd_input, "num_to_hire": int(hire_count), "raw_candidates": [] } with st.status("AI Agent is working...", expanded=True) as status: final_state = app.invoke(inputs) status.update(label="Analysis Complete!", state="complete") st.session_state.result_state = final_state st.session_state.jd = jd_input st.success("### 📋 Shortlisted Candidates") st.markdown(final_state["final_report"]) # ================================================================= # 7. CHATBOT (FIXED: ACCESS TO ALL CANDIDATES) # ================================================================= if "result_state" in st.session_state: st.divider() st.subheader("💬 Deep-Dive: Ask the HR Agent") # Initialize chat history if "messages" not in st.session_state: st.session_state.messages = [] # Display chat history for msg in st.session_state.messages: with st.chat_message(msg["role"]): st.markdown(msg["content"]) if prompt := st.chat_input("Ex: Why was John selected but Sarah wasn't?"): st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) # 1. PREPARE LEAN DATA (Crucial: Removes heavy resume_text) all_evals = st.session_state.result_state['evaluated_results']['all_evaluated_candidates'] top_hired = [c['name'] for c in st.session_state.result_state['evaluated_results']['top_n_hired_list']] # Build a summarized list of EVERY candidate knowledge_base = [] for eval_item in all_evals: status = "SELECTED/TOP-TIER" if eval_item['name'] in top_hired else "DESELECTED/LOWER-RANKED" knowledge_base.append({ "name": eval_item['name'], "score": eval_item['score'], "status": status, "reasoning": eval_item['review'] }) # 2. SYSTEM INSTRUCTIONS FOR THE AI chat_llm = ChatMistralAI(model="mistral-large-latest", api_key=api_key) context_message = f""" You are an HR Analytics Bot. You have full access to the scoring results for ALL candidates. JOB DESCRIPTION: {st.session_state.jd} CANDIDATE DATA (Scores and Status): {json.dumps(knowledge_base, indent=2)} INSTRUCTIONS: 1. Answer questions about specific candidates using the 'reasoning' and 'score' provided. 2. If asked why someone was deselected, compare their score/reasoning to the higher-scoring candidates. 3. Use Markdown tables if asked to compare multiple people. """ with st.chat_message("assistant"): # Use a list of messages (System + User) for better steering response = chat_llm.invoke([ ("system", context_message), ("user", prompt) ]) st.markdown(response.content) st.session_state.messages.append({"role": "assistant", "content": response.content})