HR-Bot-V1 / app.py
Tarun-intellentech's picture
Upload 3 files
2f174fb verified
import os
import json
import re
import gdown
import shutil
import streamlit as st
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from typing import TypedDict, List
from pydantic import BaseModel, Field
# Mistral & LangGraph Imports
from langchain_mistralai import ChatMistralAI
from langgraph.graph import StateGraph, START, END
# =================================================================
# 1. SETUP & UI STYLING
# =================================================================
st.set_page_config(page_title="HR AI Agent", layout="wide", page_icon="πŸ‘€")
load_dotenv()
# Use st.secrets for cloud or os.environ for local
api_key = os.environ.get("MISTRAL_API_KEY") or st.secrets.get("MISTRAL_API_KEY")
if not api_key:
st.error("πŸ”‘ Mistral API Key not found. Please set it in your environment variables or secrets.")
st.stop()
# =================================================================
# 2. DATA SCHEMAS
# =================================================================
class ScoredCandidate(BaseModel):
name: str
score: float = Field(..., description="Objective score 0.00-100.00.")
review: str = Field(..., description="Exactly 2 lines of review comment.")
class AgentState(TypedDict):
gdrive_link: str
job_description: str
num_to_hire: int
raw_candidates: List[dict]
evaluated_results: dict
final_report: str
# =================================================================
# 3. HELPER FUNCTIONS
# =================================================================
def download_from_gdrive(url):
temp_dir = "temp_resumes"
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir)
os.makedirs(temp_dir)
try:
# Note: GDrive folders must be "Anyone with the link"
gdown.download_folder(url, output=temp_dir, quiet=True, remaining_ok=True, use_cookies=False)
return temp_dir
except Exception as e:
st.error(f"Error downloading from Google Drive: {e}")
return None
def process_pdfs_to_json(folder_path):
llm = ChatMistralAI(model="mistral-large-latest", api_key=api_key, temperature=0)
all_candidates_json = []
# Get all PDFs, including those in subfolders created by gdown
files = []
for root, dirs, filenames in os.walk(folder_path):
for f in filenames:
if f.lower().endswith(".pdf"):
files.append(os.path.join(root, f))
if not files:
st.warning("No PDF files found in the folder.")
return []
progress_bar = st.progress(0)
status_text = st.empty()
for i, path in enumerate(files):
filename = os.path.basename(path)
status_text.text(f"πŸ” Analyzing: {filename}")
try:
reader = PdfReader(path)
raw_text = "".join([page.extract_text() or "" for page in reader.pages])
if len(raw_text.strip()) < 50:
continue # Skip empty or scanned PDFs without OCR
prompt = f"Extract details from this resume into JSON (name, email, phone, skills, experience_years):\n{raw_text[:7000]}"
response = llm.invoke(prompt)
json_match = re.search(r"\{.*\}", response.content, re.DOTALL)
if json_match:
candidate_data = json.loads(json_match.group())
candidate_data["resume_text"] = raw_text
all_candidates_json.append(candidate_data)
except Exception:
pass
progress_bar.progress((i + 1) / len(files))
status_text.empty()
progress_bar.empty()
return all_candidates_json
# =================================================================
# 4. AGENT NODES
# =================================================================
def extract_resumes_node(state: AgentState):
st.write("---")
st.info("⚑ **Phase 1:** Fetching resumes from Google Drive...")
temp_path = download_from_gdrive(state['gdrive_link'])
if temp_path:
candidates = process_pdfs_to_json(temp_path)
shutil.rmtree(temp_path) # Cleanup
return {"raw_candidates": candidates}
return {"raw_candidates": []}
def rank_candidates_node(state: AgentState):
"""
Evaluates candidates using a strict weighted rubric and 0-temperature
to ensure deterministic and consistent scoring.
"""
print("\n" + "="*50)
print("πŸš€ STEP 2: DETERMINISTIC SCORING ENGINE")
print("="*50)
# Initialize LLM with Temperature 0 for consistency
llm = ChatMistralAI(model="mistral-large-latest", api_key=api_key, temperature=0)
structured_llm = llm.with_structured_output(ScoredCandidate)
scored_list = []
for cand in state['raw_candidates']:
name = cand.get('name', 'Unknown Candidate')
print(f"🧠 Analyzing: {name}...")
# OPTIMIZED PROMPT: Using a Point-Based Rubric
prompt = f"""
YOU ARE AN EXPERT RECRUITER. Evaluate the candidate against the Job Description (JD).
### JOB DESCRIPTION:
{state['job_description']}
### CANDIDATE DATA:
{json.dumps(cand)}
### SCORING RUBRIC (Strict 100-Point Scale):
1. Technical Skill Match (40 pts): Compare 'skills' in candidate data to JD requirements.
2. Experience Level (30 pts): Rate years of experience and seniority fit.
3. Industry Fit (20 pts): Does their previous experience align with this JD's industry?
4. Education/Certifications (10 pts): Does the candidate meet the degree requirements?
### RULES:
- You must be OBJECTIVE. If a skill is not explicitly mentioned, do not award points for it.
- Temperature is set to 0; provide the most logical mathematical score.
- The 'review' must explain exactly why points were deducted.
- You must not make tie between candidates.
"""
try:
# Mistral performs the evaluation based on the rubric above
result = structured_llm.invoke(prompt)
if result:
scored_list.append(result.model_dump())
print(f"βœ… Scored {name}: {result.score}/100")
else:
scored_list.append({"name": name, "score": 0.0, "review": "Parsing error in AI output."})
except Exception as e:
print(f"⚠️ Error scoring {name}: {e}")
scored_list.append({"name": name, "score": 0.0, "review": f"Processing Error: {str(e)}"})
# SORTING: Ensures the list is ordered by score (highest first)
sorted_all = sorted(scored_list, key=lambda x: x['score'], reverse=True)
# OUTPUT: Returns the updated state to the LangGraph
return {
"evaluated_results": {
"all_evaluated_candidates": scored_list,
"top_n_hired_list": sorted_all[:state['num_to_hire']]
}
}
def report_node(state: AgentState):
st.info("⚑ **Phase 3:** Compiling final report...")
evals = state['evaluated_results']['top_n_hired_list']
report = "\n".join([f"πŸ† **{c['name']}** (Score: {c['score']})\n{c['review']}\n" for c in evals])
return {"final_report": report}
# =================================================================
# 5. GRAPH ORCHESTRATION
# =================================================================
workflow = StateGraph(AgentState)
workflow.add_node("parser", extract_resumes_node)
workflow.add_node("ranker", rank_candidates_node)
workflow.add_node("reporter", report_node)
workflow.add_edge(START, "parser")
workflow.add_edge("parser", "ranker")
workflow.add_edge("ranker", "reporter")
workflow.add_edge("reporter", END)
app = workflow.compile()
# =================================================================
# 6. UI LAYOUT
# =================================================================
st.title("🌟 AI HR Agent: Google Drive Edition")
col1, col2 = st.columns([2, 1])
with col1:
jd_input = st.text_area("πŸ“‹ Job Description", placeholder="Paste the job requirements here...", height=200)
with col2:
gdrive_link = st.text_input("πŸ”— Public GDrive Folder Link")
hire_count = st.number_input("Selection Count (Top N)", min_value=1, max_value=20, value=3)
analyze_btn = st.button("πŸš€ Run Analysis", type="primary", use_container_width=True)
if analyze_btn:
if not jd_input or not gdrive_link:
st.warning("Please provide both a Job Description and a Google Drive Link.")
else:
inputs = {
"gdrive_link": gdrive_link,
"job_description": jd_input,
"num_to_hire": int(hire_count),
"raw_candidates": []
}
with st.status("AI Agent is working...", expanded=True) as status:
final_state = app.invoke(inputs)
status.update(label="Analysis Complete!", state="complete")
st.session_state.result_state = final_state
st.session_state.jd = jd_input
st.success("### πŸ“‹ Shortlisted Candidates")
st.markdown(final_state["final_report"])
# =================================================================
# 7. CHATBOT (FIXED: ACCESS TO ALL CANDIDATES)
# =================================================================
if "result_state" in st.session_state:
st.divider()
st.subheader("πŸ’¬ Deep-Dive: Ask the HR Agent")
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Display chat history
for msg in st.session_state.messages:
with st.chat_message(msg["role"]):
st.markdown(msg["content"])
if prompt := st.chat_input("Ex: Why was John selected but Sarah wasn't?"):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
# 1. PREPARE LEAN DATA (Crucial: Removes heavy resume_text)
all_evals = st.session_state.result_state['evaluated_results']['all_evaluated_candidates']
top_hired = [c['name'] for c in st.session_state.result_state['evaluated_results']['top_n_hired_list']]
# Build a summarized list of EVERY candidate
knowledge_base = []
for eval_item in all_evals:
status = "SELECTED/TOP-TIER" if eval_item['name'] in top_hired else "DESELECTED/LOWER-RANKED"
knowledge_base.append({
"name": eval_item['name'],
"score": eval_item['score'],
"status": status,
"reasoning": eval_item['review']
})
# 2. SYSTEM INSTRUCTIONS FOR THE AI
chat_llm = ChatMistralAI(model="mistral-large-latest", api_key=api_key)
context_message = f"""
You are an HR Analytics Bot. You have full access to the scoring results for ALL candidates.
JOB DESCRIPTION:
{st.session_state.jd}
CANDIDATE DATA (Scores and Status):
{json.dumps(knowledge_base, indent=2)}
INSTRUCTIONS:
1. Answer questions about specific candidates using the 'reasoning' and 'score' provided.
2. If asked why someone was deselected, compare their score/reasoning to the higher-scoring candidates.
3. Use Markdown tables if asked to compare multiple people.
"""
with st.chat_message("assistant"):
# Use a list of messages (System + User) for better steering
response = chat_llm.invoke([
("system", context_message),
("user", prompt)
])
st.markdown(response.content)
st.session_state.messages.append({"role": "assistant", "content": response.content})