| | import os
|
| | import json
|
| | import re
|
| | import gdown
|
| | import shutil
|
| | import streamlit as st
|
| | from dotenv import load_dotenv
|
| | from PyPDF2 import PdfReader
|
| | from typing import TypedDict, List
|
| | from pydantic import BaseModel, Field
|
| |
|
| |
|
| | from langchain_mistralai import ChatMistralAI
|
| | from langgraph.graph import StateGraph, START, END
|
| |
|
| |
|
| |
|
| |
|
| | st.set_page_config(page_title="HR AI Agent", layout="wide", page_icon="π€")
|
| | load_dotenv()
|
| |
|
| |
|
| | api_key = os.environ.get("MISTRAL_API_KEY") or st.secrets.get("MISTRAL_API_KEY")
|
| |
|
| | if not api_key:
|
| | st.error("π Mistral API Key not found. Please set it in your environment variables or secrets.")
|
| | st.stop()
|
| |
|
| |
|
| |
|
| |
|
| | class ScoredCandidate(BaseModel):
|
| | name: str
|
| | score: float = Field(..., description="Objective score 0.00-100.00.")
|
| | review: str = Field(..., description="Exactly 2 lines of review comment.")
|
| |
|
| | class AgentState(TypedDict):
|
| | gdrive_link: str
|
| | job_description: str
|
| | num_to_hire: int
|
| | raw_candidates: List[dict]
|
| | evaluated_results: dict
|
| | final_report: str
|
| |
|
| |
|
| |
|
| |
|
| | def download_from_gdrive(url):
|
| | temp_dir = "temp_resumes"
|
| | if os.path.exists(temp_dir):
|
| | shutil.rmtree(temp_dir)
|
| | os.makedirs(temp_dir)
|
| |
|
| | try:
|
| |
|
| | gdown.download_folder(url, output=temp_dir, quiet=True, remaining_ok=True, use_cookies=False)
|
| | return temp_dir
|
| | except Exception as e:
|
| | st.error(f"Error downloading from Google Drive: {e}")
|
| | return None
|
| |
|
| | def process_pdfs_to_json(folder_path):
|
| | llm = ChatMistralAI(model="mistral-large-latest", api_key=api_key, temperature=0)
|
| | all_candidates_json = []
|
| |
|
| |
|
| | files = []
|
| | for root, dirs, filenames in os.walk(folder_path):
|
| | for f in filenames:
|
| | if f.lower().endswith(".pdf"):
|
| | files.append(os.path.join(root, f))
|
| |
|
| | if not files:
|
| | st.warning("No PDF files found in the folder.")
|
| | return []
|
| |
|
| | progress_bar = st.progress(0)
|
| | status_text = st.empty()
|
| |
|
| | for i, path in enumerate(files):
|
| | filename = os.path.basename(path)
|
| | status_text.text(f"π Analyzing: {filename}")
|
| | try:
|
| | reader = PdfReader(path)
|
| | raw_text = "".join([page.extract_text() or "" for page in reader.pages])
|
| |
|
| | if len(raw_text.strip()) < 50:
|
| | continue
|
| |
|
| | prompt = f"Extract details from this resume into JSON (name, email, phone, skills, experience_years):\n{raw_text[:7000]}"
|
| | response = llm.invoke(prompt)
|
| | json_match = re.search(r"\{.*\}", response.content, re.DOTALL)
|
| | if json_match:
|
| | candidate_data = json.loads(json_match.group())
|
| | candidate_data["resume_text"] = raw_text
|
| | all_candidates_json.append(candidate_data)
|
| | except Exception:
|
| | pass
|
| | progress_bar.progress((i + 1) / len(files))
|
| |
|
| | status_text.empty()
|
| | progress_bar.empty()
|
| | return all_candidates_json
|
| |
|
| |
|
| |
|
| |
|
| | def extract_resumes_node(state: AgentState):
|
| | st.write("---")
|
| | st.info("β‘ **Phase 1:** Fetching resumes from Google Drive...")
|
| | temp_path = download_from_gdrive(state['gdrive_link'])
|
| | if temp_path:
|
| | candidates = process_pdfs_to_json(temp_path)
|
| | shutil.rmtree(temp_path)
|
| | return {"raw_candidates": candidates}
|
| | return {"raw_candidates": []}
|
| |
|
| |
|
| |
|
| | def rank_candidates_node(state: AgentState):
|
| | """
|
| | Evaluates candidates using a strict weighted rubric and 0-temperature
|
| | to ensure deterministic and consistent scoring.
|
| | """
|
| | print("\n" + "="*50)
|
| | print("π STEP 2: DETERMINISTIC SCORING ENGINE")
|
| | print("="*50)
|
| |
|
| |
|
| | llm = ChatMistralAI(model="mistral-large-latest", api_key=api_key, temperature=0)
|
| | structured_llm = llm.with_structured_output(ScoredCandidate)
|
| |
|
| | scored_list = []
|
| |
|
| | for cand in state['raw_candidates']:
|
| | name = cand.get('name', 'Unknown Candidate')
|
| | print(f"π§ Analyzing: {name}...")
|
| |
|
| |
|
| | prompt = f"""
|
| | YOU ARE AN EXPERT RECRUITER. Evaluate the candidate against the Job Description (JD).
|
| |
|
| | ### JOB DESCRIPTION:
|
| | {state['job_description']}
|
| |
|
| | ### CANDIDATE DATA:
|
| | {json.dumps(cand)}
|
| |
|
| | ### SCORING RUBRIC (Strict 100-Point Scale):
|
| | 1. Technical Skill Match (40 pts): Compare 'skills' in candidate data to JD requirements.
|
| | 2. Experience Level (30 pts): Rate years of experience and seniority fit.
|
| | 3. Industry Fit (20 pts): Does their previous experience align with this JD's industry?
|
| | 4. Education/Certifications (10 pts): Does the candidate meet the degree requirements?
|
| |
|
| | ### RULES:
|
| | - You must be OBJECTIVE. If a skill is not explicitly mentioned, do not award points for it.
|
| | - Temperature is set to 0; provide the most logical mathematical score.
|
| | - The 'review' must explain exactly why points were deducted.
|
| | - You must not make tie between candidates.
|
| | """
|
| |
|
| | try:
|
| |
|
| | result = structured_llm.invoke(prompt)
|
| |
|
| | if result:
|
| | scored_list.append(result.model_dump())
|
| | print(f"β
Scored {name}: {result.score}/100")
|
| | else:
|
| | scored_list.append({"name": name, "score": 0.0, "review": "Parsing error in AI output."})
|
| |
|
| | except Exception as e:
|
| | print(f"β οΈ Error scoring {name}: {e}")
|
| | scored_list.append({"name": name, "score": 0.0, "review": f"Processing Error: {str(e)}"})
|
| |
|
| |
|
| | sorted_all = sorted(scored_list, key=lambda x: x['score'], reverse=True)
|
| |
|
| |
|
| | return {
|
| | "evaluated_results": {
|
| | "all_evaluated_candidates": scored_list,
|
| | "top_n_hired_list": sorted_all[:state['num_to_hire']]
|
| | }
|
| | }
|
| |
|
| |
|
| | def report_node(state: AgentState):
|
| | st.info("β‘ **Phase 3:** Compiling final report...")
|
| | evals = state['evaluated_results']['top_n_hired_list']
|
| | report = "\n".join([f"π **{c['name']}** (Score: {c['score']})\n{c['review']}\n" for c in evals])
|
| | return {"final_report": report}
|
| |
|
| |
|
| |
|
| |
|
| | workflow = StateGraph(AgentState)
|
| | workflow.add_node("parser", extract_resumes_node)
|
| | workflow.add_node("ranker", rank_candidates_node)
|
| | workflow.add_node("reporter", report_node)
|
| | workflow.add_edge(START, "parser")
|
| | workflow.add_edge("parser", "ranker")
|
| | workflow.add_edge("ranker", "reporter")
|
| | workflow.add_edge("reporter", END)
|
| | app = workflow.compile()
|
| |
|
| |
|
| |
|
| |
|
| | st.title("π AI HR Agent: Google Drive Edition")
|
| |
|
| | col1, col2 = st.columns([2, 1])
|
| |
|
| | with col1:
|
| | jd_input = st.text_area("π Job Description", placeholder="Paste the job requirements here...", height=200)
|
| |
|
| | with col2:
|
| | gdrive_link = st.text_input("π Public GDrive Folder Link")
|
| | hire_count = st.number_input("Selection Count (Top N)", min_value=1, max_value=20, value=3)
|
| | analyze_btn = st.button("π Run Analysis", type="primary", use_container_width=True)
|
| |
|
| | if analyze_btn:
|
| | if not jd_input or not gdrive_link:
|
| | st.warning("Please provide both a Job Description and a Google Drive Link.")
|
| | else:
|
| | inputs = {
|
| | "gdrive_link": gdrive_link,
|
| | "job_description": jd_input,
|
| | "num_to_hire": int(hire_count),
|
| | "raw_candidates": []
|
| | }
|
| |
|
| | with st.status("AI Agent is working...", expanded=True) as status:
|
| | final_state = app.invoke(inputs)
|
| | status.update(label="Analysis Complete!", state="complete")
|
| |
|
| | st.session_state.result_state = final_state
|
| | st.session_state.jd = jd_input
|
| |
|
| | st.success("### π Shortlisted Candidates")
|
| | st.markdown(final_state["final_report"])
|
| |
|
| |
|
| |
|
| |
|
| | if "result_state" in st.session_state:
|
| | st.divider()
|
| | st.subheader("π¬ Deep-Dive: Ask the HR Agent")
|
| |
|
| |
|
| | if "messages" not in st.session_state:
|
| | st.session_state.messages = []
|
| |
|
| |
|
| | for msg in st.session_state.messages:
|
| | with st.chat_message(msg["role"]):
|
| | st.markdown(msg["content"])
|
| |
|
| | if prompt := st.chat_input("Ex: Why was John selected but Sarah wasn't?"):
|
| | st.session_state.messages.append({"role": "user", "content": prompt})
|
| | with st.chat_message("user"):
|
| | st.markdown(prompt)
|
| |
|
| |
|
| | all_evals = st.session_state.result_state['evaluated_results']['all_evaluated_candidates']
|
| | top_hired = [c['name'] for c in st.session_state.result_state['evaluated_results']['top_n_hired_list']]
|
| |
|
| |
|
| | knowledge_base = []
|
| | for eval_item in all_evals:
|
| | status = "SELECTED/TOP-TIER" if eval_item['name'] in top_hired else "DESELECTED/LOWER-RANKED"
|
| | knowledge_base.append({
|
| | "name": eval_item['name'],
|
| | "score": eval_item['score'],
|
| | "status": status,
|
| | "reasoning": eval_item['review']
|
| | })
|
| |
|
| |
|
| | chat_llm = ChatMistralAI(model="mistral-large-latest", api_key=api_key)
|
| |
|
| | context_message = f"""
|
| | You are an HR Analytics Bot. You have full access to the scoring results for ALL candidates.
|
| |
|
| | JOB DESCRIPTION:
|
| | {st.session_state.jd}
|
| |
|
| | CANDIDATE DATA (Scores and Status):
|
| | {json.dumps(knowledge_base, indent=2)}
|
| |
|
| | INSTRUCTIONS:
|
| | 1. Answer questions about specific candidates using the 'reasoning' and 'score' provided.
|
| | 2. If asked why someone was deselected, compare their score/reasoning to the higher-scoring candidates.
|
| | 3. Use Markdown tables if asked to compare multiple people.
|
| | """
|
| |
|
| | with st.chat_message("assistant"):
|
| |
|
| | response = chat_llm.invoke([
|
| | ("system", context_message),
|
| | ("user", prompt)
|
| | ])
|
| | st.markdown(response.content)
|
| | st.session_state.messages.append({"role": "assistant", "content": response.content})
|
| |
|
| |
|
| |
|