Spaces:

IET-DEV
/

HR-Bot-V1

Sleeping

App Files Files Community

Tarun-intellentech commited on Jan 16

Commit

2f174fb

verified ·

1 Parent(s): b6b2b14

Upload 3 files

Browse files

Files changed (3) hide show

.env +1 -0
app.py +306 -0
requirements.txt +10 -0

.env ADDED Viewed

	@@ -0,0 +1 @@


1	+ MISTRAL_API_KEY = UqGahI5dUemJ2xkLky5wBbfAh20CykFd

app.py ADDED Viewed

	@@ -0,0 +1,306 @@

+import os
+import json
+import re
+import gdown
+import shutil
+import streamlit as st
+from dotenv import load_dotenv
+from PyPDF2 import PdfReader
+from typing import TypedDict, List
+from pydantic import BaseModel, Field
+# Mistral & LangGraph Imports
+from langchain_mistralai import ChatMistralAI
+from langgraph.graph import StateGraph, START, END
+# =================================================================
+# 1. SETUP & UI STYLING
+# =================================================================
+st.set_page_config(page_title="HR AI Agent", layout="wide", page_icon="👤")
+load_dotenv()
+# Use st.secrets for cloud or os.environ for local
+api_key = os.environ.get("MISTRAL_API_KEY") or st.secrets.get("MISTRAL_API_KEY")
+if not api_key:
+    st.error("🔑 Mistral API Key not found. Please set it in your environment variables or secrets.")
+    st.stop()
+# =================================================================
+# 2. DATA SCHEMAS
+# =================================================================
+class ScoredCandidate(BaseModel):
+    name: str
+    score: float = Field(..., description="Objective score 0.00-100.00.")
+    review: str = Field(..., description="Exactly 2 lines of review comment.")
+class AgentState(TypedDict):
+    gdrive_link: str
+    job_description: str
+    num_to_hire: int
+    raw_candidates: List[dict]
+    evaluated_results: dict
+    final_report: str
+# =================================================================
+# 3. HELPER FUNCTIONS
+# =================================================================
+def download_from_gdrive(url):
+    temp_dir = "temp_resumes"
+    if os.path.exists(temp_dir):
+        shutil.rmtree(temp_dir)
+    os.makedirs(temp_dir)
+    try:
+        # Note: GDrive folders must be "Anyone with the link"
+        gdown.download_folder(url, output=temp_dir, quiet=True, remaining_ok=True, use_cookies=False)
+        return temp_dir
+    except Exception as e:
+        st.error(f"Error downloading from Google Drive: {e}")
+        return None
+def process_pdfs_to_json(folder_path):
+    llm = ChatMistralAI(model="mistral-large-latest", api_key=api_key, temperature=0)
+    all_candidates_json = []
+    # Get all PDFs, including those in subfolders created by gdown
+    files = []
+    for root, dirs, filenames in os.walk(folder_path):
+        for f in filenames:
+            if f.lower().endswith(".pdf"):
+                files.append(os.path.join(root, f))
+    if not files:
+        st.warning("No PDF files found in the folder.")
+        return []
+    progress_bar = st.progress(0)
+    status_text = st.empty()
+    for i, path in enumerate(files):
+        filename = os.path.basename(path)
+        status_text.text(f"🔍 Analyzing: {filename}")
+        try:
+            reader = PdfReader(path)
+            raw_text = "".join([page.extract_text() or "" for page in reader.pages])
+            if len(raw_text.strip()) < 50:
+                continue # Skip empty or scanned PDFs without OCR
+            prompt = f"Extract details from this resume into JSON (name, email, phone, skills, experience_years):\n{raw_text[:7000]}"
+            response = llm.invoke(prompt)
+            json_match = re.search(r"\{.*\}", response.content, re.DOTALL)
+            if json_match:
+                candidate_data = json.loads(json_match.group())
+                candidate_data["resume_text"] = raw_text
+                all_candidates_json.append(candidate_data)
+        except Exception:
+            pass
+        progress_bar.progress((i + 1) / len(files))
+    status_text.empty()
+    progress_bar.empty()
+    return all_candidates_json
+# =================================================================
+# 4. AGENT NODES
+# =================================================================
+def extract_resumes_node(state: AgentState):
+    st.write("---")
+    st.info("⚡ **Phase 1:** Fetching resumes from Google Drive...")
+    temp_path = download_from_gdrive(state['gdrive_link'])
+    if temp_path:
+        candidates = process_pdfs_to_json(temp_path)
+        shutil.rmtree(temp_path) # Cleanup
+        return {"raw_candidates": candidates}
+    return {"raw_candidates": []}
+def rank_candidates_node(state: AgentState):
+    """
+    Evaluates candidates using a strict weighted rubric and 0-temperature
+    to ensure deterministic and consistent scoring.
+    """
+    print("\n" + "="*50)
+    print("🚀 STEP 2: DETERMINISTIC SCORING ENGINE")
+    print("="*50)
+    # Initialize LLM with Temperature 0 for consistency
+    llm = ChatMistralAI(model="mistral-large-latest", api_key=api_key, temperature=0)
+    structured_llm = llm.with_structured_output(ScoredCandidate)
+    scored_list = []
+    for cand in state['raw_candidates']:
+        name = cand.get('name', 'Unknown Candidate')
+        print(f"🧠 Analyzing: {name}...")
+        # OPTIMIZED PROMPT: Using a Point-Based Rubric
+        prompt = f"""
+        YOU ARE AN EXPERT RECRUITER. Evaluate the candidate against the Job Description (JD).
+        ### JOB DESCRIPTION:
+        {state['job_description']}
+        ### CANDIDATE DATA:
+        {json.dumps(cand)}
+        ### SCORING RUBRIC (Strict 100-Point Scale):
+        1. Technical Skill Match (40 pts): Compare 'skills' in candidate data to JD requirements.
+        2. Experience Level (30 pts): Rate years of experience and seniority fit.
+        3. Industry Fit (20 pts): Does their previous experience align with this JD's industry?
+        4. Education/Certifications (10 pts): Does the candidate meet the degree requirements?
+        ### RULES:
+        - You must be OBJECTIVE. If a skill is not explicitly mentioned, do not award points for it.
+        - Temperature is set to 0; provide the most logical mathematical score.
+        - The 'review' must explain exactly why points were deducted.
+        - You must not make tie between candidates.
+        """
+        try:
+            # Mistral performs the evaluation based on the rubric above
+            result = structured_llm.invoke(prompt)
+            if result:
+                scored_list.append(result.model_dump())
+                print(f"✅ Scored {name}: {result.score}/100")
+            else:
+                scored_list.append({"name": name, "score": 0.0, "review": "Parsing error in AI output."})
+        except Exception as e:
+            print(f"⚠️ Error scoring {name}: {e}")
+            scored_list.append({"name": name, "score": 0.0, "review": f"Processing Error: {str(e)}"})
+    # SORTING: Ensures the list is ordered by score (highest first)
+    sorted_all = sorted(scored_list, key=lambda x: x['score'], reverse=True)
+    # OUTPUT: Returns the updated state to the LangGraph
+    return {
+        "evaluated_results": {
+            "all_evaluated_candidates": scored_list,
+            "top_n_hired_list": sorted_all[:state['num_to_hire']]
+        }
+    }
+def report_node(state: AgentState):
+    st.info("⚡ **Phase 3:** Compiling final report...")
+    evals = state['evaluated_results']['top_n_hired_list']
+    report = "\n".join([f"🏆 **{c['name']}** (Score: {c['score']})\n{c['review']}\n" for c in evals])
+    return {"final_report": report}
+# =================================================================
+# 5. GRAPH ORCHESTRATION
+# =================================================================
+workflow = StateGraph(AgentState)
+workflow.add_node("parser", extract_resumes_node)
+workflow.add_node("ranker", rank_candidates_node)
+workflow.add_node("reporter", report_node)
+workflow.add_edge(START, "parser")
+workflow.add_edge("parser", "ranker")
+workflow.add_edge("ranker", "reporter")
+workflow.add_edge("reporter", END)
+app = workflow.compile()
+# =================================================================
+# 6. UI LAYOUT
+# =================================================================
+st.title("🌟 AI HR Agent: Google Drive Edition")
+col1, col2 = st.columns([2, 1])
+with col1:
+    jd_input = st.text_area("📋 Job Description", placeholder="Paste the job requirements here...", height=200)
+with col2:
+    gdrive_link = st.text_input("🔗 Public GDrive Folder Link")
+    hire_count = st.number_input("Selection Count (Top N)", min_value=1, max_value=20, value=3)
+    analyze_btn = st.button("🚀 Run Analysis", type="primary", use_container_width=True)
+if analyze_btn:
+    if not jd_input or not gdrive_link:
+        st.warning("Please provide both a Job Description and a Google Drive Link.")
+    else:
+        inputs = {
+            "gdrive_link": gdrive_link,
+            "job_description": jd_input,
+            "num_to_hire": int(hire_count),
+            "raw_candidates": []
+        }
+        with st.status("AI Agent is working...", expanded=True) as status:
+            final_state = app.invoke(inputs)
+            status.update(label="Analysis Complete!", state="complete")
+        st.session_state.result_state = final_state
+        st.session_state.jd = jd_input
+        st.success("### 📋 Shortlisted Candidates")
+        st.markdown(final_state["final_report"])
+# =================================================================
+# 7. CHATBOT (FIXED: ACCESS TO ALL CANDIDATES)
+# =================================================================
+if "result_state" in st.session_state:
+    st.divider()
+    st.subheader("💬 Deep-Dive: Ask the HR Agent")
+    # Initialize chat history
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+    # Display chat history
+    for msg in st.session_state.messages:
+        with st.chat_message(msg["role"]):
+            st.markdown(msg["content"])
+    if prompt := st.chat_input("Ex: Why was John selected but Sarah wasn't?"):
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        with st.chat_message("user"):
+            st.markdown(prompt)
+        # 1. PREPARE LEAN DATA (Crucial: Removes heavy resume_text)
+        all_evals = st.session_state.result_state['evaluated_results']['all_evaluated_candidates']
+        top_hired = [c['name'] for c in st.session_state.result_state['evaluated_results']['top_n_hired_list']]
+        # Build a summarized list of EVERY candidate
+        knowledge_base = []
+        for eval_item in all_evals:
+            status = "SELECTED/TOP-TIER" if eval_item['name'] in top_hired else "DESELECTED/LOWER-RANKED"
+            knowledge_base.append({
+                "name": eval_item['name'],
+                "score": eval_item['score'],
+                "status": status,
+                "reasoning": eval_item['review']
+            })
+        # 2. SYSTEM INSTRUCTIONS FOR THE AI
+        chat_llm = ChatMistralAI(model="mistral-large-latest", api_key=api_key)
+        context_message = f"""
+        You are an HR Analytics Bot. You have full access to the scoring results for ALL candidates.
+        JOB DESCRIPTION:
+        {st.session_state.jd}
+        CANDIDATE DATA (Scores and Status):
+        {json.dumps(knowledge_base, indent=2)}
+        INSTRUCTIONS:
+        1. Answer questions about specific candidates using the 'reasoning' and 'score' provided.
+        2. If asked why someone was deselected, compare their score/reasoning to the higher-scoring candidates.
+        3. Use Markdown tables if asked to compare multiple people.
+        """
+        with st.chat_message("assistant"):
+            # Use a list of messages (System + User) for better steering
+            response = chat_llm.invoke([
+                ("system", context_message),
+                ("user", prompt)
+            ])
+            st.markdown(response.content)
+            st.session_state.messages.append({"role": "assistant", "content": response.content})

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+langchain-mistralai
+langgraph
+pydantic
+python-dotenv
+PyPDF2
+gdown
+langchain-chroma
+langchain-community
+langchain-text-splitters
+nest-asyncio