Tarun-intellentech commited on
Commit
2f174fb
Β·
verified Β·
1 Parent(s): b6b2b14

Upload 3 files

Browse files
Files changed (3) hide show
  1. .env +1 -0
  2. app.py +306 -0
  3. requirements.txt +10 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ MISTRAL_API_KEY = UqGahI5dUemJ2xkLky5wBbfAh20CykFd
app.py ADDED
@@ -0,0 +1,306 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import re
4
+ import gdown
5
+ import shutil
6
+ import streamlit as st
7
+ from dotenv import load_dotenv
8
+ from PyPDF2 import PdfReader
9
+ from typing import TypedDict, List
10
+ from pydantic import BaseModel, Field
11
+
12
+ # Mistral & LangGraph Imports
13
+ from langchain_mistralai import ChatMistralAI
14
+ from langgraph.graph import StateGraph, START, END
15
+
16
+ # =================================================================
17
+ # 1. SETUP & UI STYLING
18
+ # =================================================================
19
+ st.set_page_config(page_title="HR AI Agent", layout="wide", page_icon="πŸ‘€")
20
+ load_dotenv()
21
+
22
+ # Use st.secrets for cloud or os.environ for local
23
+ api_key = os.environ.get("MISTRAL_API_KEY") or st.secrets.get("MISTRAL_API_KEY")
24
+
25
+ if not api_key:
26
+ st.error("πŸ”‘ Mistral API Key not found. Please set it in your environment variables or secrets.")
27
+ st.stop()
28
+
29
+ # =================================================================
30
+ # 2. DATA SCHEMAS
31
+ # =================================================================
32
+ class ScoredCandidate(BaseModel):
33
+ name: str
34
+ score: float = Field(..., description="Objective score 0.00-100.00.")
35
+ review: str = Field(..., description="Exactly 2 lines of review comment.")
36
+
37
+ class AgentState(TypedDict):
38
+ gdrive_link: str
39
+ job_description: str
40
+ num_to_hire: int
41
+ raw_candidates: List[dict]
42
+ evaluated_results: dict
43
+ final_report: str
44
+
45
+ # =================================================================
46
+ # 3. HELPER FUNCTIONS
47
+ # =================================================================
48
+ def download_from_gdrive(url):
49
+ temp_dir = "temp_resumes"
50
+ if os.path.exists(temp_dir):
51
+ shutil.rmtree(temp_dir)
52
+ os.makedirs(temp_dir)
53
+
54
+ try:
55
+ # Note: GDrive folders must be "Anyone with the link"
56
+ gdown.download_folder(url, output=temp_dir, quiet=True, remaining_ok=True, use_cookies=False)
57
+ return temp_dir
58
+ except Exception as e:
59
+ st.error(f"Error downloading from Google Drive: {e}")
60
+ return None
61
+
62
+ def process_pdfs_to_json(folder_path):
63
+ llm = ChatMistralAI(model="mistral-large-latest", api_key=api_key, temperature=0)
64
+ all_candidates_json = []
65
+
66
+ # Get all PDFs, including those in subfolders created by gdown
67
+ files = []
68
+ for root, dirs, filenames in os.walk(folder_path):
69
+ for f in filenames:
70
+ if f.lower().endswith(".pdf"):
71
+ files.append(os.path.join(root, f))
72
+
73
+ if not files:
74
+ st.warning("No PDF files found in the folder.")
75
+ return []
76
+
77
+ progress_bar = st.progress(0)
78
+ status_text = st.empty()
79
+
80
+ for i, path in enumerate(files):
81
+ filename = os.path.basename(path)
82
+ status_text.text(f"πŸ” Analyzing: {filename}")
83
+ try:
84
+ reader = PdfReader(path)
85
+ raw_text = "".join([page.extract_text() or "" for page in reader.pages])
86
+
87
+ if len(raw_text.strip()) < 50:
88
+ continue # Skip empty or scanned PDFs without OCR
89
+
90
+ prompt = f"Extract details from this resume into JSON (name, email, phone, skills, experience_years):\n{raw_text[:7000]}"
91
+ response = llm.invoke(prompt)
92
+ json_match = re.search(r"\{.*\}", response.content, re.DOTALL)
93
+ if json_match:
94
+ candidate_data = json.loads(json_match.group())
95
+ candidate_data["resume_text"] = raw_text
96
+ all_candidates_json.append(candidate_data)
97
+ except Exception:
98
+ pass
99
+ progress_bar.progress((i + 1) / len(files))
100
+
101
+ status_text.empty()
102
+ progress_bar.empty()
103
+ return all_candidates_json
104
+
105
+ # =================================================================
106
+ # 4. AGENT NODES
107
+ # =================================================================
108
+ def extract_resumes_node(state: AgentState):
109
+ st.write("---")
110
+ st.info("⚑ **Phase 1:** Fetching resumes from Google Drive...")
111
+ temp_path = download_from_gdrive(state['gdrive_link'])
112
+ if temp_path:
113
+ candidates = process_pdfs_to_json(temp_path)
114
+ shutil.rmtree(temp_path) # Cleanup
115
+ return {"raw_candidates": candidates}
116
+ return {"raw_candidates": []}
117
+
118
+
119
+
120
+ def rank_candidates_node(state: AgentState):
121
+ """
122
+ Evaluates candidates using a strict weighted rubric and 0-temperature
123
+ to ensure deterministic and consistent scoring.
124
+ """
125
+ print("\n" + "="*50)
126
+ print("πŸš€ STEP 2: DETERMINISTIC SCORING ENGINE")
127
+ print("="*50)
128
+
129
+ # Initialize LLM with Temperature 0 for consistency
130
+ llm = ChatMistralAI(model="mistral-large-latest", api_key=api_key, temperature=0)
131
+ structured_llm = llm.with_structured_output(ScoredCandidate)
132
+
133
+ scored_list = []
134
+
135
+ for cand in state['raw_candidates']:
136
+ name = cand.get('name', 'Unknown Candidate')
137
+ print(f"🧠 Analyzing: {name}...")
138
+
139
+ # OPTIMIZED PROMPT: Using a Point-Based Rubric
140
+ prompt = f"""
141
+ YOU ARE AN EXPERT RECRUITER. Evaluate the candidate against the Job Description (JD).
142
+
143
+ ### JOB DESCRIPTION:
144
+ {state['job_description']}
145
+
146
+ ### CANDIDATE DATA:
147
+ {json.dumps(cand)}
148
+
149
+ ### SCORING RUBRIC (Strict 100-Point Scale):
150
+ 1. Technical Skill Match (40 pts): Compare 'skills' in candidate data to JD requirements.
151
+ 2. Experience Level (30 pts): Rate years of experience and seniority fit.
152
+ 3. Industry Fit (20 pts): Does their previous experience align with this JD's industry?
153
+ 4. Education/Certifications (10 pts): Does the candidate meet the degree requirements?
154
+
155
+ ### RULES:
156
+ - You must be OBJECTIVE. If a skill is not explicitly mentioned, do not award points for it.
157
+ - Temperature is set to 0; provide the most logical mathematical score.
158
+ - The 'review' must explain exactly why points were deducted.
159
+ - You must not make tie between candidates.
160
+ """
161
+
162
+ try:
163
+ # Mistral performs the evaluation based on the rubric above
164
+ result = structured_llm.invoke(prompt)
165
+
166
+ if result:
167
+ scored_list.append(result.model_dump())
168
+ print(f"βœ… Scored {name}: {result.score}/100")
169
+ else:
170
+ scored_list.append({"name": name, "score": 0.0, "review": "Parsing error in AI output."})
171
+
172
+ except Exception as e:
173
+ print(f"⚠️ Error scoring {name}: {e}")
174
+ scored_list.append({"name": name, "score": 0.0, "review": f"Processing Error: {str(e)}"})
175
+
176
+ # SORTING: Ensures the list is ordered by score (highest first)
177
+ sorted_all = sorted(scored_list, key=lambda x: x['score'], reverse=True)
178
+
179
+ # OUTPUT: Returns the updated state to the LangGraph
180
+ return {
181
+ "evaluated_results": {
182
+ "all_evaluated_candidates": scored_list,
183
+ "top_n_hired_list": sorted_all[:state['num_to_hire']]
184
+ }
185
+ }
186
+
187
+
188
+ def report_node(state: AgentState):
189
+ st.info("⚑ **Phase 3:** Compiling final report...")
190
+ evals = state['evaluated_results']['top_n_hired_list']
191
+ report = "\n".join([f"πŸ† **{c['name']}** (Score: {c['score']})\n{c['review']}\n" for c in evals])
192
+ return {"final_report": report}
193
+
194
+ # =================================================================
195
+ # 5. GRAPH ORCHESTRATION
196
+ # =================================================================
197
+ workflow = StateGraph(AgentState)
198
+ workflow.add_node("parser", extract_resumes_node)
199
+ workflow.add_node("ranker", rank_candidates_node)
200
+ workflow.add_node("reporter", report_node)
201
+ workflow.add_edge(START, "parser")
202
+ workflow.add_edge("parser", "ranker")
203
+ workflow.add_edge("ranker", "reporter")
204
+ workflow.add_edge("reporter", END)
205
+ app = workflow.compile()
206
+
207
+ # =================================================================
208
+ # 6. UI LAYOUT
209
+ # =================================================================
210
+ st.title("🌟 AI HR Agent: Google Drive Edition")
211
+
212
+ col1, col2 = st.columns([2, 1])
213
+
214
+ with col1:
215
+ jd_input = st.text_area("πŸ“‹ Job Description", placeholder="Paste the job requirements here...", height=200)
216
+
217
+ with col2:
218
+ gdrive_link = st.text_input("πŸ”— Public GDrive Folder Link")
219
+ hire_count = st.number_input("Selection Count (Top N)", min_value=1, max_value=20, value=3)
220
+ analyze_btn = st.button("πŸš€ Run Analysis", type="primary", use_container_width=True)
221
+
222
+ if analyze_btn:
223
+ if not jd_input or not gdrive_link:
224
+ st.warning("Please provide both a Job Description and a Google Drive Link.")
225
+ else:
226
+ inputs = {
227
+ "gdrive_link": gdrive_link,
228
+ "job_description": jd_input,
229
+ "num_to_hire": int(hire_count),
230
+ "raw_candidates": []
231
+ }
232
+
233
+ with st.status("AI Agent is working...", expanded=True) as status:
234
+ final_state = app.invoke(inputs)
235
+ status.update(label="Analysis Complete!", state="complete")
236
+
237
+ st.session_state.result_state = final_state
238
+ st.session_state.jd = jd_input
239
+
240
+ st.success("### πŸ“‹ Shortlisted Candidates")
241
+ st.markdown(final_state["final_report"])
242
+
243
+ # =================================================================
244
+ # 7. CHATBOT (FIXED: ACCESS TO ALL CANDIDATES)
245
+ # =================================================================
246
+ if "result_state" in st.session_state:
247
+ st.divider()
248
+ st.subheader("πŸ’¬ Deep-Dive: Ask the HR Agent")
249
+
250
+ # Initialize chat history
251
+ if "messages" not in st.session_state:
252
+ st.session_state.messages = []
253
+
254
+ # Display chat history
255
+ for msg in st.session_state.messages:
256
+ with st.chat_message(msg["role"]):
257
+ st.markdown(msg["content"])
258
+
259
+ if prompt := st.chat_input("Ex: Why was John selected but Sarah wasn't?"):
260
+ st.session_state.messages.append({"role": "user", "content": prompt})
261
+ with st.chat_message("user"):
262
+ st.markdown(prompt)
263
+
264
+ # 1. PREPARE LEAN DATA (Crucial: Removes heavy resume_text)
265
+ all_evals = st.session_state.result_state['evaluated_results']['all_evaluated_candidates']
266
+ top_hired = [c['name'] for c in st.session_state.result_state['evaluated_results']['top_n_hired_list']]
267
+
268
+ # Build a summarized list of EVERY candidate
269
+ knowledge_base = []
270
+ for eval_item in all_evals:
271
+ status = "SELECTED/TOP-TIER" if eval_item['name'] in top_hired else "DESELECTED/LOWER-RANKED"
272
+ knowledge_base.append({
273
+ "name": eval_item['name'],
274
+ "score": eval_item['score'],
275
+ "status": status,
276
+ "reasoning": eval_item['review']
277
+ })
278
+
279
+ # 2. SYSTEM INSTRUCTIONS FOR THE AI
280
+ chat_llm = ChatMistralAI(model="mistral-large-latest", api_key=api_key)
281
+
282
+ context_message = f"""
283
+ You are an HR Analytics Bot. You have full access to the scoring results for ALL candidates.
284
+
285
+ JOB DESCRIPTION:
286
+ {st.session_state.jd}
287
+
288
+ CANDIDATE DATA (Scores and Status):
289
+ {json.dumps(knowledge_base, indent=2)}
290
+
291
+ INSTRUCTIONS:
292
+ 1. Answer questions about specific candidates using the 'reasoning' and 'score' provided.
293
+ 2. If asked why someone was deselected, compare their score/reasoning to the higher-scoring candidates.
294
+ 3. Use Markdown tables if asked to compare multiple people.
295
+ """
296
+
297
+ with st.chat_message("assistant"):
298
+ # Use a list of messages (System + User) for better steering
299
+ response = chat_llm.invoke([
300
+ ("system", context_message),
301
+ ("user", prompt)
302
+ ])
303
+ st.markdown(response.content)
304
+ st.session_state.messages.append({"role": "assistant", "content": response.content})
305
+
306
+
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain-mistralai
2
+ langgraph
3
+ pydantic
4
+ python-dotenv
5
+ PyPDF2
6
+ gdown
7
+ langchain-chroma
8
+ langchain-community
9
+ langchain-text-splitters
10
+ nest-asyncio