meesamraza commited on
Commit
48e44c0
·
verified ·
1 Parent(s): 1c97216

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +452 -529
app.py CHANGED
@@ -18,8 +18,8 @@ import streamlit as st
18
  import pandas as pd
19
 
20
  # File parsing
21
- import fitz                   # PyMuPDF
22
- from docx import Document     # python-docx
23
 
24
  # Groq client
25
  from groq import Groq
@@ -37,553 +37,476 @@ ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin")
37
  # Initialize Groq client (no API key -> UI warning but app still loads)
38
  groq_client = None
39
  if GROQ_API_KEY:
40
-     try:
41
-         groq_client = Groq(api_key=GROQ_API_KEY)
42
-     except Exception as e:
43
-         st.error(f"Failed to initialize Groq client: {e}")
44
  else:
45
-     st.warning("GROQ_API_KEY not found. Set it as an environment variable or in .env for model calls to work.")
46
 
47
  # --- Session state defaults ---
48
  if 'is_admin_logged_in' not in st.session_state:
49
-     st.session_state.is_admin_logged_in = False
50
  if 'analyzed_data' not in st.session_state:
51
-     initial_cols = [
52
-         'Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Phone',
53
-         # NEW SCORE COLUMNS
54
-         'Experience Score (40)', 'Skills Score (30)', 'Communication Score (20)', 'Certifications Score (10)',
55
-         'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
56
-         'Skills/Technologies', 'Certifications',
57
-         # THERAPIST FIELDS
58
-         'ABA Skills (1-10)', 'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
59
-     ]
60
-     st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)
61
  if 'individual_analysis' not in st.session_state:
62
-     st.session_state.individual_analysis = []
63
  if 'run_analysis' not in st.session_state:
64
-     st.session_state.run_analysis = False
65
 
66
- # --- Pydantic schema (No change needed here, as the new scores are derived) ---
67
  class ResumeAnalysis(BaseModel):
68
-     name: str = Field(default="Unknown")
69
-     email: str = Field(default="")
70
-     phone: str = Field(default="")
71
-     certifications: List[str] = Field(default_factory=list)
72
-     experience_summary: str = Field(default="")
73
-     education_summary: str = Field(default="")
74
-     communication_skills: str = Field(default="N/A")
75
-     technical_skills: List[str] = Field(default_factory=list)
76
-     aba_therapy_skills: Optional[str] = Field(default="N/A")
77
-     rbt_bcba_certification: Optional[str] = Field(default="N/A")
78
-     autism_care_experience_score: Optional[str] = Field(default="N/A")
79
-
80
- # --- Helpers: file text extraction (No change) ---
81
  def extract_text_from_file(uploaded_file) -> str:
82
-     """Extract text from PDF or DOCX. Returns empty string on failure."""
83
-     try:
84
-         content = uploaded_file.read()
85
-         filename = uploaded_file.name.lower()
86
-         if filename.endswith(".pdf") or content[:5] == b"%PDF-":
87
-             try:
88
-                 with fitz.open(stream=content, filetype="pdf") as doc:
89
-                     text = ""
90
-                     for p in doc:
91
-                         text += p.get_text()
92
-                 return text.strip()
93
-             except Exception:
94
-                 return ""
95
-         elif filename.endswith(".docx"):
96
-             try:
97
-                 doc = Document(io.BytesIO(content))
98
-                 paragraphs = [p.text for p in doc.paragraphs if p.text and p.text.strip()]
99
-                 return "\n".join(paragraphs).strip()
100
-             except Exception:
101
-                 return ""
102
-         else:
103
-             # fallback: decode bytes as text
104
-             try:
105
-                 return content.decode('utf-8', errors='ignore')
106
-             except Exception:
107
-                 return ""
108
-     except Exception:
109
-         return ""
110
-
111
- # --- Groq call with streaming (collects chunks) (No change) ---
112
  def call_groq_stream_collect(prompt: str, model_name: str = "llama-3.3-70b-versatile", temperature: float = 0.2, max_completion_tokens: int = 2048, top_p: float = 1.0) -> Optional[str]:
113
-     """
114
-     Calls Groq with streaming enabled and collects the textual output.
115
-     Returns the full model text, or None on failure.
116
-     """
117
-     if not groq_client:
118
-         st.error("Groq client not initialized. Set GROQ_API_KEY in environment/secrets.")
119
-         return None
120
-
121
-     try:
122
-         completion = groq_client.chat.completions.create(
123
-             model=model_name,
124
-             messages=[
125
-                 {"role": "system", "content": "You are a professional Resume Analyzer. Return JSON only when asked."},
126
-                 {"role": "user", "content": prompt}
127
-             ],
128
-             temperature=temperature,
129
-             max_completion_tokens=max_completion_tokens,
130
-             top_p=top_p,
131
-             stream=True
132
-         )
133
-
134
-         # completion is an iterator/streamable object; collect chunks
135
-         collected = ""
136
-         # some SDKs yield dict-like chunks, some objects; handle both
137
-         for chunk in completion:
138
-             try:
139
-                 # Common pattern: chunk.choices[0].delta.content
140
-                 delta = getattr(chunk.choices[0].delta, "content", None) if hasattr(chunk, "choices") else None
141
-                 if delta is None:
142
-                     # fallback for dict-like object
143
-                     if isinstance(chunk, dict):
144
-                         delta = chunk.get("choices", [{}])[0].get("delta", {}).get("content")
145
-                 if delta:
146
-                     collected += delta
147
-                 else:
148
-                     # Some SDKs return final message in chunk.choices[0].message.content
149
-                     try:
150
-                         msg = getattr(chunk.choices[0].message, "content", None)
151
-                         if msg:
152
-                             collected += msg
153
-                     except Exception:
154
-                         pass
155
-             except Exception:
156
-                 # last-resort: append str(chunk)
157
-                 try:
158
-                     collected += str(chunk)
159
-                 except Exception:
160
-                     pass
161
-
162
-         return collected.strip()
163
-     except Exception as e:
164
-         st.error(f"Groq API call failed: {e}")
165
-         return None
166
-
167
- # --- Parsing model output safely to JSON (No change) ---
168
  def extract_first_json(text: str) -> Optional[dict]:
169
-     """
170
-     Find the first JSON object in text and parse it; return dict or None.
171
-     """
172
-     if not text:
173
-         return None
174
-     # find first balanced braces block
175
-     # quick heuristic regex for {...}
176
-     try:
177
-         match = re.search(r"(\{(?:[^{}]|(?R))*\})", text, re.DOTALL)
178
-     except re.error:
179
-         # Python's re doesn't support (?R); fallback to simpler greedy
180
-         match = re.search(r"(\{.*\})", text, re.DOTALL)
181
-     if match:
182
-         json_text = match.group(1)
183
-     else:
184
-         # maybe the model returned only JSON-like lines -> try to parse full text
185
-         json_text = text
186
-
187
-     try:
188
-         parsed = json.loads(json_text)
189
-         return parsed
190
-     except Exception:
191
-         # try to clean common issues: single quotes -> double quotes
192
-         try:
193
-             json_text_fixed = json_text.replace("'", '"')
194
-             parsed = json.loads(json_text_fixed)
195
-             return parsed
196
-         except Exception:
197
-             return None
198
-
199
- # --- Analyze with Groq (cached by resume text + role) (No change) ---
200
  @st.cache_data(show_spinner=False)
201
  def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis:
202
-     """
203
-     Calls Groq (streaming) and returns a ResumeAnalysis instance.
204
-     Uses caching to avoid duplicate calls for same resume_text+role.
205
-     """
206
-     # Build prompt instructing JSON structure
207
-     therapist_instructions = ""
208
-     if job_role.lower() == "therapist":
209
-         therapist_instructions = (
210
-             "Because the role is 'Therapist', carefully search for ABA Therapy Skills, "
211
-             "RBT/BCBA Certification, and Autism-Care Experience. Provide scores 1-10 as STRINGS, or 'N/A'."
212
-         )
213
-     else:
214
-         therapist_instructions = "If therapist-specific fields are not relevant, set them to 'N/A'."
215
-
216
-     system_user_prompt = (
217
-         "Return a single JSON object with the following keys exactly: "
218
-         "name (string), email (string), phone (string), certifications (array of strings), "
219
-         "experience_summary (string), education_summary (string), communication_skills (STRING, e.g., '8'), "
220
-         "technical_skills (array of strings), aba_therapy_skills (STRING or 'N/A'), "
221
-         "rbt_bcba_certification (STRING 'Yes'/'No'/'N/A'), autism_care_experience_score (STRING or 'N/A'). "
222
-         f"{therapist_instructions}\n\nResume Text:\n\n{resume_text}\n\nReturn only the JSON object."
223
-     )
224
-
225
-     raw = call_groq_stream_collect(system_user_prompt, model_name="llama-3.3-70b-versatile", temperature=0.0, max_completion_tokens=2048)
226
-
227
-     if not raw:
228
-         # fallback empty object
229
-         return ResumeAnalysis(
230
-             name="Extraction Failed",
231
-             email="",
232
-             phone="",
233
-             certifications=[],
234
-             experience_summary="",
235
-             education_summary="",
236
-             communication_skills="N/A",
237
-             technical_skills=[],
238
-             aba_therapy_skills="N/A",
239
-             rbt_bcba_certification="N/A",
240
-             autism_care_experience_score="N/A"
241
-         )
242
-
243
-     parsed = extract_first_json(raw)
244
-     if not parsed:
245
-         # show raw output for debugging when developer runs app locally (admin panel will show too)
246
-         st.warning("Failed to parse model JSON output. See raw output below for debugging.")
247
-         st.text_area("Raw model output (debug)", raw, height=200)
248
-         return ResumeAnalysis(
249
-             name="Extraction Failed",
250
-             email="",
251
-             phone="",
252
-             certifications=[],
253
-             experience_summary="",
254
-             education_summary="",
255
-             communication_skills="N/A",
256
-             technical_skills=[],
257
-             aba_therapy_skills="N/A",
258
-             rbt_bcba_certification="N/A",
259
-             autism_care_experience_score="N/A"
260
-         )
261
-
262
-     # Ensure keys exist and coerce types
263
-     parsed.setdefault("name", "Unknown")
264
-     parsed.setdefault("email", "")
265
-     parsed.setdefault("phone", "")
266
-     parsed.setdefault("certifications", [])
267
-     parsed.setdefault("experience_summary", "")
268
-     parsed.setdefault("education_summary", "")
269
-     parsed.setdefault("communication_skills", "N/A")
270
-     parsed.setdefault("technical_skills", [])
271
-     parsed.setdefault("aba_therapy_skills", "N/A")
272
-     parsed.setdefault("rbt_bcba_certification", "N/A")
273
-     parsed.setdefault("autism_care_experience_score", "N/A")
274
-
275
-     # Ensure string coercions for some fields
276
-     try:
277
-         parsed["communication_skills"] = str(parsed.get("communication_skills") or "N/A")
278
-         parsed["aba_therapy_skills"] = str(parsed.get("aba_therapy_skills") or "N/A")
279
-         parsed["rbt_bcba_certification"] = str(parsed.get("rbt_bcba_certification") or "N/A")
280
-         parsed["autism_care_experience_score"] = str(parsed.get("autism_care_experience_score") or "N/A")
281
-     except Exception:
282
-         pass
283
-
284
-     # Validate via Pydantic
285
-     try:
286
-         analysis = ResumeAnalysis.parse_obj(parsed)
287
-         return analysis
288
-     except ValidationError as ve:
289
-         st.error("Model output failed schema validation.")
290
-         st.text_area("Raw model output (debug)", raw, height=200)
291
-         st.exception(ve)
292
-         return ResumeAnalysis(
293
-             name="Extraction Failed",
294
-             email="",
295
-             phone="",
296
-             certifications=[],
297
-             experience_summary="",
298
-             education_summary="",
299
-             communication_skills="N/A",
300
-             technical_skills=[],
301
-             aba_therapy_skills="N/A",
302
-             rbt_bcba_certification="N/A",
303
-             autism_care_experience_score="N/A"
304
-         )
305
-
306
- # --- Scoring logic (MODIFIED) ---
307
- def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> tuple[float, float, float, float, float]:
308
-     """
309
-     Calculates the overall score and the individual component scores.
310
-     Returns (final_score, exp_score, skills_score, comm_score, certs_score)
311
-     """
312
-     total_score = 0.0
313
-
314
-     # 1. Experience score: up to 40 points
315
-     exp_len = len(analysis.experience_summary or "")
316
-     # Cap factor at 1.0 (e.g., 100+ chars = 1.0)
317
-     exp_factor = min(exp_len / 100.0, 1.0)
318
-     exp_score = round(exp_factor * 40.0)
319
-     total_score += exp_score
320
-
321
-     # 2. Skills score: up to 30 points
322
-     skills_count = len(analysis.technical_skills or [])
323
-     # Cap factor at 1.0 (e.g., 10+ skills = 1.0)
324
-     skills_factor = min(skills_count / 10.0, 1.0)
325
-     skills_score = round(skills_factor * 30.0)
326
-     total_score += skills_score
327
-
328
-     # 3. Communication score: up to 20 points (expects 0-10 in string)
329
-     try:
330
-         m = re.search(r"(\d+(\.\d+)?)", str(analysis.communication_skills))
331
-         # Use regex match if available, otherwise try direct float conversion
332
-         comm_val = float(m.group(1)) if m else float(str(analysis.communication_skills))
333
-         comm_val = max(0.0, min(10.0, comm_val)) # Clamp to 0-10
334
-     except Exception:
335
-         comm_val = 5.0 # Default if model extraction failed
336
-     comm_score = round((comm_val / 10.0) * 20.0)
337
-     total_score += comm_score
338
-
339
-     # 4. Certifications score: up to 10 points
340
-     # Max 10 points for 10 or more certifications
341
-     certs_score = min(len(analysis.certifications or []), 10) * 1.0
342
-     total_score += certs_score
343
-
344
-     # 5. Therapist bonus: up to 10 points (added to overall score if applicable)
345
-     spec_bonus = 0.0
346
-     if role.lower() == "therapist":
347
-         def safe_score(x):
348
-             try:
349
-                 m = re.search(r"(\d+(\.\d+)?)", str(x))
350
-                 return float(m.group(1)) if m else 0.0
351
-             except Exception:
352
-                 return 0.0
353
-         aba = safe_score(analysis.aba_therapy_skills)
354
-         autism = safe_score(analysis.autism_care_experience_score)
355
-         # Average of the two specialized scores, scaled to a max of 10 points
356
-         spec_bonus = ((aba + autism) / 20.0) * 10.0
357
-         total_score += spec_bonus
358
-
359
-     final_score = round(min(total_score, 100))
360
-    
361
-     return (float(final_score), float(exp_score), float(skills_score), float(comm_score), float(certs_score))
362
-
363
- # --- Append to DataFrame (MODIFIED) ---
364
- def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, scores: tuple[float, float, float, float, float]):
365
-     final_score, exp_score, skills_score, comm_score, certs_score = scores
366
-    
367
-     data = analysis.dict()
368
-     tech = ", ".join(data.get("technical_skills") or [])
369
-     certs = ", ".join(data.get("certifications") or [])
370
-    
371
-     row = {
372
-         'Name': data.get("name") or "",
373
-         'Job Role': job_role,
374
-         'Resume Score (100)': final_score,
375
-         'Shortlisted': 'No',
376
-         'Email': data.get("email") or "",
377
-         'Phone': data.get("phone") or "",
378
-        
379
-         # NEW SCORE COLUMNS
380
-         'Experience Score (40)': exp_score,
381
-         'Skills Score (30)': skills_score,
382
-         'Communication Score (20)': comm_score,
383
-         'Certifications Score (10)': certs_score,
384
-        
385
-         'Experience Summary': data.get("experience_summary") or "",
386
-         'Education Summary': data.get("education_summary") or "",
387
-         'Communication Rating (1-10)': str(data.get("communication_skills") or "N/A"),
388
-         'Skills/Technologies': tech,
389
-         'Certifications': certs,
390
-         'ABA Skills (1-10)': str(data.get("aba_therapy_skills") or "N/A"),
391
-         'RBT/BCBA Cert': str(data.get("rbt_bcba_certification") or "N/A"),
392
-         'Autism-Care Exp (1-10)': str(data.get("autism_care_experience_score") or "N/A"),
393
-     }
394
-     new_df = pd.DataFrame([row])
395
-     st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)
396
-
397
- # --- Excel export helper (No change) ---
398
  def df_to_excel_bytes(df: pd.DataFrame) -> bytes:
399
-     output = io.BytesIO()
400
-     with pd.ExcelWriter(output, engine="openpyxl") as writer:
401
-         df.to_excel(writer, index=False, sheet_name="Resume Analysis Data")
402
-     return output.getvalue()
403
 
404
  # --- UI Layout ---
405
  st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis (Single-file)")
406
 
407
  tab_user, tab_admin = st.tabs(["👤 Resume Uploader (User Panel)", "🔒 Admin Dashboard (Password Protected)"])
408
 
409
- # --- User Panel (Minor change for scoring) ---
410
  with tab_user:
411
-     st.header("Upload Resumes for Analysis")
412
-     st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score fields.")
413
-
414
-     job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
415
-     selected_role = st.selectbox("1. Select the Target Job Role", options=job_role_options, key="selected_role")
416
-
417
-     uploaded_files = st.file_uploader("2. Upload Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True)
418
-
419
-     if st.button("🚀 Analyze All Uploaded Resumes"):
420
-         if not uploaded_files:
421
-             st.warning("Please upload one or more resume files to begin analysis.")
422
-         else:
423
-             st.session_state.run_analysis = True
424
-             st.rerun()
425
-
426
-     if st.session_state.get("run_analysis", False):
427
-         if not uploaded_files:
428
-             st.warning("No files found. Upload files and try again.")
429
-             st.session_state.run_analysis = False
430
-         else:
431
-             total = len(uploaded_files)
432
-             progress = st.progress(0)
433
-             st.session_state.individual_analysis = []
434
-             idx = 0
435
-             with st.spinner("Processing resumes..."):
436
-                 for f in uploaded_files:
437
-                     idx += 1
438
-                     try:
439
-                         st.write(f"Analyzing **{f.name}**...")
440
-                         resume_text = extract_text_from_file(f)
441
-                         if not resume_text:
442
-                             st.error(f"Could not extract text from {f.name}. Skipping.")
443
-                             progress.progress(idx / total)
444
-                             continue
445
-
446
-                         analysis = analyze_resume_with_groq_cached(resume_text, selected_role)
447
-
448
-                         if analysis.name == "Extraction Failed":
449
-                             st.error(f"Extraction failed for {f.name}. See debug output.")
450
-                             progress.progress(idx / total)
451
-                             continue
452
-
453
-                         scores = calculate_resume_score(analysis, selected_role)
454
-                         final_score = scores[0]
455
-                        
456
-                         append_analysis_to_dataframe(selected_role, analysis, scores)
457
-
458
-                         st.session_state.individual_analysis.append({
459
-                             'name': analysis.name,
460
-                             'score': final_score,
461
-                             'role': selected_role,
462
-                             'file_name': f.name
463
-                         })
464
-                     except Exception as e:
465
-                         st.error(f"Error analyzing {f.name}: {e}")
466
-                         st.exception(traceback.format_exc())
467
-                     finally:
468
-                         progress.progress(idx / total)
469
-
470
-             st.success(f"✅ Successfully processed {len(st.session_state.individual_analysis)} of {total} resumes.")
471
-             st.session_state.run_analysis = False
472
-
473
-     # Display last results summary
474
-     if st.session_state.individual_analysis:
475
-         st.subheader("Last Analysis Summary")
476
-         for item in st.session_state.individual_analysis:
477
-             st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**")
478
-         st.markdown("---")
479
-         st.caption("All analyzed data is stored in the Admin Dashboard.")
480
-
481
- # --- Admin Panel (MODIFIED for new columns) ---
482
  with tab_admin:
483
-     if not st.session_state.is_admin_logged_in:
484
-         st.header("Admin Login")
485
-         password = st.text_input("Enter Admin Password", type="password")
486
-         if st.button("🔑 Login"):
487
-             if password == ADMIN_PASSWORD:
488
-                 st.session_state.is_admin_logged_in = True
489
-                 st.rerun()
490
-             else:
491
-                 st.error("Incorrect password.")
492
-         st.stop()
493
-
494
-     st.header("🎯 Recruitment Dashboard")
495
-     if st.button("🚪 Logout"):
496
-         st.session_state.is_admin_logged_in = False
497
-         st.rerun()
498
-
499
-     if st.session_state.analyzed_data.empty:
500
-         st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
501
-     else:
502
-         df = st.session_state.analyzed_data.copy()
503
-         st.subheader("Candidate Data Table")
504
-         st.success(f"**Total Candidates Analyzed: {len(df)}**")
505
-
506
-         # Updated columns for display in the data editor
507
-         display_cols = [
508
-             'Name',
509
-             'Job Role',
510
-             'Resume Score (100)',
511
-             'Experience Score (40)',
512
-             'Skills Score (30)',
513
-             'Communication Score (20)',
514
-             'Certifications Score (10)',
515
-             'Shortlisted',
516
-             'Email',
517
-             'Skills/Technologies'
518
-         ]
519
-         # Filter columns to only those present in the current dataframe (safety check)
520
-         current_display_cols = [col for col in display_cols if col in df.columns]
521
-
522
-         edited_df = st.data_editor(
523
-             df[current_display_cols],
524
-             column_config={
525
-                 "Shortlisted": st.column_config.SelectboxColumn(
526
-                     "Shortlisted",
527
-                     help="Mark the candidate as Shortlisted or Rejected.",
528
-                     options=["No", "Yes"],
529
-                     required=True
530
-                 ),
531
-                 "Resume Score (100)": st.column_config.ProgressColumn(
532
-                     "Total Score",
533
-                     format="%f",
534
-                     min_value=0, max_value=100,
535
-                 ),
536
-                 "Experience Score (40)": st.column_config.ProgressColumn(
537
-                     "Experience (40)",
538
-                     format="%f",
539
-                     min_value=0, max_value=40,
540
-                 ),
541
-                 "Skills Score (30)": st.column_config.ProgressColumn(
542
-                     "Skills (30)",
543
-                     format="%f",
544
-                     min_value=0, max_value=30,
545
-                 ),
546
-                 "Communication Score (20)": st.column_config.ProgressColumn(
547
-                     "Comms (20)",
548
-                     format="%f",
549
-                     min_value=0, max_value=20,
550
-                 ),
551
-                 "Certifications Score (10)": st.column_config.ProgressColumn(
552
-                     "Certs (10)",
553
-                     format="%f",
554
-                     min_value=0, max_value=10,
555
-                 ),
556
-             },
557
-             key="dashboard_editor",
558
-             hide_index=True
559
-         )
560
-
561
-         # The logic to update the session state with the edited 'Shortlisted' column remains the same
562
-         try:
563
-             # Update the master dataframe with the edited 'Shortlisted' column
564
-             for col in edited_df.columns:
565
-                 if col in st.session_state.analyzed_data.columns and not edited_df[col].equals(st.session_state.analyzed_data[col]):
566
-                     # Only update 'Shortlisted' which is the only editable field
567
-                     if col == 'Shortlisted':
568
-                         st.session_state.analyzed_data.loc[:, 'Shortlisted'] = edited_df['Shortlisted'].values
569
-         except Exception:
570
-             # Fallback for index issues on data_editor changes
571
-             for i, val in enumerate(edited_df.get('Shortlisted', []).tolist()):
572
-                 if i < len(st.session_state.analyzed_data):
573
-                     st.session_state.analyzed_data.at[i, 'Shortlisted'] = val
574
-
575
-
576
-         st.markdown("---")
577
-         st.subheader("📥 Download Data")
578
-         df_export = st.session_state.analyzed_data.copy()
579
-         excel_bytes = df_to_excel_bytes(df_export)
580
-
581
-         st.download_button(
582
-             label="💾 Download All Data as Excel (.xlsx)",
583
-             data=excel_bytes,
584
-             file_name="quantum_scrutiny_report.xlsx",
585
-             mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
586
-             help="Downloads the full table including all extracted fields and shortlist status."
587
-         )
588
-
589
- # --- End of file ---
 
18
  import pandas as pd
19
 
20
  # File parsing
21
+ import fitz # PyMuPDF
22
+ from docx import Document # python-docx
23
 
24
  # Groq client
25
  from groq import Groq
 
37
  # Initialize Groq client (no API key -> UI warning but app still loads)
38
  groq_client = None
39
  if GROQ_API_KEY:
40
+ try:
41
+ groq_client = Groq(api_key=GROQ_API_KEY)
42
+ except Exception as e:
43
+ st.error(f"Failed to initialize Groq client: {e}")
44
  else:
45
+ st.warning("GROQ_API_KEY not found. Set it as an environment variable or in .env for model calls to work.")
46
 
47
  # --- Session state defaults ---
48
  if 'is_admin_logged_in' not in st.session_state:
49
+ st.session_state.is_admin_logged_in = False
50
  if 'analyzed_data' not in st.session_state:
51
+ initial_cols = [
52
+ 'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
53
+ 'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
54
+ 'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)',
55
+ 'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
56
+ ]
57
+ st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)
 
 
 
58
  if 'individual_analysis' not in st.session_state:
59
+ st.session_state.individual_analysis = []
60
  if 'run_analysis' not in st.session_state:
61
+ st.session_state.run_analysis = False
62
 
63
+ # --- Pydantic schema ---
64
  class ResumeAnalysis(BaseModel):
65
+ name: str = Field(default="Unknown")
66
+ email: str = Field(default="")
67
+ phone: str = Field(default="")
68
+ certifications: List[str] = Field(default_factory=list)
69
+ experience_summary: str = Field(default="")
70
+ education_summary: str = Field(default="")
71
+ communication_skills: str = Field(default="N/A")
72
+ technical_skills: List[str] = Field(default_factory=list)
73
+ aba_therapy_skills: Optional[str] = Field(default="N/A")
74
+ rbt_bcba_certification: Optional[str] = Field(default="N/A")
75
+ autism_care_experience_score: Optional[str] = Field(default="N/A")
76
+
77
+ # --- Helpers: file text extraction ---
78
  def extract_text_from_file(uploaded_file) -> str:
79
+ """Extract text from PDF or DOCX. Returns empty string on failure."""
80
+ try:
81
+ content = uploaded_file.read()
82
+ filename = uploaded_file.name.lower()
83
+ if filename.endswith(".pdf") or content[:5] == b"%PDF-":
84
+ try:
85
+ with fitz.open(stream=content, filetype="pdf") as doc:
86
+ text = ""
87
+ for p in doc:
88
+ text += p.get_text()
89
+ return text.strip()
90
+ except Exception:
91
+ return ""
92
+ elif filename.endswith(".docx"):
93
+ try:
94
+ doc = Document(io.BytesIO(content))
95
+ paragraphs = [p.text for p in doc.paragraphs if p.text and p.text.strip()]
96
+ return "\n".join(paragraphs).strip()
97
+ except Exception:
98
+ return ""
99
+ else:
100
+ # fallback: decode bytes as text
101
+ try:
102
+ return content.decode('utf-8', errors='ignore')
103
+ except Exception:
104
+ return ""
105
+ except Exception:
106
+ return ""
107
+
108
+ # --- Groq call with streaming (collects chunks) ---
109
  def call_groq_stream_collect(prompt: str, model_name: str = "llama-3.3-70b-versatile", temperature: float = 0.2, max_completion_tokens: int = 2048, top_p: float = 1.0) -> Optional[str]:
110
+ """
111
+ Calls Groq with streaming enabled and collects the textual output.
112
+ Returns the full model text, or None on failure.
113
+ """
114
+ if not groq_client:
115
+ st.error("Groq client not initialized. Set GROQ_API_KEY in environment/secrets.")
116
+ return None
117
+
118
+ try:
119
+ completion = groq_client.chat.completions.create(
120
+ model=model_name,
121
+ messages=[
122
+ {"role": "system", "content": "You are a professional Resume Analyzer. Return JSON only when asked."},
123
+ {"role": "user", "content": prompt}
124
+ ],
125
+ temperature=temperature,
126
+ max_completion_tokens=max_completion_tokens,
127
+ top_p=top_p,
128
+ stream=True
129
+ )
130
+
131
+ # completion is an iterator/streamable object; collect chunks
132
+ collected = ""
133
+ # some SDKs yield dict-like chunks, some objects; handle both
134
+ for chunk in completion:
135
+ try:
136
+ # Common pattern: chunk.choices[0].delta.content
137
+ delta = getattr(chunk.choices[0].delta, "content", None) if hasattr(chunk, "choices") else None
138
+ if delta is None:
139
+ # fallback for dict-like object
140
+ if isinstance(chunk, dict):
141
+ delta = chunk.get("choices", [{}])[0].get("delta", {}).get("content")
142
+ if delta:
143
+ collected += delta
144
+ else:
145
+ # Some SDKs return final message in chunk.choices[0].message.content
146
+ try:
147
+ msg = getattr(chunk.choices[0].message, "content", None)
148
+ if msg:
149
+ collected += msg
150
+ except Exception:
151
+ pass
152
+ except Exception:
153
+ # last-resort: append str(chunk)
154
+ try:
155
+ collected += str(chunk)
156
+ except Exception:
157
+ pass
158
+
159
+ return collected.strip()
160
+ except Exception as e:
161
+ st.error(f"Groq API call failed: {e}")
162
+ return None
163
+
164
+ # --- Parsing model output safely to JSON ---
165
  def extract_first_json(text: str) -> Optional[dict]:
166
+ """
167
+ Find the first JSON object in text and parse it; return dict or None.
168
+ """
169
+ if not text:
170
+ return None
171
+ # find first balanced braces block
172
+ # quick heuristic regex for {...}
173
+ try:
174
+ match = re.search(r"(\{(?:[^{}]|(?R))*\})", text, re.DOTALL)
175
+ except re.error:
176
+ # Python's re doesn't support (?R); fallback to simpler greedy
177
+ match = re.search(r"(\{.*\})", text, re.DOTALL)
178
+ if match:
179
+ json_text = match.group(1)
180
+ else:
181
+ # maybe the model returned only JSON-like lines -> try to parse full text
182
+ json_text = text
183
+
184
+ try:
185
+ parsed = json.loads(json_text)
186
+ return parsed
187
+ except Exception:
188
+ # try to clean common issues: single quotes -> double quotes
189
+ try:
190
+ json_text_fixed = json_text.replace("'", '"')
191
+ parsed = json.loads(json_text_fixed)
192
+ return parsed
193
+ except Exception:
194
+ return None
195
+
196
+ # --- Analyze with Groq (cached by resume text + role) ---
197
  @st.cache_data(show_spinner=False)
198
  def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis:
199
+ """
200
+ Calls Groq (streaming) and returns a ResumeAnalysis instance.
201
+ Uses caching to avoid duplicate calls for same resume_text+role.
202
+ """
203
+ # Build prompt instructing JSON structure
204
+ therapist_instructions = ""
205
+ if job_role.lower() == "therapist":
206
+ therapist_instructions = (
207
+ "Because the role is 'Therapist', carefully search for ABA Therapy Skills, "
208
+ "RBT/BCBA Certification, and Autism-Care Experience. Provide scores 1-10 as STRINGS, or 'N/A'."
209
+ )
210
+ else:
211
+ therapist_instructions = "If therapist-specific fields are not relevant, set them to 'N/A'."
212
+
213
+ system_user_prompt = (
214
+ "Return a single JSON object with the following keys exactly: "
215
+ "name (string), email (string), phone (string), certifications (array of strings), "
216
+ "experience_summary (string), education_summary (string), communication_skills (STRING, e.g., '8'), "
217
+ "technical_skills (array of strings), aba_therapy_skills (STRING or 'N/A'), "
218
+ "rbt_bcba_certification (STRING 'Yes'/'No'/'N/A'), autism_care_experience_score (STRING or 'N/A'). "
219
+ f"{therapist_instructions}\n\nResume Text:\n\n{resume_text}\n\nReturn only the JSON object."
220
+ )
221
+
222
+ raw = call_groq_stream_collect(system_user_prompt, model_name="llama-3.3-70b-versatile", temperature=0.0, max_completion_tokens=2048)
223
+
224
+ if not raw:
225
+ # fallback empty object
226
+ return ResumeAnalysis(
227
+ name="Extraction Failed",
228
+ email="",
229
+ phone="",
230
+ certifications=[],
231
+ experience_summary="",
232
+ education_summary="",
233
+ communication_skills="N/A",
234
+ technical_skills=[],
235
+ aba_therapy_skills="N/A",
236
+ rbt_bcba_certification="N/A",
237
+ autism_care_experience_score="N/A"
238
+ )
239
+
240
+ parsed = extract_first_json(raw)
241
+ if not parsed:
242
+ # show raw output for debugging when developer runs app locally (admin panel will show too)
243
+ st.warning("Failed to parse model JSON output. See raw output below for debugging.")
244
+ st.text_area("Raw model output (debug)", raw, height=200)
245
+ return ResumeAnalysis(
246
+ name="Extraction Failed",
247
+ email="",
248
+ phone="",
249
+ certifications=[],
250
+ experience_summary="",
251
+ education_summary="",
252
+ communication_skills="N/A",
253
+ technical_skills=[],
254
+ aba_therapy_skills="N/A",
255
+ rbt_bcba_certification="N/A",
256
+ autism_care_experience_score="N/A"
257
+ )
258
+
259
+ # Ensure keys exist and coerce types
260
+ parsed.setdefault("name", "Unknown")
261
+ parsed.setdefault("email", "")
262
+ parsed.setdefault("phone", "")
263
+ parsed.setdefault("certifications", [])
264
+ parsed.setdefault("experience_summary", "")
265
+ parsed.setdefault("education_summary", "")
266
+ parsed.setdefault("communication_skills", "N/A")
267
+ parsed.setdefault("technical_skills", [])
268
+ parsed.setdefault("aba_therapy_skills", "N/A")
269
+ parsed.setdefault("rbt_bcba_certification", "N/A")
270
+ parsed.setdefault("autism_care_experience_score", "N/A")
271
+
272
+ # Ensure string coercions for some fields
273
+ try:
274
+ parsed["communication_skills"] = str(parsed.get("communication_skills") or "N/A")
275
+ parsed["aba_therapy_skills"] = str(parsed.get("aba_therapy_skills") or "N/A")
276
+ parsed["rbt_bcba_certification"] = str(parsed.get("rbt_bcba_certification") or "N/A")
277
+ parsed["autism_care_experience_score"] = str(parsed.get("autism_care_experience_score") or "N/A")
278
+ except Exception:
279
+ pass
280
+
281
+ # Validate via Pydantic
282
+ try:
283
+ analysis = ResumeAnalysis.parse_obj(parsed)
284
+ return analysis
285
+ except ValidationError as ve:
286
+ st.error("Model output failed schema validation.")
287
+ st.text_area("Raw model output (debug)", raw, height=200)
288
+ st.exception(ve)
289
+ return ResumeAnalysis(
290
+ name="Extraction Failed",
291
+ email="",
292
+ phone="",
293
+ certifications=[],
294
+ experience_summary="",
295
+ education_summary="",
296
+ communication_skills="N/A",
297
+ technical_skills=[],
298
+ aba_therapy_skills="N/A",
299
+ rbt_bcba_certification="N/A",
300
+ autism_care_experience_score="N/A"
301
+ )
302
+
303
+ # --- Scoring logic ---
304
+ def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> float:
305
+ total_score = 0.0
306
+
307
+ # Experience summary: up to 40
308
+ exp_len = len(analysis.experience_summary or "")
309
+ exp_factor = min(exp_len / 100.0, 1.0)
310
+ total_score += exp_factor * 40.0
311
+
312
+ # Skills count: up to 30
313
+ skills_count = len(analysis.technical_skills or [])
314
+ skills_factor = min(skills_count / 10.0, 1.0)
315
+ total_score += skills_factor * 30.0
316
+
317
+ # Communication: up to 20 (expects 0-10 in string)
318
+ try:
319
+ m = re.search(r"(\d+(\.\d+)?)", str(analysis.communication_skills))
320
+ comm_val = float(m.group(1)) if m else float(str(analysis.communication_skills))
321
+ comm_val = max(0.0, min(10.0, comm_val))
322
+ except Exception:
323
+ comm_val = 5.0
324
+ total_score += (comm_val / 10.0) * 20.0
325
+
326
+ # Certifications: up to 10
327
+ total_score += min(len(analysis.certifications or []), 10) * 1.0
328
+
329
+ # Therapist bonus up to 10
330
+ if role.lower() == "therapist":
331
+ def safe_score(x):
332
+ try:
333
+ m = re.search(r"(\d+(\.\d+)?)", str(x))
334
+ return float(m.group(1)) if m else 0.0
335
+ except Exception:
336
+ return 0.0
337
+ aba = safe_score(analysis.aba_therapy_skills)
338
+ autism = safe_score(analysis.autism_care_experience_score)
339
+ spec_bonus = ((aba + autism) / 20.0) * 10.0
340
+ total_score += spec_bonus
341
+
342
+ final = round(min(total_score, 100))
343
+ return float(final)
344
+
345
+ # --- Append to DataFrame ---
346
+ def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
347
+ data = analysis.dict()
348
+ tech = ", ".join(data.get("technical_skills") or [])
349
+ certs = ", ".join(data.get("certifications") or [])
350
+ row = {
351
+ 'Name': data.get("name") or "",
352
+ 'Job Role': job_role,
353
+ 'Resume Score (100)': score,
354
+ 'Email': data.get("email") or "",
355
+ 'Phone': data.get("phone") or "",
356
+ 'Shortlisted': 'No',
357
+ 'Experience Summary': data.get("experience_summary") or "",
358
+ 'Education Summary': data.get("education_summary") or "",
359
+ 'Communication Rating (1-10)': str(data.get("communication_skills") or "N/A"),
360
+ 'Skills/Technologies': tech,
361
+ 'Certifications': certs,
362
+ 'ABA Skills (1-10)': str(data.get("aba_therapy_skills") or "N/A"),
363
+ 'RBT/BCBA Cert': str(data.get("rbt_bcba_certification") or "N/A"),
364
+ 'Autism-Care Exp (1-10)': str(data.get("autism_care_experience_score") or "N/A"),
365
+ }
366
+ new_df = pd.DataFrame([row])
367
+ st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)
368
+
369
+ # --- Excel export helper ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  def df_to_excel_bytes(df: pd.DataFrame) -> bytes:
371
+ output = io.BytesIO()
372
+ with pd.ExcelWriter(output, engine="openpyxl") as writer:
373
+ df.to_excel(writer, index=False, sheet_name="Resume Analysis Data")
374
+ return output.getvalue()
375
 
376
  # --- UI Layout ---
377
  st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis (Single-file)")
378
 
379
  tab_user, tab_admin = st.tabs(["👤 Resume Uploader (User Panel)", "🔒 Admin Dashboard (Password Protected)"])
380
 
381
+ # --- User Panel ---
382
  with tab_user:
383
+ st.header("Upload Resumes for Analysis")
384
+ st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score fields.")
385
+
386
+ job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
387
+ selected_role = st.selectbox("1. Select the Target Job Role", options=job_role_options, key="selected_role")
388
+
389
+ uploaded_files = st.file_uploader("2. Upload Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True)
390
+
391
+ if st.button("🚀 Analyze All Uploaded Resumes"):
392
+ if not uploaded_files:
393
+ st.warning("Please upload one or more resume files to begin analysis.")
394
+ else:
395
+ st.session_state.run_analysis = True
396
+ st.rerun()
397
+
398
+ if st.session_state.get("run_analysis", False):
399
+ if not uploaded_files:
400
+ st.warning("No files found. Upload files and try again.")
401
+ st.session_state.run_analysis = False
402
+ else:
403
+ total = len(uploaded_files)
404
+ progress = st.progress(0)
405
+ st.session_state.individual_analysis = []
406
+ idx = 0
407
+ with st.spinner("Processing resumes..."):
408
+ for f in uploaded_files:
409
+ idx += 1
410
+ try:
411
+ st.write(f"Analyzing **{f.name}**...")
412
+ resume_text = extract_text_from_file(f)
413
+ if not resume_text:
414
+ st.error(f"Could not extract text from {f.name}. Skipping.")
415
+ progress.progress(idx / total)
416
+ continue
417
+
418
+ analysis = analyze_resume_with_groq_cached(resume_text, selected_role)
419
+
420
+ if analysis.name == "Extraction Failed":
421
+ st.error(f"Extraction failed for {f.name}. See debug output.")
422
+ progress.progress(idx / total)
423
+ continue
424
+
425
+ score = calculate_resume_score(analysis, selected_role)
426
+ append_analysis_to_dataframe(selected_role, analysis, score)
427
+
428
+ st.session_state.individual_analysis.append({
429
+ 'name': analysis.name,
430
+ 'score': score,
431
+ 'role': selected_role,
432
+ 'file_name': f.name
433
+ })
434
+ except Exception as e:
435
+ st.error(f"Error analyzing {f.name}: {e}")
436
+ st.exception(traceback.format_exc())
437
+ finally:
438
+ progress.progress(idx / total)
439
+
440
+ st.success(f"✅ Successfully processed {len(st.session_state.individual_analysis)} of {total} resumes.")
441
+ st.session_state.run_analysis = False
442
+
443
+ # Display last results summary
444
+ if st.session_state.individual_analysis:
445
+ st.subheader("Last Analysis Summary")
446
+ for item in st.session_state.individual_analysis:
447
+ st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**")
448
+ st.markdown("---")
449
+ st.caption("All analyzed data is stored in the Admin Dashboard.")
450
+
451
+ # --- Admin Panel ---
 
 
452
  with tab_admin:
453
+ if not st.session_state.is_admin_logged_in:
454
+ st.header("Admin Login")
455
+ password = st.text_input("Enter Admin Password", type="password")
456
+ if st.button("🔑 Login"):
457
+ if password == ADMIN_PASSWORD:
458
+ st.session_state.is_admin_logged_in = True
459
+ st.rerun()
460
+ else:
461
+ st.error("Incorrect password.")
462
+ st.stop()
463
+
464
+ st.header("🎯 Recruitment Dashboard")
465
+ if st.button("🚪 Logout"):
466
+ st.session_state.is_admin_logged_in = False
467
+ st.rerun()
468
+
469
+ if st.session_state.analyzed_data.empty:
470
+ st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
471
+ else:
472
+ df = st.session_state.analyzed_data.copy()
473
+ st.subheader("Candidate Data Table")
474
+ st.success(f"**Total Candidates Analyzed: {len(df)}**")
475
+
476
+ display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']
477
+
478
+ edited_df = st.data_editor(
479
+ df[display_cols],
480
+ column_config={
481
+ "Shortlisted": st.column_config.SelectboxColumn(
482
+ "Shortlisted",
483
+ help="Mark the candidate as Shortlisted or Rejected.",
484
+ options=["No", "Yes"],
485
+ required=True
486
+ )
487
+ },
488
+ key="dashboard_editor",
489
+ hide_index=True
490
+ )
491
+
492
+ try:
493
+ st.session_state.analyzed_data.loc[:, 'Shortlisted'] = edited_df['Shortlisted'].values
494
+ except Exception:
495
+ for i, val in enumerate(edited_df['Shortlisted'].tolist()):
496
+ if i < len(st.session_state.analyzed_data):
497
+ st.session_state.analyzed_data.at[i, 'Shortlisted'] = val
498
+
499
+ st.markdown("---")
500
+ st.subheader("📥 Download Data")
501
+ df_export = st.session_state.analyzed_data.copy()
502
+ excel_bytes = df_to_excel_bytes(df_export)
503
+
504
+ st.download_button(
505
+ label="💾 Download All Data as Excel (.xlsx)",
506
+ data=excel_bytes,
507
+ file_name="quantum_scrutiny_report.xlsx",
508
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
509
+ help="Downloads the full table including all extracted fields and shortlist status."
510
+ )
511
+
512
+ # --- End of file ---