meesamraza commited on
Commit
80e6947
Β·
verified Β·
1 Parent(s): 320d29b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +377 -260
app.py CHANGED
@@ -1,372 +1,487 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import io
 
 
 
 
4
  import os
5
- import fitz
6
- import docx2txt
7
- import tempfile
8
- from groq import Groq
9
  from dotenv import load_dotenv
10
- from pydantic import BaseModel, Field, ValidationError
 
 
 
 
 
11
  from typing import Optional, List
12
 
13
- # --------------------
14
- # Config & Secrets
15
- # --------------------
16
- # Ensure page config is the very first Streamlit command (done here)
17
- st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered")
18
 
19
- # Load local .env if present (useful for local testing)
20
- load_dotenv()
 
21
 
22
- # Try multiple locations for the API key: environment variables, Streamlit secrets
23
- GROQ_API_KEY = os.getenv("GROQ_API_KEY") or os.getenv("GROQ_APIKEY")
24
- if not GROQ_API_KEY:
25
- # If deployed on Streamlit Cloud or similar, users might put secrets in st.secrets
26
- try:
27
- GROQ_API_KEY = st.secrets["GROQ_API_KEY"]
28
- except Exception:
29
- GROQ_API_KEY = None
30
 
31
- # Admin password (for demo). In production, store this in secrets.
32
- ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin")
 
 
 
33
 
34
- # Initialize Groq Client (if key present)
 
 
 
 
35
  groq_client = None
36
- if GROQ_API_KEY:
 
 
 
37
  try:
38
  groq_client = Groq(api_key=GROQ_API_KEY)
39
  except Exception as e:
40
- st.warning(f"Warning: Failed to initialize Groq client: {e}")
41
  groq_client = None
42
- else:
43
- st.warning("GROQ_API_KEY not found in environment or Streamlit secrets. The app will run in fallback mode.")
44
 
45
- # --------------------
46
- # Session state init
47
- # --------------------
48
  if 'is_admin_logged_in' not in st.session_state:
49
  st.session_state.is_admin_logged_in = False
50
-
51
  if 'analyzed_data' not in st.session_state:
52
  initial_cols = [
53
  'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
54
- 'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
55
- 'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)',
56
  'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
57
  ]
58
  st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)
 
 
 
 
 
59
 
60
- # --------------------
61
- # Pydantic Schema
62
- # --------------------
63
  class ResumeAnalysis(BaseModel):
64
  name: str = Field(description="Full name of the candidate.")
65
  email: str = Field(description="Professional email address.")
66
  phone: str = Field(description="Primary phone number.")
67
  certifications: List[str] = Field(default_factory=list, description="List of professional certifications.")
68
- experience_summary: str = Field(default="", description="A concise summary of the candidate's professional experience.")
69
- education_summary: str = Field(default="", description="A concise summary of the candidate's highest education.")
70
-
71
- communication_skills: str = Field(default="N/A", description="A score as a STRING (e.g., '8') or description of communication skills.")
72
- technical_skills: List[str] = Field(default_factory=list, description="List of technical skills/technologies mentioned.")
 
 
73
 
74
- aba_therapy_skills: Optional[str] = Field(default="N/A", description="Specific score as a STRING (e.g., '7').")
75
- rbt_bcba_certification: Optional[str] = Field(default="N/A", description="Indicate 'Yes' or 'No'.")
76
- autism_care_experience_score: Optional[str] = Field(default="N/A", description="A score as a STRING (e.g., '9').")
77
-
78
- # --------------------
79
- # Helpers
80
- # --------------------
81
 
 
82
  def extract_text_from_file(uploaded_file) -> str:
83
- """Extract text from uploaded file safely by writing to a temp file."""
 
 
 
84
  try:
85
- suffix = os.path.splitext(uploaded_file.name)[1].lower()
86
- with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
87
- tmp.write(uploaded_file.read())
88
- tmp_path = tmp.name
89
-
90
- text = ""
91
- if suffix == '.pdf':
 
92
  try:
93
- doc = fitz.open(tmp_path)
94
- for page in doc:
95
- text += page.get_text()
96
- doc.close()
 
97
  except Exception as e:
98
- st.error(f"PDF extraction error for {uploaded_file.name}: {e}")
99
- text = ""
100
- elif suffix in ['.docx', '.doc']:
 
 
101
  try:
102
- text = docx2txt.process(tmp_path) or ""
 
 
103
  except Exception as e:
104
- st.error(f"DOCX extraction error for {uploaded_file.name}: {e}")
105
- text = ""
106
  else:
107
- st.warning(f"Unsupported file type: {suffix}")
108
-
109
- # Clean up temp file
110
- try:
111
- os.unlink(tmp_path)
112
- except Exception:
113
- pass
114
-
115
- return text
116
  except Exception as e:
117
- st.error(f"Failed to extract text: {e}")
118
  return ""
119
 
120
 
121
- @st.cache_data
122
- def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis:
123
- """Call Groq to extract structured data. If Groq is not available or returns invalid JSON,
124
- fall back to a lightweight heuristic parser.
 
125
  """
126
- # If no groq client, skip to fallback
127
  if not groq_client:
128
- return fallback_simple_extraction(resume_text, job_role)
 
129
 
130
- # Build role-specific instructions
131
  therapist_instructions = ""
132
  if job_role == "Therapist":
133
  therapist_instructions = (
134
- "Because the job role is 'Therapist', carefully look for ABA Therapy Skills, RBT/BCBA Certification, "
135
- "and Autism-Care Experience. Provide a score from 1-10 as a STRING (e.g., '7') for these fields. "
136
- "If not found, return 'N/A'."
137
  )
138
  else:
139
  therapist_instructions = (
140
- "Since the role is not 'Therapist', set specialized therapist fields to 'N/A' if not present."
 
141
  )
142
 
143
  system_prompt = (
144
- f"You are a professional Resume Analyzer. Extract fields exactly matching the JSON schema: name, email, phone, certifications (list), "
145
- f"experience_summary, education_summary, communication_skills (STRING), technical_skills (list), aba_therapy_skills, rbt_bcba_certification, autism_care_experience_score. "
146
- f"The candidate is applying for '{job_role}'. {therapist_instructions} Return valid JSON only."
 
147
  )
148
 
 
 
149
  try:
150
- chat_completion = groq_client.chat.completions.create(
151
- model="mixtral-8x7b-32768",
152
  messages=[
153
  {"role": "system", "content": system_prompt},
154
- {"role": "user", "content": f"Analyze the following resume text:\n\n---\n{resume_text}\n---"}
155
  ],
156
- response_model={"type": "json_object", "schema": ResumeAnalysis.schema()},
157
- temperature=0.0
 
158
  )
159
-
160
- # Extract raw content (SDK may vary β€” keep defensive)
161
- raw = None
162
  try:
163
- raw = chat_completion.choices[0].message.content
164
  except Exception:
165
- raw = str(chat_completion)
 
 
 
 
166
 
167
- # Parse with Pydantic
168
- try:
169
- analysis = ResumeAnalysis.parse_raw(raw)
170
- except ValidationError as ve:
171
- st.warning(f"Groq returned invalid format; falling back to heuristic extraction. Details: {ve}")
172
- return fallback_simple_extraction(resume_text, job_role)
173
 
174
- # Ensure string coercions
175
- analysis.communication_skills = str(analysis.communication_skills or 'N/A')
176
- analysis.aba_therapy_skills = str(analysis.aba_therapy_skills or 'N/A')
177
- analysis.rbt_bcba_certification = str(analysis.rbt_bcba_certification or 'N/A')
178
- analysis.autism_care_experience_score = str(analysis.autism_care_experience_score or 'N/A')
179
 
180
- return analysis
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  except Exception as e:
183
- st.warning(f"Groq API call failed: {e}. Using fallback extraction.")
184
- return fallback_simple_extraction(resume_text, job_role)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
 
 
 
 
 
 
186
 
187
- def fallback_simple_extraction(text: str, job_role: str) -> ResumeAnalysis:
188
- """A minimal, robust heuristic extractor used when the LLM call fails.
189
- It tries to find name/email/phone and picks up some keywords for skills and certifications.
190
- """
191
- import re
 
192
 
193
- # Very simple heuristics (intended as a fallback only)
194
- email_match = re.search(r"[\w\.-]+@[\w\.-]+", text)
195
- phone_match = re.search(r"(\+?\d[\d\-\s]{7,}\d)", text)
196
-
197
- name = "Unknown"
198
- # Heuristic: first line that looks like a name (two words, capitalized)
199
- lines = [l.strip() for l in text.splitlines() if l.strip()]
200
- if lines:
201
- for line in lines[:5]:
202
- if len(line.split()) <= 4 and any(ch.isalpha() for ch in line) and line[0].isupper():
203
- name = line
204
- break
205
-
206
- email = email_match.group(0) if email_match else ""
207
- phone = phone_match.group(0) if phone_match else ""
208
-
209
- # Skills: gather common programming / therapy keywords
210
- skills_candidates = []
211
- certifications = []
212
- keywords = ['python','java','c++','machine learning','deep learning','tensorflow','pytorch','rbt','bcba','aba','autism']
213
- lower_text = text.lower()
214
- for kw in keywords:
215
- if kw in lower_text:
216
- skills_candidates.append(kw)
217
- if kw in ['rbt','bcba']:
218
- certifications.append(kw.upper())
219
-
220
- experience_summary = ' '.join(lines[:4]) if lines else ''
221
- education_summary = ''
222
-
223
- # Therapist-specific small heuristics
224
- aba = 'N/A'
225
- rbt_cert = 'Yes' if 'rbt' in lower_text or 'registered behavior technician' in lower_text else 'N/A'
226
- autism_score = 'N/A'
227
-
228
- return ResumeAnalysis(
229
- name=name,
230
- email=email,
231
- phone=phone,
232
- certifications=certifications,
233
- experience_summary=experience_summary,
234
- education_summary=education_summary,
235
- communication_skills='5',
236
- technical_skills=list(set(skills_candidates)),
237
- aba_therapy_skills=aba,
238
- rbt_bcba_certification=rbt_cert,
239
- autism_care_experience_score=autism_score
240
- )
241
 
242
 
243
- def calculate_resume_score(analysis: ResumeAnalysis) -> float:
244
- """Calculates a weighted score out of 100 based on heuristics and extracted values."""
245
  total_score = 0.0
246
 
247
- # 1. Experience Score (Max 40)
248
  exp_len = len(analysis.experience_summary or "")
249
- exp_factor = min(exp_len / 100.0, 1.0)
250
  total_score += exp_factor * 40.0
251
 
252
- # 2. Skills Score (Max 30)
253
- skills_factor = min(len(analysis.technical_skills) / 10.0, 1.0)
 
254
  total_score += skills_factor * 30.0
255
 
256
- # 3. Communication (Max 20)
257
  try:
258
- score_str = str(analysis.communication_skills).split('-')[0].strip()
259
- comm_rating = float(score_str)
 
 
 
 
260
  except Exception:
261
- comm_rating = 5.0
262
- total_score += (comm_rating / 10.0) * 20.0
263
 
264
- # 4. Certifications (Max 10)
265
- total_score += min(len(analysis.certifications), 10) * 1.0
 
266
 
267
- # Therapist bonus (max 10)
268
- if st.session_state.get('selected_role') == 'Therapist':
269
  try:
270
- aba = float(str(analysis.aba_therapy_skills)) if str(analysis.aba_therapy_skills).upper() not in ['N/A', 'NONE', ''] else 0.0
271
- autism = float(str(analysis.autism_care_experience_score)) if str(analysis.autism_care_experience_score).upper() not in ['N/A', 'NONE', ''] else 0.0
272
- total_score += ((aba + autism) / 20.0) * 10.0
 
 
 
 
 
 
 
 
273
  except Exception:
274
  pass
275
 
276
- final_score = round(min(total_score, 100))
277
- return float(final_score)
278
 
279
 
 
280
  def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
281
  data = analysis.dict()
282
- data['Job Role'] = job_role
283
- data['Resume Score'] = score
284
- data['Shortlisted'] = 'No'
285
-
286
- technical_skills_list = ", ".join(data.get('technical_skills', []))
287
- certifications_list = ", ".join(data.get('certifications', []))
288
 
289
  df_data = {
290
- 'Name': data.get('name', ''),
291
  'Job Role': job_role,
292
  'Resume Score (100)': score,
293
- 'Email': data.get('email', ''),
294
- 'Phone': data.get('phone', ''),
295
- 'Shortlisted': data.get('Shortlisted', 'No'),
296
- 'Experience Summary': data.get('experience_summary', ''),
297
- 'Education Summary': data.get('education_summary', ''),
298
- 'Communication Rating (1-10)': str(data.get('communication_skills', 'N/A')),
299
  'Skills/Technologies': technical_skills_list,
300
  'Certifications': certifications_list,
301
- 'ABA Skills (1-10)': str(data.get('aba_therapy_skills', 'N/A')),
302
- 'RBT/BCBA Cert': str(data.get('rbt_bcba_certification', 'N/A')),
303
- 'Autism-Care Exp (1-10)': str(data.get('autism_care_experience_score', 'N/A')),
304
  }
305
 
306
  new_df = pd.DataFrame([df_data])
307
  st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)
308
 
309
- # --------------------
310
- # App layout
311
- # --------------------
 
 
 
 
 
 
 
312
  st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis")
313
 
314
  tab_user, tab_admin = st.tabs(["πŸ‘€ Resume Uploader (User Panel)", "πŸ”’ Admin Dashboard (Password Protected)"])
315
 
 
 
 
316
  with tab_user:
317
  st.header("Upload Resumes for Analysis")
318
- st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score key data. If the API key is missing, a fallback heuristic extractor will run.")
319
 
320
  job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
321
- selected_role = st.selectbox("**1. Select the Target Job Role**", options=job_role_options, key='selected_role')
322
 
323
- uploaded_files = st.file_uploader("**2. Upload Resumes** (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True)
 
 
324
 
 
325
  if st.button("πŸš€ Analyze All Uploaded Resumes"):
326
  if not uploaded_files:
327
  st.warning("Please upload one or more resume files to begin analysis.")
328
  else:
329
- total_files = len(uploaded_files)
330
- progress_bar = st.progress(0.0)
331
- st.session_state.individual_analysis = []
332
 
 
 
 
 
 
 
 
 
 
 
333
  with st.spinner("Processing resumes..."):
334
- for i, file in enumerate(uploaded_files):
335
- file_name = file.name
336
- st.write(f"Analyzing **{file_name}**...")
337
-
338
- resume_text = extract_text_from_file(file)
339
- if not resume_text:
340
- st.error(f"Could not extract text from {file_name}. Skipping.")
341
- continue
342
-
343
- analysis = analyze_resume_with_groq(resume_text, selected_role)
344
- if isinstance(analysis, ResumeAnalysis) and analysis.name == "Extraction Failed":
345
- st.error(f"Extraction failed for {file_name}. Skipping.")
346
- continue
347
-
348
- score = calculate_resume_score(analysis)
349
- append_analysis_to_dataframe(selected_role, analysis, score)
350
-
351
- st.session_state.individual_analysis.append({
352
- 'name': analysis.name,
353
- 'score': score,
354
- 'role': selected_role,
355
- 'file_name': file_name
356
- })
357
-
358
- progress_bar.progress((i + 1) / total_files)
359
-
360
- st.success(f"**βœ… Successfully processed {len(st.session_state.individual_analysis)} / {total_files} resumes.**")
361
-
362
- if 'individual_analysis' in st.session_state and st.session_state.individual_analysis:
 
 
 
 
 
 
 
 
 
363
  st.subheader("Last Analysis Summary")
364
  for item in st.session_state.individual_analysis:
365
  st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**")
366
-
367
  st.markdown("---")
368
- st.caption("All analyzed data is stored in the **Admin Dashboard**.")
369
 
 
 
 
370
  with tab_admin:
371
  if not st.session_state.is_admin_logged_in:
372
  st.header("Admin Login")
@@ -374,17 +489,16 @@ with tab_admin:
374
  if st.button("πŸ”‘ Login"):
375
  if password == ADMIN_PASSWORD:
376
  st.session_state.is_admin_logged_in = True
377
- st.experimental_rerun()
378
  else:
379
  st.error("Incorrect password.")
 
380
  st.stop()
381
 
382
  st.header("🎯 Recruitment Dashboard")
383
- st.markdown("---")
384
-
385
  if st.button("πŸšͺ Logout"):
386
  st.session_state.is_admin_logged_in = False
387
- st.experimental_rerun()
388
 
389
  if st.session_state.analyzed_data.empty:
390
  st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
@@ -395,6 +509,7 @@ with tab_admin:
395
 
396
  display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']
397
 
 
398
  edited_df = st.data_editor(
399
  df[display_cols],
400
  column_config={
@@ -402,31 +517,33 @@ with tab_admin:
402
  "Shortlisted",
403
  help="Mark the candidate as Shortlisted or Rejected.",
404
  options=["No", "Yes"],
405
- required=True,
406
  )
407
  },
408
  key="dashboard_editor",
409
  hide_index=True
410
  )
411
 
412
- # Persist shortlist changes back to session state
413
- st.session_state.analyzed_data['Shortlisted'] = edited_df['Shortlisted']
 
 
 
 
 
 
414
 
415
  st.markdown("---")
416
  st.subheader("πŸ“₯ Download Data")
417
-
418
  df_export = st.session_state.analyzed_data.copy()
419
- excel_buffer = io.BytesIO()
420
- with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer:
421
- df_export.to_excel(writer, index=False, sheet_name='Resume Analysis Data')
422
- excel_buffer.seek(0)
423
 
424
  st.download_button(
425
  label="πŸ’Ύ Download All Data as Excel (.xlsx)",
426
- data=excel_buffer.getvalue(),
427
  file_name="quantum_scrutiny_report.xlsx",
428
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
429
  help="Downloads the full table including all extracted fields and shortlist status."
430
  )
431
 
432
- # End of file
 
1
+ # app.py
2
+ """
3
+ Quantum Scrutiny Platform | Groq-Powered
4
+ Single-file Streamlit app (refactored & debugged)
5
+ """
6
+
7
+ # --- 0. Always set page config as the first Streamlit command ---
8
  import os
 
 
 
 
9
  from dotenv import load_dotenv
10
+
11
+ load_dotenv() # load local .env if present (during local dev)
12
+
13
+ import io
14
+ import base64
15
+ import traceback
16
  from typing import Optional, List
17
 
18
+ import streamlit as st
19
+ import pandas as pd
 
 
 
20
 
21
+ # resume parsing
22
+ import fitz # PyMuPDF
23
+ from docx import Document # python-docx
24
 
25
+ # Groq client (keep same import name as you used)
26
+ from groq import Groq
 
 
 
 
 
 
27
 
28
+ # Pydantic for schema validation
29
+ from pydantic import BaseModel, Field, ValidationError
30
+
31
+ # --- Streamlit UI config ---
32
+ st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered")
33
 
34
+ # --- Config / Secrets ---
35
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY") # set in environment or .env or deploy secrets
36
+ ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin") # optional override via env
37
+
38
+ # --- Initialize Groq client with safe error messaging ---
39
  groq_client = None
40
+ if not GROQ_API_KEY:
41
+ st.error("GROQ_API_KEY not found. Please set GROQ_API_KEY as an environment variable or in Hugging Face secrets.")
42
+ # We won't stop here to allow UI to display, but analysis will error if used.
43
+ else:
44
  try:
45
  groq_client = Groq(api_key=GROQ_API_KEY)
46
  except Exception as e:
47
+ st.error(f"Failed to initialize Groq client: {e}")
48
  groq_client = None
 
 
49
 
50
+
51
+ # --- Session state defaults ---
 
52
  if 'is_admin_logged_in' not in st.session_state:
53
  st.session_state.is_admin_logged_in = False
 
54
  if 'analyzed_data' not in st.session_state:
55
  initial_cols = [
56
  'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
57
+ 'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
58
+ 'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)',
59
  'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
60
  ]
61
  st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)
62
+ if 'individual_analysis' not in st.session_state:
63
+ st.session_state.individual_analysis = []
64
+ if 'run_analysis' not in st.session_state:
65
+ st.session_state.run_analysis = False
66
+
67
 
68
+ # --- Pydantic schema for Groq output ---
 
 
69
  class ResumeAnalysis(BaseModel):
70
  name: str = Field(description="Full name of the candidate.")
71
  email: str = Field(description="Professional email address.")
72
  phone: str = Field(description="Primary phone number.")
73
  certifications: List[str] = Field(default_factory=list, description="List of professional certifications.")
74
+ experience_summary: str = Field(default="", description="Concise summary of experience.")
75
+ education_summary: str = Field(default="", description="Concise summary of education.")
76
+ communication_skills: str = Field(description="Communication score as a STRING ('8') or description.")
77
+ technical_skills: List[str] = Field(default_factory=list, description="List of skills/technologies.")
78
+ aba_therapy_skills: Optional[str] = Field(default="N/A", description="ABA Therapy score as STRING or 'N/A'.")
79
+ rbt_bcba_certification: Optional[str] = Field(default="N/A", description="'Yes'/'No'/'N/A'.")
80
+ autism_care_experience_score: Optional[str] = Field(default="N/A", description="Autism care experience score as STRING or 'N/A'.")
81
 
 
 
 
 
 
 
 
82
 
83
+ # --- Helper: File text extraction ---
84
  def extract_text_from_file(uploaded_file) -> str:
85
+ """
86
+ Accepts a Streamlit UploadedFile object and returns extracted text.
87
+ Supports PDF and DOCX. Returns empty string on failure.
88
+ """
89
  try:
90
+ content = uploaded_file.read()
91
+ # Reset pointer if needed (Streamlit UploadedFile likely returns bytes; after read it's consumed)
92
+ # We already consumed it into `content` so use BytesIO for downstream if needed.
93
+
94
+ # detect PDF by mime or header bytes
95
+ name_lower = uploaded_file.name.lower()
96
+ if name_lower.endswith(".pdf") or content[:5] == b"%PDF-":
97
+ # use fitz (PyMuPDF)
98
  try:
99
+ with fitz.open(stream=content, filetype="pdf") as doc:
100
+ pages_text = []
101
+ for p in doc:
102
+ pages_text.append(p.get_text())
103
+ return "\n".join(pages_text).strip()
104
  except Exception as e:
105
+ # fallback: try PyMuPDF alternative reading
106
+ st.warning(f"PDF extraction issue for {uploaded_file.name}: {e}")
107
+ return ""
108
+ elif name_lower.endswith(".docx"):
109
+ # python-docx can accept a file-like object
110
  try:
111
+ doc = Document(io.BytesIO(content))
112
+ paragraphs = [p.text for p in doc.paragraphs if p.text and p.text.strip()]
113
+ return "\n".join(paragraphs).strip()
114
  except Exception as e:
115
+ st.warning(f"DOCX extraction issue for {uploaded_file.name}: {e}")
116
+ return ""
117
  else:
118
+ # Try simple decode for text-like files
119
+ try:
120
+ return content.decode('utf-8', errors='ignore')
121
+ except Exception:
122
+ return ""
 
 
 
 
123
  except Exception as e:
124
+ st.error(f"Unexpected file extraction error: {e}")
125
  return ""
126
 
127
 
128
+ # --- Helper: call Groq (safe wrapper) ---
129
+ def call_groq_chat_system(resume_text: str, job_role: str) -> Optional[str]:
130
+ """
131
+ Calls Groq chat completion. Returns model text content or None on error.
132
+ Note: groq_client must be initialized.
133
  """
 
134
  if not groq_client:
135
+ st.error("Groq client is not initialized. Set GROQ_API_KEY in environment or secrets.")
136
+ return None
137
 
138
+ # role-specific instructions
139
  therapist_instructions = ""
140
  if job_role == "Therapist":
141
  therapist_instructions = (
142
+ "Because the job role is 'Therapist', you MUST carefully look for ABA Therapy Skills, "
143
+ "RBT/BCBA Certification, and Autism-Care Experience. Provide a score from 1-10 as a STRING "
144
+ "(e.g., '7') for the specialized fields. If any specialized field is not present, return 'N/A'."
145
  )
146
  else:
147
  therapist_instructions = (
148
+ "This is NOT a Therapist role. Set 'aba_therapy_skills', 'autism_care_experience_score', "
149
+ "and 'rbt_bcba_certification' to 'N/A' if not applicable."
150
  )
151
 
152
  system_prompt = (
153
+ "You are a professional Resume Analyzer. Extract the requested fields and return a strict JSON object "
154
+ "matching the schema: name, email, phone, certifications (array), experience_summary, education_summary, "
155
+ "communication_skills (AS A STRING, e.g., '8'), technical_skills (array), aba_therapy_skills, "
156
+ "rbt_bcba_certification, autism_care_experience_score. " + therapist_instructions
157
  )
158
 
159
+ user_prompt = f"Analyze the following resume text and return a JSON object:\n\n---\n{resume_text}\n---\nReturn only valid JSON."
160
+
161
  try:
162
+ result = groq_client.chat.completions.create(
163
+ model="mixtral-8x7b-32768", # keep your original model choice; adapt if needed
164
  messages=[
165
  {"role": "system", "content": system_prompt},
166
+ {"role": "user", "content": user_prompt}
167
  ],
168
+ temperature=0.0,
169
+ max_tokens=2000,
170
+ # Not all Groq SDK versions support response_model in the same way; we parse manually below.
171
  )
172
+ # Depending on SDK, result structure varies; common: result.choices[0].message.content
173
+ model_text = None
 
174
  try:
175
+ model_text = result.choices[0].message.content
176
  except Exception:
177
+ # try alternate structure
178
+ try:
179
+ model_text = result["choices"][0]["message"]["content"]
180
+ except Exception:
181
+ model_text = str(result)
182
 
183
+ return model_text
184
+ except Exception as e:
185
+ st.error(f"Groq API call failed: {e}")
186
+ st.exception(e)
187
+ return None
 
188
 
 
 
 
 
 
189
 
190
+ # --- Cached wrapper for analysis (cache by resume_text + role) ---
191
+ @st.cache_data(show_spinner=False)
192
+ def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis:
193
+ """
194
+ Calls Groq (or fallback) and returns a validated ResumeAnalysis Pydantic object.
195
+ This function is cached to avoid repeated calls for identical text+role.
196
+ """
197
+ raw_response = call_groq_chat_system(resume_text, job_role)
198
+ if not raw_response:
199
+ # return safe failure object
200
+ return ResumeAnalysis(
201
+ name="Extraction Failed",
202
+ email="",
203
+ phone="",
204
+ certifications=[],
205
+ experience_summary="",
206
+ education_summary="",
207
+ communication_skills="N/A",
208
+ technical_skills=[],
209
+ aba_therapy_skills="N/A",
210
+ rbt_bcba_certification="N/A",
211
+ autism_care_experience_score="N/A"
212
+ )
213
 
214
+ # Attempt to parse JSON from the model text. The model might include commentary;
215
+ # so we try to extract the first JSON object in the text.
216
+ import json
217
+ import re
218
+
219
+ json_text = None
220
+ try:
221
+ # Find the first {...} JSON object in the string (greedy to closing brace)
222
+ match = re.search(r"(\{.*\})", raw_response, re.DOTALL)
223
+ if match:
224
+ json_text = match.group(1)
225
+ else:
226
+ # if no braces found, maybe the model returned just JSON-like lines
227
+ json_text = raw_response
228
+ parsed = json.loads(json_text)
229
  except Exception as e:
230
+ # Try to be forgiving: if the model returned Python dict-like, attempt eval safely
231
+ try:
232
+ parsed = eval(json_text, {"__builtins__": None}, {}) # limited eval fallback
233
+ if not isinstance(parsed, dict):
234
+ raise ValueError("Parsed non-dict from model response fallback.")
235
+ except Exception as ex:
236
+ # Failed to parse model output -> return failure object and log both
237
+ st.warning("Failed to parse Groq output as JSON. Returning fallback extraction.")
238
+ st.text_area("Raw model output (for debugging)", raw_response, height=200)
239
+ return ResumeAnalysis(
240
+ name="Extraction Failed",
241
+ email="",
242
+ phone="",
243
+ certifications=[],
244
+ experience_summary="",
245
+ education_summary="",
246
+ communication_skills="N/A",
247
+ technical_skills=[],
248
+ aba_therapy_skills="N/A",
249
+ rbt_bcba_certification="N/A",
250
+ autism_care_experience_score="N/A"
251
+ )
252
+
253
+ # Validate & coerce to Pydantic model (safe defaults applied)
254
+ try:
255
+ # Ensure lists exist
256
+ parsed.setdefault("certifications", [])
257
+ parsed.setdefault("technical_skills", [])
258
+ # Ensure communication_skills is string
259
+ if "communication_skills" in parsed and parsed["communication_skills"] is not None:
260
+ parsed["communication_skills"] = str(parsed["communication_skills"])
261
+ else:
262
+ parsed["communication_skills"] = "N/A"
263
 
264
+ # Safety: set therapist-specific fields default to "N/A" if missing
265
+ for k in ["aba_therapy_skills", "rbt_bcba_certification", "autism_care_experience_score"]:
266
+ if k not in parsed or parsed[k] is None:
267
+ parsed[k] = "N/A"
268
+ else:
269
+ parsed[k] = str(parsed[k])
270
 
271
+ analysis = ResumeAnalysis.parse_obj(parsed)
272
+ # Final coercions to guarantee string types for some fields
273
+ analysis.communication_skills = str(analysis.communication_skills or "N/A")
274
+ analysis.aba_therapy_skills = str(analysis.aba_therapy_skills or "N/A")
275
+ analysis.rbt_bcba_certification = str(analysis.rbt_bcba_certification or "N/A")
276
+ analysis.autism_care_experience_score = str(analysis.autism_care_experience_score or "N/A")
277
 
278
+ return analysis
279
+ except ValidationError as ve:
280
+ st.error("Model output failed schema validation. Returning fallback object.")
281
+ st.text_area("Model raw response (for debugging)", raw_response, height=200)
282
+ st.exception(ve)
283
+ return ResumeAnalysis(
284
+ name="Extraction Failed",
285
+ email="",
286
+ phone="",
287
+ certifications=[],
288
+ experience_summary="",
289
+ education_summary="",
290
+ communication_skills="N/A",
291
+ technical_skills=[],
292
+ aba_therapy_skills="N/A",
293
+ rbt_bcba_certification="N/A",
294
+ autism_care_experience_score="N/A"
295
+ )
296
+ except Exception as e:
297
+ st.error("Unexpected error while validating model output.")
298
+ st.exception(e)
299
+ return ResumeAnalysis(
300
+ name="Extraction Failed",
301
+ email="",
302
+ phone="",
303
+ certifications=[],
304
+ experience_summary="",
305
+ education_summary="",
306
+ communication_skills="N/A",
307
+ technical_skills=[],
308
+ aba_therapy_skills="N/A",
309
+ rbt_bcba_certification="N/A",
310
+ autism_care_experience_score="N/A"
311
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
 
313
 
314
+ # --- Scoring function ---
315
+ def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> float:
316
  total_score = 0.0
317
 
318
+ # Experience summary length -> up to 40 points
319
  exp_len = len(analysis.experience_summary or "")
320
+ exp_factor = min(exp_len / 100.0, 1.0) # 100 chars or more -> full points
321
  total_score += exp_factor * 40.0
322
 
323
+ # Skills count -> up to 30 points
324
+ skills_count = len(analysis.technical_skills or [])
325
+ skills_factor = min(skills_count / 10.0, 1.0)
326
  total_score += skills_factor * 30.0
327
 
328
+ # Communication -> up to 20 points (expects 1-10 in string)
329
  try:
330
+ comm_raw = str(analysis.communication_skills).strip()
331
+ # allow '8/10' or '8 - good' forms: extract leading number
332
+ import re
333
+ m = re.search(r"(\d+(\.\d+)?)", comm_raw)
334
+ comm_val = float(m.group(1)) if m else float(comm_raw)
335
+ comm_val = max(0.0, min(10.0, comm_val))
336
  except Exception:
337
+ comm_val = 5.0
338
+ total_score += (comm_val / 10.0) * 20.0
339
 
340
+ # Certifications -> up to 10 points (1 point each up to 10)
341
+ cert_points = min(len(analysis.certifications or []), 10) * 1.0
342
+ total_score += cert_points
343
 
344
+ # Therapist-specific bonus up to 10 points
345
+ if role == "Therapist":
346
  try:
347
+ def safe_score(x):
348
+ try:
349
+ m = re.search(r"(\d+(\.\d+)?)", str(x))
350
+ return float(m.group(1)) if m else 0.0
351
+ except Exception:
352
+ return 0.0
353
+
354
+ aba = safe_score(analysis.aba_therapy_skills)
355
+ autism = safe_score(analysis.autism_care_experience_score)
356
+ spec_bonus = ((aba + autism) / 20.0) * 10.0 # average scaled to 10
357
+ total_score += spec_bonus
358
  except Exception:
359
  pass
360
 
361
+ final = round(min(total_score, 100))
362
+ return float(final)
363
 
364
 
365
+ # --- Append to session DataFrame helper ---
366
  def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
367
  data = analysis.dict()
368
+ technical_skills_list = ", ".join(data.get('technical_skills') or [])
369
+ certifications_list = ", ".join(data.get('certifications') or [])
 
 
 
 
370
 
371
  df_data = {
372
+ 'Name': data.get('name') or "",
373
  'Job Role': job_role,
374
  'Resume Score (100)': score,
375
+ 'Email': data.get('email') or "",
376
+ 'Phone': data.get('phone') or "",
377
+ 'Shortlisted': 'No',
378
+ 'Experience Summary': data.get('experience_summary') or "",
379
+ 'Education Summary': data.get('education_summary') or "",
380
+ 'Communication Rating (1-10)': str(data.get('communication_skills') or "N/A"),
381
  'Skills/Technologies': technical_skills_list,
382
  'Certifications': certifications_list,
383
+ 'ABA Skills (1-10)': str(data.get('aba_therapy_skills') or "N/A"),
384
+ 'RBT/BCBA Cert': str(data.get('rbt_bcba_certification') or "N/A"),
385
+ 'Autism-Care Exp (1-10)': str(data.get('autism_care_experience_score') or "N/A"),
386
  }
387
 
388
  new_df = pd.DataFrame([df_data])
389
  st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)
390
 
391
+
392
+ # --- Utility: Excel download as BytesIO for st.download_button ---
393
+ def df_to_excel_bytes(df: pd.DataFrame) -> bytes:
394
+ output = io.BytesIO()
395
+ with pd.ExcelWriter(output, engine='openpyxl') as writer:
396
+ df.to_excel(writer, index=False, sheet_name="Resume Analysis Data")
397
+ return output.getvalue()
398
+
399
+
400
+ # --- App Layout ---
401
  st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis")
402
 
403
  tab_user, tab_admin = st.tabs(["πŸ‘€ Resume Uploader (User Panel)", "πŸ”’ Admin Dashboard (Password Protected)"])
404
 
405
+ # -------------------------
406
+ # User Panel
407
+ # -------------------------
408
  with tab_user:
409
  st.header("Upload Resumes for Analysis")
410
+ st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score key fields.")
411
 
412
  job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
413
+ selected_role = st.selectbox("1. Select the Target Job Role", options=job_role_options, key="selected_role")
414
 
415
+ uploaded_files = st.file_uploader(
416
+ "2. Upload Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True
417
+ )
418
 
419
+ # Analyze button sets a session_state flag and reruns
420
  if st.button("πŸš€ Analyze All Uploaded Resumes"):
421
  if not uploaded_files:
422
  st.warning("Please upload one or more resume files to begin analysis.")
423
  else:
424
+ st.session_state.run_analysis = True
425
+ st.rerun()
 
426
 
427
+ # If run_analysis flag is set, process uploads
428
+ if st.session_state.get("run_analysis", False):
429
+ if not uploaded_files:
430
+ st.warning("No files found. Upload files and try again.")
431
+ st.session_state.run_analysis = False
432
+ else:
433
+ total = len(uploaded_files)
434
+ progress = st.progress(0)
435
+ st.session_state.individual_analysis = []
436
+ idx = 0
437
  with st.spinner("Processing resumes..."):
438
+ for f in uploaded_files:
439
+ idx += 1
440
+ try:
441
+ st.write(f"Analyzing **{f.name}**...")
442
+ resume_text = extract_text_from_file(f)
443
+ if not resume_text:
444
+ st.error(f"Could not extract text from {f.name}. Skipping.")
445
+ progress.progress(idx / total)
446
+ continue
447
+
448
+ # Call cached analyze function
449
+ analysis = analyze_resume_with_groq_cached(resume_text, selected_role)
450
+
451
+ if analysis.name == "Extraction Failed":
452
+ st.error(f"Extraction failed for {f.name}. See debug output.")
453
+ progress.progress(idx / total)
454
+ continue
455
+
456
+ score = calculate_resume_score(analysis, selected_role)
457
+ append_analysis_to_dataframe(selected_role, analysis, score)
458
+
459
+ st.session_state.individual_analysis.append({
460
+ 'name': analysis.name,
461
+ 'score': score,
462
+ 'role': selected_role,
463
+ 'file_name': f.name
464
+ })
465
+ except Exception as e:
466
+ st.error(f"Error analyzing {f.name}: {e}")
467
+ st.exception(traceback.format_exc())
468
+ finally:
469
+ progress.progress(idx / total)
470
+
471
+ st.success(f"βœ… Successfully processed {len(st.session_state.individual_analysis)} of {total} resumes.")
472
+ st.session_state.run_analysis = False # reset flag
473
+
474
+ # Show last analysis summary
475
+ if st.session_state.individual_analysis:
476
  st.subheader("Last Analysis Summary")
477
  for item in st.session_state.individual_analysis:
478
  st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**")
 
479
  st.markdown("---")
480
+ st.caption("All analyzed data is stored in the Admin Dashboard.")
481
 
482
+ # -------------------------
483
+ # Admin Panel (Password Protected)
484
+ # -------------------------
485
  with tab_admin:
486
  if not st.session_state.is_admin_logged_in:
487
  st.header("Admin Login")
 
489
  if st.button("πŸ”‘ Login"):
490
  if password == ADMIN_PASSWORD:
491
  st.session_state.is_admin_logged_in = True
492
+ st.rerun()
493
  else:
494
  st.error("Incorrect password.")
495
+ # stop further admin rendering while not logged in
496
  st.stop()
497
 
498
  st.header("🎯 Recruitment Dashboard")
 
 
499
  if st.button("πŸšͺ Logout"):
500
  st.session_state.is_admin_logged_in = False
501
+ st.rerun()
502
 
503
  if st.session_state.analyzed_data.empty:
504
  st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
 
509
 
510
  display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']
511
 
512
+ # data_editor with SelectboxColumn for 'Shortlisted'
513
  edited_df = st.data_editor(
514
  df[display_cols],
515
  column_config={
 
517
  "Shortlisted",
518
  help="Mark the candidate as Shortlisted or Rejected.",
519
  options=["No", "Yes"],
520
+ required=True
521
  )
522
  },
523
  key="dashboard_editor",
524
  hide_index=True
525
  )
526
 
527
+ # propagate the 'Shortlisted' edits back to session dataframe
528
+ try:
529
+ st.session_state.analyzed_data.loc[:, 'Shortlisted'] = edited_df['Shortlisted'].values
530
+ except Exception:
531
+ # fallback for indexing mismatches
532
+ for i, val in enumerate(edited_df['Shortlisted'].tolist()):
533
+ if i < len(st.session_state.analyzed_data):
534
+ st.session_state.analyzed_data.at[i, 'Shortlisted'] = val
535
 
536
  st.markdown("---")
537
  st.subheader("πŸ“₯ Download Data")
 
538
  df_export = st.session_state.analyzed_data.copy()
539
+ excel_bytes = df_to_excel_bytes(df_export)
 
 
 
540
 
541
  st.download_button(
542
  label="πŸ’Ύ Download All Data as Excel (.xlsx)",
543
+ data=excel_bytes,
544
  file_name="quantum_scrutiny_report.xlsx",
545
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
546
  help="Downloads the full table including all extracted fields and shortlist status."
547
  )
548
 
549
+ # --- End of app.py ---