meesamraza commited on
Commit
0bbe141
Β·
verified Β·
1 Parent(s): 56ab15e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +227 -255
app.py CHANGED
@@ -1,52 +1,49 @@
1
  # app.py
2
  """
3
  Quantum Scrutiny Platform | Groq-Powered
4
- Single-file Streamlit app (refactored & debugged)
5
  """
6
 
7
- # --- 0. Always set page config as the first Streamlit command ---
8
  import os
9
- from dotenv import load_dotenv
10
-
11
- load_dotenv() # load local .env if present (during local dev)
12
-
13
  import io
 
 
14
  import base64
15
  import traceback
16
  from typing import Optional, List
17
 
 
 
 
18
  import streamlit as st
19
  import pandas as pd
20
 
21
- # resume parsing
22
- import fitz # PyMuPDF
23
- from docx import Document # python-docx
24
 
25
- # Groq client (keep same import name as you used)
26
  from groq import Groq
27
 
28
- # Pydantic for schema validation
29
  from pydantic import BaseModel, Field, ValidationError
30
 
31
- # --- Streamlit UI config ---
32
  st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered")
33
 
34
  # --- Config / Secrets ---
35
- GROQ_API_KEY = os.getenv("GROQ_API_KEY") # set in environment or .env or deploy secrets
36
- ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin") # optional override via env
37
 
38
- # --- Initialize Groq client with safe error messaging ---
39
  groq_client = None
40
- if not GROQ_API_KEY:
41
- st.error("GROQ_API_KEY not found. Please set GROQ_API_KEY as an environment variable or in Hugging Face secrets.")
42
- # We won't stop here to allow UI to display, but analysis will error if used.
43
- else:
44
  try:
45
  groq_client = Groq(api_key=GROQ_API_KEY)
46
  except Exception as e:
47
  st.error(f"Failed to initialize Groq client: {e}")
48
- groq_client = None
49
-
50
 
51
  # --- Session state defaults ---
52
  if 'is_admin_logged_in' not in st.session_state:
@@ -64,139 +61,173 @@ if 'individual_analysis' not in st.session_state:
64
  if 'run_analysis' not in st.session_state:
65
  st.session_state.run_analysis = False
66
 
67
-
68
- # --- Pydantic schema for Groq output ---
69
  class ResumeAnalysis(BaseModel):
70
- name: str = Field(description="Full name of the candidate.")
71
- email: str = Field(description="Professional email address.")
72
- phone: str = Field(description="Primary phone number.")
73
- certifications: List[str] = Field(default_factory=list, description="List of professional certifications.")
74
- experience_summary: str = Field(default="", description="Concise summary of experience.")
75
- education_summary: str = Field(default="", description="Concise summary of education.")
76
- communication_skills: str = Field(description="Communication score as a STRING ('8') or description.")
77
- technical_skills: List[str] = Field(default_factory=list, description="List of skills/technologies.")
78
- aba_therapy_skills: Optional[str] = Field(default="N/A", description="ABA Therapy score as STRING or 'N/A'.")
79
- rbt_bcba_certification: Optional[str] = Field(default="N/A", description="'Yes'/'No'/'N/A'.")
80
- autism_care_experience_score: Optional[str] = Field(default="N/A", description="Autism care experience score as STRING or 'N/A'.")
81
-
82
-
83
- # --- Helper: File text extraction ---
84
  def extract_text_from_file(uploaded_file) -> str:
85
- """
86
- Accepts a Streamlit UploadedFile object and returns extracted text.
87
- Supports PDF and DOCX. Returns empty string on failure.
88
- """
89
  try:
90
  content = uploaded_file.read()
91
- # Reset pointer if needed (Streamlit UploadedFile likely returns bytes; after read it's consumed)
92
- # We already consumed it into `content` so use BytesIO for downstream if needed.
93
-
94
- # detect PDF by mime or header bytes
95
- name_lower = uploaded_file.name.lower()
96
- if name_lower.endswith(".pdf") or content[:5] == b"%PDF-":
97
- # use fitz (PyMuPDF)
98
  try:
99
  with fitz.open(stream=content, filetype="pdf") as doc:
100
- pages_text = []
101
  for p in doc:
102
- pages_text.append(p.get_text())
103
- return "\n".join(pages_text).strip()
104
- except Exception as e:
105
- # fallback: try PyMuPDF alternative reading
106
- st.warning(f"PDF extraction issue for {uploaded_file.name}: {e}")
107
  return ""
108
- elif name_lower.endswith(".docx"):
109
- # python-docx can accept a file-like object
110
  try:
111
  doc = Document(io.BytesIO(content))
112
  paragraphs = [p.text for p in doc.paragraphs if p.text and p.text.strip()]
113
  return "\n".join(paragraphs).strip()
114
- except Exception as e:
115
- st.warning(f"DOCX extraction issue for {uploaded_file.name}: {e}")
116
  return ""
117
  else:
118
- # Try simple decode for text-like files
119
  try:
120
  return content.decode('utf-8', errors='ignore')
121
  except Exception:
122
  return ""
123
- except Exception as e:
124
- st.error(f"Unexpected file extraction error: {e}")
125
  return ""
126
 
127
 
128
- # --- Helper: call Groq (safe wrapper) ---
129
- def call_groq_chat_system(resume_text: str, job_role: str) -> Optional[str]:
130
  """
131
- Calls Groq chat completion. Returns model text content or None on error.
132
- Note: groq_client must be initialized.
133
  """
134
  if not groq_client:
135
- st.error("Groq client is not initialized. Set GROQ_API_KEY in environment or secrets.")
136
  return None
137
 
138
- # role-specific instructions
139
- therapist_instructions = ""
140
- if job_role == "Therapist":
141
- therapist_instructions = (
142
- "Because the job role is 'Therapist', you MUST carefully look for ABA Therapy Skills, "
143
- "RBT/BCBA Certification, and Autism-Care Experience. Provide a score from 1-10 as a STRING "
144
- "(e.g., '7') for the specialized fields. If any specialized field is not present, return 'N/A'."
145
- )
146
- else:
147
- therapist_instructions = (
148
- "This is NOT a Therapist role. Set 'aba_therapy_skills', 'autism_care_experience_score', "
149
- "and 'rbt_bcba_certification' to 'N/A' if not applicable."
150
- )
151
-
152
- system_prompt = (
153
- "You are a professional Resume Analyzer. Extract the requested fields and return a strict JSON object "
154
- "matching the schema: name, email, phone, certifications (array), experience_summary, education_summary, "
155
- "communication_skills (AS A STRING, e.g., '8'), technical_skills (array), aba_therapy_skills, "
156
- "rbt_bcba_certification, autism_care_experience_score. " + therapist_instructions
157
- )
158
-
159
- user_prompt = f"Analyze the following resume text and return a JSON object:\n\n---\n{resume_text}\n---\nReturn only valid JSON."
160
-
161
  try:
162
- result = groq_client.chat.completions.create(
163
- model="mixtral-8x7b-32768", # keep your original model choice; adapt if needed
164
  messages=[
165
- {"role": "system", "content": system_prompt},
166
- {"role": "user", "content": user_prompt}
167
  ],
168
- temperature=0.0,
169
- max_tokens=2000,
170
- # Not all Groq SDK versions support response_model in the same way; we parse manually below.
 
171
  )
172
- # Depending on SDK, result structure varies; common: result.choices[0].message.content
173
- model_text = None
174
- try:
175
- model_text = result.choices[0].message.content
176
- except Exception:
177
- # try alternate structure
178
  try:
179
- model_text = result["choices"][0]["message"]["content"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  except Exception:
181
- model_text = str(result)
 
 
 
 
182
 
183
- return model_text
184
  except Exception as e:
185
  st.error(f"Groq API call failed: {e}")
186
- st.exception(e)
187
  return None
188
 
189
 
190
- # --- Cached wrapper for analysis (cache by resume_text + role) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  @st.cache_data(show_spinner=False)
192
  def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis:
193
  """
194
- Calls Groq (or fallback) and returns a validated ResumeAnalysis Pydantic object.
195
- This function is cached to avoid repeated calls for identical text+role.
196
  """
197
- raw_response = call_groq_chat_system(resume_text, job_role)
198
- if not raw_response:
199
- # return safe failure object
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  return ResumeAnalysis(
201
  name="Extraction Failed",
202
  email="",
@@ -211,75 +242,11 @@ def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAn
211
  autism_care_experience_score="N/A"
212
  )
213
 
214
- # Attempt to parse JSON from the model text. The model might include commentary;
215
- # so we try to extract the first JSON object in the text.
216
- import json
217
- import re
218
-
219
- json_text = None
220
- try:
221
- # Find the first {...} JSON object in the string (greedy to closing brace)
222
- match = re.search(r"(\{.*\})", raw_response, re.DOTALL)
223
- if match:
224
- json_text = match.group(1)
225
- else:
226
- # if no braces found, maybe the model returned just JSON-like lines
227
- json_text = raw_response
228
- parsed = json.loads(json_text)
229
- except Exception as e:
230
- # Try to be forgiving: if the model returned Python dict-like, attempt eval safely
231
- try:
232
- parsed = eval(json_text, {"__builtins__": None}, {}) # limited eval fallback
233
- if not isinstance(parsed, dict):
234
- raise ValueError("Parsed non-dict from model response fallback.")
235
- except Exception as ex:
236
- # Failed to parse model output -> return failure object and log both
237
- st.warning("Failed to parse Groq output as JSON. Returning fallback extraction.")
238
- st.text_area("Raw model output (for debugging)", raw_response, height=200)
239
- return ResumeAnalysis(
240
- name="Extraction Failed",
241
- email="",
242
- phone="",
243
- certifications=[],
244
- experience_summary="",
245
- education_summary="",
246
- communication_skills="N/A",
247
- technical_skills=[],
248
- aba_therapy_skills="N/A",
249
- rbt_bcba_certification="N/A",
250
- autism_care_experience_score="N/A"
251
- )
252
-
253
- # Validate & coerce to Pydantic model (safe defaults applied)
254
- try:
255
- # Ensure lists exist
256
- parsed.setdefault("certifications", [])
257
- parsed.setdefault("technical_skills", [])
258
- # Ensure communication_skills is string
259
- if "communication_skills" in parsed and parsed["communication_skills"] is not None:
260
- parsed["communication_skills"] = str(parsed["communication_skills"])
261
- else:
262
- parsed["communication_skills"] = "N/A"
263
-
264
- # Safety: set therapist-specific fields default to "N/A" if missing
265
- for k in ["aba_therapy_skills", "rbt_bcba_certification", "autism_care_experience_score"]:
266
- if k not in parsed or parsed[k] is None:
267
- parsed[k] = "N/A"
268
- else:
269
- parsed[k] = str(parsed[k])
270
-
271
- analysis = ResumeAnalysis.parse_obj(parsed)
272
- # Final coercions to guarantee string types for some fields
273
- analysis.communication_skills = str(analysis.communication_skills or "N/A")
274
- analysis.aba_therapy_skills = str(analysis.aba_therapy_skills or "N/A")
275
- analysis.rbt_bcba_certification = str(analysis.rbt_bcba_certification or "N/A")
276
- analysis.autism_care_experience_score = str(analysis.autism_care_experience_score or "N/A")
277
-
278
- return analysis
279
- except ValidationError as ve:
280
- st.error("Model output failed schema validation. Returning fallback object.")
281
- st.text_area("Model raw response (for debugging)", raw_response, height=200)
282
- st.exception(ve)
283
  return ResumeAnalysis(
284
  name="Extraction Failed",
285
  email="",
@@ -293,9 +260,37 @@ def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAn
293
  rbt_bcba_certification="N/A",
294
  autism_care_experience_score="N/A"
295
  )
296
- except Exception as e:
297
- st.error("Unexpected error while validating model output.")
298
- st.exception(e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  return ResumeAnalysis(
300
  name="Extraction Failed",
301
  email="",
@@ -311,112 +306,97 @@ def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAn
311
  )
312
 
313
 
314
- # --- Scoring function ---
315
  def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> float:
316
  total_score = 0.0
317
 
318
- # Experience summary length -> up to 40 points
319
  exp_len = len(analysis.experience_summary or "")
320
- exp_factor = min(exp_len / 100.0, 1.0) # 100 chars or more -> full points
321
  total_score += exp_factor * 40.0
322
 
323
- # Skills count -> up to 30 points
324
  skills_count = len(analysis.technical_skills or [])
325
  skills_factor = min(skills_count / 10.0, 1.0)
326
  total_score += skills_factor * 30.0
327
 
328
- # Communication -> up to 20 points (expects 1-10 in string)
329
  try:
330
- comm_raw = str(analysis.communication_skills).strip()
331
- # allow '8/10' or '8 - good' forms: extract leading number
332
- import re
333
- m = re.search(r"(\d+(\.\d+)?)", comm_raw)
334
- comm_val = float(m.group(1)) if m else float(comm_raw)
335
  comm_val = max(0.0, min(10.0, comm_val))
336
  except Exception:
337
  comm_val = 5.0
338
  total_score += (comm_val / 10.0) * 20.0
339
 
340
- # Certifications -> up to 10 points (1 point each up to 10)
341
- cert_points = min(len(analysis.certifications or []), 10) * 1.0
342
- total_score += cert_points
343
 
344
- # Therapist-specific bonus up to 10 points
345
- if role == "Therapist":
346
- try:
347
- def safe_score(x):
348
- try:
349
- m = re.search(r"(\d+(\.\d+)?)", str(x))
350
- return float(m.group(1)) if m else 0.0
351
- except Exception:
352
- return 0.0
353
-
354
- aba = safe_score(analysis.aba_therapy_skills)
355
- autism = safe_score(analysis.autism_care_experience_score)
356
- spec_bonus = ((aba + autism) / 20.0) * 10.0 # average scaled to 10
357
- total_score += spec_bonus
358
- except Exception:
359
- pass
360
 
361
  final = round(min(total_score, 100))
362
  return float(final)
363
 
364
 
365
- # --- Append to session DataFrame helper ---
366
  def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
367
  data = analysis.dict()
368
- technical_skills_list = ", ".join(data.get('technical_skills') or [])
369
- certifications_list = ", ".join(data.get('certifications') or [])
370
-
371
- df_data = {
372
- 'Name': data.get('name') or "",
373
  'Job Role': job_role,
374
  'Resume Score (100)': score,
375
- 'Email': data.get('email') or "",
376
- 'Phone': data.get('phone') or "",
377
  'Shortlisted': 'No',
378
- 'Experience Summary': data.get('experience_summary') or "",
379
- 'Education Summary': data.get('education_summary') or "",
380
- 'Communication Rating (1-10)': str(data.get('communication_skills') or "N/A"),
381
- 'Skills/Technologies': technical_skills_list,
382
- 'Certifications': certifications_list,
383
- 'ABA Skills (1-10)': str(data.get('aba_therapy_skills') or "N/A"),
384
- 'RBT/BCBA Cert': str(data.get('rbt_bcba_certification') or "N/A"),
385
- 'Autism-Care Exp (1-10)': str(data.get('autism_care_experience_score') or "N/A"),
386
  }
387
-
388
- new_df = pd.DataFrame([df_data])
389
  st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)
390
 
391
 
392
- # --- Utility: Excel download as BytesIO for st.download_button ---
393
  def df_to_excel_bytes(df: pd.DataFrame) -> bytes:
394
  output = io.BytesIO()
395
- with pd.ExcelWriter(output, engine='openpyxl') as writer:
396
  df.to_excel(writer, index=False, sheet_name="Resume Analysis Data")
397
  return output.getvalue()
398
 
399
 
400
- # --- App Layout ---
401
- st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis")
402
 
403
  tab_user, tab_admin = st.tabs(["πŸ‘€ Resume Uploader (User Panel)", "πŸ”’ Admin Dashboard (Password Protected)"])
404
 
405
- # -------------------------
406
- # User Panel
407
- # -------------------------
408
  with tab_user:
409
  st.header("Upload Resumes for Analysis")
410
- st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score key fields.")
411
 
412
  job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
413
  selected_role = st.selectbox("1. Select the Target Job Role", options=job_role_options, key="selected_role")
414
 
415
- uploaded_files = st.file_uploader(
416
- "2. Upload Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True
417
- )
418
 
419
- # Analyze button sets a session_state flag and reruns
420
  if st.button("πŸš€ Analyze All Uploaded Resumes"):
421
  if not uploaded_files:
422
  st.warning("Please upload one or more resume files to begin analysis.")
@@ -424,7 +404,6 @@ with tab_user:
424
  st.session_state.run_analysis = True
425
  st.rerun()
426
 
427
- # If run_analysis flag is set, process uploads
428
  if st.session_state.get("run_analysis", False):
429
  if not uploaded_files:
430
  st.warning("No files found. Upload files and try again.")
@@ -445,7 +424,6 @@ with tab_user:
445
  progress.progress(idx / total)
446
  continue
447
 
448
- # Call cached analyze function
449
  analysis = analyze_resume_with_groq_cached(resume_text, selected_role)
450
 
451
  if analysis.name == "Extraction Failed":
@@ -469,9 +447,9 @@ with tab_user:
469
  progress.progress(idx / total)
470
 
471
  st.success(f"βœ… Successfully processed {len(st.session_state.individual_analysis)} of {total} resumes.")
472
- st.session_state.run_analysis = False # reset flag
473
 
474
- # Show last analysis summary
475
  if st.session_state.individual_analysis:
476
  st.subheader("Last Analysis Summary")
477
  for item in st.session_state.individual_analysis:
@@ -479,9 +457,7 @@ with tab_user:
479
  st.markdown("---")
480
  st.caption("All analyzed data is stored in the Admin Dashboard.")
481
 
482
- # -------------------------
483
- # Admin Panel (Password Protected)
484
- # -------------------------
485
  with tab_admin:
486
  if not st.session_state.is_admin_logged_in:
487
  st.header("Admin Login")
@@ -492,7 +468,6 @@ with tab_admin:
492
  st.rerun()
493
  else:
494
  st.error("Incorrect password.")
495
- # stop further admin rendering while not logged in
496
  st.stop()
497
 
498
  st.header("🎯 Recruitment Dashboard")
@@ -509,7 +484,6 @@ with tab_admin:
509
 
510
  display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']
511
 
512
- # data_editor with SelectboxColumn for 'Shortlisted'
513
  edited_df = st.data_editor(
514
  df[display_cols],
515
  column_config={
@@ -524,11 +498,9 @@ with tab_admin:
524
  hide_index=True
525
  )
526
 
527
- # propagate the 'Shortlisted' edits back to session dataframe
528
  try:
529
  st.session_state.analyzed_data.loc[:, 'Shortlisted'] = edited_df['Shortlisted'].values
530
  except Exception:
531
- # fallback for indexing mismatches
532
  for i, val in enumerate(edited_df['Shortlisted'].tolist()):
533
  if i < len(st.session_state.analyzed_data):
534
  st.session_state.analyzed_data.at[i, 'Shortlisted'] = val
@@ -546,4 +518,4 @@ with tab_admin:
546
  help="Downloads the full table including all extracted fields and shortlist status."
547
  )
548
 
549
- # --- End of app.py ---
 
1
  # app.py
2
  """
3
  Quantum Scrutiny Platform | Groq-Powered
4
+ Single-file Streamlit app (refactored, Groq streaming-compatible)
5
  """
6
 
 
7
  import os
 
 
 
 
8
  import io
9
+ import re
10
+ import json
11
  import base64
12
  import traceback
13
  from typing import Optional, List
14
 
15
+ from dotenv import load_dotenv
16
+ load_dotenv()
17
+
18
  import streamlit as st
19
  import pandas as pd
20
 
21
+ # File parsing
22
+ import fitz # PyMuPDF
23
+ from docx import Document # python-docx
24
 
25
+ # Groq client
26
  from groq import Groq
27
 
28
+ # Validation
29
  from pydantic import BaseModel, Field, ValidationError
30
 
31
+ # --- Page config ---
32
  st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered")
33
 
34
  # --- Config / Secrets ---
35
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
36
+ ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin")
37
 
38
+ # Initialize Groq client (no API key -> UI warning but app still loads)
39
  groq_client = None
40
+ if GROQ_API_KEY:
 
 
 
41
  try:
42
  groq_client = Groq(api_key=GROQ_API_KEY)
43
  except Exception as e:
44
  st.error(f"Failed to initialize Groq client: {e}")
45
+ else:
46
+ st.warning("GROQ_API_KEY not found. Set it as an environment variable or in .env for model calls to work.")
47
 
48
  # --- Session state defaults ---
49
  if 'is_admin_logged_in' not in st.session_state:
 
61
  if 'run_analysis' not in st.session_state:
62
  st.session_state.run_analysis = False
63
 
64
+ # --- Pydantic schema ---
 
65
  class ResumeAnalysis(BaseModel):
66
+ name: str = Field(default="Unknown")
67
+ email: str = Field(default="")
68
+ phone: str = Field(default="")
69
+ certifications: List[str] = Field(default_factory=list)
70
+ experience_summary: str = Field(default="")
71
+ education_summary: str = Field(default="")
72
+ communication_skills: str = Field(default="N/A")
73
+ technical_skills: List[str] = Field(default_factory=list)
74
+ aba_therapy_skills: Optional[str] = Field(default="N/A")
75
+ rbt_bcba_certification: Optional[str] = Field(default="N/A")
76
+ autism_care_experience_score: Optional[str] = Field(default="N/A")
77
+
78
+
79
+ # --- Helpers: file text extraction ---
80
  def extract_text_from_file(uploaded_file) -> str:
81
+ """Extract text from PDF or DOCX. Returns empty string on failure."""
 
 
 
82
  try:
83
  content = uploaded_file.read()
84
+ filename = uploaded_file.name.lower()
85
+ if filename.endswith(".pdf") or content[:5] == b"%PDF-":
 
 
 
 
 
86
  try:
87
  with fitz.open(stream=content, filetype="pdf") as doc:
88
+ text = ""
89
  for p in doc:
90
+ text += p.get_text()
91
+ return text.strip()
92
+ except Exception:
 
 
93
  return ""
94
+ elif filename.endswith(".docx"):
 
95
  try:
96
  doc = Document(io.BytesIO(content))
97
  paragraphs = [p.text for p in doc.paragraphs if p.text and p.text.strip()]
98
  return "\n".join(paragraphs).strip()
99
+ except Exception:
 
100
  return ""
101
  else:
102
+ # fallback: decode bytes as text
103
  try:
104
  return content.decode('utf-8', errors='ignore')
105
  except Exception:
106
  return ""
107
+ except Exception:
 
108
  return ""
109
 
110
 
111
+ # --- Groq call with streaming (collects chunks) ---
112
+ def call_groq_stream_collect(prompt: str, model_name: str = "llama-3.3-70b-versatile", temperature: float = 0.2, max_completion_tokens: int = 2048, top_p: float = 1.0) -> Optional[str]:
113
  """
114
+ Calls Groq with streaming enabled and collects the textual output.
115
+ Returns the full model text, or None on failure.
116
  """
117
  if not groq_client:
118
+ st.error("Groq client not initialized. Set GROQ_API_KEY in environment/secrets.")
119
  return None
120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  try:
122
+ completion = groq_client.chat.completions.create(
123
+ model=model_name,
124
  messages=[
125
+ {"role": "system", "content": "You are a professional Resume Analyzer. Return JSON only when asked."},
126
+ {"role": "user", "content": prompt}
127
  ],
128
+ temperature=temperature,
129
+ max_completion_tokens=max_completion_tokens,
130
+ top_p=top_p,
131
+ stream=True
132
  )
133
+
134
+ # completion is an iterator/streamable object; collect chunks
135
+ collected = ""
136
+ # some SDKs yield dict-like chunks, some objects; handle both
137
+ for chunk in completion:
 
138
  try:
139
+ # Common pattern: chunk.choices[0].delta.content
140
+ delta = getattr(chunk.choices[0].delta, "content", None) if hasattr(chunk, "choices") else None
141
+ if delta is None:
142
+ # fallback for dict-like object
143
+ if isinstance(chunk, dict):
144
+ delta = chunk.get("choices", [{}])[0].get("delta", {}).get("content")
145
+ if delta:
146
+ collected += delta
147
+ else:
148
+ # Some SDKs return final message in chunk.choices[0].message.content
149
+ try:
150
+ msg = getattr(chunk.choices[0].message, "content", None)
151
+ if msg:
152
+ collected += msg
153
+ except Exception:
154
+ pass
155
  except Exception:
156
+ # last-resort: append str(chunk)
157
+ try:
158
+ collected += str(chunk)
159
+ except Exception:
160
+ pass
161
 
162
+ return collected.strip()
163
  except Exception as e:
164
  st.error(f"Groq API call failed: {e}")
 
165
  return None
166
 
167
 
168
+ # --- Parsing model output safely to JSON ---
169
+ def extract_first_json(text: str) -> Optional[dict]:
170
+ """
171
+ Find the first JSON object in text and parse it; return dict or None.
172
+ """
173
+ if not text:
174
+ return None
175
+ # find first balanced braces block
176
+ # quick heuristic regex for {...}
177
+ try:
178
+ match = re.search(r"(\{(?:[^{}]|(?R))*\})", text, re.DOTALL)
179
+ except re.error:
180
+ # Python's re doesn't support (?R); fallback to simpler greedy
181
+ match = re.search(r"(\{.*\})", text, re.DOTALL)
182
+ if match:
183
+ json_text = match.group(1)
184
+ else:
185
+ # maybe the model returned only JSON-like lines -> try to parse full text
186
+ json_text = text
187
+
188
+ try:
189
+ parsed = json.loads(json_text)
190
+ return parsed
191
+ except Exception:
192
+ # try to clean common issues: single quotes -> double quotes
193
+ try:
194
+ json_text_fixed = json_text.replace("'", '"')
195
+ parsed = json.loads(json_text_fixed)
196
+ return parsed
197
+ except Exception:
198
+ return None
199
+
200
+
201
+ # --- Analyze with Groq (cached by resume text + role) ---
202
  @st.cache_data(show_spinner=False)
203
  def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis:
204
  """
205
+ Calls Groq (streaming) and returns a ResumeAnalysis instance.
206
+ Uses caching to avoid duplicate calls for same resume_text+role.
207
  """
208
+ # Build prompt instructing JSON structure
209
+ therapist_instructions = ""
210
+ if job_role.lower() == "therapist":
211
+ therapist_instructions = (
212
+ "Because the role is 'Therapist', carefully search for ABA Therapy Skills, "
213
+ "RBT/BCBA Certification, and Autism-Care Experience. Provide scores 1-10 as STRINGS, or 'N/A'."
214
+ )
215
+ else:
216
+ therapist_instructions = "If therapist-specific fields are not relevant, set them to 'N/A'."
217
+
218
+ system_user_prompt = (
219
+ "Return a single JSON object with the following keys exactly: "
220
+ "name (string), email (string), phone (string), certifications (array of strings), "
221
+ "experience_summary (string), education_summary (string), communication_skills (STRING, e.g., '8'), "
222
+ "technical_skills (array of strings), aba_therapy_skills (STRING or 'N/A'), "
223
+ "rbt_bcba_certification (STRING 'Yes'/'No'/'N/A'), autism_care_experience_score (STRING or 'N/A'). "
224
+ f"{therapist_instructions}\n\nResume Text:\n\n{resume_text}\n\nReturn only the JSON object."
225
+ )
226
+
227
+ raw = call_groq_stream_collect(system_user_prompt, model_name="llama-3.3-70b-versatile", temperature=0.0, max_completion_tokens=2048)
228
+
229
+ if not raw:
230
+ # fallback empty object
231
  return ResumeAnalysis(
232
  name="Extraction Failed",
233
  email="",
 
242
  autism_care_experience_score="N/A"
243
  )
244
 
245
+ parsed = extract_first_json(raw)
246
+ if not parsed:
247
+ # show raw output for debugging when developer runs app locally (admin panel will show too)
248
+ st.warning("Failed to parse model JSON output. See raw output below for debugging.")
249
+ st.text_area("Raw model output (debug)", raw, height=200)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  return ResumeAnalysis(
251
  name="Extraction Failed",
252
  email="",
 
260
  rbt_bcba_certification="N/A",
261
  autism_care_experience_score="N/A"
262
  )
263
+
264
+ # Ensure keys exist and coerce types
265
+ parsed.setdefault("name", "Unknown")
266
+ parsed.setdefault("email", "")
267
+ parsed.setdefault("phone", "")
268
+ parsed.setdefault("certifications", [])
269
+ parsed.setdefault("experience_summary", "")
270
+ parsed.setdefault("education_summary", "")
271
+ parsed.setdefault("communication_skills", "N/A")
272
+ parsed.setdefault("technical_skills", [])
273
+ parsed.setdefault("aba_therapy_skills", "N/A")
274
+ parsed.setdefault("rbt_bcba_certification", "N/A")
275
+ parsed.setdefault("autism_care_experience_score", "N/A")
276
+
277
+ # Ensure string coercions for some fields
278
+ try:
279
+ parsed["communication_skills"] = str(parsed.get("communication_skills") or "N/A")
280
+ parsed["aba_therapy_skills"] = str(parsed.get("aba_therapy_skills") or "N/A")
281
+ parsed["rbt_bcba_certification"] = str(parsed.get("rbt_bcba_certification") or "N/A")
282
+ parsed["autism_care_experience_score"] = str(parsed.get("autism_care_experience_score") or "N/A")
283
+ except Exception:
284
+ pass
285
+
286
+ # Validate via Pydantic
287
+ try:
288
+ analysis = ResumeAnalysis.parse_obj(parsed)
289
+ return analysis
290
+ except ValidationError as ve:
291
+ st.error("Model output failed schema validation.")
292
+ st.text_area("Raw model output (debug)", raw, height=200)
293
+ st.exception(ve)
294
  return ResumeAnalysis(
295
  name="Extraction Failed",
296
  email="",
 
306
  )
307
 
308
 
309
+ # --- Scoring logic ---
310
  def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> float:
311
  total_score = 0.0
312
 
313
+ # Experience summary: up to 40
314
  exp_len = len(analysis.experience_summary or "")
315
+ exp_factor = min(exp_len / 100.0, 1.0)
316
  total_score += exp_factor * 40.0
317
 
318
+ # Skills count: up to 30
319
  skills_count = len(analysis.technical_skills or [])
320
  skills_factor = min(skills_count / 10.0, 1.0)
321
  total_score += skills_factor * 30.0
322
 
323
+ # Communication: up to 20 (expects 0-10 in string)
324
  try:
325
+ m = re.search(r"(\d+(\.\d+)?)", str(analysis.communication_skills))
326
+ comm_val = float(m.group(1)) if m else float(str(analysis.communication_skills))
 
 
 
327
  comm_val = max(0.0, min(10.0, comm_val))
328
  except Exception:
329
  comm_val = 5.0
330
  total_score += (comm_val / 10.0) * 20.0
331
 
332
+ # Certifications: up to 10
333
+ total_score += min(len(analysis.certifications or []), 10) * 1.0
 
334
 
335
+ # Therapist bonus up to 10
336
+ if role.lower() == "therapist":
337
+ def safe_score(x):
338
+ try:
339
+ m = re.search(r"(\d+(\.\d+)?)", str(x))
340
+ return float(m.group(1)) if m else 0.0
341
+ except Exception:
342
+ return 0.0
343
+ aba = safe_score(analysis.aba_therapy_skills)
344
+ autism = safe_score(analysis.autism_care_experience_score)
345
+ spec_bonus = ((aba + autism) / 20.0) * 10.0
346
+ total_score += spec_bonus
 
 
 
 
347
 
348
  final = round(min(total_score, 100))
349
  return float(final)
350
 
351
 
352
+ # --- Append to DataFrame ---
353
  def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
354
  data = analysis.dict()
355
+ tech = ", ".join(data.get("technical_skills") or [])
356
+ certs = ", ".join(data.get("certifications") or [])
357
+ row = {
358
+ 'Name': data.get("name") or "",
 
359
  'Job Role': job_role,
360
  'Resume Score (100)': score,
361
+ 'Email': data.get("email") or "",
362
+ 'Phone': data.get("phone") or "",
363
  'Shortlisted': 'No',
364
+ 'Experience Summary': data.get("experience_summary") or "",
365
+ 'Education Summary': data.get("education_summary") or "",
366
+ 'Communication Rating (1-10)': str(data.get("communication_skills") or "N/A"),
367
+ 'Skills/Technologies': tech,
368
+ 'Certifications': certs,
369
+ 'ABA Skills (1-10)': str(data.get("aba_therapy_skills") or "N/A"),
370
+ 'RBT/BCBA Cert': str(data.get("rbt_bcba_certification") or "N/A"),
371
+ 'Autism-Care Exp (1-10)': str(data.get("autism_care_experience_score") or "N/A"),
372
  }
373
+ new_df = pd.DataFrame([row])
 
374
  st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)
375
 
376
 
377
+ # --- Excel export helper ---
378
  def df_to_excel_bytes(df: pd.DataFrame) -> bytes:
379
  output = io.BytesIO()
380
+ with pd.ExcelWriter(output, engine="openpyxl") as writer:
381
  df.to_excel(writer, index=False, sheet_name="Resume Analysis Data")
382
  return output.getvalue()
383
 
384
 
385
+ # --- UI Layout ---
386
+ st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis (Single-file)")
387
 
388
  tab_user, tab_admin = st.tabs(["πŸ‘€ Resume Uploader (User Panel)", "πŸ”’ Admin Dashboard (Password Protected)"])
389
 
390
+ # --- User Panel ---
 
 
391
  with tab_user:
392
  st.header("Upload Resumes for Analysis")
393
+ st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score fields.")
394
 
395
  job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
396
  selected_role = st.selectbox("1. Select the Target Job Role", options=job_role_options, key="selected_role")
397
 
398
+ uploaded_files = st.file_uploader("2. Upload Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True)
 
 
399
 
 
400
  if st.button("πŸš€ Analyze All Uploaded Resumes"):
401
  if not uploaded_files:
402
  st.warning("Please upload one or more resume files to begin analysis.")
 
404
  st.session_state.run_analysis = True
405
  st.rerun()
406
 
 
407
  if st.session_state.get("run_analysis", False):
408
  if not uploaded_files:
409
  st.warning("No files found. Upload files and try again.")
 
424
  progress.progress(idx / total)
425
  continue
426
 
 
427
  analysis = analyze_resume_with_groq_cached(resume_text, selected_role)
428
 
429
  if analysis.name == "Extraction Failed":
 
447
  progress.progress(idx / total)
448
 
449
  st.success(f"βœ… Successfully processed {len(st.session_state.individual_analysis)} of {total} resumes.")
450
+ st.session_state.run_analysis = False
451
 
452
+ # Display last results summary
453
  if st.session_state.individual_analysis:
454
  st.subheader("Last Analysis Summary")
455
  for item in st.session_state.individual_analysis:
 
457
  st.markdown("---")
458
  st.caption("All analyzed data is stored in the Admin Dashboard.")
459
 
460
+ # --- Admin Panel ---
 
 
461
  with tab_admin:
462
  if not st.session_state.is_admin_logged_in:
463
  st.header("Admin Login")
 
468
  st.rerun()
469
  else:
470
  st.error("Incorrect password.")
 
471
  st.stop()
472
 
473
  st.header("🎯 Recruitment Dashboard")
 
484
 
485
  display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']
486
 
 
487
  edited_df = st.data_editor(
488
  df[display_cols],
489
  column_config={
 
498
  hide_index=True
499
  )
500
 
 
501
  try:
502
  st.session_state.analyzed_data.loc[:, 'Shortlisted'] = edited_df['Shortlisted'].values
503
  except Exception:
 
504
  for i, val in enumerate(edited_df['Shortlisted'].tolist()):
505
  if i < len(st.session_state.analyzed_data):
506
  st.session_state.analyzed_data.at[i, 'Shortlisted'] = val
 
518
  help="Downloads the full table including all extracted fields and shortlist status."
519
  )
520
 
521
+ # --- End of file ---