meesamraza commited on
Commit
b9548de
Β·
verified Β·
1 Parent(s): 63dd931

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +246 -374
app.py CHANGED
@@ -1,7 +1,7 @@
1
  # app.py
2
  """
3
- Quantum Scrutiny Platform | Groq-Powered
4
- Single-file Streamlit app (refactored, Groq streaming-compatible)
5
  """
6
 
7
  import os
@@ -12,6 +12,7 @@ import base64
12
  import traceback
13
  from typing import Optional, List
14
 
 
15
  from dotenv import load_dotenv
16
  load_dotenv()
17
 
@@ -19,8 +20,8 @@ import streamlit as st
19
  import pandas as pd
20
 
21
  # File parsing
22
- import fitz # PyMuPDF
23
- from docx import Document # python-docx
24
 
25
  # Groq client
26
  from groq import Groq
@@ -28,14 +29,22 @@ from groq import Groq
28
  # Validation
29
  from pydantic import BaseModel, Field, ValidationError
30
 
31
- # --- Page config ---
32
- st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered")
33
 
34
- # --- Config / Secrets ---
 
 
 
 
 
 
 
 
 
 
 
35
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
36
  ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin")
37
 
38
- # Initialize Groq client (no API key -> UI warning but app still loads)
39
  groq_client = None
40
  if GROQ_API_KEY:
41
  try:
@@ -43,25 +52,34 @@ if GROQ_API_KEY:
43
  except Exception as e:
44
  st.error(f"Failed to initialize Groq client: {e}")
45
  else:
46
- st.warning("GROQ_API_KEY not found. Set it as an environment variable or in .env for model calls to work.")
 
47
 
48
- # --- Session state defaults ---
 
 
49
  if 'is_admin_logged_in' not in st.session_state:
50
  st.session_state.is_admin_logged_in = False
 
 
 
 
 
 
 
51
  if 'analyzed_data' not in st.session_state:
52
- initial_cols = [
53
  'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
54
  'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
55
  'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)',
56
  'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
57
  ]
58
- st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)
59
- if 'individual_analysis' not in st.session_state:
60
- st.session_state.individual_analysis = []
61
- if 'run_analysis' not in st.session_state:
62
- st.session_state.run_analysis = False
63
 
64
- # --- Pydantic schema ---
 
 
65
  class ResumeAnalysis(BaseModel):
66
  name: str = Field(default="Unknown")
67
  email: str = Field(default="")
@@ -76,446 +94,300 @@ class ResumeAnalysis(BaseModel):
76
  autism_care_experience_score: Optional[str] = Field(default="N/A")
77
 
78
 
79
- # --- Helpers: file text extraction ---
 
 
80
  def extract_text_from_file(uploaded_file) -> str:
81
- """Extract text from PDF or DOCX. Returns empty string on failure."""
82
  try:
83
  content = uploaded_file.read()
84
- filename = uploaded_file.name.lower()
85
- if filename.endswith(".pdf") or content[:5] == b"%PDF-":
 
 
86
  try:
87
  with fitz.open(stream=content, filetype="pdf") as doc:
88
- text = ""
89
- for p in doc:
90
- text += p.get_text()
91
- return text.strip()
92
- except Exception:
93
  return ""
94
- elif filename.endswith(".docx"):
 
 
95
  try:
96
  doc = Document(io.BytesIO(content))
97
- paragraphs = [p.text for p in doc.paragraphs if p.text and p.text.strip()]
98
- return "\n".join(paragraphs).strip()
99
- except Exception:
100
  return ""
101
- else:
102
- # fallback: decode bytes as text
103
- try:
104
- return content.decode('utf-8', errors='ignore')
105
- except Exception:
106
- return ""
107
- except Exception:
108
  return ""
109
 
110
 
111
- # --- Groq call with streaming (collects chunks) ---
112
- def call_groq_stream_collect(prompt: str, model_name: str = "llama-3.3-70b-versatile", temperature: float = 0.2, max_completion_tokens: int = 2048, top_p: float = 1.0) -> Optional[str]:
113
- """
114
- Calls Groq with streaming enabled and collects the textual output.
115
- Returns the full model text, or None on failure.
116
- """
117
  if not groq_client:
118
- st.error("Groq client not initialized. Set GROQ_API_KEY in environment/secrets.")
119
  return None
120
 
121
  try:
122
  completion = groq_client.chat.completions.create(
123
- model=model_name,
124
  messages=[
125
- {"role": "system", "content": "You are a professional Resume Analyzer. Return JSON only when asked."},
126
  {"role": "user", "content": prompt}
127
  ],
128
- temperature=temperature,
129
- max_completion_tokens=max_completion_tokens,
130
- top_p=top_p,
131
- stream=True
132
  )
133
 
134
- # completion is an iterator/streamable object; collect chunks
135
  collected = ""
136
- # some SDKs yield dict-like chunks, some objects; handle both
137
  for chunk in completion:
138
  try:
139
- # Common pattern: chunk.choices[0].delta.content
140
- delta = getattr(chunk.choices[0].delta, "content", None) if hasattr(chunk, "choices") else None
141
- if delta is None:
142
- # fallback for dict-like object
143
- if isinstance(chunk, dict):
144
- delta = chunk.get("choices", [{}])[0].get("delta", {}).get("content")
145
  if delta:
146
  collected += delta
147
- else:
148
- # Some SDKs return final message in chunk.choices[0].message.content
149
- try:
150
- msg = getattr(chunk.choices[0].message, "content", None)
151
- if msg:
152
- collected += msg
153
- except Exception:
154
- pass
155
- except Exception:
156
- # last-resort: append str(chunk)
157
- try:
158
- collected += str(chunk)
159
- except Exception:
160
- pass
161
-
162
- return collected.strip()
163
  except Exception as e:
164
- st.error(f"Groq API call failed: {e}")
165
  return None
166
 
167
 
168
- # --- Parsing model output safely to JSON ---
169
- def extract_first_json(text: str) -> Optional[dict]:
170
- """
171
- Find the first JSON object in text and parse it; return dict or None.
172
- """
173
  if not text:
174
  return None
175
- # find first balanced braces block
176
- # quick heuristic regex for {...}
177
- try:
178
- match = re.search(r"(\{(?:[^{}]|(?R))*\})", text, re.DOTALL)
179
- except re.error:
180
- # Python's re doesn't support (?R); fallback to simpler greedy
181
- match = re.search(r"(\{.*\})", text, re.DOTALL)
182
- if match:
183
- json_text = match.group(1)
184
- else:
185
- # maybe the model returned only JSON-like lines -> try to parse full text
186
- json_text = text
187
 
 
 
 
 
 
 
 
 
188
  try:
189
- parsed = json.loads(json_text)
190
- return parsed
191
- except Exception:
192
- # try to clean common issues: single quotes -> double quotes
193
  try:
194
- json_text_fixed = json_text.replace("'", '"')
195
- parsed = json.loads(json_text_fixed)
196
- return parsed
197
- except Exception:
198
  return None
199
 
200
 
201
- # --- Analyze with Groq (cached by resume text + role) ---
 
 
202
  @st.cache_data(show_spinner=False)
203
  def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis:
204
- """
205
- Calls Groq (streaming) and returns a ResumeAnalysis instance.
206
- Uses caching to avoid duplicate calls for same resume_text+role.
207
- """
208
- # Build prompt instructing JSON structure
209
- therapist_instructions = ""
210
- if job_role.lower() == "therapist":
211
- therapist_instructions = (
212
- "Because the role is 'Therapist', carefully search for ABA Therapy Skills, "
213
- "RBT/BCBA Certification, and Autism-Care Experience. Provide scores 1-10 as STRINGS, or 'N/A'."
214
- )
215
- else:
216
- therapist_instructions = "If therapist-specific fields are not relevant, set them to 'N/A'."
217
-
218
- system_user_prompt = (
219
- "Return a single JSON object with the following keys exactly: "
220
- "name (string), email (string), phone (string), certifications (array of strings), "
221
- "experience_summary (string), education_summary (string), communication_skills (STRING, e.g., '8'), "
222
- "technical_skills (array of strings), aba_therapy_skills (STRING or 'N/A'), "
223
- "rbt_bcba_certification (STRING 'Yes'/'No'/'N/A'), autism_care_experience_score (STRING or 'N/A'). "
224
- f"{therapist_instructions}\n\nResume Text:\n\n{resume_text}\n\nReturn only the JSON object."
225
  )
226
 
227
- raw = call_groq_stream_collect(system_user_prompt, model_name="llama-3.3-70b-versatile", temperature=0.0, max_completion_tokens=2048)
228
-
229
- if not raw:
230
- # fallback empty object
231
- return ResumeAnalysis(
232
- name="Extraction Failed",
233
- email="",
234
- phone="",
235
- certifications=[],
236
- experience_summary="",
237
- education_summary="",
238
- communication_skills="N/A",
239
- technical_skills=[],
240
- aba_therapy_skills="N/A",
241
- rbt_bcba_certification="N/A",
242
- autism_care_experience_score="N/A"
243
- )
244
 
 
 
 
 
 
 
 
 
 
245
  parsed = extract_first_json(raw)
 
246
  if not parsed:
247
- # show raw output for debugging when developer runs app locally (admin panel will show too)
248
- st.warning("Failed to parse model JSON output. See raw output below for debugging.")
249
- st.text_area("Raw model output (debug)", raw, height=200)
250
- return ResumeAnalysis(
251
- name="Extraction Failed",
252
- email="",
253
- phone="",
254
- certifications=[],
255
- experience_summary="",
256
- education_summary="",
257
- communication_skills="N/A",
258
- technical_skills=[],
259
- aba_therapy_skills="N/A",
260
- rbt_bcba_certification="N/A",
261
- autism_care_experience_score="N/A"
262
- )
263
 
264
- # Ensure keys exist and coerce types
265
- parsed.setdefault("name", "Unknown")
266
- parsed.setdefault("email", "")
267
- parsed.setdefault("phone", "")
268
- parsed.setdefault("certifications", [])
269
- parsed.setdefault("experience_summary", "")
270
- parsed.setdefault("education_summary", "")
271
- parsed.setdefault("communication_skills", "N/A")
272
- parsed.setdefault("technical_skills", [])
273
- parsed.setdefault("aba_therapy_skills", "N/A")
274
- parsed.setdefault("rbt_bcba_certification", "N/A")
275
- parsed.setdefault("autism_care_experience_score", "N/A")
276
-
277
- # Ensure string coercions for some fields
278
  try:
279
- parsed["communication_skills"] = str(parsed.get("communication_skills") or "N/A")
280
- parsed["aba_therapy_skills"] = str(parsed.get("aba_therapy_skills") or "N/A")
281
- parsed["rbt_bcba_certification"] = str(parsed.get("rbt_bcba_certification") or "N/A")
282
- parsed["autism_care_experience_score"] = str(parsed.get("autism_care_experience_score") or "N/A")
283
- except Exception:
284
- pass
285
-
286
- # Validate via Pydantic
287
- try:
288
- analysis = ResumeAnalysis.parse_obj(parsed)
289
- return analysis
290
- except ValidationError as ve:
291
- st.error("Model output failed schema validation.")
292
- st.text_area("Raw model output (debug)", raw, height=200)
293
- st.exception(ve)
294
- return ResumeAnalysis(
295
- name="Extraction Failed",
296
- email="",
297
- phone="",
298
- certifications=[],
299
- experience_summary="",
300
- education_summary="",
301
- communication_skills="N/A",
302
- technical_skills=[],
303
- aba_therapy_skills="N/A",
304
- rbt_bcba_certification="N/A",
305
- autism_care_experience_score="N/A"
306
- )
307
 
308
 
309
- # --- Scoring logic ---
 
 
310
  def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> float:
311
- total_score = 0.0
312
 
313
- # Experience summary: up to 40
314
- exp_len = len(analysis.experience_summary or "")
315
- exp_factor = min(exp_len / 100.0, 1.0)
316
- total_score += exp_factor * 40.0
317
 
318
- # Skills count: up to 30
319
- skills_count = len(analysis.technical_skills or [])
320
- skills_factor = min(skills_count / 10.0, 1.0)
321
- total_score += skills_factor * 30.0
322
 
323
- # Communication: up to 20 (expects 0-10 in string)
324
  try:
325
- m = re.search(r"(\d+(\.\d+)?)", str(analysis.communication_skills))
326
- comm_val = float(m.group(1)) if m else float(str(analysis.communication_skills))
327
- comm_val = max(0.0, min(10.0, comm_val))
328
- except Exception:
329
- comm_val = 5.0
330
- total_score += (comm_val / 10.0) * 20.0
331
 
332
- # Certifications: up to 10
333
- total_score += min(len(analysis.certifications or []), 10) * 1.0
334
 
335
- # Therapist bonus up to 10
336
  if role.lower() == "therapist":
337
- def safe_score(x):
338
- try:
339
- m = re.search(r"(\d+(\.\d+)?)", str(x))
340
- return float(m.group(1)) if m else 0.0
341
- except Exception:
342
- return 0.0
343
- aba = safe_score(analysis.aba_therapy_skills)
344
- autism = safe_score(analysis.autism_care_experience_score)
345
- spec_bonus = ((aba + autism) / 20.0) * 10.0
346
- total_score += spec_bonus
347
-
348
- final = round(min(total_score, 100))
349
- return float(final)
350
-
351
-
352
- # --- Append to DataFrame ---
353
- def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
354
- data = analysis.dict()
355
- tech = ", ".join(data.get("technical_skills") or [])
356
- certs = ", ".join(data.get("certifications") or [])
357
- row = {
358
- 'Name': data.get("name") or "",
359
- 'Job Role': job_role,
360
- 'Resume Score (100)': score,
361
- 'Email': data.get("email") or "",
362
- 'Phone': data.get("phone") or "",
363
- 'Shortlisted': 'No',
364
- 'Experience Summary': data.get("experience_summary") or "",
365
- 'Education Summary': data.get("education_summary") or "",
366
- 'Communication Rating (1-10)': str(data.get("communication_skills") or "N/A"),
367
- 'Skills/Technologies': tech,
368
- 'Certifications': certs,
369
- 'ABA Skills (1-10)': str(data.get("aba_therapy_skills") or "N/A"),
370
- 'RBT/BCBA Cert': str(data.get("rbt_bcba_certification") or "N/A"),
371
- 'Autism-Care Exp (1-10)': str(data.get("autism_care_experience_score") or "N/A"),
372
- }
373
- new_df = pd.DataFrame([row])
374
- st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)
375
-
376
-
377
- # --- Excel export helper ---
378
- def df_to_excel_bytes(df: pd.DataFrame) -> bytes:
379
  output = io.BytesIO()
380
- with pd.ExcelWriter(output, engine="openpyxl") as writer:
381
- df.to_excel(writer, index=False, sheet_name="Resume Analysis Data")
382
  return output.getvalue()
383
 
384
 
385
- # --- UI Layout ---
386
- st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis (Single-file)")
 
 
387
 
388
- tab_user, tab_admin = st.tabs(["πŸ‘€ Resume Uploader (User Panel)", "πŸ”’ Admin Dashboard (Password Protected)"])
 
 
 
389
 
390
- # --- User Panel ---
 
 
391
  with tab_user:
392
- st.header("Upload Resumes for Analysis")
393
- st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score fields.")
394
 
395
- job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
396
- selected_role = st.selectbox("1. Select the Target Job Role", options=job_role_options, key="selected_role")
 
 
 
 
397
 
398
- uploaded_files = st.file_uploader("2. Upload Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True)
 
 
 
 
399
 
400
- if st.button("πŸš€ Analyze All Uploaded Resumes"):
401
- if not uploaded_files:
402
- st.warning("Please upload one or more resume files to begin analysis.")
403
  else:
404
  st.session_state.run_analysis = True
405
  st.rerun()
406
 
407
- if st.session_state.get("run_analysis", False):
408
- if not uploaded_files:
409
- st.warning("No files found. Upload files and try again.")
 
410
  st.session_state.run_analysis = False
 
411
  else:
412
- total = len(uploaded_files)
413
  progress = st.progress(0)
414
- st.session_state.individual_analysis = []
415
- idx = 0
416
- with st.spinner("Processing resumes..."):
417
- for f in uploaded_files:
418
- idx += 1
419
- try:
420
- st.write(f"Analyzing **{f.name}**...")
421
- resume_text = extract_text_from_file(f)
422
- if not resume_text:
423
- st.error(f"Could not extract text from {f.name}. Skipping.")
424
- progress.progress(idx / total)
425
- continue
426
-
427
- analysis = analyze_resume_with_groq_cached(resume_text, selected_role)
428
-
429
- if analysis.name == "Extraction Failed":
430
- st.error(f"Extraction failed for {f.name}. See debug output.")
431
- progress.progress(idx / total)
432
- continue
433
-
434
- score = calculate_resume_score(analysis, selected_role)
435
- append_analysis_to_dataframe(selected_role, analysis, score)
436
-
437
- st.session_state.individual_analysis.append({
438
- 'name': analysis.name,
439
- 'score': score,
440
- 'role': selected_role,
441
- 'file_name': f.name
442
- })
443
- except Exception as e:
444
- st.error(f"Error analyzing {f.name}: {e}")
445
- st.exception(traceback.format_exc())
446
- finally:
447
- progress.progress(idx / total)
448
-
449
- st.success(f"βœ… Successfully processed {len(st.session_state.individual_analysis)} of {total} resumes.")
450
  st.session_state.run_analysis = False
451
 
452
- # Display last results summary
453
- if st.session_state.individual_analysis:
454
- st.subheader("Last Analysis Summary")
455
- for item in st.session_state.individual_analysis:
456
- st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**")
457
- st.markdown("---")
458
- st.caption("All analyzed data is stored in the Admin Dashboard.")
459
 
460
- # --- Admin Panel ---
 
 
461
  with tab_admin:
 
462
  if not st.session_state.is_admin_logged_in:
463
- st.header("Admin Login")
464
- password = st.text_input("Enter Admin Password", type="password")
465
- if st.button("πŸ”‘ Login"):
466
- if password == ADMIN_PASSWORD:
467
  st.session_state.is_admin_logged_in = True
468
  st.rerun()
469
  else:
470
  st.error("Incorrect password.")
471
- st.stop()
472
 
473
- st.header("🎯 Recruitment Dashboard")
474
- if st.button("πŸšͺ Logout"):
475
- st.session_state.is_admin_logged_in = False
476
- st.rerun()
477
-
478
- if st.session_state.analyzed_data.empty:
479
- st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
480
  else:
481
- df = st.session_state.analyzed_data.copy()
482
- st.subheader("Candidate Data Table")
483
- st.success(f"**Total Candidates Analyzed: {len(df)}**")
484
-
485
- display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']
486
-
487
- edited_df = st.data_editor(
488
- df[display_cols],
489
- column_config={
490
- "Shortlisted": st.column_config.SelectboxColumn(
491
- "Shortlisted",
492
- help="Mark the candidate as Shortlisted or Rejected.",
493
- options=["No", "Yes"],
494
- required=True
495
- )
496
- },
497
- key="dashboard_editor",
498
- hide_index=True
499
- )
500
-
501
- try:
502
- st.session_state.analyzed_data.loc[:, 'Shortlisted'] = edited_df['Shortlisted'].values
503
- except Exception:
504
- for i, val in enumerate(edited_df['Shortlisted'].tolist()):
505
- if i < len(st.session_state.analyzed_data):
506
- st.session_state.analyzed_data.at[i, 'Shortlisted'] = val
507
-
508
- st.markdown("---")
509
- st.subheader("πŸ“₯ Download Data")
510
- df_export = st.session_state.analyzed_data.copy()
511
- excel_bytes = df_to_excel_bytes(df_export)
512
-
513
- st.download_button(
514
- label="πŸ’Ύ Download All Data as Excel (.xlsx)",
515
- data=excel_bytes,
516
- file_name="quantum_scrutiny_report.xlsx",
517
- mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
518
- help="Downloads the full table including all extracted fields and shortlist status."
519
- )
520
-
521
- # --- End of file ---
 
1
  # app.py
2
  """
3
+ Quantum Scrutiny Platform β€” Groq-Powered Resume Analyzer
4
+ Fully updated + cleaned single-file Streamlit application
5
  """
6
 
7
  import os
 
12
  import traceback
13
  from typing import Optional, List
14
 
15
+ # Env
16
  from dotenv import load_dotenv
17
  load_dotenv()
18
 
 
20
  import pandas as pd
21
 
22
  # File parsing
23
+ import fitz # PyMuPDF
24
+ from docx import Document
25
 
26
  # Groq client
27
  from groq import Groq
 
29
  # Validation
30
  from pydantic import BaseModel, Field, ValidationError
31
 
 
 
32
 
33
+ # ---------------------------------------------------------
34
+ # Page config
35
+ # ---------------------------------------------------------
36
+ st.set_page_config(
37
+ page_title="Quantum Scrutiny Platform",
38
+ layout="wide"
39
+ )
40
+
41
+
42
+ # ---------------------------------------------------------
43
+ # Secrets
44
+ # ---------------------------------------------------------
45
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
46
  ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin")
47
 
 
48
  groq_client = None
49
  if GROQ_API_KEY:
50
  try:
 
52
  except Exception as e:
53
  st.error(f"Failed to initialize Groq client: {e}")
54
  else:
55
+ st.warning("GROQ_API_KEY not found β€” model calls disabled.")
56
+
57
 
58
+ # ---------------------------------------------------------
59
+ # Session State
60
+ # ---------------------------------------------------------
61
  if 'is_admin_logged_in' not in st.session_state:
62
  st.session_state.is_admin_logged_in = False
63
+
64
+ if 'run_analysis' not in st.session_state:
65
+ st.session_state.run_analysis = False
66
+
67
+ if 'individual_analysis' not in st.session_state:
68
+ st.session_state.individual_analysis = []
69
+
70
  if 'analyzed_data' not in st.session_state:
71
+ cols = [
72
  'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
73
  'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
74
  'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)',
75
  'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
76
  ]
77
+ st.session_state.analyzed_data = pd.DataFrame(columns=cols)
78
+
 
 
 
79
 
80
+ # ---------------------------------------------------------
81
+ # Pydantic Schema
82
+ # ---------------------------------------------------------
83
  class ResumeAnalysis(BaseModel):
84
  name: str = Field(default="Unknown")
85
  email: str = Field(default="")
 
94
  autism_care_experience_score: Optional[str] = Field(default="N/A")
95
 
96
 
97
+ # ---------------------------------------------------------
98
+ # Text Extraction
99
+ # ---------------------------------------------------------
100
  def extract_text_from_file(uploaded_file) -> str:
 
101
  try:
102
  content = uploaded_file.read()
103
+ name = uploaded_file.name.lower()
104
+
105
+ # PDF
106
+ if name.endswith(".pdf") or content[:5] == b"%PDF-":
107
  try:
108
  with fitz.open(stream=content, filetype="pdf") as doc:
109
+ return "".join([p.get_text() for p in doc]).strip()
110
+ except:
 
 
 
111
  return ""
112
+
113
+ # DOCX
114
+ elif name.endswith(".docx"):
115
  try:
116
  doc = Document(io.BytesIO(content))
117
+ return "\n".join([p.text for p in doc.paragraphs]).strip()
118
+ except:
 
119
  return ""
120
+
121
+ # Fallback
122
+ return content.decode("utf-8", errors="ignore")
123
+
124
+ except:
 
 
125
  return ""
126
 
127
 
128
+ # ---------------------------------------------------------
129
+ # Groq Streaming Wrapper
130
+ # ---------------------------------------------------------
131
+ def call_groq_stream_collect(prompt: str) -> Optional[str]:
132
+
 
133
  if not groq_client:
134
+ st.error("Groq client not initialized.")
135
  return None
136
 
137
  try:
138
  completion = groq_client.chat.completions.create(
139
+ model="llama-3.3-70b-versatile",
140
  messages=[
141
+ {"role": "system", "content": "You are an AI resume analyzer."},
142
  {"role": "user", "content": prompt}
143
  ],
144
+ stream=True,
145
+ temperature=0.0,
146
+ max_completion_tokens=2048
 
147
  )
148
 
 
149
  collected = ""
 
150
  for chunk in completion:
151
  try:
152
+ delta = getattr(chunk.choices[0].delta, "content", None)
 
 
 
 
 
153
  if delta:
154
  collected += delta
155
+ except:
156
+ pass
157
+ return collected
158
+
 
 
 
 
 
 
 
 
 
 
 
 
159
  except Exception as e:
160
+ st.error(f"Groq API error: {e}")
161
  return None
162
 
163
 
164
+ # ---------------------------------------------------------
165
+ # JSON Extraction
166
+ # ---------------------------------------------------------
167
+ def extract_first_json(text: str):
 
168
  if not text:
169
  return None
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
+ # Try simple balanced regex
172
+ match = re.search(r"\{[\s\S]*\}", text)
173
+ if not match:
174
+ return None
175
+
176
+ raw_json = match.group(0)
177
+
178
+ # Attempt parse
179
  try:
180
+ return json.loads(raw_json)
181
+ except:
 
 
182
  try:
183
+ return json.loads(raw_json.replace("'", '"'))
184
+ except:
 
 
185
  return None
186
 
187
 
188
+ # ---------------------------------------------------------
189
+ # Cached Analysis
190
+ # ---------------------------------------------------------
191
  @st.cache_data(show_spinner=False)
192
  def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis:
193
+
194
+ therapist_instruction = (
195
+ "If role is Therapist, extract ABA skills, BCBA/RBT, and Autism-care scores."
196
+ if job_role.lower() == "therapist" else
197
+ "For non-therapist roles, set therapist fields to 'N/A'."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  )
199
 
200
+ prompt = f"""
201
+ Return a JSON object with keys:
202
+ name, email, phone, certifications, experience_summary,
203
+ education_summary, communication_skills, technical_skills,
204
+ aba_therapy_skills, rbt_bcba_certification, autism_care_experience_score.
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
+ {therapist_instruction}
207
+
208
+ Resume Text:
209
+ {resume_text}
210
+
211
+ Return only JSON.
212
+ """
213
+
214
+ raw = call_groq_stream_collect(prompt)
215
  parsed = extract_first_json(raw)
216
+
217
  if not parsed:
218
+ return ResumeAnalysis(name="Extraction Failed")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  try:
221
+ return ResumeAnalysis.parse_obj(parsed)
222
+ except:
223
+ return ResumeAnalysis(name="Extraction Failed")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
 
225
 
226
+ # ---------------------------------------------------------
227
+ # Scoring
228
+ # ---------------------------------------------------------
229
  def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> float:
230
+ score = 0
231
 
232
+ # Experience length (40)
233
+ score += min(len(analysis.experience_summary) / 100, 1) * 40
 
 
234
 
235
+ # Skills count (30)
236
+ score += min(len(analysis.technical_skills) / 10, 1) * 30
 
 
237
 
238
+ # Communication (20)
239
  try:
240
+ c = float(re.findall(r"\d+", analysis.communication_skills)[0])
241
+ except:
242
+ c = 5
243
+ score += (min(c, 10) / 10) * 20
 
 
244
 
245
+ # Certifications (10)
246
+ score += min(len(analysis.certifications), 10)
247
 
248
+ # Therapist bonus (10)
249
  if role.lower() == "therapist":
250
+ try:
251
+ aba = float(re.findall(r"\d+", analysis.aba_therapy_skills)[0])
252
+ autism = float(re.findall(r"\d+", analysis.autism_care_experience_score)[0])
253
+ score += ((aba + autism) / 20) * 10
254
+ except:
255
+ pass
256
+
257
+ return float(round(min(score, 100)))
258
+
259
+
260
+ # ---------------------------------------------------------
261
+ # Add Row
262
+ # ---------------------------------------------------------
263
+ def append_analysis_to_dataframe(role, analysis: ResumeAnalysis, score: float):
264
+
265
+ df = st.session_state.analyzed_data
266
+
267
+ df.loc[len(df)] = [
268
+ analysis.name,
269
+ role,
270
+ score,
271
+ analysis.email,
272
+ analysis.phone,
273
+ "No",
274
+ analysis.experience_summary,
275
+ analysis.education_summary,
276
+ analysis.communication_skills,
277
+ ", ".join(analysis.technical_skills),
278
+ ", ".join(analysis.certifications),
279
+ analysis.aba_therapy_skills,
280
+ analysis.rbt_bcba_certification,
281
+ analysis.autism_care_experience_score
282
+ ]
283
+
284
+ st.session_state.analyzed_data = df
285
+
286
+
287
+ # ---------------------------------------------------------
288
+ # Excel Export
289
+ # ---------------------------------------------------------
290
+ def df_to_excel_bytes(df):
 
291
  output = io.BytesIO()
292
+ with pd.ExcelWriter(output, engine="openpyxl") as w:
293
+ df.to_excel(w, index=False, sheet_name="Resume Analysis")
294
  return output.getvalue()
295
 
296
 
297
+ # ---------------------------------------------------------
298
+ # UI
299
+ # ---------------------------------------------------------
300
+ st.title("🌌 Quantum Scrutiny Platform β€” AI Resume Analyzer")
301
 
302
+ tab_user, tab_admin = st.tabs([
303
+ "πŸ‘€ User Resume Panel",
304
+ "πŸ”’ Admin Dashboard"
305
+ ])
306
 
307
+ # ---------------------------------------------------------
308
+ # USER PANEL
309
+ # ---------------------------------------------------------
310
  with tab_user:
 
 
311
 
312
+ st.header("Upload Resumes")
313
+
314
+ job_role = st.selectbox(
315
+ "Select Job Role",
316
+ ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
317
+ )
318
 
319
+ files = st.file_uploader(
320
+ "Upload PDF or DOCX",
321
+ type=["pdf", "docx"],
322
+ accept_multiple_files=True
323
+ )
324
 
325
+ if st.button("πŸš€ Analyze All"):
326
+ if not files:
327
+ st.warning("Upload at least one file.")
328
  else:
329
  st.session_state.run_analysis = True
330
  st.rerun()
331
 
332
+ if st.session_state.run_analysis:
333
+
334
+ if not files:
335
+ st.error("No files found.")
336
  st.session_state.run_analysis = False
337
+
338
  else:
339
+ total = len(files)
340
  progress = st.progress(0)
341
+
342
+ for i, f in enumerate(files, 1):
343
+ st.write(f"Analyzing **{f.name}**...")
344
+ text = extract_text_from_file(f)
345
+
346
+ if not text:
347
+ st.error(f"Could not extract text from {f.name}. Skipped.")
348
+ progress.progress(i / total)
349
+ continue
350
+
351
+ analysis = analyze_resume_with_groq_cached(text, job_role)
352
+ score = calculate_resume_score(analysis, job_role)
353
+
354
+ append_analysis_to_dataframe(job_role, analysis, score)
355
+ progress.progress(i / total)
356
+
357
+ st.success("All files processed!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  st.session_state.run_analysis = False
359
 
 
 
 
 
 
 
 
360
 
361
+ # ---------------------------------------------------------
362
+ # ADMIN PANEL
363
+ # ---------------------------------------------------------
364
  with tab_admin:
365
+
366
  if not st.session_state.is_admin_logged_in:
367
+
368
+ pwd = st.text_input("Admin Password", type="password")
369
+ if st.button("Login"):
370
+ if pwd == ADMIN_PASSWORD:
371
  st.session_state.is_admin_logged_in = True
372
  st.rerun()
373
  else:
374
  st.error("Incorrect password.")
 
375
 
 
 
 
 
 
 
 
376
  else:
377
+ st.subheader("Admin Dashboard β€” Analyzed Data")
378
+
379
+ df = st.session_state.analyzed_data
380
+ st.dataframe(df, use_container_width=True)
381
+
382
+ if st.button("Download Excel"):
383
+ xls = df_to_excel_bytes(df)
384
+ st.download_button(
385
+ label="Download File",
386
+ data=xls,
387
+ file_name="resume_analysis.xlsx",
388
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
389
+ )
390
+
391
+ if st.button("Clear Database"):
392
+ st.session_state.analyzed_data = st.session_state.analyzed_data.iloc[0:0]
393
+ st.success("Cleared.")