meesamraza commited on
Commit
ce2a8d8
·
verified ·
1 Parent(s): b9548de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +547 -351
app.py CHANGED
@@ -1,7 +1,6 @@
1
- # app.py
2
  """
3
- Quantum Scrutiny Platform Groq-Powered Resume Analyzer
4
- Fully updated + cleaned single-file Streamlit application
5
  """
6
 
7
  import os
@@ -12,7 +11,6 @@ import base64
12
  import traceback
13
  from typing import Optional, List
14
 
15
- # Env
16
  from dotenv import load_dotenv
17
  load_dotenv()
18
 
@@ -20,8 +18,8 @@ import streamlit as st
20
  import pandas as pd
21
 
22
  # File parsing
23
- import fitz # PyMuPDF
24
- from docx import Document
25
 
26
  # Groq client
27
  from groq import Groq
@@ -29,365 +27,563 @@ from groq import Groq
29
  # Validation
30
  from pydantic import BaseModel, Field, ValidationError
31
 
 
 
32
 
33
- # ---------------------------------------------------------
34
- # Page config
35
- # ---------------------------------------------------------
36
- st.set_page_config(
37
- page_title="Quantum Scrutiny Platform",
38
- layout="wide"
39
- )
40
-
41
-
42
- # ---------------------------------------------------------
43
- # Secrets
44
- # ---------------------------------------------------------
45
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
46
  ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin")
47
 
 
48
  groq_client = None
49
  if GROQ_API_KEY:
50
- try:
51
- groq_client = Groq(api_key=GROQ_API_KEY)
52
- except Exception as e:
53
- st.error(f"Failed to initialize Groq client: {e}")
54
  else:
55
- st.warning("GROQ_API_KEY not found model calls disabled.")
56
 
57
-
58
- # ---------------------------------------------------------
59
- # Session State
60
- # ---------------------------------------------------------
61
  if 'is_admin_logged_in' not in st.session_state:
62
- st.session_state.is_admin_logged_in = False
63
-
64
- if 'run_analysis' not in st.session_state:
65
- st.session_state.run_analysis = False
66
-
 
 
 
 
 
 
 
67
  if 'individual_analysis' not in st.session_state:
68
- st.session_state.individual_analysis = []
 
 
69
 
70
- if 'analyzed_data' not in st.session_state:
71
- cols = [
72
- 'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
73
- 'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
74
- 'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)',
75
- 'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
76
- ]
77
- st.session_state.analyzed_data = pd.DataFrame(columns=cols)
78
-
79
-
80
- # ---------------------------------------------------------
81
- # Pydantic Schema
82
- # ---------------------------------------------------------
83
  class ResumeAnalysis(BaseModel):
84
- name: str = Field(default="Unknown")
85
- email: str = Field(default="")
86
- phone: str = Field(default="")
87
- certifications: List[str] = Field(default_factory=list)
88
- experience_summary: str = Field(default="")
89
- education_summary: str = Field(default="")
90
- communication_skills: str = Field(default="N/A")
91
- technical_skills: List[str] = Field(default_factory=list)
92
- aba_therapy_skills: Optional[str] = Field(default="N/A")
93
- rbt_bcba_certification: Optional[str] = Field(default="N/A")
94
- autism_care_experience_score: Optional[str] = Field(default="N/A")
95
-
96
-
97
- # ---------------------------------------------------------
98
- # Text Extraction
99
- # ---------------------------------------------------------
100
  def extract_text_from_file(uploaded_file) -> str:
101
- try:
102
- content = uploaded_file.read()
103
- name = uploaded_file.name.lower()
104
-
105
- # PDF
106
- if name.endswith(".pdf") or content[:5] == b"%PDF-":
107
- try:
108
- with fitz.open(stream=content, filetype="pdf") as doc:
109
- return "".join([p.get_text() for p in doc]).strip()
110
- except:
111
- return ""
112
-
113
- # DOCX
114
- elif name.endswith(".docx"):
115
- try:
116
- doc = Document(io.BytesIO(content))
117
- return "\n".join([p.text for p in doc.paragraphs]).strip()
118
- except:
119
- return ""
120
-
121
- # Fallback
122
- return content.decode("utf-8", errors="ignore")
123
-
124
- except:
125
- return ""
126
-
127
-
128
- # ---------------------------------------------------------
129
- # Groq Streaming Wrapper
130
- # ---------------------------------------------------------
131
- def call_groq_stream_collect(prompt: str) -> Optional[str]:
132
-
133
- if not groq_client:
134
- st.error("Groq client not initialized.")
135
- return None
136
-
137
- try:
138
- completion = groq_client.chat.completions.create(
139
- model="llama-3.3-70b-versatile",
140
- messages=[
141
- {"role": "system", "content": "You are an AI resume analyzer."},
142
- {"role": "user", "content": prompt}
143
- ],
144
- stream=True,
145
- temperature=0.0,
146
- max_completion_tokens=2048
147
- )
148
-
149
- collected = ""
150
- for chunk in completion:
151
- try:
152
- delta = getattr(chunk.choices[0].delta, "content", None)
153
- if delta:
154
- collected += delta
155
- except:
156
- pass
157
- return collected
158
-
159
- except Exception as e:
160
- st.error(f"Groq API error: {e}")
161
- return None
162
-
163
-
164
- # ---------------------------------------------------------
165
- # JSON Extraction
166
- # ---------------------------------------------------------
167
- def extract_first_json(text: str):
168
- if not text:
169
- return None
170
-
171
- # Try simple balanced regex
172
- match = re.search(r"\{[\s\S]*\}", text)
173
- if not match:
174
- return None
175
-
176
- raw_json = match.group(0)
177
-
178
- # Attempt parse
179
- try:
180
- return json.loads(raw_json)
181
- except:
182
- try:
183
- return json.loads(raw_json.replace("'", '"'))
184
- except:
185
- return None
186
-
187
-
188
- # ---------------------------------------------------------
189
- # Cached Analysis
190
- # ---------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  @st.cache_data(show_spinner=False)
192
  def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis:
193
-
194
- therapist_instruction = (
195
- "If role is Therapist, extract ABA skills, BCBA/RBT, and Autism-care scores."
196
- if job_role.lower() == "therapist" else
197
- "For non-therapist roles, set therapist fields to 'N/A'."
198
- )
199
-
200
- prompt = f"""
201
- Return a JSON object with keys:
202
- name, email, phone, certifications, experience_summary,
203
- education_summary, communication_skills, technical_skills,
204
- aba_therapy_skills, rbt_bcba_certification, autism_care_experience_score.
205
-
206
- {therapist_instruction}
207
-
208
- Resume Text:
209
- {resume_text}
210
-
211
- Return only JSON.
212
- """
213
-
214
- raw = call_groq_stream_collect(prompt)
215
- parsed = extract_first_json(raw)
216
-
217
- if not parsed:
218
- return ResumeAnalysis(name="Extraction Failed")
219
-
220
- try:
221
- return ResumeAnalysis.parse_obj(parsed)
222
- except:
223
- return ResumeAnalysis(name="Extraction Failed")
224
-
225
-
226
- # ---------------------------------------------------------
227
- # Scoring
228
- # ---------------------------------------------------------
229
- def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> float:
230
- score = 0
231
-
232
- # Experience length (40)
233
- score += min(len(analysis.experience_summary) / 100, 1) * 40
234
-
235
- # Skills count (30)
236
- score += min(len(analysis.technical_skills) / 10, 1) * 30
237
-
238
- # Communication (20)
239
- try:
240
- c = float(re.findall(r"\d+", analysis.communication_skills)[0])
241
- except:
242
- c = 5
243
- score += (min(c, 10) / 10) * 20
244
-
245
- # Certifications (10)
246
- score += min(len(analysis.certifications), 10)
247
-
248
- # Therapist bonus (10)
249
- if role.lower() == "therapist":
250
- try:
251
- aba = float(re.findall(r"\d+", analysis.aba_therapy_skills)[0])
252
- autism = float(re.findall(r"\d+", analysis.autism_care_experience_score)[0])
253
- score += ((aba + autism) / 20) * 10
254
- except:
255
- pass
256
-
257
- return float(round(min(score, 100)))
258
-
259
-
260
- # ---------------------------------------------------------
261
- # Add Row
262
- # ---------------------------------------------------------
263
- def append_analysis_to_dataframe(role, analysis: ResumeAnalysis, score: float):
264
-
265
- df = st.session_state.analyzed_data
266
-
267
- df.loc[len(df)] = [
268
- analysis.name,
269
- role,
270
- score,
271
- analysis.email,
272
- analysis.phone,
273
- "No",
274
- analysis.experience_summary,
275
- analysis.education_summary,
276
- analysis.communication_skills,
277
- ", ".join(analysis.technical_skills),
278
- ", ".join(analysis.certifications),
279
- analysis.aba_therapy_skills,
280
- analysis.rbt_bcba_certification,
281
- analysis.autism_care_experience_score
282
- ]
283
-
284
- st.session_state.analyzed_data = df
285
-
286
-
287
- # ---------------------------------------------------------
288
- # Excel Export
289
- # ---------------------------------------------------------
290
- def df_to_excel_bytes(df):
291
- output = io.BytesIO()
292
- with pd.ExcelWriter(output, engine="openpyxl") as w:
293
- df.to_excel(w, index=False, sheet_name="Resume Analysis")
294
- return output.getvalue()
295
-
296
-
297
- # ---------------------------------------------------------
298
- # UI
299
- # ---------------------------------------------------------
300
- st.title("🌌 Quantum Scrutiny Platform AI Resume Analyzer")
301
-
302
- tab_user, tab_admin = st.tabs([
303
- "👤 User Resume Panel",
304
- "🔒 Admin Dashboard"
305
- ])
306
-
307
- # ---------------------------------------------------------
308
- # USER PANEL
309
- # ---------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  with tab_user:
311
-
312
- st.header("Upload Resumes")
313
-
314
- job_role = st.selectbox(
315
- "Select Job Role",
316
- ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
317
- )
318
-
319
- files = st.file_uploader(
320
- "Upload PDF or DOCX",
321
- type=["pdf", "docx"],
322
- accept_multiple_files=True
323
- )
324
-
325
- if st.button("🚀 Analyze All"):
326
- if not files:
327
- st.warning("Upload at least one file.")
328
- else:
329
- st.session_state.run_analysis = True
330
- st.rerun()
331
-
332
- if st.session_state.run_analysis:
333
-
334
- if not files:
335
- st.error("No files found.")
336
- st.session_state.run_analysis = False
337
-
338
- else:
339
- total = len(files)
340
- progress = st.progress(0)
341
-
342
- for i, f in enumerate(files, 1):
343
- st.write(f"Analyzing **{f.name}**...")
344
- text = extract_text_from_file(f)
345
-
346
- if not text:
347
- st.error(f"Could not extract text from {f.name}. Skipped.")
348
- progress.progress(i / total)
349
- continue
350
-
351
- analysis = analyze_resume_with_groq_cached(text, job_role)
352
- score = calculate_resume_score(analysis, job_role)
353
-
354
- append_analysis_to_dataframe(job_role, analysis, score)
355
- progress.progress(i / total)
356
-
357
- st.success("All files processed!")
358
- st.session_state.run_analysis = False
359
-
360
-
361
- # ---------------------------------------------------------
362
- # ADMIN PANEL
363
- # ---------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
  with tab_admin:
365
-
366
- if not st.session_state.is_admin_logged_in:
367
-
368
- pwd = st.text_input("Admin Password", type="password")
369
- if st.button("Login"):
370
- if pwd == ADMIN_PASSWORD:
371
- st.session_state.is_admin_logged_in = True
372
- st.rerun()
373
- else:
374
- st.error("Incorrect password.")
375
-
376
- else:
377
- st.subheader("Admin Dashboard — Analyzed Data")
378
-
379
- df = st.session_state.analyzed_data
380
- st.dataframe(df, use_container_width=True)
381
-
382
- if st.button("Download Excel"):
383
- xls = df_to_excel_bytes(df)
384
- st.download_button(
385
- label="Download File",
386
- data=xls,
387
- file_name="resume_analysis.xlsx",
388
- mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
389
- )
390
-
391
- if st.button("Clear Database"):
392
- st.session_state.analyzed_data = st.session_state.analyzed_data.iloc[0:0]
393
- st.success("Cleared.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ Quantum Scrutiny Platform | Groq-Powered
3
+ Single-file Streamlit app (refactored, Groq streaming-compatible)
4
  """
5
 
6
  import os
 
11
  import traceback
12
  from typing import Optional, List
13
 
 
14
  from dotenv import load_dotenv
15
  load_dotenv()
16
 
 
18
  import pandas as pd
19
 
20
  # File parsing
21
+ import fitz                   # PyMuPDF
22
+ from docx import Document     # python-docx
23
 
24
  # Groq client
25
  from groq import Groq
 
27
  # Validation
28
  from pydantic import BaseModel, Field, ValidationError
29
 
30
+ # --- Page config ---
31
+ st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered")
32
 
33
+ # --- Config / Secrets ---
 
 
 
 
 
 
 
 
 
 
 
34
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
35
  ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin")
36
 
37
+ # Initialize Groq client (no API key -> UI warning but app still loads)
38
  groq_client = None
39
  if GROQ_API_KEY:
40
+     try:
41
+         groq_client = Groq(api_key=GROQ_API_KEY)
42
+     except Exception as e:
43
+         st.error(f"Failed to initialize Groq client: {e}")
44
  else:
45
+     st.warning("GROQ_API_KEY not found. Set it as an environment variable or in .env for model calls to work.")
46
 
47
+ # --- Session state defaults ---
 
 
 
48
  if 'is_admin_logged_in' not in st.session_state:
49
+     st.session_state.is_admin_logged_in = False
50
+ if 'analyzed_data' not in st.session_state:
51
+     initial_cols = [
52
+         'Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Phone',
53
+         # NEW SCORE COLUMNS
54
+         'Experience Score (40)', 'Skills Score (30)', 'Communication Score (20)', 'Certifications Score (10)',
55
+         'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
56
+         'Skills/Technologies', 'Certifications',
57
+         # THERAPIST FIELDS
58
+         'ABA Skills (1-10)', 'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
59
+     ]
60
+     st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)
61
  if 'individual_analysis' not in st.session_state:
62
+     st.session_state.individual_analysis = []
63
+ if 'run_analysis' not in st.session_state:
64
+     st.session_state.run_analysis = False
65
 
66
+ # --- Pydantic schema (No change needed here, as the new scores are derived) ---
 
 
 
 
 
 
 
 
 
 
 
 
67
  class ResumeAnalysis(BaseModel):
68
+     name: str = Field(default="Unknown")
69
+     email: str = Field(default="")
70
+     phone: str = Field(default="")
71
+     certifications: List[str] = Field(default_factory=list)
72
+     experience_summary: str = Field(default="")
73
+     education_summary: str = Field(default="")
74
+     communication_skills: str = Field(default="N/A")
75
+     technical_skills: List[str] = Field(default_factory=list)
76
+     aba_therapy_skills: Optional[str] = Field(default="N/A")
77
+     rbt_bcba_certification: Optional[str] = Field(default="N/A")
78
+     autism_care_experience_score: Optional[str] = Field(default="N/A")
79
+
80
+ # --- Helpers: file text extraction (No change) ---
 
 
 
81
  def extract_text_from_file(uploaded_file) -> str:
82
+     """Extract text from PDF or DOCX. Returns empty string on failure."""
83
+     try:
84
+         content = uploaded_file.read()
85
+         filename = uploaded_file.name.lower()
86
+         if filename.endswith(".pdf") or content[:5] == b"%PDF-":
87
+             try:
88
+                 with fitz.open(stream=content, filetype="pdf") as doc:
89
+                     text = ""
90
+                     for p in doc:
91
+                         text += p.get_text()
92
+                 return text.strip()
93
+             except Exception:
94
+                 return ""
95
+         elif filename.endswith(".docx"):
96
+             try:
97
+                 doc = Document(io.BytesIO(content))
98
+                 paragraphs = [p.text for p in doc.paragraphs if p.text and p.text.strip()]
99
+                 return "\n".join(paragraphs).strip()
100
+             except Exception:
101
+                 return ""
102
+         else:
103
+             # fallback: decode bytes as text
104
+             try:
105
+                 return content.decode('utf-8', errors='ignore')
106
+             except Exception:
107
+                 return ""
108
+     except Exception:
109
+         return ""
110
+
111
+ # --- Groq call with streaming (collects chunks) (No change) ---
112
+ def call_groq_stream_collect(prompt: str, model_name: str = "llama-3.3-70b-versatile", temperature: float = 0.2, max_completion_tokens: int = 2048, top_p: float = 1.0) -> Optional[str]:
113
+     """
114
+     Calls Groq with streaming enabled and collects the textual output.
115
+     Returns the full model text, or None on failure.
116
+     """
117
+     if not groq_client:
118
+         st.error("Groq client not initialized. Set GROQ_API_KEY in environment/secrets.")
119
+         return None
120
+
121
+     try:
122
+         completion = groq_client.chat.completions.create(
123
+             model=model_name,
124
+             messages=[
125
+                 {"role": "system", "content": "You are a professional Resume Analyzer. Return JSON only when asked."},
126
+                 {"role": "user", "content": prompt}
127
+             ],
128
+             temperature=temperature,
129
+           �� max_completion_tokens=max_completion_tokens,
130
+             top_p=top_p,
131
+             stream=True
132
+         )
133
+
134
+         # completion is an iterator/streamable object; collect chunks
135
+         collected = ""
136
+         # some SDKs yield dict-like chunks, some objects; handle both
137
+         for chunk in completion:
138
+             try:
139
+                 # Common pattern: chunk.choices[0].delta.content
140
+                 delta = getattr(chunk.choices[0].delta, "content", None) if hasattr(chunk, "choices") else None
141
+                 if delta is None:
142
+                     # fallback for dict-like object
143
+                     if isinstance(chunk, dict):
144
+                         delta = chunk.get("choices", [{}])[0].get("delta", {}).get("content")
145
+                 if delta:
146
+                     collected += delta
147
+                 else:
148
+                     # Some SDKs return final message in chunk.choices[0].message.content
149
+                     try:
150
+                         msg = getattr(chunk.choices[0].message, "content", None)
151
+                         if msg:
152
+                             collected += msg
153
+                     except Exception:
154
+                         pass
155
+             except Exception:
156
+                 # last-resort: append str(chunk)
157
+                 try:
158
+                     collected += str(chunk)
159
+                 except Exception:
160
+                     pass
161
+
162
+         return collected.strip()
163
+     except Exception as e:
164
+         st.error(f"Groq API call failed: {e}")
165
+         return None
166
+
167
+ # --- Parsing model output safely to JSON (No change) ---
168
+ def extract_first_json(text: str) -> Optional[dict]:
169
+     """
170
+     Find the first JSON object in text and parse it; return dict or None.
171
+     """
172
+     if not text:
173
+         return None
174
+     # find first balanced braces block
175
+     # quick heuristic regex for {...}
176
+     try:
177
+         match = re.search(r"(\{(?:[^{}]|(?R))*\})", text, re.DOTALL)
178
+     except re.error:
179
+         # Python's re doesn't support (?R); fallback to simpler greedy
180
+         match = re.search(r"(\{.*\})", text, re.DOTALL)
181
+     if match:
182
+         json_text = match.group(1)
183
+     else:
184
+         # maybe the model returned only JSON-like lines -> try to parse full text
185
+         json_text = text
186
+
187
+     try:
188
+         parsed = json.loads(json_text)
189
+         return parsed
190
+     except Exception:
191
+         # try to clean common issues: single quotes -> double quotes
192
+         try:
193
+             json_text_fixed = json_text.replace("'", '"')
194
+             parsed = json.loads(json_text_fixed)
195
+             return parsed
196
+         except Exception:
197
+             return None
198
+
199
+ # --- Analyze with Groq (cached by resume text + role) (No change) ---
200
  @st.cache_data(show_spinner=False)
201
  def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis:
202
+     """
203
+     Calls Groq (streaming) and returns a ResumeAnalysis instance.
204
+     Uses caching to avoid duplicate calls for same resume_text+role.
205
+     """
206
+     # Build prompt instructing JSON structure
207
+     therapist_instructions = ""
208
+     if job_role.lower() == "therapist":
209
+         therapist_instructions = (
210
+             "Because the role is 'Therapist', carefully search for ABA Therapy Skills, "
211
+             "RBT/BCBA Certification, and Autism-Care Experience. Provide scores 1-10 as STRINGS, or 'N/A'."
212
+         )
213
+     else:
214
+         therapist_instructions = "If therapist-specific fields are not relevant, set them to 'N/A'."
215
+
216
+     system_user_prompt = (
217
+         "Return a single JSON object with the following keys exactly: "
218
+         "name (string), email (string), phone (string), certifications (array of strings), "
219
+         "experience_summary (string), education_summary (string), communication_skills (STRING, e.g., '8'), "
220
+         "technical_skills (array of strings), aba_therapy_skills (STRING or 'N/A'), "
221
+         "rbt_bcba_certification (STRING 'Yes'/'No'/'N/A'), autism_care_experience_score (STRING or 'N/A'). "
222
+         f"{therapist_instructions}\n\nResume Text:\n\n{resume_text}\n\nReturn only the JSON object."
223
+     )
224
+
225
+     raw = call_groq_stream_collect(system_user_prompt, model_name="llama-3.3-70b-versatile", temperature=0.0, max_completion_tokens=2048)
226
+
227
+     if not raw:
228
+         # fallback empty object
229
+         return ResumeAnalysis(
230
+             name="Extraction Failed",
231
+             email="",
232
+             phone="",
233
+             certifications=[],
234
+             experience_summary="",
235
+             education_summary="",
236
+             communication_skills="N/A",
237
+             technical_skills=[],
238
+             aba_therapy_skills="N/A",
239
+             rbt_bcba_certification="N/A",
240
+             autism_care_experience_score="N/A"
241
+         )
242
+
243
+     parsed = extract_first_json(raw)
244
+     if not parsed:
245
+         # show raw output for debugging when developer runs app locally (admin panel will show too)
246
+         st.warning("Failed to parse model JSON output. See raw output below for debugging.")
247
+         st.text_area("Raw model output (debug)", raw, height=200)
248
+         return ResumeAnalysis(
249
+             name="Extraction Failed",
250
+             email="",
251
+             phone="",
252
+             certifications=[],
253
+             experience_summary="",
254
+             education_summary="",
255
+             communication_skills="N/A",
256
+             technical_skills=[],
257
+             aba_therapy_skills="N/A",
258
+             rbt_bcba_certification="N/A",
259
+             autism_care_experience_score="N/A"
260
+         )
261
+
262
+     # Ensure keys exist and coerce types
263
+     parsed.setdefault("name", "Unknown")
264
+     parsed.setdefault("email", "")
265
+     parsed.setdefault("phone", "")
266
+     parsed.setdefault("certifications", [])
267
+     parsed.setdefault("experience_summary", "")
268
+     parsed.setdefault("education_summary", "")
269
+     parsed.setdefault("communication_skills", "N/A")
270
+     parsed.setdefault("technical_skills", [])
271
+     parsed.setdefault("aba_therapy_skills", "N/A")
272
+     parsed.setdefault("rbt_bcba_certification", "N/A")
273
+     parsed.setdefault("autism_care_experience_score", "N/A")
274
+
275
+     # Ensure string coercions for some fields
276
+     try:
277
+         parsed["communication_skills"] = str(parsed.get("communication_skills") or "N/A")
278
+         parsed["aba_therapy_skills"] = str(parsed.get("aba_therapy_skills") or "N/A")
279
+         parsed["rbt_bcba_certification"] = str(parsed.get("rbt_bcba_certification") or "N/A")
280
+         parsed["autism_care_experience_score"] = str(parsed.get("autism_care_experience_score") or "N/A")
281
+     except Exception:
282
+         pass
283
+
284
+     # Validate via Pydantic
285
+     try:
286
+         analysis = ResumeAnalysis.parse_obj(parsed)
287
+         return analysis
288
+     except ValidationError as ve:
289
+         st.error("Model output failed schema validation.")
290
+         st.text_area("Raw model output (debug)", raw, height=200)
291
+         st.exception(ve)
292
+         return ResumeAnalysis(
293
+             name="Extraction Failed",
294
+             email="",
295
+             phone="",
296
+             certifications=[],
297
+             experience_summary="",
298
+             education_summary="",
299
+             communication_skills="N/A",
300
+             technical_skills=[],
301
+             aba_therapy_skills="N/A",
302
+             rbt_bcba_certification="N/A",
303
+             autism_care_experience_score="N/A"
304
+         )
305
+
306
+ # --- Scoring logic (MODIFIED) ---
307
+ def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> tuple[float, float, float, float, float]:
308
+     """
309
+     Calculates the overall score and the individual component scores.
310
+     Returns (final_score, exp_score, skills_score, comm_score, certs_score)
311
+     """
312
+     total_score = 0.0
313
+
314
+     # 1. Experience score: up to 40 points
315
+     exp_len = len(analysis.experience_summary or "")
316
+     # Cap factor at 1.0 (e.g., 100+ chars = 1.0)
317
+     exp_factor = min(exp_len / 100.0, 1.0)
318
+     exp_score = round(exp_factor * 40.0)
319
+     total_score += exp_score
320
+
321
+     # 2. Skills score: up to 30 points
322
+     skills_count = len(analysis.technical_skills or [])
323
+     # Cap factor at 1.0 (e.g., 10+ skills = 1.0)
324
+     skills_factor = min(skills_count / 10.0, 1.0)
325
+     skills_score = round(skills_factor * 30.0)
326
+     total_score += skills_score
327
+
328
+     # 3. Communication score: up to 20 points (expects 0-10 in string)
329
+     try:
330
+         m = re.search(r"(\d+(\.\d+)?)", str(analysis.communication_skills))
331
+         # Use regex match if available, otherwise try direct float conversion
332
+         comm_val = float(m.group(1)) if m else float(str(analysis.communication_skills))
333
+         comm_val = max(0.0, min(10.0, comm_val)) # Clamp to 0-10
334
+     except Exception:
335
+         comm_val = 5.0 # Default if model extraction failed
336
+     comm_score = round((comm_val / 10.0) * 20.0)
337
+     total_score += comm_score
338
+
339
+     # 4. Certifications score: up to 10 points
340
+     # Max 10 points for 10 or more certifications
341
+     certs_score = min(len(analysis.certifications or []), 10) * 1.0
342
+     total_score += certs_score
343
+
344
+     # 5. Therapist bonus: up to 10 points (added to overall score if applicable)
345
+     spec_bonus = 0.0
346
+     if role.lower() == "therapist":
347
+         def safe_score(x):
348
+             try:
349
+                 m = re.search(r"(\d+(\.\d+)?)", str(x))
350
+                 return float(m.group(1)) if m else 0.0
351
+             except Exception:
352
+                 return 0.0
353
+         aba = safe_score(analysis.aba_therapy_skills)
354
+         autism = safe_score(analysis.autism_care_experience_score)
355
+         # Average of the two specialized scores, scaled to a max of 10 points
356
+         spec_bonus = ((aba + autism) / 20.0) * 10.0
357
+         total_score += spec_bonus
358
+
359
+     final_score = round(min(total_score, 100))
360
+    
361
+     return (float(final_score), float(exp_score), float(skills_score), float(comm_score), float(certs_score))
362
+
363
+ # --- Append to DataFrame (MODIFIED) ---
364
+ def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, scores: tuple[float, float, float, float, float]):
365
+     final_score, exp_score, skills_score, comm_score, certs_score = scores
366
+    
367
+     data = analysis.dict()
368
+     tech = ", ".join(data.get("technical_skills") or [])
369
+     certs = ", ".join(data.get("certifications") or [])
370
+    
371
+     row = {
372
+         'Name': data.get("name") or "",
373
+         'Job Role': job_role,
374
+         'Resume Score (100)': final_score,
375
+         'Shortlisted': 'No',
376
+         'Email': data.get("email") or "",
377
+         'Phone': data.get("phone") or "",
378
+        
379
+         # NEW SCORE COLUMNS
380
+         'Experience Score (40)': exp_score,
381
+         'Skills Score (30)': skills_score,
382
+         'Communication Score (20)': comm_score,
383
+         'Certifications Score (10)': certs_score,
384
+        
385
+         'Experience Summary': data.get("experience_summary") or "",
386
+         'Education Summary': data.get("education_summary") or "",
387
+         'Communication Rating (1-10)': str(data.get("communication_skills") or "N/A"),
388
+         'Skills/Technologies': tech,
389
+         'Certifications': certs,
390
+         'ABA Skills (1-10)': str(data.get("aba_therapy_skills") or "N/A"),
391
+         'RBT/BCBA Cert': str(data.get("rbt_bcba_certification") or "N/A"),
392
+         'Autism-Care Exp (1-10)': str(data.get("autism_care_experience_score") or "N/A"),
393
+     }
394
+     new_df = pd.DataFrame([row])
395
+     st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)
396
+
397
+ # --- Excel export helper (No change) ---
398
+ def df_to_excel_bytes(df: pd.DataFrame) -> bytes:
399
+     output = io.BytesIO()
400
+     with pd.ExcelWriter(output, engine="openpyxl") as writer:
401
+         df.to_excel(writer, index=False, sheet_name="Resume Analysis Data")
402
+     return output.getvalue()
403
+
404
+ # --- UI Layout ---
405
+ st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis (Single-file)")
406
+
407
+ tab_user, tab_admin = st.tabs(["👤 Resume Uploader (User Panel)", "🔒 Admin Dashboard (Password Protected)"])
408
+
409
+ # --- User Panel (Minor change for scoring) ---
410
  with tab_user:
411
+     st.header("Upload Resumes for Analysis")
412
+     st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score fields.")
413
+
414
+     job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
415
+     selected_role = st.selectbox("1. Select the Target Job Role", options=job_role_options, key="selected_role")
416
+
417
+     uploaded_files = st.file_uploader("2. Upload Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True)
418
+
419
+     if st.button("🚀 Analyze All Uploaded Resumes"):
420
+         if not uploaded_files:
421
+             st.warning("Please upload one or more resume files to begin analysis.")
422
+         else:
423
+             st.session_state.run_analysis = True
424
+             st.rerun()
425
+
426
+     if st.session_state.get("run_analysis", False):
427
+         if not uploaded_files:
428
+             st.warning("No files found. Upload files and try again.")
429
+             st.session_state.run_analysis = False
430
+         else:
431
+             total = len(uploaded_files)
432
+             progress = st.progress(0)
433
+             st.session_state.individual_analysis = []
434
+             idx = 0
435
+             with st.spinner("Processing resumes..."):
436
+                 for f in uploaded_files:
437
+                     idx += 1
438
+                     try:
439
+                         st.write(f"Analyzing **{f.name}**...")
440
+                         resume_text = extract_text_from_file(f)
441
+                         if not resume_text:
442
+                             st.error(f"Could not extract text from {f.name}. Skipping.")
443
+                             progress.progress(idx / total)
444
+                             continue
445
+
446
+                         analysis = analyze_resume_with_groq_cached(resume_text, selected_role)
447
+
448
+                         if analysis.name == "Extraction Failed":
449
+                             st.error(f"Extraction failed for {f.name}. See debug output.")
450
+                             progress.progress(idx / total)
451
+                             continue
452
+
453
+                         scores = calculate_resume_score(analysis, selected_role)
454
+                         final_score = scores[0]
455
+                        
456
+                         append_analysis_to_dataframe(selected_role, analysis, scores)
457
+
458
+                         st.session_state.individual_analysis.append({
459
+                             'name': analysis.name,
460
+                             'score': final_score,
461
+                             'role': selected_role,
462
+                             'file_name': f.name
463
+                         })
464
+                     except Exception as e:
465
+                         st.error(f"Error analyzing {f.name}: {e}")
466
+                         st.exception(traceback.format_exc())
467
+                     finally:
468
+                         progress.progress(idx / total)
469
+
470
+             st.success(f"✅ Successfully processed {len(st.session_state.individual_analysis)} of {total} resumes.")
471
+             st.session_state.run_analysis = False
472
+
473
+     # Display last results summary
474
+     if st.session_state.individual_analysis:
475
+         st.subheader("Last Analysis Summary")
476
+         for item in st.session_state.individual_analysis:
477
+             st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**")
478
+         st.markdown("---")
479
+         st.caption("All analyzed data is stored in the Admin Dashboard.")
480
+
481
+ # --- Admin Panel (MODIFIED for new columns) ---
482
  with tab_admin:
483
+     if not st.session_state.is_admin_logged_in:
484
+         st.header("Admin Login")
485
+         password = st.text_input("Enter Admin Password", type="password")
486
+         if st.button("🔑 Login"):
487
+             if password == ADMIN_PASSWORD:
488
+                 st.session_state.is_admin_logged_in = True
489
+                 st.rerun()
490
+             else:
491
+                 st.error("Incorrect password.")
492
+         st.stop()
493
+
494
+     st.header("🎯 Recruitment Dashboard")
495
+     if st.button("🚪 Logout"):
496
+         st.session_state.is_admin_logged_in = False
497
+         st.rerun()
498
+
499
+     if st.session_state.analyzed_data.empty:
500
+         st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
501
+     else:
502
+         df = st.session_state.analyzed_data.copy()
503
+         st.subheader("Candidate Data Table")
504
+         st.success(f"**Total Candidates Analyzed: {len(df)}**")
505
+
506
+         # Updated columns for display in the data editor
507
+         display_cols = [
508
+             'Name',
509
+             'Job Role',
510
+             'Resume Score (100)',
511
+             'Experience Score (40)',
512
+             'Skills Score (30)',
513
+             'Communication Score (20)',
514
+             'Certifications Score (10)',
515
+             'Shortlisted',
516
+             'Email',
517
+             'Skills/Technologies'
518
+         ]
519
+         # Filter columns to only those present in the current dataframe (safety check)
520
+         current_display_cols = [col for col in display_cols if col in df.columns]
521
+
522
+         edited_df = st.data_editor(
523
+             df[current_display_cols],
524
+             column_config={
525
+                 "Shortlisted": st.column_config.SelectboxColumn(
526
+                     "Shortlisted",
527
+                     help="Mark the candidate as Shortlisted or Rejected.",
528
+                     options=["No", "Yes"],
529
+                     required=True
530
+                 ),
531
+                 "Resume Score (100)": st.column_config.ProgressColumn(
532
+                     "Total Score",
533
+                     format="%f",
534
+                     min_value=0, max_value=100,
535
+                 ),
536
+                 "Experience Score (40)": st.column_config.ProgressColumn(
537
+                     "Experience (40)",
538
+                     format="%f",
539
+                     min_value=0, max_value=40,
540
+                 ),
541
+                 "Skills Score (30)": st.column_config.ProgressColumn(
542
+                     "Skills (30)",
543
+                     format="%f",
544
+                     min_value=0, max_value=30,
545
+                 ),
546
+                 "Communication Score (20)": st.column_config.ProgressColumn(
547
+                     "Comms (20)",
548
+                     format="%f",
549
+                     min_value=0, max_value=20,
550
+                 ),
551
+                 "Certifications Score (10)": st.column_config.ProgressColumn(
552
+                     "Certs (10)",
553
+                     format="%f",
554
+                     min_value=0, max_value=10,
555
+                 ),
556
+             },
557
+             key="dashboard_editor",
558
+             hide_index=True
559
+         )
560
+
561
+         # The logic to update the session state with the edited 'Shortlisted' column remains the same
562
+         try:
563
+             # Update the master dataframe with the edited 'Shortlisted' column
564
+             for col in edited_df.columns:
565
+                 if col in st.session_state.analyzed_data.columns and not edited_df[col].equals(st.session_state.analyzed_data[col]):
566
+                     # Only update 'Shortlisted' which is the only editable field
567
+                     if col == 'Shortlisted':
568
+                         st.session_state.analyzed_data.loc[:, 'Shortlisted'] = edited_df['Shortlisted'].values
569
+         except Exception:
570
+             # Fallback for index issues on data_editor changes
571
+             for i, val in enumerate(edited_df.get('Shortlisted', []).tolist()):
572
+                 if i < len(st.session_state.analyzed_data):
573
+                     st.session_state.analyzed_data.at[i, 'Shortlisted'] = val
574
+
575
+
576
+         st.markdown("---")
577
+         st.subheader("📥 Download Data")
578
+         df_export = st.session_state.analyzed_data.copy()
579
+         excel_bytes = df_to_excel_bytes(df_export)
580
+
581
+         st.download_button(
582
+             label="💾 Download All Data as Excel (.xlsx)",
583
+             data=excel_bytes,
584
+             file_name="quantum_scrutiny_report.xlsx",
585
+             mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
586
+             help="Downloads the full table including all extracted fields and shortlist status."
587
+         )
588
+
589
+ # --- End of file ---