meesamraza commited on
Commit
48e44c0
Β·
verified Β·
1 Parent(s): 1c97216

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +452 -529
app.py CHANGED
@@ -18,8 +18,8 @@ import streamlit as st
18
  import pandas as pd
19
 
20
  # File parsing
21
- import fitz Β  Β  Β  Β  Β  Β  Β  Β  Β  # PyMuPDF
22
- from docx import Document Β  Β  # python-docx
23
 
24
  # Groq client
25
  from groq import Groq
@@ -37,553 +37,476 @@ ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin")
37
  # Initialize Groq client (no API key -> UI warning but app still loads)
38
  groq_client = None
39
  if GROQ_API_KEY:
40
- Β  Β  try:
41
- Β  Β  Β  Β  groq_client = Groq(api_key=GROQ_API_KEY)
42
- Β  Β  except Exception as e:
43
- Β  Β  Β  Β  st.error(f"Failed to initialize Groq client: {e}")
44
  else:
45
- Β  Β  st.warning("GROQ_API_KEY not found. Set it as an environment variable or in .env for model calls to work.")
46
 
47
  # --- Session state defaults ---
48
  if 'is_admin_logged_in' not in st.session_state:
49
- Β  Β  st.session_state.is_admin_logged_in = False
50
  if 'analyzed_data' not in st.session_state:
51
- Β  Β  initial_cols = [
52
- Β  Β  Β  Β  'Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Phone',
53
- Β  Β  Β  Β  # NEW SCORE COLUMNS
54
- Β  Β  Β  Β  'Experience Score (40)', 'Skills Score (30)', 'Communication Score (20)', 'Certifications Score (10)',
55
- Β  Β  Β  Β  'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
56
- Β  Β  Β  Β  'Skills/Technologies', 'Certifications',
57
- Β  Β  Β  Β  # THERAPIST FIELDS
58
- Β  Β  Β  Β  'ABA Skills (1-10)', 'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
59
- Β  Β  ]
60
- Β  Β  st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)
61
  if 'individual_analysis' not in st.session_state:
62
- Β  Β  st.session_state.individual_analysis = []
63
  if 'run_analysis' not in st.session_state:
64
- Β  Β  st.session_state.run_analysis = False
65
 
66
- # --- Pydantic schema (No change needed here, as the new scores are derived) ---
67
  class ResumeAnalysis(BaseModel):
68
- Β  Β  name: str = Field(default="Unknown")
69
- Β  Β  email: str = Field(default="")
70
- Β  Β  phone: str = Field(default="")
71
- Β  Β  certifications: List[str] = Field(default_factory=list)
72
- Β  Β  experience_summary: str = Field(default="")
73
- Β  Β  education_summary: str = Field(default="")
74
- Β  Β  communication_skills: str = Field(default="N/A")
75
- Β  Β  technical_skills: List[str] = Field(default_factory=list)
76
- Β  Β  aba_therapy_skills: Optional[str] = Field(default="N/A")
77
- Β  Β  rbt_bcba_certification: Optional[str] = Field(default="N/A")
78
- Β  Β  autism_care_experience_score: Optional[str] = Field(default="N/A")
79
-
80
- # --- Helpers: file text extraction (No change) ---
81
  def extract_text_from_file(uploaded_file) -> str:
82
- Β  Β  """Extract text from PDF or DOCX. Returns empty string on failure."""
83
- Β  Β  try:
84
- Β  Β  Β  Β  content = uploaded_file.read()
85
- Β  Β  Β  Β  filename = uploaded_file.name.lower()
86
- Β  Β  Β  Β  if filename.endswith(".pdf") or content[:5] == b"%PDF-":
87
- Β  Β  Β  Β  Β  Β  try:
88
- Β  Β  Β  Β  Β  Β  Β  Β  with fitz.open(stream=content, filetype="pdf") as doc:
89
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  text = ""
90
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  for p in doc:
91
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  text += p.get_text()
92
- Β  Β  Β  Β  Β  Β  Β  Β  return text.strip()
93
- Β  Β  Β  Β  Β  Β  except Exception:
94
- Β  Β  Β  Β  Β  Β  Β  Β  return ""
95
- Β  Β  Β  Β  elif filename.endswith(".docx"):
96
- Β  Β  Β  Β  Β  Β  try:
97
- Β  Β  Β  Β  Β  Β  Β  Β  doc = Document(io.BytesIO(content))
98
- Β  Β  Β  Β  Β  Β  Β  Β  paragraphs = [p.text for p in doc.paragraphs if p.text and p.text.strip()]
99
- Β  Β  Β  Β  Β  Β  Β  Β  return "\n".join(paragraphs).strip()
100
- Β  Β  Β  Β  Β  Β  except Exception:
101
- Β  Β  Β  Β  Β  Β  Β  Β  return ""
102
- Β  Β  Β  Β  else:
103
- Β  Β  Β  Β  Β  Β  # fallback: decode bytes as text
104
- Β  Β  Β  Β  Β  Β  try:
105
- Β  Β  Β  Β  Β  Β  Β  Β  return content.decode('utf-8', errors='ignore')
106
- Β  Β  Β  Β  Β  Β  except Exception:
107
- Β  Β  Β  Β  Β  Β  Β  Β  return ""
108
- Β  Β  except Exception:
109
- Β  Β  Β  Β  return ""
110
-
111
- # --- Groq call with streaming (collects chunks) (No change) ---
112
  def call_groq_stream_collect(prompt: str, model_name: str = "llama-3.3-70b-versatile", temperature: float = 0.2, max_completion_tokens: int = 2048, top_p: float = 1.0) -> Optional[str]:
113
- Β  Β  """
114
- Β  Β  Calls Groq with streaming enabled and collects the textual output.
115
- Β  Β  Returns the full model text, or None on failure.
116
- Β  Β  """
117
- Β  Β  if not groq_client:
118
- Β  Β  Β  Β  st.error("Groq client not initialized. Set GROQ_API_KEY in environment/secrets.")
119
- Β  Β  Β  Β  return None
120
-
121
- Β  Β  try:
122
- Β  Β  Β  Β  completion = groq_client.chat.completions.create(
123
- Β  Β  Β  Β  Β  Β  model=model_name,
124
- Β  Β  Β  Β  Β  Β  messages=[
125
- Β  Β  Β  Β  Β  Β  Β  Β  {"role": "system", "content": "You are a professional Resume Analyzer. Return JSON only when asked."},
126
- Β  Β  Β  Β  Β  Β  Β  Β  {"role": "user", "content": prompt}
127
- Β  Β  Β  Β  Β  Β  ],
128
- Β  Β  Β  Β  Β  Β  temperature=temperature,
129
- Β  Β  Β  Β  Β  Β  max_completion_tokens=max_completion_tokens,
130
- Β  Β  Β  Β  Β  Β  top_p=top_p,
131
- Β  Β  Β  Β  Β  Β  stream=True
132
- Β  Β  Β  Β  )
133
-
134
- Β  Β  Β  Β  # completion is an iterator/streamable object; collect chunks
135
- Β  Β  Β  Β  collected = ""
136
- Β  Β  Β  Β  # some SDKs yield dict-like chunks, some objects; handle both
137
- Β  Β  Β  Β  for chunk in completion:
138
- Β  Β  Β  Β  Β  Β  try:
139
- Β  Β  Β  Β  Β  Β  Β  Β  # Common pattern: chunk.choices[0].delta.content
140
- Β  Β  Β  Β  Β  Β  Β  Β  delta = getattr(chunk.choices[0].delta, "content", None) if hasattr(chunk, "choices") else None
141
- Β  Β  Β  Β  Β  Β  Β  Β  if delta is None:
142
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  # fallback for dict-like object
143
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  if isinstance(chunk, dict):
144
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  delta = chunk.get("choices", [{}])[0].get("delta", {}).get("content")
145
- Β  Β  Β  Β  Β  Β  Β  Β  if delta:
146
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  collected += delta
147
- Β  Β  Β  Β  Β  Β  Β  Β  else:
148
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  # Some SDKs return final message in chunk.choices[0].message.content
149
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  try:
150
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  msg = getattr(chunk.choices[0].message, "content", None)
151
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  if msg:
152
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  collected += msg
153
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  except Exception:
154
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  pass
155
- Β  Β  Β  Β  Β  Β  except Exception:
156
- Β  Β  Β  Β  Β  Β  Β  Β  # last-resort: append str(chunk)
157
- Β  Β  Β  Β  Β  Β  Β  Β  try:
158
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  collected += str(chunk)
159
- Β  Β  Β  Β  Β  Β  Β  Β  except Exception:
160
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  pass
161
-
162
- Β  Β  Β  Β  return collected.strip()
163
- Β  Β  except Exception as e:
164
- Β  Β  Β  Β  st.error(f"Groq API call failed: {e}")
165
- Β  Β  Β  Β  return None
166
-
167
- # --- Parsing model output safely to JSON (No change) ---
168
  def extract_first_json(text: str) -> Optional[dict]:
169
- Β  Β  """
170
- Β  Β  Find the first JSON object in text and parse it; return dict or None.
171
- Β  Β  """
172
- Β  Β  if not text:
173
- Β  Β  Β  Β  return None
174
- Β  Β  # find first balanced braces block
175
- Β  Β  # quick heuristic regex for {...}
176
- Β  Β  try:
177
- Β  Β  Β  Β  match = re.search(r"(\{(?:[^{}]|(?R))*\})", text, re.DOTALL)
178
- Β  Β  except re.error:
179
- Β  Β  Β  Β  # Python's re doesn't support (?R); fallback to simpler greedy
180
- Β  Β  Β  Β  match = re.search(r"(\{.*\})", text, re.DOTALL)
181
- Β  Β  if match:
182
- Β  Β  Β  Β  json_text = match.group(1)
183
- Β  Β  else:
184
- Β  Β  Β  Β  # maybe the model returned only JSON-like lines -> try to parse full text
185
- Β  Β  Β  Β  json_text = text
186
-
187
- Β  Β  try:
188
- Β  Β  Β  Β  parsed = json.loads(json_text)
189
- Β  Β  Β  Β  return parsed
190
- Β  Β  except Exception:
191
- Β  Β  Β  Β  # try to clean common issues: single quotes -> double quotes
192
- Β  Β  Β  Β  try:
193
- Β  Β  Β  Β  Β  Β  json_text_fixed = json_text.replace("'", '"')
194
- Β  Β  Β  Β  Β  Β  parsed = json.loads(json_text_fixed)
195
- Β  Β  Β  Β  Β  Β  return parsed
196
- Β  Β  Β  Β  except Exception:
197
- Β  Β  Β  Β  Β  Β  return None
198
-
199
- # --- Analyze with Groq (cached by resume text + role) (No change) ---
200
  @st.cache_data(show_spinner=False)
201
  def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis:
202
- Β  Β  """
203
- Β  Β  Calls Groq (streaming) and returns a ResumeAnalysis instance.
204
- Β  Β  Uses caching to avoid duplicate calls for same resume_text+role.
205
- Β  Β  """
206
- Β  Β  # Build prompt instructing JSON structure
207
- Β  Β  therapist_instructions = ""
208
- Β  Β  if job_role.lower() == "therapist":
209
- Β  Β  Β  Β  therapist_instructions = (
210
- Β  Β  Β  Β  Β  Β  "Because the role is 'Therapist', carefully search for ABA Therapy Skills, "
211
- Β  Β  Β  Β  Β  Β  "RBT/BCBA Certification, and Autism-Care Experience. Provide scores 1-10 as STRINGS, or 'N/A'."
212
- Β  Β  Β  Β  )
213
- Β  Β  else:
214
- Β  Β  Β  Β  therapist_instructions = "If therapist-specific fields are not relevant, set them to 'N/A'."
215
-
216
- Β  Β  system_user_prompt = (
217
- Β  Β  Β  Β  "Return a single JSON object with the following keys exactly: "
218
- Β  Β  Β  Β  "name (string), email (string), phone (string), certifications (array of strings), "
219
- Β  Β  Β  Β  "experience_summary (string), education_summary (string), communication_skills (STRING, e.g., '8'), "
220
- Β  Β  Β  Β  "technical_skills (array of strings), aba_therapy_skills (STRING or 'N/A'), "
221
- Β  Β  Β  Β  "rbt_bcba_certification (STRING 'Yes'/'No'/'N/A'), autism_care_experience_score (STRING or 'N/A'). "
222
- Β  Β  Β  Β  f"{therapist_instructions}\n\nResume Text:\n\n{resume_text}\n\nReturn only the JSON object."
223
- Β  Β  )
224
-
225
- Β  Β  raw = call_groq_stream_collect(system_user_prompt, model_name="llama-3.3-70b-versatile", temperature=0.0, max_completion_tokens=2048)
226
-
227
- Β  Β  if not raw:
228
- Β  Β  Β  Β  # fallback empty object
229
- Β  Β  Β  Β  return ResumeAnalysis(
230
- Β  Β  Β  Β  Β  Β  name="Extraction Failed",
231
- Β  Β  Β  Β  Β  Β  email="",
232
- Β  Β  Β  Β  Β  Β  phone="",
233
- Β  Β  Β  Β  Β  Β  certifications=[],
234
- Β  Β  Β  Β  Β  Β  experience_summary="",
235
- Β  Β  Β  Β  Β  Β  education_summary="",
236
- Β  Β  Β  Β  Β  Β  communication_skills="N/A",
237
- Β  Β  Β  Β  Β  Β  technical_skills=[],
238
- Β  Β  Β  Β  Β  Β  aba_therapy_skills="N/A",
239
- Β  Β  Β  Β  Β  Β  rbt_bcba_certification="N/A",
240
- Β  Β  Β  Β  Β  Β  autism_care_experience_score="N/A"
241
- Β  Β  Β  Β  )
242
-
243
- Β  Β  parsed = extract_first_json(raw)
244
- Β  Β  if not parsed:
245
- Β  Β  Β  Β  # show raw output for debugging when developer runs app locally (admin panel will show too)
246
- Β  Β  Β  Β  st.warning("Failed to parse model JSON output. See raw output below for debugging.")
247
- Β  Β  Β  Β  st.text_area("Raw model output (debug)", raw, height=200)
248
- Β  Β  Β  Β  return ResumeAnalysis(
249
- Β  Β  Β  Β  Β  Β  name="Extraction Failed",
250
- Β  Β  Β  Β  Β  Β  email="",
251
- Β  Β  Β  Β  Β  Β  phone="",
252
- Β  Β  Β  Β  Β  Β  certifications=[],
253
- Β  Β  Β  Β  Β  Β  experience_summary="",
254
- Β  Β  Β  Β  Β  Β  education_summary="",
255
- Β  Β  Β  Β  Β  Β  communication_skills="N/A",
256
- Β  Β  Β  Β  Β  Β  technical_skills=[],
257
- Β  Β  Β  Β  Β  Β  aba_therapy_skills="N/A",
258
- Β  Β  Β  Β  Β  Β  rbt_bcba_certification="N/A",
259
- Β  Β  Β  Β  Β  Β  autism_care_experience_score="N/A"
260
- Β  Β  Β  Β  )
261
-
262
- Β  Β  # Ensure keys exist and coerce types
263
- Β  Β  parsed.setdefault("name", "Unknown")
264
- Β  Β  parsed.setdefault("email", "")
265
- Β  Β  parsed.setdefault("phone", "")
266
- Β  Β  parsed.setdefault("certifications", [])
267
- Β  Β  parsed.setdefault("experience_summary", "")
268
- Β  Β  parsed.setdefault("education_summary", "")
269
- Β  Β  parsed.setdefault("communication_skills", "N/A")
270
- Β  Β  parsed.setdefault("technical_skills", [])
271
- Β  Β  parsed.setdefault("aba_therapy_skills", "N/A")
272
- Β  Β  parsed.setdefault("rbt_bcba_certification", "N/A")
273
- Β  Β  parsed.setdefault("autism_care_experience_score", "N/A")
274
-
275
- Β  Β  # Ensure string coercions for some fields
276
- Β  Β  try:
277
- Β  Β  Β  Β  parsed["communication_skills"] = str(parsed.get("communication_skills") or "N/A")
278
- Β  Β  Β  Β  parsed["aba_therapy_skills"] = str(parsed.get("aba_therapy_skills") or "N/A")
279
- Β  Β  Β  Β  parsed["rbt_bcba_certification"] = str(parsed.get("rbt_bcba_certification") or "N/A")
280
- Β  Β  Β  Β  parsed["autism_care_experience_score"] = str(parsed.get("autism_care_experience_score") or "N/A")
281
- Β  Β  except Exception:
282
- Β  Β  Β  Β  pass
283
-
284
- Β  Β  # Validate via Pydantic
285
- Β  Β  try:
286
- Β  Β  Β  Β  analysis = ResumeAnalysis.parse_obj(parsed)
287
- Β  Β  Β  Β  return analysis
288
- Β  Β  except ValidationError as ve:
289
- Β  Β  Β  Β  st.error("Model output failed schema validation.")
290
- Β  Β  Β  Β  st.text_area("Raw model output (debug)", raw, height=200)
291
- Β  Β  Β  Β  st.exception(ve)
292
- Β  Β  Β  Β  return ResumeAnalysis(
293
- Β  Β  Β  Β  Β  Β  name="Extraction Failed",
294
- Β  Β  Β  Β  Β  Β  email="",
295
- Β  Β  Β  Β  Β  Β  phone="",
296
- Β  Β  Β  Β  Β  Β  certifications=[],
297
- Β  Β  Β  Β  Β  Β  experience_summary="",
298
- Β  Β  Β  Β  Β  Β  education_summary="",
299
- Β  Β  Β  Β  Β  Β  communication_skills="N/A",
300
- Β  Β  Β  Β  Β  Β  technical_skills=[],
301
- Β  Β  Β  Β  Β  Β  aba_therapy_skills="N/A",
302
- Β  Β  Β  Β  Β  Β  rbt_bcba_certification="N/A",
303
- Β  Β  Β  Β  Β  Β  autism_care_experience_score="N/A"
304
- Β  Β  Β  Β  )
305
-
306
- # --- Scoring logic (MODIFIED) ---
307
- def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> tuple[float, float, float, float, float]:
308
- Β  Β  """
309
- Β  Β  Calculates the overall score and the individual component scores.
310
- Β  Β  Returns (final_score, exp_score, skills_score, comm_score, certs_score)
311
- Β  Β  """
312
- Β  Β  total_score = 0.0
313
-
314
- Β  Β  # 1. Experience score: up to 40 points
315
- Β  Β  exp_len = len(analysis.experience_summary or "")
316
- Β  Β  # Cap factor at 1.0 (e.g., 100+ chars = 1.0)
317
- Β  Β  exp_factor = min(exp_len / 100.0, 1.0)
318
- Β  Β  exp_score = round(exp_factor * 40.0)
319
- Β  Β  total_score += exp_score
320
-
321
- Β  Β  # 2. Skills score: up to 30 points
322
- Β  Β  skills_count = len(analysis.technical_skills or [])
323
- Β  Β  # Cap factor at 1.0 (e.g., 10+ skills = 1.0)
324
- Β  Β  skills_factor = min(skills_count / 10.0, 1.0)
325
- Β  Β  skills_score = round(skills_factor * 30.0)
326
- Β  Β  total_score += skills_score
327
-
328
- Β  Β  # 3. Communication score: up to 20 points (expects 0-10 in string)
329
- Β  Β  try:
330
- Β  Β  Β  Β  m = re.search(r"(\d+(\.\d+)?)", str(analysis.communication_skills))
331
- Β  Β  Β  Β  # Use regex match if available, otherwise try direct float conversion
332
- Β  Β  Β  Β  comm_val = float(m.group(1)) if m else float(str(analysis.communication_skills))
333
- Β  Β  Β  Β  comm_val = max(0.0, min(10.0, comm_val)) # Clamp to 0-10
334
- Β  Β  except Exception:
335
- Β  Β  Β  Β  comm_val = 5.0 # Default if model extraction failed
336
- Β  Β  comm_score = round((comm_val / 10.0) * 20.0)
337
- Β  Β  total_score += comm_score
338
-
339
- Β  Β  # 4. Certifications score: up to 10 points
340
- Β  Β  # Max 10 points for 10 or more certifications
341
- Β  Β  certs_score = min(len(analysis.certifications or []), 10) * 1.0
342
- Β  Β  total_score += certs_score
343
-
344
- Β  Β  # 5. Therapist bonus: up to 10 points (added to overall score if applicable)
345
- Β  Β  spec_bonus = 0.0
346
- Β  Β  if role.lower() == "therapist":
347
- Β  Β  Β  Β  def safe_score(x):
348
- Β  Β  Β  Β  Β  Β  try:
349
- Β  Β  Β  Β  Β  Β  Β  Β  m = re.search(r"(\d+(\.\d+)?)", str(x))
350
- Β  Β  Β  Β  Β  Β  Β  Β  return float(m.group(1)) if m else 0.0
351
- Β  Β  Β  Β  Β  Β  except Exception:
352
- Β  Β  Β  Β  Β  Β  Β  Β  return 0.0
353
- Β  Β  Β  Β  aba = safe_score(analysis.aba_therapy_skills)
354
- Β  Β  Β  Β  autism = safe_score(analysis.autism_care_experience_score)
355
- Β  Β  Β  Β  # Average of the two specialized scores, scaled to a max of 10 points
356
- Β  Β  Β  Β  spec_bonus = ((aba + autism) / 20.0) * 10.0
357
- Β  Β  Β  Β  total_score += spec_bonus
358
-
359
- Β  Β  final_score = round(min(total_score, 100))
360
- Β  Β 
361
- Β  Β  return (float(final_score), float(exp_score), float(skills_score), float(comm_score), float(certs_score))
362
-
363
- # --- Append to DataFrame (MODIFIED) ---
364
- def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, scores: tuple[float, float, float, float, float]):
365
- Β  Β  final_score, exp_score, skills_score, comm_score, certs_score = scores
366
- Β  Β 
367
- Β  Β  data = analysis.dict()
368
- Β  Β  tech = ", ".join(data.get("technical_skills") or [])
369
- Β  Β  certs = ", ".join(data.get("certifications") or [])
370
- Β  Β 
371
- Β  Β  row = {
372
- Β  Β  Β  Β  'Name': data.get("name") or "",
373
- Β  Β  Β  Β  'Job Role': job_role,
374
- Β  Β  Β  Β  'Resume Score (100)': final_score,
375
- Β  Β  Β  Β  'Shortlisted': 'No',
376
- Β  Β  Β  Β  'Email': data.get("email") or "",
377
- Β  Β  Β  Β  'Phone': data.get("phone") or "",
378
- Β  Β  Β  Β 
379
- Β  Β  Β  Β  # NEW SCORE COLUMNS
380
- Β  Β  Β  Β  'Experience Score (40)': exp_score,
381
- Β  Β  Β  Β  'Skills Score (30)': skills_score,
382
- Β  Β  Β  Β  'Communication Score (20)': comm_score,
383
- Β  Β  Β  Β  'Certifications Score (10)': certs_score,
384
- Β  Β  Β  Β 
385
- Β  Β  Β  Β  'Experience Summary': data.get("experience_summary") or "",
386
- Β  Β  Β  Β  'Education Summary': data.get("education_summary") or "",
387
- Β  Β  Β  Β  'Communication Rating (1-10)': str(data.get("communication_skills") or "N/A"),
388
- Β  Β  Β  Β  'Skills/Technologies': tech,
389
- Β  Β  Β  Β  'Certifications': certs,
390
- Β  Β  Β  Β  'ABA Skills (1-10)': str(data.get("aba_therapy_skills") or "N/A"),
391
- Β  Β  Β  Β  'RBT/BCBA Cert': str(data.get("rbt_bcba_certification") or "N/A"),
392
- Β  Β  Β  Β  'Autism-Care Exp (1-10)': str(data.get("autism_care_experience_score") or "N/A"),
393
- Β  Β  }
394
- Β  Β  new_df = pd.DataFrame([row])
395
- Β  Β  st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)
396
-
397
- # --- Excel export helper (No change) ---
398
  def df_to_excel_bytes(df: pd.DataFrame) -> bytes:
399
- Β  Β  output = io.BytesIO()
400
- Β  Β  with pd.ExcelWriter(output, engine="openpyxl") as writer:
401
- Β  Β  Β  Β  df.to_excel(writer, index=False, sheet_name="Resume Analysis Data")
402
- Β  Β  return output.getvalue()
403
 
404
  # --- UI Layout ---
405
  st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis (Single-file)")
406
 
407
  tab_user, tab_admin = st.tabs(["πŸ‘€ Resume Uploader (User Panel)", "πŸ”’ Admin Dashboard (Password Protected)"])
408
 
409
- # --- User Panel (Minor change for scoring) ---
410
  with tab_user:
411
- Β  Β  st.header("Upload Resumes for Analysis")
412
- Β  Β  st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score fields.")
413
-
414
- Β  Β  job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
415
- Β  Β  selected_role = st.selectbox("1. Select the Target Job Role", options=job_role_options, key="selected_role")
416
-
417
- Β  Β  uploaded_files = st.file_uploader("2. Upload Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True)
418
-
419
- Β  Β  if st.button("πŸš€ Analyze All Uploaded Resumes"):
420
- Β  Β  Β  Β  if not uploaded_files:
421
- Β  Β  Β  Β  Β  Β  st.warning("Please upload one or more resume files to begin analysis.")
422
- Β  Β  Β  Β  else:
423
- Β  Β  Β  Β  Β  Β  st.session_state.run_analysis = True
424
- Β  Β  Β  Β  Β  Β  st.rerun()
425
-
426
- Β  Β  if st.session_state.get("run_analysis", False):
427
- Β  Β  Β  Β  if not uploaded_files:
428
- Β  Β  Β  Β  Β  Β  st.warning("No files found. Upload files and try again.")
429
- Β  Β  Β  Β  Β  Β  st.session_state.run_analysis = False
430
- Β  Β  Β  Β  else:
431
- Β  Β  Β  Β  Β  Β  total = len(uploaded_files)
432
- Β  Β  Β  Β  Β  Β  progress = st.progress(0)
433
- Β  Β  Β  Β  Β  Β  st.session_state.individual_analysis = []
434
- Β  Β  Β  Β  Β  Β  idx = 0
435
- Β  Β  Β  Β  Β  Β  with st.spinner("Processing resumes..."):
436
- Β  Β  Β  Β  Β  Β  Β  Β  for f in uploaded_files:
437
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  idx += 1
438
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  try:
439
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  st.write(f"Analyzing **{f.name}**...")
440
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  resume_text = extract_text_from_file(f)
441
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  if not resume_text:
442
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  st.error(f"Could not extract text from {f.name}. Skipping.")
443
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  progress.progress(idx / total)
444
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  continue
445
-
446
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  analysis = analyze_resume_with_groq_cached(resume_text, selected_role)
447
-
448
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  if analysis.name == "Extraction Failed":
449
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  st.error(f"Extraction failed for {f.name}. See debug output.")
450
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  progress.progress(idx / total)
451
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  continue
452
-
453
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  scores = calculate_resume_score(analysis, selected_role)
454
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  final_score = scores[0]
455
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β 
456
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  append_analysis_to_dataframe(selected_role, analysis, scores)
457
-
458
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  st.session_state.individual_analysis.append({
459
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  'name': analysis.name,
460
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  'score': final_score,
461
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  'role': selected_role,
462
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  'file_name': f.name
463
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  })
464
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  except Exception as e:
465
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  st.error(f"Error analyzing {f.name}: {e}")
466
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  st.exception(traceback.format_exc())
467
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  finally:
468
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  progress.progress(idx / total)
469
-
470
- Β  Β  Β  Β  Β  Β  st.success(f"βœ… Successfully processed {len(st.session_state.individual_analysis)} of {total} resumes.")
471
- Β  Β  Β  Β  Β  Β  st.session_state.run_analysis = False
472
-
473
- Β  Β  # Display last results summary
474
- Β  Β  if st.session_state.individual_analysis:
475
- Β  Β  Β  Β  st.subheader("Last Analysis Summary")
476
- Β  Β  Β  Β  for item in st.session_state.individual_analysis:
477
- Β  Β  Β  Β  Β  Β  st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**")
478
- Β  Β  Β  Β  st.markdown("---")
479
- Β  Β  Β  Β  st.caption("All analyzed data is stored in the Admin Dashboard.")
480
-
481
- # --- Admin Panel (MODIFIED for new columns) ---
482
  with tab_admin:
483
- Β  Β  if not st.session_state.is_admin_logged_in:
484
- Β  Β  Β  Β  st.header("Admin Login")
485
- Β  Β  Β  Β  password = st.text_input("Enter Admin Password", type="password")
486
- Β  Β  Β  Β  if st.button("πŸ”‘ Login"):
487
- Β  Β  Β  Β  Β  Β  if password == ADMIN_PASSWORD:
488
- Β  Β  Β  Β  Β  Β  Β  Β  st.session_state.is_admin_logged_in = True
489
- Β  Β  Β  Β  Β  Β  Β  Β  st.rerun()
490
- Β  Β  Β  Β  Β  Β  else:
491
- Β  Β  Β  Β  Β  Β  Β  Β  st.error("Incorrect password.")
492
- Β  Β  Β  Β  st.stop()
493
-
494
-     st.header("🎯 Recruitment Dashboard")
495
- Β  Β  if st.button("πŸšͺ Logout"):
496
- Β  Β  Β  Β  st.session_state.is_admin_logged_in = False
497
- Β  Β  Β  Β  st.rerun()
498
-
499
- Β  Β  if st.session_state.analyzed_data.empty:
500
- Β  Β  Β  Β  st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
501
- Β  Β  else:
502
- Β  Β  Β  Β  df = st.session_state.analyzed_data.copy()
503
- Β  Β  Β  Β  st.subheader("Candidate Data Table")
504
- Β  Β  Β  Β  st.success(f"**Total Candidates Analyzed: {len(df)}**")
505
-
506
- Β  Β  Β  Β  # Updated columns for display in the data editor
507
- Β  Β  Β  Β  display_cols = [
508
- Β  Β  Β  Β  Β  Β  'Name',
509
- Β  Β  Β  Β  Β  Β  'Job Role',
510
- Β  Β  Β  Β  Β  Β  'Resume Score (100)',
511
- Β  Β  Β  Β  Β  Β  'Experience Score (40)',
512
- Β  Β  Β  Β  Β  Β  'Skills Score (30)',
513
- Β  Β  Β  Β  Β  Β  'Communication Score (20)',
514
- Β  Β  Β  Β  Β  Β  'Certifications Score (10)',
515
- Β  Β  Β  Β  Β  Β  'Shortlisted',
516
- Β  Β  Β  Β  Β  Β  'Email',
517
- Β  Β  Β  Β  Β  Β  'Skills/Technologies'
518
- Β  Β  Β  Β  ]
519
- Β  Β  Β  Β  # Filter columns to only those present in the current dataframe (safety check)
520
- Β  Β  Β  Β  current_display_cols = [col for col in display_cols if col in df.columns]
521
-
522
- Β  Β  Β  Β  edited_df = st.data_editor(
523
- Β  Β  Β  Β  Β  Β  df[current_display_cols],
524
- Β  Β  Β  Β  Β  Β  column_config={
525
- Β  Β  Β  Β  Β  Β  Β  Β  "Shortlisted": st.column_config.SelectboxColumn(
526
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  "Shortlisted",
527
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  help="Mark the candidate as Shortlisted or Rejected.",
528
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  options=["No", "Yes"],
529
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  required=True
530
- Β  Β  Β  Β  Β  Β  Β  Β  ),
531
- Β  Β  Β  Β  Β  Β  Β  Β  "Resume Score (100)": st.column_config.ProgressColumn(
532
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  "Total Score",
533
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  format="%f",
534
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  min_value=0, max_value=100,
535
- Β  Β  Β  Β  Β  Β  Β  Β  ),
536
- Β  Β  Β  Β  Β  Β  Β  Β  "Experience Score (40)": st.column_config.ProgressColumn(
537
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  "Experience (40)",
538
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  format="%f",
539
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  min_value=0, max_value=40,
540
- Β  Β  Β  Β  Β  Β  Β  Β  ),
541
- Β  Β  Β  Β  Β  Β  Β  Β  "Skills Score (30)": st.column_config.ProgressColumn(
542
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  "Skills (30)",
543
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  format="%f",
544
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  min_value=0, max_value=30,
545
- Β  Β  Β  Β  Β  Β  Β  Β  ),
546
- Β  Β  Β  Β  Β  Β  Β  Β  "Communication Score (20)": st.column_config.ProgressColumn(
547
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  "Comms (20)",
548
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  format="%f",
549
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  min_value=0, max_value=20,
550
- Β  Β  Β  Β  Β  Β  Β  Β  ),
551
- Β  Β  Β  Β  Β  Β  Β  Β  "Certifications Score (10)": st.column_config.ProgressColumn(
552
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  "Certs (10)",
553
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  format="%f",
554
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  min_value=0, max_value=10,
555
- Β  Β  Β  Β  Β  Β  Β  Β  ),
556
- Β  Β  Β  Β  Β  Β  },
557
- Β  Β  Β  Β  Β  Β  key="dashboard_editor",
558
- Β  Β  Β  Β  Β  Β  hide_index=True
559
- Β  Β  Β  Β  )
560
-
561
- Β  Β  Β  Β  # The logic to update the session state with the edited 'Shortlisted' column remains the same
562
- Β  Β  Β  Β  try:
563
- Β  Β  Β  Β  Β  Β  # Update the master dataframe with the edited 'Shortlisted' column
564
- Β  Β  Β  Β  Β  Β  for col in edited_df.columns:
565
- Β  Β  Β  Β  Β  Β  Β  Β  if col in st.session_state.analyzed_data.columns and not edited_df[col].equals(st.session_state.analyzed_data[col]):
566
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  # Only update 'Shortlisted' which is the only editable field
567
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  if col == 'Shortlisted':
568
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  st.session_state.analyzed_data.loc[:, 'Shortlisted'] = edited_df['Shortlisted'].values
569
- Β  Β  Β  Β  except Exception:
570
- Β  Β  Β  Β  Β  Β  # Fallback for index issues on data_editor changes
571
- Β  Β  Β  Β  Β  Β  for i, val in enumerate(edited_df.get('Shortlisted', []).tolist()):
572
- Β  Β  Β  Β  Β  Β  Β  Β  if i < len(st.session_state.analyzed_data):
573
- Β  Β  Β  Β  Β  Β  Β  Β  Β  Β  st.session_state.analyzed_data.at[i, 'Shortlisted'] = val
574
-
575
-
576
- Β  Β  Β  Β  st.markdown("---")
577
- Β  Β  Β  Β  st.subheader("πŸ“₯ Download Data")
578
- Β  Β  Β  Β  df_export = st.session_state.analyzed_data.copy()
579
- Β  Β  Β  Β  excel_bytes = df_to_excel_bytes(df_export)
580
-
581
- Β  Β  Β  Β  st.download_button(
582
- Β  Β  Β  Β  Β  Β  label="πŸ’Ύ Download All Data as Excel (.xlsx)",
583
- Β  Β  Β  Β  Β  Β  data=excel_bytes,
584
- Β  Β  Β  Β  Β  Β  file_name="quantum_scrutiny_report.xlsx",
585
- Β  Β  Β  Β  Β  Β  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
586
- Β  Β  Β  Β  Β  Β  help="Downloads the full table including all extracted fields and shortlist status."
587
- Β  Β  Β  Β  )
588
-
589
- # --- End of file ---
 
18
  import pandas as pd
19
 
20
  # File parsing
21
+ import fitz # PyMuPDF
22
+ from docx import Document # python-docx
23
 
24
  # Groq client
25
  from groq import Groq
 
37
  # Initialize Groq client (no API key -> UI warning but app still loads)
38
  groq_client = None
39
  if GROQ_API_KEY:
40
+ try:
41
+ groq_client = Groq(api_key=GROQ_API_KEY)
42
+ except Exception as e:
43
+ st.error(f"Failed to initialize Groq client: {e}")
44
  else:
45
+ st.warning("GROQ_API_KEY not found. Set it as an environment variable or in .env for model calls to work.")
46
 
47
  # --- Session state defaults ---
48
  if 'is_admin_logged_in' not in st.session_state:
49
+ st.session_state.is_admin_logged_in = False
50
  if 'analyzed_data' not in st.session_state:
51
+ initial_cols = [
52
+ 'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
53
+ 'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
54
+ 'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)',
55
+ 'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
56
+ ]
57
+ st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)
 
 
 
58
  if 'individual_analysis' not in st.session_state:
59
+ st.session_state.individual_analysis = []
60
  if 'run_analysis' not in st.session_state:
61
+ st.session_state.run_analysis = False
62
 
63
+ # --- Pydantic schema ---
64
  class ResumeAnalysis(BaseModel):
65
+ name: str = Field(default="Unknown")
66
+ email: str = Field(default="")
67
+ phone: str = Field(default="")
68
+ certifications: List[str] = Field(default_factory=list)
69
+ experience_summary: str = Field(default="")
70
+ education_summary: str = Field(default="")
71
+ communication_skills: str = Field(default="N/A")
72
+ technical_skills: List[str] = Field(default_factory=list)
73
+ aba_therapy_skills: Optional[str] = Field(default="N/A")
74
+ rbt_bcba_certification: Optional[str] = Field(default="N/A")
75
+ autism_care_experience_score: Optional[str] = Field(default="N/A")
76
+
77
+ # --- Helpers: file text extraction ---
78
  def extract_text_from_file(uploaded_file) -> str:
79
+ """Extract text from PDF or DOCX. Returns empty string on failure."""
80
+ try:
81
+ content = uploaded_file.read()
82
+ filename = uploaded_file.name.lower()
83
+ if filename.endswith(".pdf") or content[:5] == b"%PDF-":
84
+ try:
85
+ with fitz.open(stream=content, filetype="pdf") as doc:
86
+ text = ""
87
+ for p in doc:
88
+ text += p.get_text()
89
+ return text.strip()
90
+ except Exception:
91
+ return ""
92
+ elif filename.endswith(".docx"):
93
+ try:
94
+ doc = Document(io.BytesIO(content))
95
+ paragraphs = [p.text for p in doc.paragraphs if p.text and p.text.strip()]
96
+ return "\n".join(paragraphs).strip()
97
+ except Exception:
98
+ return ""
99
+ else:
100
+ # fallback: decode bytes as text
101
+ try:
102
+ return content.decode('utf-8', errors='ignore')
103
+ except Exception:
104
+ return ""
105
+ except Exception:
106
+ return ""
107
+
108
+ # --- Groq call with streaming (collects chunks) ---
109
  def call_groq_stream_collect(prompt: str, model_name: str = "llama-3.3-70b-versatile", temperature: float = 0.2, max_completion_tokens: int = 2048, top_p: float = 1.0) -> Optional[str]:
110
+ """
111
+ Calls Groq with streaming enabled and collects the textual output.
112
+ Returns the full model text, or None on failure.
113
+ """
114
+ if not groq_client:
115
+ st.error("Groq client not initialized. Set GROQ_API_KEY in environment/secrets.")
116
+ return None
117
+
118
+ try:
119
+ completion = groq_client.chat.completions.create(
120
+ model=model_name,
121
+ messages=[
122
+ {"role": "system", "content": "You are a professional Resume Analyzer. Return JSON only when asked."},
123
+ {"role": "user", "content": prompt}
124
+ ],
125
+ temperature=temperature,
126
+ max_completion_tokens=max_completion_tokens,
127
+ top_p=top_p,
128
+ stream=True
129
+ )
130
+
131
+ # completion is an iterator/streamable object; collect chunks
132
+ collected = ""
133
+ # some SDKs yield dict-like chunks, some objects; handle both
134
+ for chunk in completion:
135
+ try:
136
+ # Common pattern: chunk.choices[0].delta.content
137
+ delta = getattr(chunk.choices[0].delta, "content", None) if hasattr(chunk, "choices") else None
138
+ if delta is None:
139
+ # fallback for dict-like object
140
+ if isinstance(chunk, dict):
141
+ delta = chunk.get("choices", [{}])[0].get("delta", {}).get("content")
142
+ if delta:
143
+ collected += delta
144
+ else:
145
+ # Some SDKs return final message in chunk.choices[0].message.content
146
+ try:
147
+ msg = getattr(chunk.choices[0].message, "content", None)
148
+ if msg:
149
+ collected += msg
150
+ except Exception:
151
+ pass
152
+ except Exception:
153
+ # last-resort: append str(chunk)
154
+ try:
155
+ collected += str(chunk)
156
+ except Exception:
157
+ pass
158
+
159
+ return collected.strip()
160
+ except Exception as e:
161
+ st.error(f"Groq API call failed: {e}")
162
+ return None
163
+
164
+ # --- Parsing model output safely to JSON ---
165
  def extract_first_json(text: str) -> Optional[dict]:
166
+ """
167
+ Find the first JSON object in text and parse it; return dict or None.
168
+ """
169
+ if not text:
170
+ return None
171
+ # find first balanced braces block
172
+ # quick heuristic regex for {...}
173
+ try:
174
+ match = re.search(r"(\{(?:[^{}]|(?R))*\})", text, re.DOTALL)
175
+ except re.error:
176
+ # Python's re doesn't support (?R); fallback to simpler greedy
177
+ match = re.search(r"(\{.*\})", text, re.DOTALL)
178
+ if match:
179
+ json_text = match.group(1)
180
+ else:
181
+ # maybe the model returned only JSON-like lines -> try to parse full text
182
+ json_text = text
183
+
184
+ try:
185
+ parsed = json.loads(json_text)
186
+ return parsed
187
+ except Exception:
188
+ # try to clean common issues: single quotes -> double quotes
189
+ try:
190
+ json_text_fixed = json_text.replace("'", '"')
191
+ parsed = json.loads(json_text_fixed)
192
+ return parsed
193
+ except Exception:
194
+ return None
195
+
196
+ # --- Analyze with Groq (cached by resume text + role) ---
197
  @st.cache_data(show_spinner=False)
198
  def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis:
199
+ """
200
+ Calls Groq (streaming) and returns a ResumeAnalysis instance.
201
+ Uses caching to avoid duplicate calls for same resume_text+role.
202
+ """
203
+ # Build prompt instructing JSON structure
204
+ therapist_instructions = ""
205
+ if job_role.lower() == "therapist":
206
+ therapist_instructions = (
207
+ "Because the role is 'Therapist', carefully search for ABA Therapy Skills, "
208
+ "RBT/BCBA Certification, and Autism-Care Experience. Provide scores 1-10 as STRINGS, or 'N/A'."
209
+ )
210
+ else:
211
+ therapist_instructions = "If therapist-specific fields are not relevant, set them to 'N/A'."
212
+
213
+ system_user_prompt = (
214
+ "Return a single JSON object with the following keys exactly: "
215
+ "name (string), email (string), phone (string), certifications (array of strings), "
216
+ "experience_summary (string), education_summary (string), communication_skills (STRING, e.g., '8'), "
217
+ "technical_skills (array of strings), aba_therapy_skills (STRING or 'N/A'), "
218
+ "rbt_bcba_certification (STRING 'Yes'/'No'/'N/A'), autism_care_experience_score (STRING or 'N/A'). "
219
+ f"{therapist_instructions}\n\nResume Text:\n\n{resume_text}\n\nReturn only the JSON object."
220
+ )
221
+
222
+ raw = call_groq_stream_collect(system_user_prompt, model_name="llama-3.3-70b-versatile", temperature=0.0, max_completion_tokens=2048)
223
+
224
+ if not raw:
225
+ # fallback empty object
226
+ return ResumeAnalysis(
227
+ name="Extraction Failed",
228
+ email="",
229
+ phone="",
230
+ certifications=[],
231
+ experience_summary="",
232
+ education_summary="",
233
+ communication_skills="N/A",
234
+ technical_skills=[],
235
+ aba_therapy_skills="N/A",
236
+ rbt_bcba_certification="N/A",
237
+ autism_care_experience_score="N/A"
238
+ )
239
+
240
+ parsed = extract_first_json(raw)
241
+ if not parsed:
242
+ # show raw output for debugging when developer runs app locally (admin panel will show too)
243
+ st.warning("Failed to parse model JSON output. See raw output below for debugging.")
244
+ st.text_area("Raw model output (debug)", raw, height=200)
245
+ return ResumeAnalysis(
246
+ name="Extraction Failed",
247
+ email="",
248
+ phone="",
249
+ certifications=[],
250
+ experience_summary="",
251
+ education_summary="",
252
+ communication_skills="N/A",
253
+ technical_skills=[],
254
+ aba_therapy_skills="N/A",
255
+ rbt_bcba_certification="N/A",
256
+ autism_care_experience_score="N/A"
257
+ )
258
+
259
+ # Ensure keys exist and coerce types
260
+ parsed.setdefault("name", "Unknown")
261
+ parsed.setdefault("email", "")
262
+ parsed.setdefault("phone", "")
263
+ parsed.setdefault("certifications", [])
264
+ parsed.setdefault("experience_summary", "")
265
+ parsed.setdefault("education_summary", "")
266
+ parsed.setdefault("communication_skills", "N/A")
267
+ parsed.setdefault("technical_skills", [])
268
+ parsed.setdefault("aba_therapy_skills", "N/A")
269
+ parsed.setdefault("rbt_bcba_certification", "N/A")
270
+ parsed.setdefault("autism_care_experience_score", "N/A")
271
+
272
+ # Ensure string coercions for some fields
273
+ try:
274
+ parsed["communication_skills"] = str(parsed.get("communication_skills") or "N/A")
275
+ parsed["aba_therapy_skills"] = str(parsed.get("aba_therapy_skills") or "N/A")
276
+ parsed["rbt_bcba_certification"] = str(parsed.get("rbt_bcba_certification") or "N/A")
277
+ parsed["autism_care_experience_score"] = str(parsed.get("autism_care_experience_score") or "N/A")
278
+ except Exception:
279
+ pass
280
+
281
+ # Validate via Pydantic
282
+ try:
283
+ analysis = ResumeAnalysis.parse_obj(parsed)
284
+ return analysis
285
+ except ValidationError as ve:
286
+ st.error("Model output failed schema validation.")
287
+ st.text_area("Raw model output (debug)", raw, height=200)
288
+ st.exception(ve)
289
+ return ResumeAnalysis(
290
+ name="Extraction Failed",
291
+ email="",
292
+ phone="",
293
+ certifications=[],
294
+ experience_summary="",
295
+ education_summary="",
296
+ communication_skills="N/A",
297
+ technical_skills=[],
298
+ aba_therapy_skills="N/A",
299
+ rbt_bcba_certification="N/A",
300
+ autism_care_experience_score="N/A"
301
+ )
302
+
303
+ # --- Scoring logic ---
304
+ def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> float:
305
+ total_score = 0.0
306
+
307
+ # Experience summary: up to 40
308
+ exp_len = len(analysis.experience_summary or "")
309
+ exp_factor = min(exp_len / 100.0, 1.0)
310
+ total_score += exp_factor * 40.0
311
+
312
+ # Skills count: up to 30
313
+ skills_count = len(analysis.technical_skills or [])
314
+ skills_factor = min(skills_count / 10.0, 1.0)
315
+ total_score += skills_factor * 30.0
316
+
317
+ # Communication: up to 20 (expects 0-10 in string)
318
+ try:
319
+ m = re.search(r"(\d+(\.\d+)?)", str(analysis.communication_skills))
320
+ comm_val = float(m.group(1)) if m else float(str(analysis.communication_skills))
321
+ comm_val = max(0.0, min(10.0, comm_val))
322
+ except Exception:
323
+ comm_val = 5.0
324
+ total_score += (comm_val / 10.0) * 20.0
325
+
326
+ # Certifications: up to 10
327
+ total_score += min(len(analysis.certifications or []), 10) * 1.0
328
+
329
+ # Therapist bonus up to 10
330
+ if role.lower() == "therapist":
331
+ def safe_score(x):
332
+ try:
333
+ m = re.search(r"(\d+(\.\d+)?)", str(x))
334
+ return float(m.group(1)) if m else 0.0
335
+ except Exception:
336
+ return 0.0
337
+ aba = safe_score(analysis.aba_therapy_skills)
338
+ autism = safe_score(analysis.autism_care_experience_score)
339
+ spec_bonus = ((aba + autism) / 20.0) * 10.0
340
+ total_score += spec_bonus
341
+
342
+ final = round(min(total_score, 100))
343
+ return float(final)
344
+
345
+ # --- Append to DataFrame ---
346
+ def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
347
+ data = analysis.dict()
348
+ tech = ", ".join(data.get("technical_skills") or [])
349
+ certs = ", ".join(data.get("certifications") or [])
350
+ row = {
351
+ 'Name': data.get("name") or "",
352
+ 'Job Role': job_role,
353
+ 'Resume Score (100)': score,
354
+ 'Email': data.get("email") or "",
355
+ 'Phone': data.get("phone") or "",
356
+ 'Shortlisted': 'No',
357
+ 'Experience Summary': data.get("experience_summary") or "",
358
+ 'Education Summary': data.get("education_summary") or "",
359
+ 'Communication Rating (1-10)': str(data.get("communication_skills") or "N/A"),
360
+ 'Skills/Technologies': tech,
361
+ 'Certifications': certs,
362
+ 'ABA Skills (1-10)': str(data.get("aba_therapy_skills") or "N/A"),
363
+ 'RBT/BCBA Cert': str(data.get("rbt_bcba_certification") or "N/A"),
364
+ 'Autism-Care Exp (1-10)': str(data.get("autism_care_experience_score") or "N/A"),
365
+ }
366
+ new_df = pd.DataFrame([row])
367
+ st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)
368
+
369
+ # --- Excel export helper ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  def df_to_excel_bytes(df: pd.DataFrame) -> bytes:
371
+ output = io.BytesIO()
372
+ with pd.ExcelWriter(output, engine="openpyxl") as writer:
373
+ df.to_excel(writer, index=False, sheet_name="Resume Analysis Data")
374
+ return output.getvalue()
375
 
376
  # --- UI Layout ---
377
  st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis (Single-file)")
378
 
379
  tab_user, tab_admin = st.tabs(["πŸ‘€ Resume Uploader (User Panel)", "πŸ”’ Admin Dashboard (Password Protected)"])
380
 
381
+ # --- User Panel ---
382
  with tab_user:
383
+ st.header("Upload Resumes for Analysis")
384
+ st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score fields.")
385
+
386
+ job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
387
+ selected_role = st.selectbox("1. Select the Target Job Role", options=job_role_options, key="selected_role")
388
+
389
+ uploaded_files = st.file_uploader("2. Upload Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True)
390
+
391
+ if st.button("πŸš€ Analyze All Uploaded Resumes"):
392
+ if not uploaded_files:
393
+ st.warning("Please upload one or more resume files to begin analysis.")
394
+ else:
395
+ st.session_state.run_analysis = True
396
+ st.rerun()
397
+
398
+ if st.session_state.get("run_analysis", False):
399
+ if not uploaded_files:
400
+ st.warning("No files found. Upload files and try again.")
401
+ st.session_state.run_analysis = False
402
+ else:
403
+ total = len(uploaded_files)
404
+ progress = st.progress(0)
405
+ st.session_state.individual_analysis = []
406
+ idx = 0
407
+ with st.spinner("Processing resumes..."):
408
+ for f in uploaded_files:
409
+ idx += 1
410
+ try:
411
+ st.write(f"Analyzing **{f.name}**...")
412
+ resume_text = extract_text_from_file(f)
413
+ if not resume_text:
414
+ st.error(f"Could not extract text from {f.name}. Skipping.")
415
+ progress.progress(idx / total)
416
+ continue
417
+
418
+ analysis = analyze_resume_with_groq_cached(resume_text, selected_role)
419
+
420
+ if analysis.name == "Extraction Failed":
421
+ st.error(f"Extraction failed for {f.name}. See debug output.")
422
+ progress.progress(idx / total)
423
+ continue
424
+
425
+ score = calculate_resume_score(analysis, selected_role)
426
+ append_analysis_to_dataframe(selected_role, analysis, score)
427
+
428
+ st.session_state.individual_analysis.append({
429
+ 'name': analysis.name,
430
+ 'score': score,
431
+ 'role': selected_role,
432
+ 'file_name': f.name
433
+ })
434
+ except Exception as e:
435
+ st.error(f"Error analyzing {f.name}: {e}")
436
+ st.exception(traceback.format_exc())
437
+ finally:
438
+ progress.progress(idx / total)
439
+
440
+ st.success(f"βœ… Successfully processed {len(st.session_state.individual_analysis)} of {total} resumes.")
441
+ st.session_state.run_analysis = False
442
+
443
+ # Display last results summary
444
+ if st.session_state.individual_analysis:
445
+ st.subheader("Last Analysis Summary")
446
+ for item in st.session_state.individual_analysis:
447
+ st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**")
448
+ st.markdown("---")
449
+ st.caption("All analyzed data is stored in the Admin Dashboard.")
450
+
451
+ # --- Admin Panel ---
 
 
452
  with tab_admin:
453
+ if not st.session_state.is_admin_logged_in:
454
+ st.header("Admin Login")
455
+ password = st.text_input("Enter Admin Password", type="password")
456
+ if st.button("πŸ”‘ Login"):
457
+ if password == ADMIN_PASSWORD:
458
+ st.session_state.is_admin_logged_in = True
459
+ st.rerun()
460
+ else:
461
+ st.error("Incorrect password.")
462
+ st.stop()
463
+
464
+ st.header("🎯 Recruitment Dashboard")
465
+ if st.button("πŸšͺ Logout"):
466
+ st.session_state.is_admin_logged_in = False
467
+ st.rerun()
468
+
469
+ if st.session_state.analyzed_data.empty:
470
+ st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
471
+ else:
472
+ df = st.session_state.analyzed_data.copy()
473
+ st.subheader("Candidate Data Table")
474
+ st.success(f"**Total Candidates Analyzed: {len(df)}**")
475
+
476
+ display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']
477
+
478
+ edited_df = st.data_editor(
479
+ df[display_cols],
480
+ column_config={
481
+ "Shortlisted": st.column_config.SelectboxColumn(
482
+ "Shortlisted",
483
+ help="Mark the candidate as Shortlisted or Rejected.",
484
+ options=["No", "Yes"],
485
+ required=True
486
+ )
487
+ },
488
+ key="dashboard_editor",
489
+ hide_index=True
490
+ )
491
+
492
+ try:
493
+ st.session_state.analyzed_data.loc[:, 'Shortlisted'] = edited_df['Shortlisted'].values
494
+ except Exception:
495
+ for i, val in enumerate(edited_df['Shortlisted'].tolist()):
496
+ if i < len(st.session_state.analyzed_data):
497
+ st.session_state.analyzed_data.at[i, 'Shortlisted'] = val
498
+
499
+ st.markdown("---")
500
+ st.subheader("πŸ“₯ Download Data")
501
+ df_export = st.session_state.analyzed_data.copy()
502
+ excel_bytes = df_to_excel_bytes(df_export)
503
+
504
+ st.download_button(
505
+ label="πŸ’Ύ Download All Data as Excel (.xlsx)",
506
+ data=excel_bytes,
507
+ file_name="quantum_scrutiny_report.xlsx",
508
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
509
+ help="Downloads the full table including all extracted fields and shortlist status."
510
+ )
511
+
512
+ # --- End of file ---