meesamraza commited on
Commit
320d29b
Β·
verified Β·
1 Parent(s): 6e2aa61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +239 -185
app.py CHANGED
@@ -1,38 +1,53 @@
1
- # src/streamlit_app.py
2
-
3
  import streamlit as st
4
  import pandas as pd
5
  import io
6
  import os
7
  import fitz
8
  import docx2txt
 
9
  from groq import Groq
10
  from dotenv import load_dotenv
11
- from pydantic import BaseModel, Field, ValidationError # Added ValidationError
12
- from typing import Optional, List # Added Optional and List
13
 
14
- # --- 0. FIX: SET PAGE CONFIG AS THE FIRST STREAMLIT COMMAND ---
 
 
 
15
  st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered")
16
- # FIX for Hugging Face Deployment: Read the key from the environment/Secrets.
17
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
18
 
19
- # Admin Password (as requested)
20
- ADMIN_PASSWORD = "admin"
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- # Initialize Groq Client
 
23
  if GROQ_API_KEY:
24
  try:
25
  groq_client = Groq(api_key=GROQ_API_KEY)
26
  except Exception as e:
27
- st.error(f"Error initializing Groq Client: {e}")
28
- st.stop()
29
  else:
30
- st.error("GROQ_API_KEY not found. Please ensure the key is set as a Secret in Hugging Face or in the local .env file.")
31
- st.stop()
32
 
33
- # Initialize Session State
 
 
34
  if 'is_admin_logged_in' not in st.session_state:
35
  st.session_state.is_admin_logged_in = False
 
36
  if 'analyzed_data' not in st.session_state:
37
  initial_cols = [
38
  'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
@@ -42,83 +57,98 @@ if 'analyzed_data' not in st.session_state:
42
  ]
43
  st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)
44
 
45
-
46
- # --- 2. DATA STRUCTURE FOR GROQ OUTPUT (Pydantic Schema) ---
47
-
48
  class ResumeAnalysis(BaseModel):
49
- """Pydantic model for structured resume data extraction."""
50
  name: str = Field(description="Full name of the candidate.")
51
  email: str = Field(description="Professional email address.")
52
  phone: str = Field(description="Primary phone number.")
53
- certifications: List[str] = Field(description="List of professional certifications.")
54
- experience_summary: str = Field(description="A concise summary of the candidate's professional experience.")
55
- education_summary: str = Field(description="A concise summary of the candidate's highest education.")
56
-
57
- # --- CRITICAL FIX: Use str or Optional[str] and improve coercion ---
58
- # The Groq model is returning INT (8) instead of STR ('8') for communication_skills.
59
- # The most stable fix is to keep the field as str and rely on Groq's JSON mode
60
- # but improve the prompt guidance. We will also update the helper functions to be more robust.
61
- communication_skills: str = Field(description="A score as a STRING (e.g., '8') or description of communication skills.")
62
- technical_skills: List[str] = Field(description="List of technical skills/technologies mentioned.")
63
-
64
- # These fields can sometimes return None, so we make them Optional[str]
65
- # and default them to "N/A" in the final output in the analyze function if still None.
66
- aba_therapy_skills: Optional[str] = Field(default="N/A", description="Specific score as a STRING (e.g., '7'). Use 'N/A' if not applicable.")
67
- rbt_bcba_certification: Optional[str] = Field(default="N/A", description="Indicate 'Yes' or 'No'. Use 'N/A' if not applicable.")
68
- autism_care_experience_score: Optional[str] = Field(default="N/A", description="A score as a STRING (e.g., '9'). Use 'N/A' if not applicable.")
69
-
70
-
71
- # --- 3. HELPER FUNCTIONS ---
72
-
73
- def extract_text_from_file(uploaded_file):
74
- """Extracts text from PDF or DOCX files."""
75
- file_type = uploaded_file.type
76
  try:
77
- if file_type == "application/pdf":
78
- with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
79
- text = ""
 
 
 
 
 
 
80
  for page in doc:
81
  text += page.get_text()
82
- return text
83
- elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
84
- return docx2txt.process(uploaded_file)
 
 
 
 
 
 
 
85
  else:
86
- return ""
 
 
 
 
 
 
 
 
87
  except Exception as e:
88
- print(f"Error extracting text: {e}")
89
  return ""
90
 
91
- @st.cache_data(show_spinner="Analyzing resume with Groq...")
 
92
  def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis:
93
- """Uses Groq and the Pydantic schema for structured extraction."""
94
-
95
- # Custom instructions for Therapist role
 
 
 
 
 
96
  therapist_instructions = ""
97
  if job_role == "Therapist":
98
  therapist_instructions = (
99
- "Because the job role is 'Therapist', you MUST carefully look for ABA Therapy Skills, RBT/BCBA Certification, and Autism-Care Experience. "
100
- "Provide a score from 1-10 as a **STRING** (e.g., '7') for the specialized fields. "
101
- "If any specialized therapist field is not found, you MUST return **null** or **N/A** for that field."
102
  )
103
  else:
104
- # For non-therapist roles, explicitly instruct the model to use 'null'
105
- # so Optional[str] handles it cleanly.
106
  therapist_instructions = (
107
- "Since the role is not 'Therapist', set 'aba_therapy_skills', 'autism_care_experience_score', and 'rbt_bcba_certification' to **null** or **N/A**."
108
  )
109
 
110
- # System Prompt for Groq
111
  system_prompt = (
112
- f"You are a professional Resume Analyzer. Your task is to extract specific information from the provided resume text. "
113
- f"The candidate is applying for the role of '{job_role}'. "
114
- f"Return a JSON object that strictly adheres to the provided Pydantic schema. "
115
- f"**CRITICAL:** Ensure 'communication_skills' is returned as a **STRING** value, even if it's a number (e.g., \"8\" NOT 8). " # <-- Re-emphasizing string output for the specific failing field
116
- f"{therapist_instructions}"
117
  )
118
 
119
  try:
120
  chat_completion = groq_client.chat.completions.create(
121
- model="mixtral-8x7b-32768",
122
  messages=[
123
  {"role": "system", "content": system_prompt},
124
  {"role": "user", "content": f"Analyze the following resume text:\n\n---\n{resume_text}\n---"}
@@ -126,167 +156,198 @@ def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis:
126
  response_model={"type": "json_object", "schema": ResumeAnalysis.schema()},
127
  temperature=0.0
128
  )
129
-
130
- # Parse the JSON response
131
- analysis = ResumeAnalysis.parse_raw(chat_completion.choices[0].message.content)
132
 
133
- # Post-processing: Ensure Optional fields are strings for score calculation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  analysis.aba_therapy_skills = str(analysis.aba_therapy_skills or 'N/A')
135
  analysis.rbt_bcba_certification = str(analysis.rbt_bcba_certification or 'N/A')
136
  analysis.autism_care_experience_score = str(analysis.autism_care_experience_score or 'N/A')
137
- analysis.communication_skills = str(analysis.communication_skills) # Coerce communication_skills to string just in case it passed validation as an int somehow
138
 
139
  return analysis
140
-
141
- except ValidationError as ve:
142
- st.error(f"Groq API Validation Error: The model returned incompatible data. Details: {ve}")
143
- print(f"Failed JSON: {chat_completion.choices[0].message.content}") # Print the bad JSON for debugging
144
- return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A")
145
  except Exception as e:
146
- st.error(f"Groq API Error: {e}")
147
- return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
 
150
  def calculate_resume_score(analysis: ResumeAnalysis) -> float:
151
- """Calculates the weighted score out of 100."""
152
-
153
  total_score = 0.0
154
 
155
- # 1. Experience Score (Max 40 points)
156
- exp_factor = min(len(analysis.experience_summary) / 100.0, 1.0)
 
157
  total_score += exp_factor * 40.0
158
 
159
- # 2. Skills Score (Max 30 points)
160
  skills_factor = min(len(analysis.technical_skills) / 10.0, 1.0)
161
  total_score += skills_factor * 30.0
162
 
163
- # 3. Communication Score (Max 20 points)
164
  try:
165
- # Safely parse the communication score string, handling N/A or raw numbers
166
- score_str = str(analysis.communication_skills).split('-')[0].strip() # Use str() to handle if it somehow remained an int
167
  comm_rating = float(score_str)
168
- except (ValueError, IndexError):
169
- comm_rating = 5.0
170
-
171
- score_comm = (comm_rating / 10.0) * 20.0
172
- total_score += score_comm
173
-
174
- # 4. Certification Score (Max 10 points)
175
- score_cert = min(len(analysis.certifications), 10) * 1.0
176
- total_score += score_cert
177
-
178
- # --- Therapist-Specific Bonus Checks ---
179
- if st.session_state.get('selected_role') == "Therapist":
180
  try:
181
- # Safely parse specialized scores, handling 'N/A' or None
182
- aba_score = float(str(analysis.aba_therapy_skills).split('-')[0].strip()) if str(analysis.aba_therapy_skills).upper() not in ['N/A', 'NONE'] else 0.0
183
- autism_score = float(str(analysis.autism_care_experience_score).split('-')[0].strip()) if str(analysis.autism_care_experience_score).upper() not in ['N/A', 'NONE'] else 0.0
184
-
185
- # Add a bonus based on the average specialized scores (max 10 points)
186
- specialized_bonus = ((aba_score + autism_score) / 20.0) * 10.0
187
- total_score += specialized_bonus
188
- except (ValueError, IndexError, TypeError):
189
- pass # Ignore if specialized scores are still corrupted
190
-
191
-
192
  final_score = round(min(total_score, 100))
193
  return float(final_score)
194
 
195
 
196
  def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
197
- """Formats and appends the new analysis to the session state DataFrame."""
198
-
199
  data = analysis.dict()
200
  data['Job Role'] = job_role
201
  data['Resume Score'] = score
202
  data['Shortlisted'] = 'No'
203
-
204
- technical_skills_list = ", ".join(data['technical_skills'])
205
- certifications_list = ", ".join(data['certifications'])
206
-
207
- # Ensure fields that might have been None are now strings for the DataFrame
208
- comm_skills = str(data['communication_skills'] or 'N/A')
209
- aba_skills = str(data['aba_therapy_skills'] or 'N/A')
210
- rbt_cert = str(data['rbt_bcba_certification'] or 'N/A')
211
- autism_exp = str(data['autism_care_experience_score'] or 'N/A')
212
-
213
  df_data = {
214
- 'Name': data['name'],
215
  'Job Role': job_role,
216
  'Resume Score (100)': score,
217
- 'Email': data['email'],
218
- 'Phone': data['phone'],
219
- 'Shortlisted': data['Shortlisted'],
220
- 'Experience Summary': data['experience_summary'],
221
- 'Education Summary': data['education_summary'],
222
- 'Communication Rating (1-10)': comm_skills,
223
  'Skills/Technologies': technical_skills_list,
224
  'Certifications': certifications_list,
225
- 'ABA Skills (1-10)': aba_skills,
226
- 'RBT/BCBA Cert': rbt_cert,
227
- 'Autism-Care Exp (1-10)': autism_exp,
228
  }
229
 
230
  new_df = pd.DataFrame([df_data])
231
  st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)
232
 
233
-
234
- # --- 4. APP LAYOUT AND LOGIC ---
235
-
236
  st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis")
237
 
238
  tab_user, tab_admin = st.tabs(["πŸ‘€ Resume Uploader (User Panel)", "πŸ”’ Admin Dashboard (Password Protected)"])
239
 
240
- # =========================================================================
241
- # A. Resume Upload (User Panel)
242
- # =========================================================================
243
  with tab_user:
244
  st.header("Upload Resumes for Analysis")
245
- st.info("Upload multiple PDF or DOCX files. The Groq AI engine will quickly extract and score the key data.")
246
-
247
  job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
248
- selected_role = st.selectbox(
249
- "**1. Select the Target Job Role** (Influences analysis and scoring)",
250
- options=job_role_options,
251
- key='selected_role'
252
- )
253
 
254
- uploaded_files = st.file_uploader(
255
- "**2. Upload Resumes** (PDF or DOCX)",
256
- type=["pdf", "docx"],
257
- accept_multiple_files=True
258
- )
259
-
260
  if st.button("πŸš€ Analyze All Uploaded Resumes"):
261
  if not uploaded_files:
262
  st.warning("Please upload one or more resume files to begin analysis.")
263
  else:
264
  total_files = len(uploaded_files)
265
- progress_bar = st.progress(0)
266
-
267
  st.session_state.individual_analysis = []
268
-
269
- with st.status("Processing Resumes...", expanded=True) as status_box:
270
-
271
  for i, file in enumerate(uploaded_files):
272
  file_name = file.name
273
  st.write(f"Analyzing **{file_name}**...")
274
-
275
  resume_text = extract_text_from_file(file)
276
-
277
  if not resume_text:
278
  st.error(f"Could not extract text from {file_name}. Skipping.")
279
  continue
280
-
281
  analysis = analyze_resume_with_groq(resume_text, selected_role)
282
-
283
- if analysis.name == "Extraction Failed":
284
- st.error(f"Groq extraction failed for {file_name}. Skipping.")
285
- continue
286
-
287
  score = calculate_resume_score(analysis)
288
  append_analysis_to_dataframe(selected_role, analysis, score)
289
-
290
  st.session_state.individual_analysis.append({
291
  'name': analysis.name,
292
  'score': score,
@@ -295,52 +356,45 @@ with tab_user:
295
  })
296
 
297
  progress_bar.progress((i + 1) / total_files)
298
-
299
- status_box.update(label="Analysis Complete!", state="complete", expanded=False)
300
-
301
- st.success(f"**βœ… Successfully analyzed {total_files} resumes.**")
302
-
303
  if 'individual_analysis' in st.session_state and st.session_state.individual_analysis:
304
  st.subheader("Last Analysis Summary")
305
  for item in st.session_state.individual_analysis:
306
  st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**")
307
-
308
  st.markdown("---")
309
  st.caption("All analyzed data is stored in the **Admin Dashboard**.")
310
 
311
- # =========================================================================
312
- # B. Admin Panel (Password Protected)
313
- # =========================================================================
314
  with tab_admin:
315
-
316
  if not st.session_state.is_admin_logged_in:
317
  st.header("Admin Login")
318
  password = st.text_input("Enter Admin Password", type="password")
319
  if st.button("πŸ”‘ Login"):
320
  if password == ADMIN_PASSWORD:
321
  st.session_state.is_admin_logged_in = True
322
- st.rerun()
323
  else:
324
  st.error("Incorrect password.")
325
  st.stop()
326
-
327
  st.header("🎯 Recruitment Dashboard")
328
  st.markdown("---")
329
-
330
  if st.button("πŸšͺ Logout"):
331
  st.session_state.is_admin_logged_in = False
332
- st.rerun()
333
 
334
  if st.session_state.analyzed_data.empty:
335
  st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
336
  else:
337
  df = st.session_state.analyzed_data.copy()
338
-
339
  st.subheader("Candidate Data Table")
340
  st.success(f"**Total Candidates Analyzed: {len(df)}**")
341
 
342
  display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']
343
-
344
  edited_df = st.data_editor(
345
  df[display_cols],
346
  column_config={
@@ -354,11 +408,11 @@ with tab_admin:
354
  key="dashboard_editor",
355
  hide_index=True
356
  )
357
-
 
358
  st.session_state.analyzed_data['Shortlisted'] = edited_df['Shortlisted']
359
 
360
  st.markdown("---")
361
-
362
  st.subheader("πŸ“₯ Download Data")
363
 
364
  df_export = st.session_state.analyzed_data.copy()
@@ -369,10 +423,10 @@ with tab_admin:
369
 
370
  st.download_button(
371
  label="πŸ’Ύ Download All Data as Excel (.xlsx)",
372
- data=excel_buffer,
373
  file_name="quantum_scrutiny_report.xlsx",
374
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
375
  help="Downloads the full table including all extracted fields and shortlist status."
376
  )
377
 
378
- # --- End of src/streamlit_app.py ---
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import io
4
  import os
5
  import fitz
6
  import docx2txt
7
+ import tempfile
8
  from groq import Groq
9
  from dotenv import load_dotenv
10
+ from pydantic import BaseModel, Field, ValidationError
11
+ from typing import Optional, List
12
 
13
+ # --------------------
14
+ # Config & Secrets
15
+ # --------------------
16
+ # Ensure page config is the very first Streamlit command (done here)
17
  st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered")
 
 
18
 
19
+ # Load local .env if present (useful for local testing)
20
+ load_dotenv()
21
+
22
+ # Try multiple locations for the API key: environment variables, Streamlit secrets
23
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY") or os.getenv("GROQ_APIKEY")
24
+ if not GROQ_API_KEY:
25
+ # If deployed on Streamlit Cloud or similar, users might put secrets in st.secrets
26
+ try:
27
+ GROQ_API_KEY = st.secrets["GROQ_API_KEY"]
28
+ except Exception:
29
+ GROQ_API_KEY = None
30
+
31
+ # Admin password (for demo). In production, store this in secrets.
32
+ ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin")
33
 
34
+ # Initialize Groq Client (if key present)
35
+ groq_client = None
36
  if GROQ_API_KEY:
37
  try:
38
  groq_client = Groq(api_key=GROQ_API_KEY)
39
  except Exception as e:
40
+ st.warning(f"Warning: Failed to initialize Groq client: {e}")
41
+ groq_client = None
42
  else:
43
+ st.warning("GROQ_API_KEY not found in environment or Streamlit secrets. The app will run in fallback mode.")
 
44
 
45
+ # --------------------
46
+ # Session state init
47
+ # --------------------
48
  if 'is_admin_logged_in' not in st.session_state:
49
  st.session_state.is_admin_logged_in = False
50
+
51
  if 'analyzed_data' not in st.session_state:
52
  initial_cols = [
53
  'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
 
57
  ]
58
  st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)
59
 
60
+ # --------------------
61
+ # Pydantic Schema
62
+ # --------------------
63
  class ResumeAnalysis(BaseModel):
 
64
  name: str = Field(description="Full name of the candidate.")
65
  email: str = Field(description="Professional email address.")
66
  phone: str = Field(description="Primary phone number.")
67
+ certifications: List[str] = Field(default_factory=list, description="List of professional certifications.")
68
+ experience_summary: str = Field(default="", description="A concise summary of the candidate's professional experience.")
69
+ education_summary: str = Field(default="", description="A concise summary of the candidate's highest education.")
70
+
71
+ communication_skills: str = Field(default="N/A", description="A score as a STRING (e.g., '8') or description of communication skills.")
72
+ technical_skills: List[str] = Field(default_factory=list, description="List of technical skills/technologies mentioned.")
73
+
74
+ aba_therapy_skills: Optional[str] = Field(default="N/A", description="Specific score as a STRING (e.g., '7').")
75
+ rbt_bcba_certification: Optional[str] = Field(default="N/A", description="Indicate 'Yes' or 'No'.")
76
+ autism_care_experience_score: Optional[str] = Field(default="N/A", description="A score as a STRING (e.g., '9').")
77
+
78
+ # --------------------
79
+ # Helpers
80
+ # --------------------
81
+
82
+ def extract_text_from_file(uploaded_file) -> str:
83
+ """Extract text from uploaded file safely by writing to a temp file."""
 
 
 
 
 
 
84
  try:
85
+ suffix = os.path.splitext(uploaded_file.name)[1].lower()
86
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
87
+ tmp.write(uploaded_file.read())
88
+ tmp_path = tmp.name
89
+
90
+ text = ""
91
+ if suffix == '.pdf':
92
+ try:
93
+ doc = fitz.open(tmp_path)
94
  for page in doc:
95
  text += page.get_text()
96
+ doc.close()
97
+ except Exception as e:
98
+ st.error(f"PDF extraction error for {uploaded_file.name}: {e}")
99
+ text = ""
100
+ elif suffix in ['.docx', '.doc']:
101
+ try:
102
+ text = docx2txt.process(tmp_path) or ""
103
+ except Exception as e:
104
+ st.error(f"DOCX extraction error for {uploaded_file.name}: {e}")
105
+ text = ""
106
  else:
107
+ st.warning(f"Unsupported file type: {suffix}")
108
+
109
+ # Clean up temp file
110
+ try:
111
+ os.unlink(tmp_path)
112
+ except Exception:
113
+ pass
114
+
115
+ return text
116
  except Exception as e:
117
+ st.error(f"Failed to extract text: {e}")
118
  return ""
119
 
120
+
121
+ @st.cache_data
122
  def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis:
123
+ """Call Groq to extract structured data. If Groq is not available or returns invalid JSON,
124
+ fall back to a lightweight heuristic parser.
125
+ """
126
+ # If no groq client, skip to fallback
127
+ if not groq_client:
128
+ return fallback_simple_extraction(resume_text, job_role)
129
+
130
+ # Build role-specific instructions
131
  therapist_instructions = ""
132
  if job_role == "Therapist":
133
  therapist_instructions = (
134
+ "Because the job role is 'Therapist', carefully look for ABA Therapy Skills, RBT/BCBA Certification, "
135
+ "and Autism-Care Experience. Provide a score from 1-10 as a STRING (e.g., '7') for these fields. "
136
+ "If not found, return 'N/A'."
137
  )
138
  else:
 
 
139
  therapist_instructions = (
140
+ "Since the role is not 'Therapist', set specialized therapist fields to 'N/A' if not present."
141
  )
142
 
 
143
  system_prompt = (
144
+ f"You are a professional Resume Analyzer. Extract fields exactly matching the JSON schema: name, email, phone, certifications (list), "
145
+ f"experience_summary, education_summary, communication_skills (STRING), technical_skills (list), aba_therapy_skills, rbt_bcba_certification, autism_care_experience_score. "
146
+ f"The candidate is applying for '{job_role}'. {therapist_instructions} Return valid JSON only."
 
 
147
  )
148
 
149
  try:
150
  chat_completion = groq_client.chat.completions.create(
151
+ model="mixtral-8x7b-32768",
152
  messages=[
153
  {"role": "system", "content": system_prompt},
154
  {"role": "user", "content": f"Analyze the following resume text:\n\n---\n{resume_text}\n---"}
 
156
  response_model={"type": "json_object", "schema": ResumeAnalysis.schema()},
157
  temperature=0.0
158
  )
 
 
 
159
 
160
+ # Extract raw content (SDK may vary β€” keep defensive)
161
+ raw = None
162
+ try:
163
+ raw = chat_completion.choices[0].message.content
164
+ except Exception:
165
+ raw = str(chat_completion)
166
+
167
+ # Parse with Pydantic
168
+ try:
169
+ analysis = ResumeAnalysis.parse_raw(raw)
170
+ except ValidationError as ve:
171
+ st.warning(f"Groq returned invalid format; falling back to heuristic extraction. Details: {ve}")
172
+ return fallback_simple_extraction(resume_text, job_role)
173
+
174
+ # Ensure string coercions
175
+ analysis.communication_skills = str(analysis.communication_skills or 'N/A')
176
  analysis.aba_therapy_skills = str(analysis.aba_therapy_skills or 'N/A')
177
  analysis.rbt_bcba_certification = str(analysis.rbt_bcba_certification or 'N/A')
178
  analysis.autism_care_experience_score = str(analysis.autism_care_experience_score or 'N/A')
 
179
 
180
  return analysis
181
+
 
 
 
 
182
  except Exception as e:
183
+ st.warning(f"Groq API call failed: {e}. Using fallback extraction.")
184
+ return fallback_simple_extraction(resume_text, job_role)
185
+
186
+
187
+ def fallback_simple_extraction(text: str, job_role: str) -> ResumeAnalysis:
188
+ """A minimal, robust heuristic extractor used when the LLM call fails.
189
+ It tries to find name/email/phone and picks up some keywords for skills and certifications.
190
+ """
191
+ import re
192
+
193
+ # Very simple heuristics (intended as a fallback only)
194
+ email_match = re.search(r"[\w\.-]+@[\w\.-]+", text)
195
+ phone_match = re.search(r"(\+?\d[\d\-\s]{7,}\d)", text)
196
+
197
+ name = "Unknown"
198
+ # Heuristic: first line that looks like a name (two words, capitalized)
199
+ lines = [l.strip() for l in text.splitlines() if l.strip()]
200
+ if lines:
201
+ for line in lines[:5]:
202
+ if len(line.split()) <= 4 and any(ch.isalpha() for ch in line) and line[0].isupper():
203
+ name = line
204
+ break
205
+
206
+ email = email_match.group(0) if email_match else ""
207
+ phone = phone_match.group(0) if phone_match else ""
208
+
209
+ # Skills: gather common programming / therapy keywords
210
+ skills_candidates = []
211
+ certifications = []
212
+ keywords = ['python','java','c++','machine learning','deep learning','tensorflow','pytorch','rbt','bcba','aba','autism']
213
+ lower_text = text.lower()
214
+ for kw in keywords:
215
+ if kw in lower_text:
216
+ skills_candidates.append(kw)
217
+ if kw in ['rbt','bcba']:
218
+ certifications.append(kw.upper())
219
+
220
+ experience_summary = ' '.join(lines[:4]) if lines else ''
221
+ education_summary = ''
222
+
223
+ # Therapist-specific small heuristics
224
+ aba = 'N/A'
225
+ rbt_cert = 'Yes' if 'rbt' in lower_text or 'registered behavior technician' in lower_text else 'N/A'
226
+ autism_score = 'N/A'
227
+
228
+ return ResumeAnalysis(
229
+ name=name,
230
+ email=email,
231
+ phone=phone,
232
+ certifications=certifications,
233
+ experience_summary=experience_summary,
234
+ education_summary=education_summary,
235
+ communication_skills='5',
236
+ technical_skills=list(set(skills_candidates)),
237
+ aba_therapy_skills=aba,
238
+ rbt_bcba_certification=rbt_cert,
239
+ autism_care_experience_score=autism_score
240
+ )
241
 
242
 
243
  def calculate_resume_score(analysis: ResumeAnalysis) -> float:
244
+ """Calculates a weighted score out of 100 based on heuristics and extracted values."""
 
245
  total_score = 0.0
246
 
247
+ # 1. Experience Score (Max 40)
248
+ exp_len = len(analysis.experience_summary or "")
249
+ exp_factor = min(exp_len / 100.0, 1.0)
250
  total_score += exp_factor * 40.0
251
 
252
+ # 2. Skills Score (Max 30)
253
  skills_factor = min(len(analysis.technical_skills) / 10.0, 1.0)
254
  total_score += skills_factor * 30.0
255
 
256
+ # 3. Communication (Max 20)
257
  try:
258
+ score_str = str(analysis.communication_skills).split('-')[0].strip()
 
259
  comm_rating = float(score_str)
260
+ except Exception:
261
+ comm_rating = 5.0
262
+ total_score += (comm_rating / 10.0) * 20.0
263
+
264
+ # 4. Certifications (Max 10)
265
+ total_score += min(len(analysis.certifications), 10) * 1.0
266
+
267
+ # Therapist bonus (max 10)
268
+ if st.session_state.get('selected_role') == 'Therapist':
 
 
 
269
  try:
270
+ aba = float(str(analysis.aba_therapy_skills)) if str(analysis.aba_therapy_skills).upper() not in ['N/A', 'NONE', ''] else 0.0
271
+ autism = float(str(analysis.autism_care_experience_score)) if str(analysis.autism_care_experience_score).upper() not in ['N/A', 'NONE', ''] else 0.0
272
+ total_score += ((aba + autism) / 20.0) * 10.0
273
+ except Exception:
274
+ pass
275
+
 
 
 
 
 
276
  final_score = round(min(total_score, 100))
277
  return float(final_score)
278
 
279
 
280
  def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
 
 
281
  data = analysis.dict()
282
  data['Job Role'] = job_role
283
  data['Resume Score'] = score
284
  data['Shortlisted'] = 'No'
285
+
286
+ technical_skills_list = ", ".join(data.get('technical_skills', []))
287
+ certifications_list = ", ".join(data.get('certifications', []))
288
+
 
 
 
 
 
 
289
  df_data = {
290
+ 'Name': data.get('name', ''),
291
  'Job Role': job_role,
292
  'Resume Score (100)': score,
293
+ 'Email': data.get('email', ''),
294
+ 'Phone': data.get('phone', ''),
295
+ 'Shortlisted': data.get('Shortlisted', 'No'),
296
+ 'Experience Summary': data.get('experience_summary', ''),
297
+ 'Education Summary': data.get('education_summary', ''),
298
+ 'Communication Rating (1-10)': str(data.get('communication_skills', 'N/A')),
299
  'Skills/Technologies': technical_skills_list,
300
  'Certifications': certifications_list,
301
+ 'ABA Skills (1-10)': str(data.get('aba_therapy_skills', 'N/A')),
302
+ 'RBT/BCBA Cert': str(data.get('rbt_bcba_certification', 'N/A')),
303
+ 'Autism-Care Exp (1-10)': str(data.get('autism_care_experience_score', 'N/A')),
304
  }
305
 
306
  new_df = pd.DataFrame([df_data])
307
  st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)
308
 
309
+ # --------------------
310
+ # App layout
311
+ # --------------------
312
  st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis")
313
 
314
  tab_user, tab_admin = st.tabs(["πŸ‘€ Resume Uploader (User Panel)", "πŸ”’ Admin Dashboard (Password Protected)"])
315
 
 
 
 
316
  with tab_user:
317
  st.header("Upload Resumes for Analysis")
318
+ st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score key data. If the API key is missing, a fallback heuristic extractor will run.")
319
+
320
  job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
321
+ selected_role = st.selectbox("**1. Select the Target Job Role**", options=job_role_options, key='selected_role')
322
+
323
+ uploaded_files = st.file_uploader("**2. Upload Resumes** (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True)
 
 
324
 
 
 
 
 
 
 
325
  if st.button("πŸš€ Analyze All Uploaded Resumes"):
326
  if not uploaded_files:
327
  st.warning("Please upload one or more resume files to begin analysis.")
328
  else:
329
  total_files = len(uploaded_files)
330
+ progress_bar = st.progress(0.0)
 
331
  st.session_state.individual_analysis = []
332
+
333
+ with st.spinner("Processing resumes..."):
 
334
  for i, file in enumerate(uploaded_files):
335
  file_name = file.name
336
  st.write(f"Analyzing **{file_name}**...")
337
+
338
  resume_text = extract_text_from_file(file)
 
339
  if not resume_text:
340
  st.error(f"Could not extract text from {file_name}. Skipping.")
341
  continue
342
+
343
  analysis = analyze_resume_with_groq(resume_text, selected_role)
344
+ if isinstance(analysis, ResumeAnalysis) and analysis.name == "Extraction Failed":
345
+ st.error(f"Extraction failed for {file_name}. Skipping.")
346
+ continue
347
+
 
348
  score = calculate_resume_score(analysis)
349
  append_analysis_to_dataframe(selected_role, analysis, score)
350
+
351
  st.session_state.individual_analysis.append({
352
  'name': analysis.name,
353
  'score': score,
 
356
  })
357
 
358
  progress_bar.progress((i + 1) / total_files)
359
+
360
+ st.success(f"**βœ… Successfully processed {len(st.session_state.individual_analysis)} / {total_files} resumes.**")
361
+
 
 
362
  if 'individual_analysis' in st.session_state and st.session_state.individual_analysis:
363
  st.subheader("Last Analysis Summary")
364
  for item in st.session_state.individual_analysis:
365
  st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**")
366
+
367
  st.markdown("---")
368
  st.caption("All analyzed data is stored in the **Admin Dashboard**.")
369
 
 
 
 
370
  with tab_admin:
 
371
  if not st.session_state.is_admin_logged_in:
372
  st.header("Admin Login")
373
  password = st.text_input("Enter Admin Password", type="password")
374
  if st.button("πŸ”‘ Login"):
375
  if password == ADMIN_PASSWORD:
376
  st.session_state.is_admin_logged_in = True
377
+ st.experimental_rerun()
378
  else:
379
  st.error("Incorrect password.")
380
  st.stop()
381
+
382
  st.header("🎯 Recruitment Dashboard")
383
  st.markdown("---")
384
+
385
  if st.button("πŸšͺ Logout"):
386
  st.session_state.is_admin_logged_in = False
387
+ st.experimental_rerun()
388
 
389
  if st.session_state.analyzed_data.empty:
390
  st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
391
  else:
392
  df = st.session_state.analyzed_data.copy()
 
393
  st.subheader("Candidate Data Table")
394
  st.success(f"**Total Candidates Analyzed: {len(df)}**")
395
 
396
  display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']
397
+
398
  edited_df = st.data_editor(
399
  df[display_cols],
400
  column_config={
 
408
  key="dashboard_editor",
409
  hide_index=True
410
  )
411
+
412
+ # Persist shortlist changes back to session state
413
  st.session_state.analyzed_data['Shortlisted'] = edited_df['Shortlisted']
414
 
415
  st.markdown("---")
 
416
  st.subheader("πŸ“₯ Download Data")
417
 
418
  df_export = st.session_state.analyzed_data.copy()
 
423
 
424
  st.download_button(
425
  label="πŸ’Ύ Download All Data as Excel (.xlsx)",
426
+ data=excel_buffer.getvalue(),
427
  file_name="quantum_scrutiny_report.xlsx",
428
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
429
  help="Downloads the full table including all extracted fields and shortlist status."
430
  )
431
 
432
+ # End of file