meesamraza commited on
Commit
5fe15a6
·
verified ·
1 Parent(s): d915eee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -35
app.py CHANGED
@@ -8,7 +8,8 @@ import fitz
8
  import docx2txt
9
  from groq import Groq
10
  from dotenv import load_dotenv
11
- from pydantic import BaseModel, Field
 
12
 
13
  # --- 0. FIX: SET PAGE CONFIG AS THE FIRST STREAMLIT COMMAND ---
14
  st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered")
@@ -55,16 +56,22 @@ class ResumeAnalysis(BaseModel):
55
  name: str = Field(description="Full name of the candidate.")
56
  email: str = Field(description="Professional email address.")
57
  phone: str = Field(description="Primary phone number.")
58
- certifications: list[str] = Field(description="List of professional certifications (e.g., PMP, AWS Certified).")
59
  experience_summary: str = Field(description="A concise summary of the candidate's professional experience.")
60
  education_summary: str = Field(description="A concise summary of the candidate's highest education.")
61
 
62
- # --- FIX 1: Explicitly describe required STRING output format ---
63
- communication_skills: str = Field(description="A score as a STRING (e.g., '8') or brief description of communication skills.")
64
- technical_skills: list[str] = Field(description="List of technical skills/technologies mentioned (e.g., Python, SQL, Docker).")
65
- aba_therapy_skills: str = Field(description="Specific mention or score as a STRING (e.g., '7') for ABA Therapy skills, ONLY if the role is 'Therapist'. Use the STRING 'N/A' if not applicable or found.")
66
- rbt_bcba_certification: str = Field(description="Indicate the STRING 'Yes' or 'No' if RBT/BCBA certification is mentioned, ONLY if the role is 'Therapist'. Use the STRING 'N/A' if not applicable or found.")
67
- autism_care_experience_score: str = Field(description="A score as a STRING (e.g., '9') for Autism-Care Experience, ONLY if the role is 'Therapist'. Use the STRING 'N/A' if not applicable or found.")
 
 
 
 
 
 
68
 
69
 
70
  # --- 3. HELPER FUNCTIONS ---
@@ -95,30 +102,29 @@ def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis:
95
  therapist_instructions = ""
96
  if job_role == "Therapist":
97
  therapist_instructions = (
98
- "Because the job role is 'Therapist', you MUST carefully look for: "
99
- "1. ABA Therapy Skills, RBT/BCBA Certification, and Autism-Care Experience. "
100
- "2. Provide a score from 1-10 as a **STRING** (e.g., '7') for the specialized fields: 'aba_therapy_skills' and 'autism_care_experience_score'. "
101
- "3. If any specialized therapist field is not found, you MUST use the **STRING** 'N/A'. "
102
- "4. Set 'rbt_bcba_certification' to the **STRING** 'Yes' or 'No'."
103
  )
104
  else:
105
- # For non-therapist roles, explicitly instruct the model to use 'N/A' for therapist fields
 
106
  therapist_instructions = (
107
- "Since the role is not 'Therapist', set 'aba_therapy_skills', 'autism_care_experience_score', and 'rbt_bcba_certification' to the **STRING** 'N/A'."
108
  )
109
 
110
  # System Prompt for Groq
111
  system_prompt = (
112
  f"You are a professional Resume Analyzer. Your task is to extract specific information from the provided resume text. "
113
  f"The candidate is applying for the role of '{job_role}'. "
114
- f"Follow the instructions precisely and return a JSON object that strictly adheres to the provided Pydantic schema. "
115
- f"**IMPORTANT:** All values must be returned as the data type specified. Numerical scores must be enclosed in quotes to be treated as **STRING** types (e.g., \"8\"). "
116
- f"For skills, provide a list of 5-10 most relevant items. {therapist_instructions}"
117
  )
118
 
119
  try:
120
  chat_completion = groq_client.chat.completions.create(
121
- model="mixtral-8x7b-32768", # Fast model suitable for this task
122
  messages=[
123
  {"role": "system", "content": system_prompt},
124
  {"role": "user", "content": f"Analyze the following resume text:\n\n---\n{resume_text}\n---"}
@@ -127,15 +133,23 @@ def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis:
127
  temperature=0.0
128
  )
129
 
130
- # The response is a JSON string, which we can parse into the Pydantic model
131
  analysis = ResumeAnalysis.parse_raw(chat_completion.choices[0].message.content)
 
 
 
 
 
 
 
132
  return analysis
133
 
 
 
 
 
134
  except Exception as e:
135
- # This will now only catch errors related to the API connection or Pydantic structural errors
136
- # (e.g., list vs string), not the common type mismatches.
137
  st.error(f"Groq API Error: {e}")
138
- # Return an empty/default analysis object on failure
139
  return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A")
140
 
141
 
@@ -154,11 +168,11 @@ def calculate_resume_score(analysis: ResumeAnalysis) -> float:
154
 
155
  # 3. Communication Score (Max 20 points)
156
  try:
157
- # Safely parse the communication score string (e.g., '8' or '8-High')
158
- score_str = analysis.communication_skills.split('-')[0].strip()
159
  comm_rating = float(score_str)
160
  except (ValueError, IndexError):
161
- comm_rating = 5.0 # Default if unparsable
162
 
163
  score_comm = (comm_rating / 10.0) * 20.0
164
  total_score += score_comm
@@ -170,15 +184,15 @@ def calculate_resume_score(analysis: ResumeAnalysis) -> float:
170
  # --- Therapist-Specific Bonus Checks ---
171
  if st.session_state.get('selected_role') == "Therapist":
172
  try:
173
- # Safely parse specialized scores, handling 'N/A'
174
- aba_score = float(analysis.aba_therapy_skills.split('-')[0].strip()) if analysis.aba_therapy_skills != 'N/A' else 0.0
175
- autism_score = float(analysis.autism_care_experience_score.split('-')[0].strip()) if analysis.autism_care_experience_score != 'N/A' else 0.0
176
 
177
  # Add a bonus based on the average specialized scores (max 10 points)
178
  specialized_bonus = ((aba_score + autism_score) / 20.0) * 10.0
179
  total_score += specialized_bonus
180
- except (ValueError, IndexError):
181
- pass
182
 
183
 
184
  final_score = round(min(total_score, 100))
@@ -196,6 +210,12 @@ def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score:
196
  technical_skills_list = ", ".join(data['technical_skills'])
197
  certifications_list = ", ".join(data['certifications'])
198
 
 
 
 
 
 
 
199
  df_data = {
200
  'Name': data['name'],
201
  'Job Role': job_role,
@@ -205,12 +225,12 @@ def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score:
205
  'Shortlisted': data['Shortlisted'],
206
  'Experience Summary': data['experience_summary'],
207
  'Education Summary': data['education_summary'],
208
- 'Communication Rating (1-10)': data['communication_skills'],
209
  'Skills/Technologies': technical_skills_list,
210
  'Certifications': certifications_list,
211
- 'ABA Skills (1-10)': data['aba_therapy_skills'],
212
- 'RBT/BCBA Cert': data['rbt_bcba_certification'],
213
- 'Autism-Care Exp (1-10)': data['autism_care_experience_score'],
214
  }
215
 
216
  new_df = pd.DataFrame([df_data])
 
8
  import docx2txt
9
  from groq import Groq
10
  from dotenv import load_dotenv
11
+ from pydantic import BaseModel, Field, ValidationError # Added ValidationError
12
+ from typing import Optional, List # Added Optional and List
13
 
14
  # --- 0. FIX: SET PAGE CONFIG AS THE FIRST STREAMLIT COMMAND ---
15
  st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered")
 
56
  name: str = Field(description="Full name of the candidate.")
57
  email: str = Field(description="Professional email address.")
58
  phone: str = Field(description="Primary phone number.")
59
+ certifications: List[str] = Field(description="List of professional certifications.")
60
  experience_summary: str = Field(description="A concise summary of the candidate's professional experience.")
61
  education_summary: str = Field(description="A concise summary of the candidate's highest education.")
62
 
63
+ # --- CRITICAL FIX: Use str or Optional[str] and improve coercion ---
64
+ # The Groq model is returning INT (8) instead of STR ('8') for communication_skills.
65
+ # The most stable fix is to keep the field as str and rely on Groq's JSON mode
66
+ # but improve the prompt guidance. We will also update the helper functions to be more robust.
67
+ communication_skills: str = Field(description="A score as a STRING (e.g., '8') or description of communication skills.")
68
+ technical_skills: List[str] = Field(description="List of technical skills/technologies mentioned.")
69
+
70
+ # These fields can sometimes return None, so we make them Optional[str]
71
+ # and default them to "N/A" in the final output in the analyze function if still None.
72
+ aba_therapy_skills: Optional[str] = Field(default="N/A", description="Specific score as a STRING (e.g., '7'). Use 'N/A' if not applicable.")
73
+ rbt_bcba_certification: Optional[str] = Field(default="N/A", description="Indicate 'Yes' or 'No'. Use 'N/A' if not applicable.")
74
+ autism_care_experience_score: Optional[str] = Field(default="N/A", description="A score as a STRING (e.g., '9'). Use 'N/A' if not applicable.")
75
 
76
 
77
  # --- 3. HELPER FUNCTIONS ---
 
102
  therapist_instructions = ""
103
  if job_role == "Therapist":
104
  therapist_instructions = (
105
+ "Because the job role is 'Therapist', you MUST carefully look for ABA Therapy Skills, RBT/BCBA Certification, and Autism-Care Experience. "
106
+ "Provide a score from 1-10 as a **STRING** (e.g., '7') for the specialized fields. "
107
+ "If any specialized therapist field is not found, you MUST return **null** or **N/A** for that field."
 
 
108
  )
109
  else:
110
+ # For non-therapist roles, explicitly instruct the model to use 'null'
111
+ # so Optional[str] handles it cleanly.
112
  therapist_instructions = (
113
+ "Since the role is not 'Therapist', set 'aba_therapy_skills', 'autism_care_experience_score', and 'rbt_bcba_certification' to **null** or **N/A**."
114
  )
115
 
116
  # System Prompt for Groq
117
  system_prompt = (
118
  f"You are a professional Resume Analyzer. Your task is to extract specific information from the provided resume text. "
119
  f"The candidate is applying for the role of '{job_role}'. "
120
+ f"Return a JSON object that strictly adheres to the provided Pydantic schema. "
121
+ f"**CRITICAL:** Ensure 'communication_skills' is returned as a **STRING** value, even if it's a number (e.g., \"8\" NOT 8). " # <-- Re-emphasizing string output for the specific failing field
122
+ f"{therapist_instructions}"
123
  )
124
 
125
  try:
126
  chat_completion = groq_client.chat.completions.create(
127
+ model="mixtral-8x7b-32768",
128
  messages=[
129
  {"role": "system", "content": system_prompt},
130
  {"role": "user", "content": f"Analyze the following resume text:\n\n---\n{resume_text}\n---"}
 
133
  temperature=0.0
134
  )
135
 
136
+ # Parse the JSON response
137
  analysis = ResumeAnalysis.parse_raw(chat_completion.choices[0].message.content)
138
+
139
+ # Post-processing: Ensure Optional fields are strings for score calculation
140
+ analysis.aba_therapy_skills = str(analysis.aba_therapy_skills or 'N/A')
141
+ analysis.rbt_bcba_certification = str(analysis.rbt_bcba_certification or 'N/A')
142
+ analysis.autism_care_experience_score = str(analysis.autism_care_experience_score or 'N/A')
143
+ analysis.communication_skills = str(analysis.communication_skills) # Coerce communication_skills to string just in case it passed validation as an int somehow
144
+
145
  return analysis
146
 
147
+ except ValidationError as ve:
148
+ st.error(f"Groq API Validation Error: The model returned incompatible data. Details: {ve}")
149
+ print(f"Failed JSON: {chat_completion.choices[0].message.content}") # Print the bad JSON for debugging
150
+ return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A")
151
  except Exception as e:
 
 
152
  st.error(f"Groq API Error: {e}")
 
153
  return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A")
154
 
155
 
 
168
 
169
  # 3. Communication Score (Max 20 points)
170
  try:
171
+ # Safely parse the communication score string, handling N/A or raw numbers
172
+ score_str = str(analysis.communication_skills).split('-')[0].strip() # Use str() to handle if it somehow remained an int
173
  comm_rating = float(score_str)
174
  except (ValueError, IndexError):
175
+ comm_rating = 5.0
176
 
177
  score_comm = (comm_rating / 10.0) * 20.0
178
  total_score += score_comm
 
184
  # --- Therapist-Specific Bonus Checks ---
185
  if st.session_state.get('selected_role') == "Therapist":
186
  try:
187
+ # Safely parse specialized scores, handling 'N/A' or None
188
+ aba_score = float(str(analysis.aba_therapy_skills).split('-')[0].strip()) if str(analysis.aba_therapy_skills).upper() not in ['N/A', 'NONE'] else 0.0
189
+ autism_score = float(str(analysis.autism_care_experience_score).split('-')[0].strip()) if str(analysis.autism_care_experience_score).upper() not in ['N/A', 'NONE'] else 0.0
190
 
191
  # Add a bonus based on the average specialized scores (max 10 points)
192
  specialized_bonus = ((aba_score + autism_score) / 20.0) * 10.0
193
  total_score += specialized_bonus
194
+ except (ValueError, IndexError, TypeError):
195
+ pass # Ignore if specialized scores are still corrupted
196
 
197
 
198
  final_score = round(min(total_score, 100))
 
210
  technical_skills_list = ", ".join(data['technical_skills'])
211
  certifications_list = ", ".join(data['certifications'])
212
 
213
+ # Ensure fields that might have been None are now strings for the DataFrame
214
+ comm_skills = str(data['communication_skills'] or 'N/A')
215
+ aba_skills = str(data['aba_therapy_skills'] or 'N/A')
216
+ rbt_cert = str(data['rbt_bcba_certification'] or 'N/A')
217
+ autism_exp = str(data['autism_care_experience_score'] or 'N/A')
218
+
219
  df_data = {
220
  'Name': data['name'],
221
  'Job Role': job_role,
 
225
  'Shortlisted': data['Shortlisted'],
226
  'Experience Summary': data['experience_summary'],
227
  'Education Summary': data['education_summary'],
228
+ 'Communication Rating (1-10)': comm_skills,
229
  'Skills/Technologies': technical_skills_list,
230
  'Certifications': certifications_list,
231
+ 'ABA Skills (1-10)': aba_skills,
232
+ 'RBT/BCBA Cert': rbt_cert,
233
+ 'Autism-Care Exp (1-10)': autism_exp,
234
  }
235
 
236
  new_df = pd.DataFrame([df_data])