meesamraza commited on
Commit
58b9e2b
Β·
verified Β·
1 Parent(s): d68ff83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +383 -46
app.py CHANGED
@@ -1,56 +1,393 @@
 
 
 
 
 
1
  import os
 
 
2
  from groq import Groq
3
- import streamlit as st
4
  from dotenv import load_dotenv
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- # Load API key from .env file
7
- load_dotenv()
8
- api_key = os.getenv("GROQ_API_KEY")
9
-
10
- # Initialize the Groq client
11
- client = Groq(api_key=api_key)
12
-
13
- # Define the programming development topics for the chatbot
14
- developer_topics = [
15
- "best programming languages", "web development frameworks", "version control with Git",
16
- "debugging tips", "data structures and algorithms", "object-oriented programming",
17
- "functional programming", "software design patterns", "API design and development",
18
- "devops practices", "cloud computing", "front-end development", "back-end development",
19
- "machine learning", "deep learning", "software testing and QA", "agile methodologies",
20
- "CI/CD pipelines", "database design", "programming best practices", "security in development",
21
- "mobile app development", "project management for developers", "open source contribution",
22
- "developer tools and IDEs", "documentation and code commenting", "coding interview preparation"
23
- ]
24
-
25
- # Function to fetch chatbot completion from Groq API
26
- def get_response(query):
27
- completion = client.chat.completions.create(
28
- model="llama-3.3-70b-versatile",
29
- messages=[{"role": "user", "content": query}],
30
- temperature=0.7,
31
- max_completion_tokens=2024,
32
- top_p=1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  )
34
- response = completion.choices[0].message.content
35
- return response
36
 
37
- def main():
38
- st.title("Programming Developer Advisor Chatbot")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
- # Let the user choose a developer-related topic or type a custom query
41
- topic = st.selectbox("Choose a programming topic", developer_topics)
42
- user_input = st.text_area("Or ask a programming-related question:", "")
43
 
44
- # If the user provides a query (not from audio), use that directly
45
- if user_input:
46
- query = user_input
47
- response = get_response(query)
48
- st.write("### Response:")
49
- st.write(response)
50
 
51
- # Handle unrelated queries
52
- if user_input and not any(topic in user_input.lower() for topic in developer_topics):
53
- st.write("Sorry, I can only answer programming-related questions.")
 
 
 
 
54
 
55
- if __name__ == "__main__":
56
- main()
 
1
+ # src/streamlit_app.py
2
+
3
+ import streamlit as st
4
+ import pandas as pd
5
+ import io
6
  import os
7
+ import fitz # PyMuPDF
8
+ import docx2txt
9
  from groq import Groq
 
10
  from dotenv import load_dotenv
11
+ from pydantic import BaseModel, Field
12
+
13
+ # --- 1. CONFIGURATION AND INITIALIZATION ---
14
+
15
+ # 🚨 FIX for .env: Load environment variables by explicitly pointing up one directory.
16
+ # This ensures the script finds the .env file even though it's run from the 'src' folder.
17
+ load_dotenv(os.path.join(os.path.dirname(__file__), '..', '.env'))
18
+
19
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
20
+
21
+ # Initialize Groq Client
22
+ if GROQ_API_KEY:
23
+ try:
24
+ groq_client = Groq(api_key=GROQ_API_KEY)
25
+ except Exception as e:
26
+ st.error(f"Error initializing Groq Client: {e}")
27
+ st.stop()
28
+ else:
29
+ # This message should no longer appear if the .env fix works
30
+ st.error("GROQ_API_KEY not found. Please ensure the .env file is in the project root and contains your key.")
31
+ st.stop()
32
+
33
+ # Admin Password (as requested)
34
+ ADMIN_PASSWORD = "admin"
35
+
36
+ # Initialize Session State
37
+ if 'is_admin_logged_in' not in st.session_state:
38
+ st.session_state.is_admin_logged_in = False
39
+ if 'analyzed_data' not in st.session_state:
40
+ # Define DataFrame with columns for initial structure
41
+ initial_cols = [
42
+ 'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
43
+ 'Experience Summary', 'Education Summary', 'Communication Rating (1-10)',
44
+ 'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)',
45
+ 'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
46
+ ]
47
+ st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)
48
+
49
+
50
+ # --- 2. DATA STRUCTURE FOR GROQ OUTPUT (Pydantic Schema) ---
51
 
52
+ class ResumeAnalysis(BaseModel):
53
+ """Pydantic model for structured resume data extraction."""
54
+ name: str = Field(description="Full name of the candidate.")
55
+ email: str = Field(description="Professional email address.")
56
+ phone: str = Field(description="Primary phone number.")
57
+ certifications: list[str] = Field(description="List of professional certifications (e.g., PMP, AWS Certified).")
58
+ experience_summary: str = Field(description="A concise summary of the candidate's professional experience.")
59
+ education_summary: str = Field(description="A concise summary of the candidate's highest education.")
60
+ communication_skills: str = Field(description="A rating (1-10) or brief description of communication skills based on the resume language.")
61
+ technical_skills: list[str] = Field(description="List of technical skills/technologies mentioned (e.g., Python, SQL, Docker).")
62
+ aba_therapy_skills: str = Field(description="Specific mention or score (1-10) for ABA Therapy skills, ONLY if the role is 'Therapist'.")
63
+ rbt_bcba_certification: str = Field(description="Indicate 'Yes' or 'No' if RBT/BCBA certification is mentioned, ONLY if the role is 'Therapist'.")
64
+ autism_care_experience_score: str = Field(description="A score (1-10) for Autism-Care Experience, ONLY if the role is 'Therapist'.")
65
+
66
+ # --- 3. HELPER FUNCTIONS ---
67
+
68
+ def extract_text_from_file(uploaded_file):
69
+ """Extracts text from PDF or DOCX files."""
70
+ file_type = uploaded_file.type
71
+ try:
72
+ if file_type == "application/pdf":
73
+ # Use PyMuPDF for PDF
74
+ with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
75
+ text = ""
76
+ for page in doc:
77
+ text += page.get_text()
78
+ return text
79
+ elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
80
+ # Use docx2txt for DOCX
81
+ return docx2txt.process(uploaded_file)
82
+ else:
83
+ return ""
84
+ except Exception as e:
85
+ st.error(f"Error extracting text: {e}")
86
+ return ""
87
+
88
+ @st.cache_data(show_spinner="Analyzing resume with Groq...")
89
+ def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis:
90
+ """Uses Groq and the Pydantic schema for structured extraction."""
91
+
92
+ # Custom instructions for Therapist role
93
+ therapist_instructions = ""
94
+ if job_role == "Therapist":
95
+ therapist_instructions = (
96
+ "Because the job role is 'Therapist', you MUST carefully look for: "
97
+ "1. ABA Therapy Skills, RBT/BCBA Certification, and Autism-Care Experience. "
98
+ "2. Provide a score from 1-10 for the specialized fields: 'aba_therapy_skills' and 'autism_care_experience_score'. "
99
+ "3. Set 'rbt_bcba_certification' to 'Yes' or 'No'."
100
+ )
101
+
102
+ # System Prompt for Groq
103
+ system_prompt = (
104
+ f"You are a professional Resume Analyzer. Your task is to extract specific information from the provided resume text. "
105
+ f"The candidate is applying for the role of '{job_role}'. "
106
+ f"Follow the instructions precisely and return a JSON object that strictly adheres to the provided Pydantic schema. "
107
+ f"For skills, provide a list of 5-10 most relevant items. {therapist_instructions}"
108
  )
 
 
109
 
110
+ try:
111
+ chat_completion = groq_client.chat.completions.create(
112
+ model="mixtral-8x7b-32768", # Fast model suitable for this task
113
+ messages=[
114
+ {"role": "system", "content": system_prompt},
115
+ {"role": "user", "content": f"Analyze the following resume text:\n\n---\n{resume_text}\n---"}
116
+ ],
117
+ response_model={"type": "json_object", "schema": ResumeAnalysis.schema()},
118
+ temperature=0.0
119
+ )
120
+
121
+ # The response is a JSON string, which we can parse into the Pydantic model
122
+ analysis = ResumeAnalysis.parse_raw(chat_completion.choices[0].message.content)
123
+ return analysis
124
+
125
+ except Exception as e:
126
+ st.error(f"Groq API Error: {e}")
127
+ # Return an empty/default analysis object on failure
128
+ return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="0", technical_skills=[], aba_therapy_skills="0", rbt_bcba_certification="No", autism_care_experience_score="0")
129
+
130
+
131
+ def calculate_resume_score(analysis: ResumeAnalysis) -> float:
132
+ """Calculates the weighted score out of 100."""
133
+
134
+ # Weights for maximum possible score contribution:
135
+ # Experience (40%), Skills (30%), Communication (20%), Certifications (10%)
136
+
137
+ total_score = 0.0
138
+
139
+ # 1. Experience Score (Max 40 points)
140
+ # Simple heuristic: longer summary means more experience found.
141
+ # Max score is 40.
142
+ exp_factor = min(len(analysis.experience_summary) / 100.0, 1.0) # Use 100 chars as the max point
143
+ total_score += exp_factor * 40.0
144
+
145
+ # 2. Skills Score (Max 30 points)
146
+ # Based on number of skills found (up to 10 relevant skills)
147
+ # Max score is 30.
148
+ skills_factor = min(len(analysis.technical_skills) / 10.0, 1.0)
149
+ total_score += skills_factor * 30.0
150
+
151
+ # 3. Communication Score (Max 20 points)
152
+ # Assuming 'communication_skills' is a score string '1-10' from Groq
153
+ try:
154
+ # Tries to extract the first number from the string (e.g., '7-High' -> 7)
155
+ comm_rating = float(analysis.communication_skills.split('-')[0].strip())
156
+ except (ValueError, IndexError):
157
+ comm_rating = 5.0 # Default if Groq returns unparsable text
158
+
159
+ score_comm = (comm_rating / 10.0) * 20.0 # Scale 1-10 rating to max 20 points
160
+ total_score += score_comm
161
+
162
+ # 4. Certification Score (Max 10 points)
163
+ # Each certification adds a point, max 10 certs.
164
+ score_cert = min(len(analysis.certifications), 10) * 1.0
165
+ total_score += score_cert
166
+
167
+ # --- Therapist-Specific Bonus Checks ---
168
+ if st.session_state.get('selected_role') == "Therapist":
169
+ # Additional points based on specialized scores (e.g., up to 5 points bonus)
170
+ try:
171
+ aba_score = float(analysis.aba_therapy_skills.split('-')[0].strip())
172
+ autism_score = float(analysis.autism_care_experience_score.split('-')[0].strip())
173
+
174
+ # Add a bonus based on the average specialized scores (max 10 points)
175
+ specialized_bonus = ((aba_score + autism_score) / 20.0) * 10.0
176
+ total_score += specialized_bonus
177
+ except (ValueError, IndexError):
178
+ pass # Ignore if specialized scores are not numbers
179
+
180
+
181
+ # Final cleanup and capping
182
+ final_score = round(min(total_score, 100))
183
+ return float(final_score)
184
+
185
+
186
+ def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
187
+ """Formats and appends the new analysis to the session state DataFrame."""
188
+
189
+ # Convert Pydantic model to dictionary
190
+ data = analysis.dict()
191
+
192
+ # Add computed and derived fields
193
+ data['Job Role'] = job_role
194
+ data['Resume Score'] = score
195
+ data['Shortlisted'] = 'No' # Default status
196
+
197
+ # Clean up list fields for display/Excel
198
+ technical_skills_list = ", ".join(data['technical_skills'])
199
+ certifications_list = ", ".join(data['certifications'])
200
+
201
+ # The new row data
202
+ df_data = {
203
+ 'Name': data['name'],
204
+ 'Job Role': job_role,
205
+ 'Resume Score (100)': score,
206
+ 'Email': data['email'],
207
+ 'Phone': data['phone'],
208
+ 'Shortlisted': data['Shortlisted'],
209
+ 'Experience Summary': data['experience_summary'],
210
+ 'Education Summary': data['education_summary'],
211
+ 'Communication Rating (1-10)': data['communication_skills'],
212
+ 'Skills/Technologies': technical_skills_list,
213
+ 'Certifications': certifications_list,
214
+ 'ABA Skills (1-10)': data['aba_therapy_skills'],
215
+ 'RBT/BCBA Cert': data['rbt_bcba_certification'],
216
+ 'Autism-Care Exp (1-10)': data['autism_care_experience_score'],
217
+ }
218
+
219
+ # Convert to a single-row DataFrame and concatenate
220
+ new_df = pd.DataFrame([df_data])
221
+ st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)
222
+
223
+
224
+ # --- 4. APP LAYOUT AND LOGIC ---
225
+
226
+ st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered")
227
+
228
+ st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis")
229
+
230
+ # --- Tabs for User and Admin ---
231
+ tab_user, tab_admin = st.tabs(["πŸ‘€ Resume Uploader (User Panel)", "πŸ”’ Admin Dashboard (Password Protected)"])
232
+
233
+ # =========================================================================
234
+ # A. Resume Upload (User Panel)
235
+ # =========================================================================
236
+ with tab_user:
237
+ st.header("Upload Resumes for Analysis")
238
+ st.info("Upload multiple PDF or DOCX files. The Groq AI engine will quickly extract and score the key data.")
239
+
240
+ # Job Role Selection
241
+ job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
242
+ selected_role = st.selectbox(
243
+ "**1. Select the Target Job Role** (Influences analysis and scoring)",
244
+ options=job_role_options,
245
+ key='selected_role' # Store role in session state for scoring logic
246
+ )
247
+
248
+ # File Uploader
249
+ uploaded_files = st.file_uploader(
250
+ "**2. Upload Resumes** (PDF or DOCX)",
251
+ type=["pdf", "docx"],
252
+ accept_multiple_files=True
253
+ )
254
+
255
+ if st.button("πŸš€ Analyze All Uploaded Resumes"):
256
+ if not uploaded_files:
257
+ st.warning("Please upload one or more resume files to begin analysis.")
258
+ else:
259
+ total_files = len(uploaded_files)
260
+ progress_bar = st.progress(0)
261
+
262
+ # Clear previous individual file analysis displays
263
+ st.session_state.individual_analysis = []
264
+
265
+ with st.status("Processing Resumes...", expanded=True) as status_box:
266
+
267
+ for i, file in enumerate(uploaded_files):
268
+ file_name = file.name
269
+ st.write(f"Analyzing **{file_name}**...")
270
+
271
+ # 1. Extract Text
272
+ resume_text = extract_text_from_file(file)
273
+
274
+ if not resume_text:
275
+ st.error(f"Could not extract text from {file_name}. Skipping.")
276
+ continue
277
+
278
+ # 2. Analyze with Groq
279
+ analysis = analyze_resume_with_groq(resume_text, selected_role)
280
+
281
+ if analysis.name == "Extraction Failed":
282
+ st.error(f"Groq extraction failed for {file_name}. Skipping.")
283
+ continue
284
+
285
+ # 3. Calculate Score
286
+ score = calculate_resume_score(analysis)
287
+
288
+ # 4. Store Data
289
+ append_analysis_to_dataframe(selected_role, analysis, score)
290
+
291
+ # Store data for individual display below
292
+ st.session_state.individual_analysis.append({
293
+ 'name': analysis.name,
294
+ 'score': score,
295
+ 'role': selected_role,
296
+ 'file_name': file_name
297
+ })
298
+
299
+ # Update progress
300
+ progress_bar.progress((i + 1) / total_files)
301
+
302
+ status_box.update(label="Analysis Complete!", state="complete", expanded=False)
303
+
304
+ st.success(f"**βœ… Successfully analyzed {total_files} resumes.**")
305
+
306
+ # Display results of the last batch of analysis
307
+ if 'individual_analysis' in st.session_state and st.session_state.individual_analysis:
308
+ st.subheader("Last Analysis Summary")
309
+ for item in st.session_state.individual_analysis:
310
+ st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**")
311
+
312
+ st.markdown("---")
313
+ st.caption("All analyzed data is stored in the **Admin Dashboard**.")
314
+
315
+ # =========================================================================
316
+ # B. Admin Panel (Password Protected)
317
+ # =========================================================================
318
+ with tab_admin:
319
+
320
+ # --- Login Logic ---
321
+ if not st.session_state.is_admin_logged_in:
322
+ st.header("Admin Login")
323
+ password = st.text_input("Enter Admin Password", type="password")
324
+ if st.button("πŸ”‘ Login"):
325
+ if password == ADMIN_PASSWORD:
326
+ st.session_state.is_admin_logged_in = True
327
+ st.rerun()
328
+ else:
329
+ st.error("Incorrect password.")
330
+ st.stop() # Stop execution until logged in
331
+
332
+ # --- Dashboard Content (Logged In) ---
333
+ st.header("🎯 Recruitment Dashboard")
334
+ st.markdown("---")
335
+
336
+ if st.button("πŸšͺ Logout"):
337
+ st.session_state.is_admin_logged_in = False
338
+ st.rerun()
339
+
340
+ if st.session_state.analyzed_data.empty:
341
+ st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
342
+ else:
343
+ df = st.session_state.analyzed_data.copy()
344
+
345
+ # --- 1. Shortlisting & Data Display ---
346
+ st.subheader("Candidate Data Table")
347
+ st.success(f"**Total Candidates Analyzed: {len(df)}**")
348
+
349
+ # Key columns for display
350
+ display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']
351
+
352
+ # Editable Data Table (allowing admin to change 'Shortlisted' status)
353
+ edited_df = st.data_editor(
354
+ df[display_cols],
355
+ column_config={
356
+ "Shortlisted": st.column_config.SelectboxColumn(
357
+ "Shortlisted",
358
+ help="Mark the candidate as Shortlisted or Rejected.",
359
+ options=["No", "Yes"],
360
+ required=True,
361
+ )
362
+ },
363
+ key="dashboard_editor",
364
+ hide_index=True
365
+ )
366
+
367
+ # Update the session state DataFrame with the edited shortlisting status
368
+ # This keeps the changes persistent
369
+ st.session_state.analyzed_data['Shortlisted'] = edited_df['Shortlisted']
370
+
371
+ st.markdown("---")
372
+
373
+ # --- 2. Download Excel File ---
374
+ st.subheader("πŸ“₯ Download Data")
375
 
376
+ # The full DataFrame to export
377
+ df_export = st.session_state.analyzed_data.copy()
 
378
 
379
+ # Create an in-memory Excel file buffer
380
+ excel_buffer = io.BytesIO()
381
+ with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer:
382
+ df_export.to_excel(writer, index=False, sheet_name='Resume Analysis Data')
383
+ excel_buffer.seek(0)
 
384
 
385
+ st.download_button(
386
+ label="πŸ’Ύ Download All Data as Excel (.xlsx)",
387
+ data=excel_buffer,
388
+ file_name="quantum_scrutiny_report.xlsx",
389
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
390
+ help="Downloads the full table including all extracted fields and shortlist status."
391
+ )
392
 
393
+ # --- End of src/streamlit_app.py ---