scmlewis commited on
Commit
34cbce4
·
verified ·
1 Parent(s): edcaccf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -30
app.py CHANGED
@@ -1,5 +1,5 @@
1
  # app.py
2
- # Business-Focused Streamlit Application for AI Talent Screening
3
 
4
  import streamlit as st
5
  from transformers import BertTokenizer, BertForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
@@ -21,7 +21,7 @@ st.set_page_config(
21
  initial_sidebar_state="expanded",
22
  )
23
 
24
- # --- CUSTOM PROFESSIONAL CSS OVERHAUL ---
25
  st.markdown("""
26
  <style>
27
  /* 0. GLOBAL CONFIG & LIGHT THEME */
@@ -98,7 +98,7 @@ st.markdown("""
98
  font-weight: bold;
99
  }
100
  .stSidebar {
101
- background-color: #E9ECEF; /* Slightly darker sidebar for contrast */
102
  border-right: 1px solid #DEE2E6;
103
  }
104
 
@@ -139,10 +139,12 @@ st.markdown("""
139
  </style>
140
  """, unsafe_allow_html=True)
141
 
142
-
143
  # --- (Model and Helper Functions - Core logic remains the same) ---
144
- # NOTE: The core ML logic and utility functions for PDF/DOCX parsing remain unchanged
145
- # as they are robust and purely functional.
 
 
 
146
  skills_list = [
147
  'python', 'sql', 'c++', 'java', 'tableau', 'machine learning', 'data analysis',
148
  'business intelligence', 'r', 'tensorflow', 'pandas', 'spark', 'scikit-learn', 'aws',
@@ -159,14 +161,15 @@ skills_list = [
159
  ]
160
  skills_pattern = re.compile(r'\b(' + '|'.join(re.escape(skill) for skill in skills_list) + r')\b', re.IGNORECASE)
161
 
162
- # Utility functions (simplified for brevity here, but full code in final output)
163
  def extract_text_from_pdf(file):
164
  try:
165
  pdf_reader = PyPDF2.PdfReader(file)
166
  text = ""
167
  for page in pdf_reader.pages:
168
  page_text = page.extract_text()
169
- if page_text: text += page_text + "\n"
 
170
  return text.strip()
171
  except: return ""
172
 
@@ -178,7 +181,7 @@ def extract_text_from_docx(file):
178
  text += paragraph.text + "\n"
179
  return text.strip()
180
  except: return ""
181
-
182
  def extract_text_from_file(uploaded_file):
183
  if uploaded_file.name.endswith('.pdf'): return extract_text_from_pdf(uploaded_file)
184
  elif uploaded_file.name.endswith('.docx'): return extract_text_from_docx(uploaded_file)
@@ -190,7 +193,6 @@ def normalize_text(text):
190
  return text
191
 
192
  def check_experience_mismatch(resume, job_description):
193
- # ... (Experience mismatch logic remains) ...
194
  resume_match = re.search(r'(\d+)\s*years?|senior', resume.lower())
195
  job_match = re.search(r'(\d+)\s*years?(?:\s+\w+)*\+|senior\+', job_description.lower())
196
  if resume_match and job_match:
@@ -212,7 +214,6 @@ def validate_input(text, is_resume=True):
212
 
213
  @st.cache_resource
214
  def load_models():
215
- # ... (Model loading logic remains) ...
216
  bert_model_path = 'scmlewis/bert-finetuned-isom5240'
217
  bert_tokenizer = BertTokenizer.from_pretrained(bert_model_path)
218
  bert_model = BertForSequenceClassification.from_pretrained(bert_model_path, num_labels=2)
@@ -227,7 +228,6 @@ def load_models():
227
 
228
  @st.cache_data
229
  def tokenize_inputs(resumes, job_description, _bert_tokenizer, _t5_tokenizer):
230
- # ... (Tokenization logic remains) ...
231
  job_description_norm = normalize_text(job_description)
232
  bert_inputs = [f"resume: {normalize_text(resume)} [sep] job: {job_description_norm}" for resume in resumes]
233
  bert_tokenized = _bert_tokenizer(bert_inputs, return_tensors='pt', padding=True, truncation=True, max_length=64)
@@ -248,7 +248,6 @@ def extract_skills(text):
248
 
249
  @st.cache_data
250
  def classify_and_summarize_batch(resume, job_description, _bert_tok, _t5_input, _t5_tok, _job_skills_set):
251
- # ... (Inference and classification logic remains) ...
252
  _, bert_model, t5_tokenizer, t5_model, device = st.session_state.models
253
  timeout = 60
254
 
@@ -307,7 +306,6 @@ def classify_and_summarize_batch(resume, job_description, _bert_tok, _t5_input,
307
 
308
  @st.cache_data
309
  def generate_skill_pie_chart(resumes):
310
- # ... (Pie chart logic remains, but with business colors) ...
311
  skill_counts = {}
312
  total_resumes = len([r for r in resumes if r.strip()])
313
  if total_resumes == 0: return None
@@ -328,10 +326,9 @@ def generate_skill_pie_chart(resumes):
328
  labels = list(top_skills.keys())
329
  sizes = [(count / sum(top_skills.values())) * 100 for count in top_skills.values()]
330
 
331
- # Use standard white background for a business report look
332
  plt.style.use('default')
333
  fig, ax = plt.subplots(figsize=(6, 4))
334
- colors = plt.cm.tab10(np.linspace(0, 1, len(labels))) # Professional color map
335
  plt.rcParams['text.color'] = 'black'
336
  wedges, texts, autotexts = ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors, textprops={'fontsize': 10, 'color': 'black'})
337
  ax.axis('equal')
@@ -339,10 +336,16 @@ def generate_skill_pie_chart(resumes):
339
  return fig
340
 
341
  def render_sidebar():
342
- """Render sidebar content with professional HR language."""
 
 
 
 
 
 
343
  with st.sidebar:
344
  st.markdown(f"""
345
- <h2 style='text-align: center; border-left: none; padding-left: 0; color: #007BFF;'>
346
  Talent Screening Assistant
347
  </h2>
348
  <p style='text-align: center; font-size: 14px; margin-top: 0; color: #6C757D;'>
@@ -367,10 +370,11 @@ def render_sidebar():
367
  """)
368
 
369
  with st.expander("🎯 Screening Outcomes Explained", expanded=False):
 
370
  st.markdown(f"""
371
- - **Relevant** (`{st.get_style_color("success")}`): Strong match across all criteria. Proceed to interview.
372
- - **Irrelevant** (`{st.get_style_color("danger")}`): Low skill overlap or poor fit. Pass on candidate.
373
- - **Requires Review** (`{st.get_style_color("warning")}`): **Flagged** due to Experience Mismatch or Lower AI confidence. Requires manual review by a hiring manager.
374
  """)
375
 
376
  def main():
@@ -499,8 +503,17 @@ def main():
499
 
500
  for i, resume in enumerate(valid_resumes):
501
  status_text.text(f"Status: Analyzing Profile {i+1} of {total_steps}...")
502
- bert_tok_single = {'input_ids': bert_tokenized['input_ids'][i].unsqueeze(0), 'attention_mask': bert_tokenized['attention_ids'][i].unsqueeze(0)}
503
- t5_tok_single = {'input_ids': t5_tokenized['input_ids'][i].unsqueeze(0), 'attention_mask': t5_tokenized['attention_mask'][i].unsqueeze(0)}
 
 
 
 
 
 
 
 
 
504
  result = classify_and_summarize_batch(resume, job_description, bert_tok_single, t5_inputs[i], t5_tok_single, job_skills_set)
505
  result["Profile ID"] = f"Candidate {i+1}"
506
  results.append(result)
@@ -527,37 +540,43 @@ def main():
527
 
528
  st.markdown(f"#### Overview: {total} Candidate Profiles Processed")
529
 
 
 
 
 
 
 
530
  col1, col2, col3, col4 = st.columns(4)
531
 
532
  with col1:
533
  st.markdown(f"""
534
  <div class='scorecard-block'>
535
  <div class='scorecard-label'>TOTAL PROFILES</div>
536
- <div class='scorecard-value' style='color:{st.get_style_color("primary")};'>{total}</div>
537
  </div>
538
  """, unsafe_allow_html=True)
539
 
540
  with col2:
541
  st.markdown(f"""
542
  <div class='scorecard-block block-relevant'>
543
- <div class='scorecard-label' style='color: {st.get_style_color("success")};'>RELEVANT MATCHES</div>
544
- <div class='scorecard-value' style='color: {st.get_style_color("success")};'>{relevant_count}</div>
545
  </div>
546
  """, unsafe_allow_html=True)
547
 
548
  with col3:
549
  st.markdown(f"""
550
  <div class='scorecard-block block-uncertain'>
551
- <div class='scorecard-label' style='color: {st.get_style_color("warning")};'>REQUIRES REVIEW</div>
552
- <div class='scorecard-value' style='color: {st.get_style_color("warning")};'>{review_count}</div>
553
  </div>
554
  """, unsafe_allow_html=True)
555
 
556
  with col4:
557
  st.markdown(f"""
558
  <div class='scorecard-block block-irrelevant'>
559
- <div class='scorecard-label' style='color: {st.get_style_color("danger")};'>IRRELEVANT / ERROR</div>
560
- <div class='scorecard-value' style='color: {st.get_style_color("danger")};'>{irrelevant_count}</div>
561
  </div>
562
  """, unsafe_allow_html=True)
563
 
 
1
  # app.py
2
+ # Business-Focused Streamlit Application for AI Talent Screening (FIXED)
3
 
4
  import streamlit as st
5
  from transformers import BertTokenizer, BertForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
 
21
  initial_sidebar_state="expanded",
22
  )
23
 
24
+ # --- CUSTOM PROFESSIONAL CSS OVERHAUL (UNCHANGED) ---
25
  st.markdown("""
26
  <style>
27
  /* 0. GLOBAL CONFIG & LIGHT THEME */
 
98
  font-weight: bold;
99
  }
100
  .stSidebar {
101
+ background-color: #E9ECEF; /* Sidebar background color matching light theme */
102
  border-right: 1px solid #DEE2E6;
103
  }
104
 
 
139
  </style>
140
  """, unsafe_allow_html=True)
141
 
 
142
  # --- (Model and Helper Functions - Core logic remains the same) ---
143
+ # ... (skills_list, skills_pattern, extract_text_from_pdf/docx/file, normalize_text, check_experience_mismatch, validate_input, load_models, tokenize_inputs, extract_skills, classify_and_summarize_batch, generate_skill_pie_chart functions remain unchanged) ...
144
+
145
+ # NOTE: Since the file content is large, I'm only including the modified function `render_sidebar`
146
+ # and the affected part of `main` for brevity. The full code block at the end contains the complete, fixed file.
147
+
148
  skills_list = [
149
  'python', 'sql', 'c++', 'java', 'tableau', 'machine learning', 'data analysis',
150
  'business intelligence', 'r', 'tensorflow', 'pandas', 'spark', 'scikit-learn', 'aws',
 
161
  ]
162
  skills_pattern = re.compile(r'\b(' + '|'.join(re.escape(skill) for skill in skills_list) + r')\b', re.IGNORECASE)
163
 
164
+ # Helper functions for CV parsing
165
  def extract_text_from_pdf(file):
166
  try:
167
  pdf_reader = PyPDF2.PdfReader(file)
168
  text = ""
169
  for page in pdf_reader.pages:
170
  page_text = page.extract_text()
171
+ if page_text:
172
+ text += page_text + "\n"
173
  return text.strip()
174
  except: return ""
175
 
 
181
  text += paragraph.text + "\n"
182
  return text.strip()
183
  except: return ""
184
+
185
  def extract_text_from_file(uploaded_file):
186
  if uploaded_file.name.endswith('.pdf'): return extract_text_from_pdf(uploaded_file)
187
  elif uploaded_file.name.endswith('.docx'): return extract_text_from_docx(uploaded_file)
 
193
  return text
194
 
195
  def check_experience_mismatch(resume, job_description):
 
196
  resume_match = re.search(r'(\d+)\s*years?|senior', resume.lower())
197
  job_match = re.search(r'(\d+)\s*years?(?:\s+\w+)*\+|senior\+', job_description.lower())
198
  if resume_match and job_match:
 
214
 
215
  @st.cache_resource
216
  def load_models():
 
217
  bert_model_path = 'scmlewis/bert-finetuned-isom5240'
218
  bert_tokenizer = BertTokenizer.from_pretrained(bert_model_path)
219
  bert_model = BertForSequenceClassification.from_pretrained(bert_model_path, num_labels=2)
 
228
 
229
  @st.cache_data
230
  def tokenize_inputs(resumes, job_description, _bert_tokenizer, _t5_tokenizer):
 
231
  job_description_norm = normalize_text(job_description)
232
  bert_inputs = [f"resume: {normalize_text(resume)} [sep] job: {job_description_norm}" for resume in resumes]
233
  bert_tokenized = _bert_tokenizer(bert_inputs, return_tensors='pt', padding=True, truncation=True, max_length=64)
 
248
 
249
  @st.cache_data
250
  def classify_and_summarize_batch(resume, job_description, _bert_tok, _t5_input, _t5_tok, _job_skills_set):
 
251
  _, bert_model, t5_tokenizer, t5_model, device = st.session_state.models
252
  timeout = 60
253
 
 
306
 
307
  @st.cache_data
308
  def generate_skill_pie_chart(resumes):
 
309
  skill_counts = {}
310
  total_resumes = len([r for r in resumes if r.strip()])
311
  if total_resumes == 0: return None
 
326
  labels = list(top_skills.keys())
327
  sizes = [(count / sum(top_skills.values())) * 100 for count in top_skills.values()]
328
 
 
329
  plt.style.use('default')
330
  fig, ax = plt.subplots(figsize=(6, 4))
331
+ colors = plt.cm.tab10(np.linspace(0, 1, len(labels)))
332
  plt.rcParams['text.color'] = 'black'
333
  wedges, texts, autotexts = ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors, textprops={'fontsize': 10, 'color': 'black'})
334
  ax.axis('equal')
 
336
  return fig
337
 
338
  def render_sidebar():
339
+ """Render sidebar content with professional HR language. FIXED: Replaced st.get_style_color with hex codes."""
340
+ # Define hex colors to replace st.get_style_color() calls
341
+ SUCCESS_COLOR = "#28A745" # Corporate Green
342
+ WARNING_COLOR = "#FFC107" # Corporate Yellow
343
+ DANGER_COLOR = "#DC3545" # Corporate Red
344
+ PRIMARY_COLOR = "#007BFF" # Corporate Blue
345
+
346
  with st.sidebar:
347
  st.markdown(f"""
348
+ <h2 style='text-align: center; border-left: none; padding-left: 0; color: {PRIMARY_COLOR};'>
349
  Talent Screening Assistant
350
  </h2>
351
  <p style='text-align: center; font-size: 14px; margin-top: 0; color: #6C757D;'>
 
370
  """)
371
 
372
  with st.expander("🎯 Screening Outcomes Explained", expanded=False):
373
+ # FIXED: Replaced st.get_style_color with hex color codes
374
  st.markdown(f"""
375
+ - **Relevant** (`{SUCCESS_COLOR}`): Strong match across all criteria. Proceed to interview.
376
+ - **Irrelevant** (`{DANGER_COLOR}`): Low skill overlap or poor fit. Pass on candidate.
377
+ - **Requires Review** (`{WARNING_COLOR}`): **Flagged** due to Experience Mismatch or Lower AI confidence. Requires manual review by a hiring manager.
378
  """)
379
 
380
  def main():
 
503
 
504
  for i, resume in enumerate(valid_resumes):
505
  status_text.text(f"Status: Analyzing Profile {i+1} of {total_steps}...")
506
+
507
+ # Create single-batch tensors for BERT and T5
508
+ bert_tok_single = {
509
+ 'input_ids': bert_tokenized['input_ids'][i].unsqueeze(0),
510
+ 'attention_mask': bert_tokenized['attention_mask'][i].unsqueeze(0)
511
+ }
512
+ t5_tok_single = {
513
+ 'input_ids': t5_tokenized['input_ids'][i].unsqueeze(0),
514
+ 'attention_mask': t5_tokenized['attention_mask'][i].unsqueeze(0)
515
+ }
516
+
517
  result = classify_and_summarize_batch(resume, job_description, bert_tok_single, t5_inputs[i], t5_tok_single, job_skills_set)
518
  result["Profile ID"] = f"Candidate {i+1}"
519
  results.append(result)
 
540
 
541
  st.markdown(f"#### Overview: {total} Candidate Profiles Processed")
542
 
543
+ # Define hex colors again for the scorecard blocks
544
+ PRIMARY_COLOR = "#007BFF" # Corporate Blue
545
+ SUCCESS_COLOR = "#28A745" # Corporate Green
546
+ WARNING_COLOR = "#FFC107" # Corporate Yellow
547
+ DANGER_COLOR = "#DC3545" # Corporate Red
548
+
549
  col1, col2, col3, col4 = st.columns(4)
550
 
551
  with col1:
552
  st.markdown(f"""
553
  <div class='scorecard-block'>
554
  <div class='scorecard-label'>TOTAL PROFILES</div>
555
+ <div class='scorecard-value' style='color:{PRIMARY_COLOR};'>{total}</div>
556
  </div>
557
  """, unsafe_allow_html=True)
558
 
559
  with col2:
560
  st.markdown(f"""
561
  <div class='scorecard-block block-relevant'>
562
+ <div class='scorecard-label' style='color: {SUCCESS_COLOR};'>RELEVANT MATCHES</div>
563
+ <div class='scorecard-value' style='color: {SUCCESS_COLOR};'>{relevant_count}</div>
564
  </div>
565
  """, unsafe_allow_html=True)
566
 
567
  with col3:
568
  st.markdown(f"""
569
  <div class='scorecard-block block-uncertain'>
570
+ <div class='scorecard-label' style='color: {WARNING_COLOR};'>REQUIRES REVIEW</div>
571
+ <div class='scorecard-value' style='color: {WARNING_COLOR};'>{review_count}</div>
572
  </div>
573
  """, unsafe_allow_html=True)
574
 
575
  with col4:
576
  st.markdown(f"""
577
  <div class='scorecard-block block-irrelevant'>
578
+ <div class='scorecard-label' style='color: {DANGER_COLOR};'>IRRELEVANT / ERROR</div>
579
+ <div class='scorecard-value' style='color: {DANGER_COLOR};'>{irrelevant_count}</div>
580
  </div>
581
  """, unsafe_allow_html=True)
582