scmlewis commited on
Commit
e7d4b15
Β·
verified Β·
1 Parent(s): cd4a730

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -55
app.py CHANGED
@@ -1,5 +1,5 @@
1
  # app.py
2
- # Modern Dark Mode Streamlit Application for AI Talent Screening (FIXED: Color Scheme & Sidebar Text)
3
 
4
  import streamlit as st
5
  from transformers import BertTokenizer, BertForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
@@ -11,11 +11,11 @@ import matplotlib.pyplot as plt
11
  import PyPDF2
12
  from docx import Document
13
  import time
14
- import pandas as pd # pandas import for st.table (previously missing, added for completeness)
15
 
16
  # Set page config with modern dark theme and wide layout
17
  st.set_page_config(
18
- page_title="AI Talent Screening Tool",
19
  page_icon="πŸš€",
20
  layout="wide",
21
  initial_sidebar_state="expanded",
@@ -26,13 +26,13 @@ st.markdown("""
26
  <style>
27
  /* 0. GLOBAL CONFIG & DARK THEME */
28
  :root {
29
- --primary-color: #42A5F5; /* Vibrant Blue (Titles/Accent) */
30
  --accent-gradient-start: #4F46E5; /* Deep Purple-Blue */
31
  --accent-gradient-end: #3B82F6; /* Brighter Blue */
32
  --success-color: #4CAF50; /* Green (Good Match) */
33
  --warning-color: #FFC107; /* Amber/Yellow (Review) */
34
  --danger-color: #F44336; /* Red (Irrelevant/Error) */
35
- --background-color: #1A1C20; /* Very Dark, Deep Background (Like the Reference Image) */
36
  --container-background: #23272F; /* Slightly Lighter Container */
37
  --text-color: #F8F8F8; /* Light Text */
38
  --secondary-text-color: #B0B0B0; /* Muted Light Gray */
@@ -48,10 +48,13 @@ st.markdown("""
48
  background-color: var(--background-color);
49
  }
50
 
51
- /* 1. HEADER & TITLES */
52
  h1 {
53
  text-align: center;
54
- color: var(--primary-color);
 
 
 
55
  font-size: 2.8em;
56
  font-weight: 800;
57
  border-bottom: 3px solid rgba(66, 165, 245, 0.3);
@@ -60,8 +63,8 @@ st.markdown("""
60
  }
61
  h2, h3, h4 {
62
  color: var(--text-color);
63
- border-left: 5px solid var(--primary-color); /* Blue marker for clarity */
64
- padding-left: 15px;
65
  margin-top: 30px;
66
  font-weight: 600;
67
  }
@@ -84,14 +87,30 @@ st.markdown("""
84
  /* Primary Button with Gradient */
85
  .stButton>button[kind="primary"] {
86
  color: white !important;
87
- /* Applying subtle gradient to the primary button (Analyze/Run Screening) */
88
  background: linear-gradient(90deg, var(--accent-gradient-start) 0%, var(--accent-gradient-end) 100%) !important;
89
- border-color: var(--accent-gradient-start) !important;
90
  }
91
  .stButton>button[kind="primary"]:hover {
92
- background: linear-gradient(90deg, #3B82F6 0%, #4F46E5 100%) !important; /* Invert or darken gradient on hover */
93
- border-color: #3B82F6 !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  }
 
 
 
 
95
 
96
  /* 3. INPUTS, CONTAINERS, TABS & SIDEBAR */
97
  .stTextArea, .stTextInput, .stFileUploader {
@@ -107,21 +126,48 @@ st.markdown("""
107
  font-weight: bold;
108
  }
109
  .stSidebar {
110
- background-color: #23272F; /* Sidebar background */
111
  border-right: 1px solid #3A3A3A;
112
  color: var(--text-color);
113
  }
114
 
115
- /* Fix: Ensure text in sidebar expanders is visible */
116
  [data-testid="stSidebar"] p,
117
  [data-testid="stSidebar"] li,
118
  [data-testid="stSidebar"] [data-testid="stExpander"] {
119
  color: var(--secondary-text-color) !important;
120
  }
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  /* Alert/Info Boxes for dark theme contrast */
123
  [data-testid="stAlert"] {
124
- background-color: var(--container-background) !important; /* Match container background */
125
  color: var(--text-color) !important;
126
  border-left: 5px solid;
127
  }
@@ -144,8 +190,10 @@ st.markdown("""
144
 
145
 
146
  # --- (Model and Helper Functions - Core logic remains the same) ---
147
- # NOTE: The core ML logic and utility functions for PDF/DOCX parsing remain unchanged
148
- # as they are robust and purely functional.
 
 
149
  skills_list = [
150
  'python', 'sql', 'c++', 'java', 'tableau', 'machine learning', 'data analysis',
151
  'business intelligence', 'r', 'tensorflow', 'pandas', 'spark', 'scikit-learn', 'aws',
@@ -160,6 +208,8 @@ skills_list = [
160
  'agile methodologies', 'communication', 'team leadership',
161
  'databricks', 'synapse', 'delta lake', 'streamlit', 'fastapi', 'graphql', 'mlflow', 'kedro'
162
  ]
 
 
163
  skills_pattern = re.compile(r'\b(' + '|'.join(re.escape(skill) for skill in skills_list) + r')\b', re.IGNORECASE)
164
 
165
  # Helper functions for CV parsing
@@ -186,12 +236,17 @@ def extract_text_from_docx(file):
186
  except Exception as e:
187
  st.error(f"Error extracting text from Word document: {str(e)}")
188
  return ""
189
-
190
  def extract_text_from_file(uploaded_file):
191
- if uploaded_file.name.endswith('.pdf'): return extract_text_from_pdf(uploaded_file)
192
- elif uploaded_file.name.endswith('.docx'): return extract_text_from_docx(uploaded_file)
193
- return ""
 
 
 
 
194
 
 
195
  def normalize_text(text):
196
  text = text.lower()
197
  text = re.sub(r'_|-|,\s*collaborated in agile teams|,\s*developed solutions for|,\s*led projects involving|,\s*designed applications with|,\s*built machine learning models for|,\s*implemented data pipelines for|,\s*deployed cloud-based solutions|,\s*optimized workflows for|,\s*contributed to data-driven projects', '', text)
@@ -201,25 +256,33 @@ def check_experience_mismatch(resume, job_description):
201
  resume_match = re.search(r'(\d+)\s*years?|senior', resume.lower())
202
  job_match = re.search(r'(\d+)\s*years?(?:\s+\w+)*\+|senior\+', job_description.lower())
203
  if resume_match and job_match:
204
- resume_years_text = resume_match.group(0)
205
- job_years_text = job_match.group(0)
206
- if 'senior' in resume_years_text: resume_num = 10
207
- else: resume_num = int(resume_match.group(1))
208
- if 'senior+' in job_years_text: job_num = 10
209
- else: job_num = int(job_match.group(1))
210
- if resume_num < job_num: return f"Experience mismatch: Resume has {resume_years_text.strip()}, job requires {job_years_text.strip()}"
 
 
 
 
 
211
  return None
212
 
213
  def validate_input(text, is_resume=True):
214
- if not text.strip() or len(text.strip()) < 10: return "Input is too short (minimum 10 characters)."
 
215
  text_normalized = normalize_text(text)
216
- if is_resume and not skills_pattern.search(text_normalized): return "Please include at least one data/tech skill (e.g., python, sql, databricks)."
217
- if is_resume and not re.search(r'\d+\s*year(s)?|senior', text.lower()): return "Please include experience (e.g., '3 years experience' or 'senior')."
 
 
218
  return None
219
 
220
  @st.cache_resource
221
  def load_models():
222
- # Model loading logic (unchanged)
223
  bert_model_path = 'scmlewis/bert-finetuned-isom5240'
224
  bert_tokenizer = BertTokenizer.from_pretrained(bert_model_path)
225
  bert_model = BertForSequenceClassification.from_pretrained(bert_model_path, num_labels=2)
@@ -238,12 +301,14 @@ def tokenize_inputs(resumes, job_description, _bert_tokenizer, _t5_tokenizer):
238
  job_description_norm = normalize_text(job_description)
239
  bert_inputs = [f"resume: {normalize_text(resume)} [sep] job: {job_description_norm}" for resume in resumes]
240
  bert_tokenized = _bert_tokenizer(bert_inputs, return_tensors='pt', padding=True, truncation=True, max_length=64)
 
241
  t5_inputs = []
242
  for resume in resumes:
243
  prompt = re.sub(r'\b[Cc]\+\+\b', 'c++', resume)
244
  prompt_normalized = normalize_text(prompt)
245
  t5_inputs.append(f"summarize: {prompt_normalized}")
246
  t5_tokenized = _t5_tokenizer(t5_inputs, return_tensors='pt', padding=True, truncation=True, max_length=64)
 
247
  return bert_tokenized, t5_inputs, t5_tokenized
248
 
249
  @st.cache_data
@@ -256,7 +321,7 @@ def extract_skills(text):
256
 
257
  @st.cache_data
258
  def classify_and_summarize_batch(resume, job_description, _bert_tokenized, _t5_input, _t5_tokenized, _job_skills_set):
259
- # Classification and Summary logic (unchanged, but adjusted summary content)
260
  _, bert_model, t5_tokenizer, t5_model, device = st.session_state.models
261
  timeout = 60
262
 
@@ -264,6 +329,7 @@ def classify_and_summarize_batch(resume, job_description, _bert_tokenized, _t5_i
264
  bert_tokenized = {k: v.to(device) for k, v in _bert_tokenized.items()}
265
  with torch.no_grad():
266
  outputs = bert_model(**bert_tokenized)
 
267
  logits = outputs.logits
268
  probabilities = torch.softmax(logits, dim=1).cpu().numpy()
269
  predictions = np.argmax(probabilities, axis=1)
@@ -273,7 +339,16 @@ def classify_and_summarize_batch(resume, job_description, _bert_tokenized, _t5_i
273
 
274
  t5_tokenized = {k: v.to(device) for k, v in _t5_tokenized.items()}
275
  with torch.no_grad():
276
- t5_outputs = t5_model.generate(t5_tokenized['input_ids'], attention_mask=t5_tokenized['attention_mask'], max_length=30, min_length=8, num_beams=2, no_repeat_ngram_size=3, length_penalty=2.0, early_stopping=True)
 
 
 
 
 
 
 
 
 
277
  summaries = [t5_tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True) for output in t5_outputs]
278
  summary_raw = re.sub(r'\s+', ' ', summaries[0]).strip()
279
 
@@ -288,7 +363,7 @@ def classify_and_summarize_batch(resume, job_description, _bert_tokenized, _t5_i
288
  suitability = "Irrelevant"
289
  warning = "Low skill match (<40%) with job requirements"
290
  elif exp_warning:
291
- suitability = "Uncertain" # Changed to Uncertain for HR friendly language
292
  warning = exp_warning
293
  elif prob[pred] < confidence_threshold:
294
  suitability = "Uncertain"
@@ -305,12 +380,12 @@ def classify_and_summarize_batch(resume, job_description, _bert_tokenized, _t5_i
305
  elif detected_skills: final_summary = f"Key Skills: {', '.join(detected_skills)}"
306
  else: final_summary = f"Experience: {exp_match.group(0) if exp_match else 'Unknown'}"
307
 
308
- # Color codes based on new theme
309
  if suitability == "Relevant": color = "#4CAF50"
310
  elif suitability == "Irrelevant": color = "#F44336"
311
  else: color = "#FFC107"
312
 
313
- return {"Suitability": suitability, "Data/Tech Related Skills Summary": final_summary, "Warning": warning or "None", "Suitability_Color": color}
314
  except Exception as e:
315
  return {"Suitability": "Error", "Data/Tech Related Skills Summary": "Failed to process profile", "Warning": str(e), "Suitability_Color": "#F44336"}
316
 
@@ -340,7 +415,7 @@ def generate_skill_pie_chart(resumes):
340
  # Use dark theme settings for the chart
341
  plt.style.use('dark_background')
342
  fig, ax = plt.subplots(figsize=(6, 4))
343
- colors = plt.cm.plasma(np.linspace(0.2, 0.9, len(labels))) # Changed color map to plasma for better contrast
344
  plt.rcParams['text.color'] = '#F8F8F8'
345
  wedges, texts, autotexts = ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors, textprops={'fontsize': 10, 'color': '#F8F8F8'})
346
  ax.axis('equal')
@@ -393,21 +468,23 @@ def main():
393
  """Main function to run the Streamlit app for resume screening."""
394
  render_sidebar()
395
 
396
- # Initialize session state (using existing functional logic)
397
  if 'resumes' not in st.session_state: st.session_state.resumes = ["Expert in python, machine learning, tableau, 4 years experience", "", ""]
398
  if 'input_job_description' not in st.session_state: st.session_state.input_job_description = "Data scientist requires python, machine learning, 3 years+"
399
  if 'results' not in st.session_state: st.session_state.results = []
400
  if 'valid_resumes' not in st.session_state: st.session_state.valid_resumes = []
401
  if 'models' not in st.session_state: st.session_state.models = None
402
 
403
- st.markdown("<h1>πŸš€ AI TALENT SCREENING TOOL</h1>", unsafe_allow_html=True)
 
404
 
405
  # HR-friendly Tab Names
406
  tab_setup, tab_resumes, tab_results = st.tabs(["1. Job Requirement Setup", "2. Candidate Profile Upload", "3. Screening Report & Analytics"])
407
 
408
  # --- TAB 1: Setup & Job Description ---
409
  with tab_setup:
410
- st.subheader("Define Job Requirements")
 
411
  st.info("Please enter the **Job Description** below. This is essential for the AI to accurately match skills and experience levels.")
412
 
413
  job_description = st.text_area(
@@ -425,7 +502,8 @@ def main():
425
 
426
  # --- TAB 2: Manage Resumes ---
427
  with tab_resumes:
428
- st.subheader(f"Upload Candidate Profiles ({len(st.session_state.resumes)}/5)")
 
429
  st.info("Upload or paste candidate text below. The AI requires **key technical skills and experience statements** to function.")
430
 
431
  # Manage resume inputs
@@ -476,7 +554,7 @@ def main():
476
  reset_clicked = st.button("♻️ Reset All Inputs", use_container_width=True)
477
  st.markdown("---")
478
 
479
- # Handle reset and analysis logic
480
  if reset_clicked:
481
  st.session_state.resumes = ["", "", ""]
482
  st.session_state.input_job_description = ""
@@ -516,7 +594,6 @@ def main():
516
  for i, resume in enumerate(valid_resumes):
517
  status_text.text(f"Status: Analyzing Profile {i+1} of {total_steps}...")
518
 
519
- # Create single-batch tensors for BERT and T5
520
  bert_tok_single = {
521
  'input_ids': bert_tokenized['input_ids'][i].unsqueeze(0),
522
  'attention_mask': bert_tokenized['attention_mask'][i].unsqueeze(0)
@@ -534,7 +611,7 @@ def main():
534
  t5_tok_single,
535
  job_skills_set
536
  )
537
- result["Resume"] = f"Candidate {i+1}" # Renamed from "Profile ID" to "Resume" to match results section.
538
  results.append(result)
539
  progress_bar.progress((i + 1) / total_steps)
540
  st.session_state.results = results
@@ -546,12 +623,13 @@ def main():
546
 
547
  # --- TAB 3: Results (The Professional Report) ---
548
  with tab_results:
549
- st.subheader("Screening Results Summary")
 
550
 
551
  if st.session_state.results:
552
 
553
  # --- Scorecard Metrics (Professional Tiles) ---
554
- results_df = pd.DataFrame(st.session_state.results) # Ensure pandas is imported at the top
555
  total = len(results_df)
556
  relevant_count = len(results_df[results_df['Suitability'] == 'Relevant'])
557
  review_count = len(results_df[results_df['Suitability'] == 'Uncertain'])
@@ -567,6 +645,7 @@ def main():
567
 
568
  col1, col2, col3, col4 = st.columns(4)
569
 
 
570
  with col1:
571
  st.markdown(f"""
572
  <div class='scorecard-block'>
@@ -602,10 +681,8 @@ def main():
602
  st.markdown("---")
603
 
604
  # --- Detailed Report Table ---
605
- st.subheader("Detailed Screening Results")
606
 
607
- # Display DataFrame
608
- # Renaming the column from 'Data/Tech Related Skills Summary' to 'PROFILE SUMMARY' for the final display
609
  display_df = results_df.drop(columns=['Suitability_Color']).rename(columns={'Data/Tech Related Skills Summary': 'PROFILE SUMMARY', 'Warning': 'FLAGGING REASON'})
610
 
611
  st.dataframe(
@@ -624,7 +701,6 @@ def main():
624
  col_dl, col_chart_expander = st.columns([1, 3])
625
 
626
  with col_dl:
627
- # Use the original result columns for CSV download
628
  csv_buffer = io.StringIO()
629
  results_df.drop(columns=['Suitability_Color']).to_csv(csv_buffer, index=False)
630
 
@@ -652,7 +728,4 @@ def main():
652
 
653
 
654
  if __name__ == "__main__":
655
- # Ensure pandas is available for the main function to run without errors
656
- if 'pd' not in globals():
657
- import pandas as pd
658
  main()
 
1
  # app.py
2
+ # Modern Dark Mode Streamlit Application for AI Talent Screening (FIXED: Scorecard, Strokes, Colors, Header)
3
 
4
  import streamlit as st
5
  from transformers import BertTokenizer, BertForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
 
11
  import PyPDF2
12
  from docx import Document
13
  import time
14
+ import pandas as pd
15
 
16
  # Set page config with modern dark theme and wide layout
17
  st.set_page_config(
18
+ page_title="AI Data/Tech Talent Screening Tool",
19
  page_icon="πŸš€",
20
  layout="wide",
21
  initial_sidebar_state="expanded",
 
26
  <style>
27
  /* 0. GLOBAL CONFIG & DARK THEME */
28
  :root {
29
+ --primary-color: #42A5F5; /* Vibrant Blue (Accent) */
30
  --accent-gradient-start: #4F46E5; /* Deep Purple-Blue */
31
  --accent-gradient-end: #3B82F6; /* Brighter Blue */
32
  --success-color: #4CAF50; /* Green (Good Match) */
33
  --warning-color: #FFC107; /* Amber/Yellow (Review) */
34
  --danger-color: #F44336; /* Red (Irrelevant/Error) */
35
+ --background-color: #1A1C20; /* Very Dark, Deep Background */
36
  --container-background: #23272F; /* Slightly Lighter Container */
37
  --text-color: #F8F8F8; /* Light Text */
38
  --secondary-text-color: #B0B0B0; /* Muted Light Gray */
 
48
  background-color: var(--background-color);
49
  }
50
 
51
+ /* 1. HEADER & TITLES - NEW GRADIENT AND NO BLUE STROKE */
52
  h1 {
53
  text-align: center;
54
+ /* Applying Text Gradient to H1 */
55
+ background: linear-gradient(90deg, var(--accent-gradient-start) 0%, var(--accent-gradient-end) 100%);
56
+ -webkit-background-clip: text;
57
+ -webkit-text-fill-color: transparent;
58
  font-size: 2.8em;
59
  font-weight: 800;
60
  border-bottom: 3px solid rgba(66, 165, 245, 0.3);
 
63
  }
64
  h2, h3, h4 {
65
  color: var(--text-color);
66
+ border-left: none; /* REMOVED THE BLUE STROKE */
67
+ padding-left: 0;
68
  margin-top: 30px;
69
  font-weight: 600;
70
  }
 
87
  /* Primary Button with Gradient */
88
  .stButton>button[kind="primary"] {
89
  color: white !important;
 
90
  background: linear-gradient(90deg, var(--accent-gradient-start) 0%, var(--accent-gradient-end) 100%) !important;
 
91
  }
92
  .stButton>button[kind="primary"]:hover {
93
+ background: linear-gradient(90deg, #3B82F6 0%, #4F46E5 100%) !important;
94
+ }
95
+
96
+ /* FIX: Style for Add/Remove Candidate Buttons */
97
+ .st-emotion-cache-1jmveo5 > div:nth-child(1) > div > button,
98
+ .st-emotion-cache-1jmveo5 > div:nth-child(2) > div > button {
99
+ color: var(--text-color) !important;
100
+ background-color: var(--container-background) !important;
101
+ }
102
+ .st-emotion-cache-1jmveo5 > div:nth-child(1) > div > button:hover,
103
+ .st-emotion-cache-1jmveo5 > div:nth-child(2) > div > button:hover {
104
+ background-color: #404040 !important;
105
+ }
106
+ /* FIX: Color the + and - icons (Streamlit's default icon color is text color) */
107
+ .st-emotion-cache-1jmveo5 > div:nth-child(1) > div > button > svg {
108
+ color: var(--accent-gradient-start) !important;
109
  }
110
+ .st-emotion-cache-1jmveo5 > div:nth-child(2) > div > button > svg {
111
+ color: var(--accent-gradient-end) !important;
112
+ }
113
+
114
 
115
  /* 3. INPUTS, CONTAINERS, TABS & SIDEBAR */
116
  .stTextArea, .stTextInput, .stFileUploader {
 
126
  font-weight: bold;
127
  }
128
  .stSidebar {
129
+ background-color: #23272F;
130
  border-right: 1px solid #3A3A3A;
131
  color: var(--text-color);
132
  }
133
 
134
+ /* FIX: Ensure text in sidebar expanders is visible */
135
  [data-testid="stSidebar"] p,
136
  [data-testid="stSidebar"] li,
137
  [data-testid="stSidebar"] [data-testid="stExpander"] {
138
  color: var(--secondary-text-color) !important;
139
  }
140
 
141
+ /* Scorecard Style (Tiles from previous version) */
142
+ .scorecard-block {
143
+ border: 1px solid #3A3A3A;
144
+ border-radius: 12px;
145
+ padding: 20px;
146
+ margin: 5px 0;
147
+ background-color: #333333;
148
+ transition: all 0.3s;
149
+ box-shadow: 0 4px 10px rgba(0, 0, 0, 0.2);
150
+ }
151
+ .scorecard-block:hover {
152
+ box-shadow: 0 6px 15px rgba(0, 0, 0, 0.4);
153
+ }
154
+ .scorecard-value {
155
+ font-size: 38px;
156
+ font-weight: 800;
157
+ color: var(--primary-color);
158
+ }
159
+ .scorecard-label {
160
+ font-size: 14px;
161
+ color: var(--secondary-text-color);
162
+ }
163
+ /* Color override for specific blocks */
164
+ .block-relevant { border-left: 5px solid var(--success-color); }
165
+ .block-uncertain { border-left: 5px solid var(--warning-color); }
166
+ .block-irrelevant { border-left: 5px solid var(--danger-color); }
167
+
168
  /* Alert/Info Boxes for dark theme contrast */
169
  [data-testid="stAlert"] {
170
+ background-color: var(--container-background) !important;
171
  color: var(--text-color) !important;
172
  border-left: 5px solid;
173
  }
 
190
 
191
 
192
  # --- (Model and Helper Functions - Core logic remains the same) ---
193
+ # NOTE: Keeping the functional code from the provided app.py for brevity,
194
+ # as the changes are mainly aesthetic/structural outside of function definitions.
195
+
196
+ # Skills list (79 skills from Application_Demo.ipynb)
197
  skills_list = [
198
  'python', 'sql', 'c++', 'java', 'tableau', 'machine learning', 'data analysis',
199
  'business intelligence', 'r', 'tensorflow', 'pandas', 'spark', 'scikit-learn', 'aws',
 
208
  'agile methodologies', 'communication', 'team leadership',
209
  'databricks', 'synapse', 'delta lake', 'streamlit', 'fastapi', 'graphql', 'mlflow', 'kedro'
210
  ]
211
+
212
+ # Precompile regex for skills matching (optimized for single pass)
213
  skills_pattern = re.compile(r'\b(' + '|'.join(re.escape(skill) for skill in skills_list) + r')\b', re.IGNORECASE)
214
 
215
  # Helper functions for CV parsing
 
236
  except Exception as e:
237
  st.error(f"Error extracting text from Word document: {str(e)}")
238
  return ""
239
+
240
  def extract_text_from_file(uploaded_file):
241
+ if uploaded_file.name.endswith('.pdf'):
242
+ return extract_text_from_pdf(uploaded_file)
243
+ elif uploaded_file.name.endswith('.docx'):
244
+ return extract_text_from_docx(uploaded_file)
245
+ else:
246
+ # Note: This error message is slightly misleading as Streamlit's file uploader already filters file types
247
+ return ""
248
 
249
+ # Helper functions for analysis
250
  def normalize_text(text):
251
  text = text.lower()
252
  text = re.sub(r'_|-|,\s*collaborated in agile teams|,\s*developed solutions for|,\s*led projects involving|,\s*designed applications with|,\s*built machine learning models for|,\s*implemented data pipelines for|,\s*deployed cloud-based solutions|,\s*optimized workflows for|,\s*contributed to data-driven projects', '', text)
 
256
  resume_match = re.search(r'(\d+)\s*years?|senior', resume.lower())
257
  job_match = re.search(r'(\d+)\s*years?(?:\s+\w+)*\+|senior\+', job_description.lower())
258
  if resume_match and job_match:
259
+ resume_years = resume_match.group(0)
260
+ job_years = job_match.group(0)
261
+ if 'senior' in resume_years:
262
+ resume_num = 10
263
+ else:
264
+ resume_num = int(resume_match.group(1))
265
+ if 'senior+' in job_years:
266
+ job_num = 10
267
+ else:
268
+ job_num = int(job_match.group(1))
269
+ if resume_num < job_num:
270
+ return f"Experience mismatch: Resume has {resume_years.strip()}, job requires {job_years.strip()}"
271
  return None
272
 
273
  def validate_input(text, is_resume=True):
274
+ if not text.strip() or len(text.strip()) < 10:
275
+ return "Input is too short (minimum 10 characters)."
276
  text_normalized = normalize_text(text)
277
+ if is_resume and not skills_pattern.search(text_normalized):
278
+ return "Please include at least one data/tech skill (e.g., python, sql, databricks)."
279
+ if is_resume and not re.search(r'\d+\s*year(s)?|senior', text.lower()):
280
+ return "Please include experience (e.g., '3 years experience' or 'senior')."
281
  return None
282
 
283
  @st.cache_resource
284
  def load_models():
285
+ # Load models (unchanged)
286
  bert_model_path = 'scmlewis/bert-finetuned-isom5240'
287
  bert_tokenizer = BertTokenizer.from_pretrained(bert_model_path)
288
  bert_model = BertForSequenceClassification.from_pretrained(bert_model_path, num_labels=2)
 
301
  job_description_norm = normalize_text(job_description)
302
  bert_inputs = [f"resume: {normalize_text(resume)} [sep] job: {job_description_norm}" for resume in resumes]
303
  bert_tokenized = _bert_tokenizer(bert_inputs, return_tensors='pt', padding=True, truncation=True, max_length=64)
304
+
305
  t5_inputs = []
306
  for resume in resumes:
307
  prompt = re.sub(r'\b[Cc]\+\+\b', 'c++', resume)
308
  prompt_normalized = normalize_text(prompt)
309
  t5_inputs.append(f"summarize: {prompt_normalized}")
310
  t5_tokenized = _t5_tokenizer(t5_inputs, return_tensors='pt', padding=True, truncation=True, max_length=64)
311
+
312
  return bert_tokenized, t5_inputs, t5_tokenized
313
 
314
  @st.cache_data
 
321
 
322
  @st.cache_data
323
  def classify_and_summarize_batch(resume, job_description, _bert_tokenized, _t5_input, _t5_tokenized, _job_skills_set):
324
+ """Process one resume at a time to reduce CPU load with a timeout."""
325
  _, bert_model, t5_tokenizer, t5_model, device = st.session_state.models
326
  timeout = 60
327
 
 
329
  bert_tokenized = {k: v.to(device) for k, v in _bert_tokenized.items()}
330
  with torch.no_grad():
331
  outputs = bert_model(**bert_tokenized)
332
+
333
  logits = outputs.logits
334
  probabilities = torch.softmax(logits, dim=1).cpu().numpy()
335
  predictions = np.argmax(probabilities, axis=1)
 
339
 
340
  t5_tokenized = {k: v.to(device) for k, v in _t5_tokenized.items()}
341
  with torch.no_grad():
342
+ t5_outputs = t5_model.generate(
343
+ t5_tokenized['input_ids'],
344
+ attention_mask=t5_tokenized['attention_mask'],
345
+ max_length=30,
346
+ min_length=8,
347
+ num_beams=2,
348
+ no_repeat_ngram_size=3,
349
+ length_penalty=2.0,
350
+ early_stopping=True
351
+ )
352
  summaries = [t5_tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True) for output in t5_outputs]
353
  summary_raw = re.sub(r'\s+', ' ', summaries[0]).strip()
354
 
 
363
  suitability = "Irrelevant"
364
  warning = "Low skill match (<40%) with job requirements"
365
  elif exp_warning:
366
+ suitability = "Uncertain"
367
  warning = exp_warning
368
  elif prob[pred] < confidence_threshold:
369
  suitability = "Uncertain"
 
380
  elif detected_skills: final_summary = f"Key Skills: {', '.join(detected_skills)}"
381
  else: final_summary = f"Experience: {exp_match.group(0) if exp_match else 'Unknown'}"
382
 
383
+ # Color codes based on new theme (needed for scorecard in main logic)
384
  if suitability == "Relevant": color = "#4CAF50"
385
  elif suitability == "Irrelevant": color = "#F44336"
386
  else: color = "#FFC107"
387
 
388
+ return {"Suitability": suitability, "Data/Tech Related Skills Summary": final_summary, "Warning": warning, "Suitability_Color": color}
389
  except Exception as e:
390
  return {"Suitability": "Error", "Data/Tech Related Skills Summary": "Failed to process profile", "Warning": str(e), "Suitability_Color": "#F44336"}
391
 
 
415
  # Use dark theme settings for the chart
416
  plt.style.use('dark_background')
417
  fig, ax = plt.subplots(figsize=(6, 4))
418
+ colors = plt.cm.plasma(np.linspace(0.2, 0.9, len(labels)))
419
  plt.rcParams['text.color'] = '#F8F8F8'
420
  wedges, texts, autotexts = ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors, textprops={'fontsize': 10, 'color': '#F8F8F8'})
421
  ax.axis('equal')
 
468
  """Main function to run the Streamlit app for resume screening."""
469
  render_sidebar()
470
 
471
+ # Initialize session state
472
  if 'resumes' not in st.session_state: st.session_state.resumes = ["Expert in python, machine learning, tableau, 4 years experience", "", ""]
473
  if 'input_job_description' not in st.session_state: st.session_state.input_job_description = "Data scientist requires python, machine learning, 3 years+"
474
  if 'results' not in st.session_state: st.session_state.results = []
475
  if 'valid_resumes' not in st.session_state: st.session_state.valid_resumes = []
476
  if 'models' not in st.session_state: st.session_state.models = None
477
 
478
+ # NEW GRADIENT HEADER
479
+ st.markdown("<h1>πŸš€ AI DATA/TECH TALENT SCREENING TOOL</h1>", unsafe_allow_html=True)
480
 
481
  # HR-friendly Tab Names
482
  tab_setup, tab_resumes, tab_results = st.tabs(["1. Job Requirement Setup", "2. Candidate Profile Upload", "3. Screening Report & Analytics"])
483
 
484
  # --- TAB 1: Setup & Job Description ---
485
  with tab_setup:
486
+ # EMOJI ADDED
487
+ st.markdown("## πŸ“‹ Define Job Requirements")
488
  st.info("Please enter the **Job Description** below. This is essential for the AI to accurately match skills and experience levels.")
489
 
490
  job_description = st.text_area(
 
502
 
503
  # --- TAB 2: Manage Resumes ---
504
  with tab_resumes:
505
+ # EMOJI ADDED
506
+ st.markdown(f"## πŸ“ Upload Candidate Profiles ({len(st.session_state.resumes)}/5)")
507
  st.info("Upload or paste candidate text below. The AI requires **key technical skills and experience statements** to function.")
508
 
509
  # Manage resume inputs
 
554
  reset_clicked = st.button("♻️ Reset All Inputs", use_container_width=True)
555
  st.markdown("---")
556
 
557
+ # Handle reset and analysis logic (unchanged)
558
  if reset_clicked:
559
  st.session_state.resumes = ["", "", ""]
560
  st.session_state.input_job_description = ""
 
594
  for i, resume in enumerate(valid_resumes):
595
  status_text.text(f"Status: Analyzing Profile {i+1} of {total_steps}...")
596
 
 
597
  bert_tok_single = {
598
  'input_ids': bert_tokenized['input_ids'][i].unsqueeze(0),
599
  'attention_mask': bert_tokenized['attention_mask'][i].unsqueeze(0)
 
611
  t5_tok_single,
612
  job_skills_set
613
  )
614
+ result["Resume"] = f"Candidate {i+1}"
615
  results.append(result)
616
  progress_bar.progress((i + 1) / total_steps)
617
  st.session_state.results = results
 
623
 
624
  # --- TAB 3: Results (The Professional Report) ---
625
  with tab_results:
626
+ # EMOJI ADDED
627
+ st.markdown("## πŸ“Š Screening Results Summary")
628
 
629
  if st.session_state.results:
630
 
631
  # --- Scorecard Metrics (Professional Tiles) ---
632
+ results_df = pd.DataFrame(st.session_state.results)
633
  total = len(results_df)
634
  relevant_count = len(results_df[results_df['Suitability'] == 'Relevant'])
635
  review_count = len(results_df[results_df['Suitability'] == 'Uncertain'])
 
645
 
646
  col1, col2, col3, col4 = st.columns(4)
647
 
648
+ # SCORECARD TILES REINSTATED
649
  with col1:
650
  st.markdown(f"""
651
  <div class='scorecard-block'>
 
681
  st.markdown("---")
682
 
683
  # --- Detailed Report Table ---
684
+ st.markdown("### πŸ“‹ Detailed Screening Results")
685
 
 
 
686
  display_df = results_df.drop(columns=['Suitability_Color']).rename(columns={'Data/Tech Related Skills Summary': 'PROFILE SUMMARY', 'Warning': 'FLAGGING REASON'})
687
 
688
  st.dataframe(
 
701
  col_dl, col_chart_expander = st.columns([1, 3])
702
 
703
  with col_dl:
 
704
  csv_buffer = io.StringIO()
705
  results_df.drop(columns=['Suitability_Color']).to_csv(csv_buffer, index=False)
706
 
 
728
 
729
 
730
  if __name__ == "__main__":
 
 
 
731
  main()