Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
# app.py
|
| 2 |
-
# Optimized Streamlit Application for Resume Screening with Multiple Resumes
|
| 3 |
|
| 4 |
import streamlit as st
|
| 5 |
from transformers import BertTokenizer, BertForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
|
|
@@ -8,14 +8,39 @@ import numpy as np
|
|
| 8 |
import re
|
| 9 |
import io
|
| 10 |
import matplotlib.pyplot as plt
|
| 11 |
-
import
|
| 12 |
-
|
| 13 |
|
| 14 |
# Set page config as the first Streamlit command
|
| 15 |
st.set_page_config(page_title="Resume Screening Assistant for Data/Tech", page_icon="π", layout="wide")
|
| 16 |
|
| 17 |
-
#
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
'python', 'sql', 'c++', 'java', 'tableau', 'machine learning', 'data analysis',
|
| 20 |
'business intelligence', 'r', 'tensorflow', 'pandas', 'spark', 'scikit-learn', 'aws',
|
| 21 |
'javascript', 'scala', 'go', 'ruby', 'pytorch', 'keras', 'deep learning', 'nlp',
|
|
@@ -28,118 +53,67 @@ skills_pattern = re.compile(r'\b(?:' + '|'.join(map(re.escape, [
|
|
| 28 |
'cybersecurity', 'project management', 'technical writing', 'business analysis',
|
| 29 |
'agile methodologies', 'communication', 'team leadership',
|
| 30 |
'databricks', 'synapse', 'delta lake', 'streamlit', 'fastapi', 'graphql', 'mlflow', 'kedro'
|
| 31 |
-
]
|
| 32 |
-
|
| 33 |
-
normalize_pattern = re.compile(r'_|-|,\s*collaborated in agile teams|,\s*developed solutions for|,\s*led projects involving|,\s*designed applications with|,\s*built machine learning models for|,\s*implemented data pipelines for|,\s*deployed cloud-based solutions|,\s*optimized workflows for|,\s*contributed to data-driven projects')
|
| 34 |
-
|
| 35 |
-
# Apply custom CSS for layout stability and element styling
|
| 36 |
-
st.markdown("""
|
| 37 |
-
<style>
|
| 38 |
-
/* Sidebar Styling */
|
| 39 |
-
[data-testid="stSidebar"] {
|
| 40 |
-
width: 350px !important;
|
| 41 |
-
min-width: 350px !important;
|
| 42 |
-
max-height: 100vh;
|
| 43 |
-
overflow-y: auto;
|
| 44 |
-
padding-bottom: 20px;
|
| 45 |
-
}
|
| 46 |
-
[data-testid="stSidebarCollapseButton"] {
|
| 47 |
-
display: none !important;
|
| 48 |
-
}
|
| 49 |
-
|
| 50 |
-
/* Main Content Styling */
|
| 51 |
-
.block-container {
|
| 52 |
-
margin-left: 350px;
|
| 53 |
-
}
|
| 54 |
-
h2, h3 {
|
| 55 |
-
color: var(--primaryColor);
|
| 56 |
-
font-weight: bold;
|
| 57 |
-
margin-top: 20px;
|
| 58 |
-
}
|
| 59 |
-
|
| 60 |
-
/* Input Fields */
|
| 61 |
-
.stTextInput > label {
|
| 62 |
-
font-weight: bold;
|
| 63 |
-
font-size: 16px;
|
| 64 |
-
}
|
| 65 |
-
.stTextInput > div > input {
|
| 66 |
-
border: 1px solid var(--secondaryBackgroundColor);
|
| 67 |
-
border-radius: 5px;
|
| 68 |
-
padding: 8px;
|
| 69 |
-
font-size: 14px;
|
| 70 |
-
}
|
| 71 |
-
.stTextInput > div > input::placeholder {
|
| 72 |
-
color: #888888;
|
| 73 |
-
}
|
| 74 |
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
border-radius: 5px;
|
| 78 |
-
padding: 10px 20px;
|
| 79 |
-
font-size: 16px;
|
| 80 |
-
border: none;
|
| 81 |
-
}
|
| 82 |
-
.stButton > button:hover {
|
| 83 |
-
border: 1px solid var(--textColor);
|
| 84 |
-
}
|
| 85 |
-
|
| 86 |
-
/* Results Table */
|
| 87 |
-
div[data-testid="stDataFrame"] {
|
| 88 |
-
border: 1px solid var(--secondaryBackgroundColor);
|
| 89 |
-
border-radius: 5px;
|
| 90 |
-
}
|
| 91 |
-
div[data-testid="stDataFrame"] table th {
|
| 92 |
-
background-color: var(--primaryColor);
|
| 93 |
-
color: var(--textColor);
|
| 94 |
-
font-weight: bold;
|
| 95 |
-
}
|
| 96 |
-
div[data-testid="stDataFrame"] table td {
|
| 97 |
-
color: var(--textColor);
|
| 98 |
-
}
|
| 99 |
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
"
|
| 112 |
-
|
| 113 |
-
# Theme toggle button
|
| 114 |
-
def toggle_theme():
|
| 115 |
-
current_theme = st.config.get_option("theme.base")
|
| 116 |
-
new_theme = "dark" if current_theme == "light" else "light"
|
| 117 |
-
st.config.set_option("theme.base", new_theme)
|
| 118 |
-
st.rerun()
|
| 119 |
-
|
| 120 |
-
# Place the toggle button in the main content area (top-right)
|
| 121 |
-
col1, col2 = st.columns([9, 1])
|
| 122 |
-
with col2:
|
| 123 |
-
if st.button(f"Switch to {'Dark' if st.config.get_option('theme.base') == 'light' else 'Light'} Mode", key="theme_toggle"):
|
| 124 |
-
toggle_theme()
|
| 125 |
|
| 126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
def normalize_text(text):
|
| 128 |
text = text.lower()
|
| 129 |
-
|
|
|
|
|
|
|
| 130 |
|
| 131 |
def check_experience_mismatch(resume, job_description):
|
| 132 |
resume_match = re.search(r'(\d+)\s*years?|senior', resume.lower())
|
| 133 |
-
|
|
|
|
| 134 |
if resume_match and job_match:
|
| 135 |
resume_years = resume_match.group(0)
|
| 136 |
job_years = job_match.group(0)
|
|
|
|
| 137 |
if 'senior' in resume_years:
|
| 138 |
resume_num = 10
|
| 139 |
else:
|
| 140 |
resume_num = int(resume_match.group(1))
|
|
|
|
| 141 |
if 'senior+' in job_years:
|
| 142 |
-
job_num = 10
|
| 143 |
else:
|
| 144 |
job_num = int(job_match.group(1))
|
| 145 |
if resume_num < job_num:
|
|
@@ -150,9 +124,7 @@ def validate_input(text, is_resume=True):
|
|
| 150 |
if not text.strip() or len(text.strip()) < 10:
|
| 151 |
return "Input is too short (minimum 10 characters)."
|
| 152 |
text_normalized = normalize_text(text)
|
| 153 |
-
|
| 154 |
-
found_skill = bool(skills_pattern.search(text_normalized))
|
| 155 |
-
if is_resume and not found_skill:
|
| 156 |
return "Please include at least one data/tech skill (e.g., python, sql, databricks)."
|
| 157 |
if is_resume and not re.search(r'\d+\s*year(s)?|senior', text.lower()):
|
| 158 |
return "Please include experience (e.g., '3 years experience' or 'senior')."
|
|
@@ -160,162 +132,128 @@ def validate_input(text, is_resume=True):
|
|
| 160 |
|
| 161 |
@st.cache_resource
|
| 162 |
def load_models():
|
| 163 |
-
start_time = time.time()
|
| 164 |
bert_model_path = 'scmlewis/bert-finetuned-isom5240'
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
t5_model = T5ForConditionalGeneration.from_pretrained('t5-small')
|
| 170 |
-
except Exception as e:
|
| 171 |
-
st.error(f"Error loading models: {str(e)}")
|
| 172 |
-
raise e
|
| 173 |
device = torch.device('cpu') # CPU for lightweight deployment
|
| 174 |
bert_model.to(device)
|
| 175 |
t5_model.to(device)
|
| 176 |
bert_model.eval()
|
| 177 |
t5_model.eval()
|
| 178 |
-
st.session_state.load_models_time = time.time() - start_time
|
| 179 |
return bert_tokenizer, bert_model, t5_tokenizer, t5_model, device
|
| 180 |
|
| 181 |
@st.cache_data
|
| 182 |
-
def tokenize_inputs(resumes, job_description):
|
| 183 |
"""Precompute tokenized inputs for BERT and T5."""
|
| 184 |
-
bert_tokenizer, _, t5_tokenizer, _, _ = st.session_state.models
|
| 185 |
-
start_time = time.time()
|
| 186 |
-
|
| 187 |
job_description_norm = normalize_text(job_description)
|
| 188 |
bert_inputs = [f"resume: {normalize_text(resume)} [sep] job: {job_description_norm}" for resume in resumes]
|
| 189 |
-
bert_tokenized =
|
| 190 |
|
| 191 |
t5_inputs = []
|
| 192 |
for resume in resumes:
|
| 193 |
prompt = re.sub(r'\b[Cc]\+\+\b', 'c++', resume)
|
| 194 |
prompt_normalized = normalize_text(prompt)
|
| 195 |
t5_inputs.append(f"summarize: {prompt_normalized}")
|
| 196 |
-
t5_tokenized =
|
| 197 |
|
| 198 |
-
st.session_state.tokenize_time = time.time() - start_time
|
| 199 |
return bert_tokenized, t5_inputs, t5_tokenized
|
| 200 |
|
| 201 |
@st.cache_data
|
| 202 |
def extract_skills(text):
|
| 203 |
"""Extract skills from text in a single pass."""
|
| 204 |
-
start_time = time.time()
|
| 205 |
text_normalized = normalize_text(text)
|
| 206 |
text_normalized = re.sub(r'[,_-]', ' ', text_normalized)
|
| 207 |
found_skills = skills_pattern.findall(text_normalized)
|
| 208 |
-
st.session_state.extract_skills_time = time.time() - start_time
|
| 209 |
return set(found_skills)
|
| 210 |
|
| 211 |
@st.cache_data
|
| 212 |
-
def classify_and_summarize_batch(
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
start_time = time.time()
|
| 216 |
-
timeout = 60 # Timeout after 60 seconds
|
| 217 |
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
if time.time() - t5_start > timeout:
|
| 248 |
-
raise TimeoutError("T5 inference timed out")
|
| 249 |
-
summaries = [t5_tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True) for output in t5_outputs]
|
| 250 |
-
summaries = [re.sub(r'\s+', ' ', summary).strip() for summary in summaries]
|
| 251 |
-
|
| 252 |
-
prob, pred, summary, t5_input = probabilities[0], predictions[0], summaries[0], _t5_input
|
| 253 |
resume_skills_set = extract_skills(resume)
|
| 254 |
skill_overlap = len(_job_skills_set.intersection(resume_skills_set)) / len(_job_skills_set) if _job_skills_set else 0
|
| 255 |
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
|
|
|
| 259 |
else:
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
|
|
|
|
|
|
| 263 |
else:
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
exp_warning = check_experience_mismatch(resume, job_description)
|
| 268 |
-
if exp_warning:
|
| 269 |
suitability = "Uncertain"
|
| 270 |
-
warning =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
|
| 272 |
-
|
|
|
|
| 273 |
exp_match = re.search(r'\d+\s*years?|senior', resume.lower())
|
| 274 |
if skills and exp_match:
|
| 275 |
summary = f"{', '.join(skills)} proficiency, {exp_match.group(0)} experience"
|
| 276 |
else:
|
| 277 |
summary = f"{exp_match.group(0) if exp_match else 'unknown'} experience"
|
| 278 |
|
| 279 |
-
|
|
|
|
| 280 |
"Suitability": suitability,
|
| 281 |
"Data/Tech Related Skills Summary": summary,
|
| 282 |
-
"Warning": warning or "None"
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
st.session_state.classify_summarize_time = time.time() - start_time
|
| 287 |
-
return result
|
| 288 |
-
except TimeoutError as e:
|
| 289 |
-
st.warning(f"Skipped processing for resume due to timeout: {str(e)}")
|
| 290 |
-
return {
|
| 291 |
-
"Suitability": "Error",
|
| 292 |
-
"Data/Tech Related Skills Summary": "Processing timed out",
|
| 293 |
-
"Warning": str(e),
|
| 294 |
-
"Inference Time": time.time() - start_time
|
| 295 |
-
}
|
| 296 |
-
except Exception as e:
|
| 297 |
-
st.error(f"Error during inference for resume: {str(e)}")
|
| 298 |
-
return {
|
| 299 |
-
"Suitability": "Error",
|
| 300 |
-
"Data/Tech Related Skills Summary": "Failed to process",
|
| 301 |
-
"Warning": str(e),
|
| 302 |
-
"Inference Time": time.time() - start_time
|
| 303 |
-
}
|
| 304 |
|
| 305 |
@st.cache_data
|
| 306 |
def generate_skill_pie_chart(resumes):
|
| 307 |
-
"""Generate a pie chart of skill frequency across resumes."""
|
| 308 |
-
start_time = time.time()
|
| 309 |
skill_counts = {}
|
| 310 |
total_resumes = len([r for r in resumes if r.strip()])
|
| 311 |
|
| 312 |
if total_resumes == 0:
|
| 313 |
return None
|
| 314 |
|
|
|
|
| 315 |
for resume in resumes:
|
| 316 |
if resume.strip():
|
| 317 |
resume_lower = normalize_text(resume)
|
| 318 |
-
resume_lower = re.sub(r'[,_-]', ' ', resume_lower)
|
| 319 |
found_skills = skills_pattern.findall(resume_lower)
|
| 320 |
for skill in found_skills:
|
| 321 |
skill_counts[skill] = skill_counts.get(skill, 0) + 1
|
|
@@ -330,226 +268,204 @@ def generate_skill_pie_chart(resumes):
|
|
| 330 |
colors = plt.cm.Blues(np.linspace(0.4, 0.8, len(labels)))
|
| 331 |
ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors, textprops={'fontsize': 10})
|
| 332 |
ax.axis('equal')
|
| 333 |
-
plt.title("Skill Frequency Across Resumes", fontsize=12, pad=10)
|
| 334 |
-
|
| 335 |
-
st.session_state.pie_chart_time = time.time() - start_time
|
| 336 |
return fig
|
| 337 |
|
| 338 |
-
def
|
| 339 |
-
"""
|
|
|
|
| 340 |
with st.sidebar:
|
| 341 |
st.markdown("""
|
| 342 |
-
<h1 style='text-align: center; font-size: 32px; margin-bottom: 10px;'>
|
| 343 |
<p style='text-align: center; font-size: 16px; margin-top: 0;'>
|
| 344 |
-
Welcome to our AI-powered resume screening tool, specialized for data science and tech roles! This app evaluates multiple resumes against a single job description
|
| 345 |
</p>
|
| 346 |
""", unsafe_allow_html=True)
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
if 'expander1' not in st.session_state:
|
| 350 |
-
st.session_state.expander1 = True
|
| 351 |
-
if 'expander2' not in st.session_state:
|
| 352 |
-
st.session_state.expander2 = False
|
| 353 |
-
if 'expander3' not in st.session_state:
|
| 354 |
-
st.session_state.expander3 = False
|
| 355 |
-
if 'expander4' not in st.session_state:
|
| 356 |
-
st.session_state.expander4 = False
|
| 357 |
-
|
| 358 |
-
with st.expander("How to Use the App", expanded=st.session_state.expander1):
|
| 359 |
-
st.session_state.expander1 = True
|
| 360 |
st.markdown("""
|
| 361 |
-
|
|
|
|
| 362 |
- Enter the job description, specifying required skills and experience (e.g., "Data engineer requires python, spark, 5 years+").
|
| 363 |
-
- Click
|
| 364 |
-
- Use
|
| 365 |
-
- Use the
|
| 366 |
-
-
|
| 367 |
-
- View the skill frequency pie chart to see
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
-
|
| 374 |
-
- Resume 1: "Expert in python, machine learning, tableau, 4 years experience"
|
| 375 |
-
- Resume 2: "Skilled in sql, pandas, 2 years experience"
|
| 376 |
-
- Resume 3: "Proficient in java, python, 5 years experience"
|
| 377 |
-
- Job Description: "Data scientist requires python, machine learning, 3 years+"
|
| 378 |
-
- **Test Case 2**:
|
| 379 |
-
- Resume 1: "Skilled in databricks, spark, python, 6 years experience"
|
| 380 |
-
- Resume 2: "Expert in sql, tableau, business intelligence, 3 years experience"
|
| 381 |
-
- Resume 3: "Proficient in rust, langchain, 2 years experience"
|
| 382 |
-
- Job Description: "Data engineer requires python, spark, 5 years+"
|
| 383 |
-
""")
|
| 384 |
-
|
| 385 |
-
with st.expander("Guidelines", expanded=st.session_state.expander3):
|
| 386 |
-
st.session_state.expander3 = True
|
| 387 |
-
st.markdown("""
|
| 388 |
-
- Use comma-separated skills from a comprehensive list including python, sql, databricks, etc. (79 skills supported, see Project Report for full list).
|
| 389 |
- Include experience in years (e.g., "3 years experience" or "1 year experience") or as "senior".
|
| 390 |
- Focus on data/tech skills for accurate summarization.
|
| 391 |
- Resumes with only irrelevant skills (e.g., sales, marketing) will be classified as "Irrelevant".
|
|
|
|
| 392 |
""")
|
| 393 |
-
|
| 394 |
-
with st.expander("Classification Criteria", expanded=st.session_state.expander4):
|
| 395 |
-
st.session_state.expander4 = True
|
| 396 |
st.markdown("""
|
| 397 |
-
|
| 398 |
-
- **Skill Overlap**: The resume
|
| 399 |
- **Model Confidence**: A finetuned BERT model evaluates skill relevance. If confidence is below 85%, the classification is "Uncertain".
|
| 400 |
-
- **Experience Match**: The resume
|
| 401 |
|
| 402 |
**Outcomes**:
|
| 403 |
-
- **Relevant**: Skill overlap β₯ 50%, sufficient experience, and high model confidence (β₯
|
| 404 |
- **Irrelevant**: Skill overlap < 40% or high confidence in low skill relevance.
|
| 405 |
-
- **Uncertain**: Skill overlap β₯ 50% but experience mismatch (e.g., resume has 2 years, job requires 5 years+), or low model confidence (<
|
| 406 |
|
| 407 |
-
**Note**: An experience mismatch warning is shown if the resume
|
| 408 |
""")
|
| 409 |
|
| 410 |
-
|
| 411 |
-
"
|
| 412 |
-
# Render sidebar
|
| 413 |
-
render_sidebar()
|
| 414 |
-
|
| 415 |
-
# Initialize session state
|
| 416 |
-
if 'models' not in st.session_state:
|
| 417 |
-
st.session_state.models = load_models()
|
| 418 |
if 'resumes' not in st.session_state:
|
| 419 |
-
st.session_state.resumes = ["Expert in python, machine learning, tableau, 4 years experience"
|
| 420 |
-
if '
|
| 421 |
-
st.session_state.
|
| 422 |
-
if 'job_description' not in st.session_state:
|
| 423 |
-
st.session_state.job_description = "Data scientist requires python, machine learning, 3 years+" # Prefill with Test Case 1
|
| 424 |
if 'results' not in st.session_state:
|
| 425 |
-
st.session_state.results =
|
| 426 |
-
if '
|
| 427 |
-
st.session_state.
|
| 428 |
-
if '
|
| 429 |
-
st.session_state.
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
st.session_state.
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
st.
|
| 453 |
-
|
| 454 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 455 |
st.rerun()
|
| 456 |
-
with
|
| 457 |
-
if st.button("
|
| 458 |
-
st.session_state.
|
| 459 |
-
st.session_state.resumes = ["Expert in python, machine learning, tableau, 4 years experience"] + [""] * 4
|
| 460 |
-
st.session_state.job_description = "Data scientist requires python, machine learning, 3 years+"
|
| 461 |
-
st.session_state.results = None
|
| 462 |
-
st.session_state.pie_chart = None
|
| 463 |
st.rerun()
|
| 464 |
|
| 465 |
# Job description input
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
if
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
st.session_state.
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
st.session_state.
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
st.
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 529 |
|
| 530 |
# Display results
|
| 531 |
if st.session_state.results:
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
|
| 554 |
if __name__ == "__main__":
|
|
|
|
| 555 |
main()
|
|
|
|
| 1 |
# app.py
|
| 2 |
+
# Optimized Streamlit Application for Resume Screening with Multiple Resumes
|
| 3 |
|
| 4 |
import streamlit as st
|
| 5 |
from transformers import BertTokenizer, BertForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
|
|
|
|
| 8 |
import re
|
| 9 |
import io
|
| 10 |
import matplotlib.pyplot as plt
|
| 11 |
+
import PyPDF2
|
| 12 |
+
from docx import Document
|
| 13 |
|
| 14 |
# Set page config as the first Streamlit command
|
| 15 |
st.set_page_config(page_title="Resume Screening Assistant for Data/Tech", page_icon="π", layout="wide")
|
| 16 |
|
| 17 |
+
# Set sidebar width and make uncollapsible
|
| 18 |
+
st.markdown("""
|
| 19 |
+
<style>
|
| 20 |
+
.css-1d391kg { /* Sidebar */
|
| 21 |
+
width: 350px !important;
|
| 22 |
+
}
|
| 23 |
+
[data-testid="stSidebarCollapseButton"] { /* Hide toggle button */
|
| 24 |
+
display: none !important;
|
| 25 |
+
}
|
| 26 |
+
.stSidebar { /* Ensure sidebar visibility */
|
| 27 |
+
min-width: 350px !important;
|
| 28 |
+
visibility: visible !important;
|
| 29 |
+
}
|
| 30 |
+
[data-testid="stExpander"] summary { /* Expander headers */
|
| 31 |
+
font-size: 26px !important;
|
| 32 |
+
font-weight: bold !important;
|
| 33 |
+
text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.1) !important;
|
| 34 |
+
white-space: nowrap !important;
|
| 35 |
+
}
|
| 36 |
+
.st-expander-content p { /* Expander body text */
|
| 37 |
+
font-size: 12px !important;
|
| 38 |
+
}
|
| 39 |
+
</style>
|
| 40 |
+
""", unsafe_allow_html=True)
|
| 41 |
+
|
| 42 |
+
# Skills list (79 skills from Application_Demo.ipynb)
|
| 43 |
+
skills_list = [
|
| 44 |
'python', 'sql', 'c++', 'java', 'tableau', 'machine learning', 'data analysis',
|
| 45 |
'business intelligence', 'r', 'tensorflow', 'pandas', 'spark', 'scikit-learn', 'aws',
|
| 46 |
'javascript', 'scala', 'go', 'ruby', 'pytorch', 'keras', 'deep learning', 'nlp',
|
|
|
|
| 53 |
'cybersecurity', 'project management', 'technical writing', 'business analysis',
|
| 54 |
'agile methodologies', 'communication', 'team leadership',
|
| 55 |
'databricks', 'synapse', 'delta lake', 'streamlit', 'fastapi', 'graphql', 'mlflow', 'kedro'
|
| 56 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
+
# Precompile regex for skills matching (optimized for single pass)
|
| 59 |
+
skills_pattern = re.compile(r'\b(' + '|'.join(re.escape(skill) for skill in skills_list) + r')\b', re.IGNORECASE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
+
# Helper functions for CV parsing
|
| 62 |
+
def extract_text_from_pdf(file):
|
| 63 |
+
try:
|
| 64 |
+
pdf_reader = PyPDF2.PdfReader(file)
|
| 65 |
+
text = ""
|
| 66 |
+
for page in pdf_reader.pages:
|
| 67 |
+
page_text = page.extract_text()
|
| 68 |
+
if page_text:
|
| 69 |
+
text += page_text + "\n"
|
| 70 |
+
return text.strip()
|
| 71 |
+
except Exception as e:
|
| 72 |
+
st.error(f"Error extracting text from PDF: {str(e)}")
|
| 73 |
+
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
+
def extract_text_from_docx(file):
|
| 76 |
+
try:
|
| 77 |
+
doc = Document(file)
|
| 78 |
+
text = ""
|
| 79 |
+
for paragraph in doc.paragraphs:
|
| 80 |
+
text += paragraph.text + "\n"
|
| 81 |
+
return text.strip()
|
| 82 |
+
except Exception as e:
|
| 83 |
+
st.error(f"Error extracting text from Word document: {str(e)}")
|
| 84 |
+
return ""
|
| 85 |
+
|
| 86 |
+
def extract_text_from_file(uploaded_file):
|
| 87 |
+
if uploaded_file.name.endswith('.pdf'):
|
| 88 |
+
return extract_text_from_pdf(uploaded_file)
|
| 89 |
+
elif uploaded_file.name.endswith('.docx'):
|
| 90 |
+
return extract_text_from_docx(uploaded_file)
|
| 91 |
+
else:
|
| 92 |
+
st.error("Unsupported file format. Please upload a PDF or Word (.docx) document.")
|
| 93 |
+
return ""
|
| 94 |
+
|
| 95 |
+
# Helper functions for analysis
|
| 96 |
def normalize_text(text):
|
| 97 |
text = text.lower()
|
| 98 |
+
# Remove underscores, hyphens, and specific phrases, replacing with empty string
|
| 99 |
+
text = re.sub(r'_|-|,\s*collaborated in agile teams|,\s*developed solutions for|,\s*led projects involving|,\s*designed applications with|,\s*built machine learning models for|,\s*implemented data pipelines for|,\s*deployed cloud-based solutions|,\s*optimized workflows for|,\s*contributed to data-driven projects', '', text)
|
| 100 |
+
return text
|
| 101 |
|
| 102 |
def check_experience_mismatch(resume, job_description):
|
| 103 |
resume_match = re.search(r'(\d+)\s*years?|senior', resume.lower())
|
| 104 |
+
# Allow optional words like "experience" between "years" and "+"
|
| 105 |
+
job_match = re.search(r'(\d+)\s*years?(?:\s+\w+)*\+|senior\+', job_description.lower())
|
| 106 |
if resume_match and job_match:
|
| 107 |
resume_years = resume_match.group(0)
|
| 108 |
job_years = job_match.group(0)
|
| 109 |
+
# Handle resume years
|
| 110 |
if 'senior' in resume_years:
|
| 111 |
resume_num = 10
|
| 112 |
else:
|
| 113 |
resume_num = int(resume_match.group(1))
|
| 114 |
+
# Handle job years
|
| 115 |
if 'senior+' in job_years:
|
| 116 |
+
job_num = 10
|
| 117 |
else:
|
| 118 |
job_num = int(job_match.group(1))
|
| 119 |
if resume_num < job_num:
|
|
|
|
| 124 |
if not text.strip() or len(text.strip()) < 10:
|
| 125 |
return "Input is too short (minimum 10 characters)."
|
| 126 |
text_normalized = normalize_text(text)
|
| 127 |
+
if is_resume and not skills_pattern.search(text_normalized):
|
|
|
|
|
|
|
| 128 |
return "Please include at least one data/tech skill (e.g., python, sql, databricks)."
|
| 129 |
if is_resume and not re.search(r'\d+\s*year(s)?|senior', text.lower()):
|
| 130 |
return "Please include experience (e.g., '3 years experience' or 'senior')."
|
|
|
|
| 132 |
|
| 133 |
@st.cache_resource
|
| 134 |
def load_models():
|
|
|
|
| 135 |
bert_model_path = 'scmlewis/bert-finetuned-isom5240'
|
| 136 |
+
bert_tokenizer = BertTokenizer.from_pretrained(bert_model_path)
|
| 137 |
+
bert_model = BertForSequenceClassification.from_pretrained(bert_model_path, num_labels=2)
|
| 138 |
+
t5_tokenizer = T5Tokenizer.from_pretrained('t5-small')
|
| 139 |
+
t5_model = T5ForConditionalGeneration.from_pretrained('t5-small')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
device = torch.device('cpu') # CPU for lightweight deployment
|
| 141 |
bert_model.to(device)
|
| 142 |
t5_model.to(device)
|
| 143 |
bert_model.eval()
|
| 144 |
t5_model.eval()
|
|
|
|
| 145 |
return bert_tokenizer, bert_model, t5_tokenizer, t5_model, device
|
| 146 |
|
| 147 |
@st.cache_data
|
| 148 |
+
def tokenize_inputs(resumes, job_description, _bert_tokenizer, _t5_tokenizer):
|
| 149 |
"""Precompute tokenized inputs for BERT and T5."""
|
|
|
|
|
|
|
|
|
|
| 150 |
job_description_norm = normalize_text(job_description)
|
| 151 |
bert_inputs = [f"resume: {normalize_text(resume)} [sep] job: {job_description_norm}" for resume in resumes]
|
| 152 |
+
bert_tokenized = _bert_tokenizer(bert_inputs, return_tensors='pt', padding=True, truncation=True, max_length=64)
|
| 153 |
|
| 154 |
t5_inputs = []
|
| 155 |
for resume in resumes:
|
| 156 |
prompt = re.sub(r'\b[Cc]\+\+\b', 'c++', resume)
|
| 157 |
prompt_normalized = normalize_text(prompt)
|
| 158 |
t5_inputs.append(f"summarize: {prompt_normalized}")
|
| 159 |
+
t5_tokenized = _t5_tokenizer(t5_inputs, return_tensors='pt', padding=True, truncation=True, max_length=64)
|
| 160 |
|
|
|
|
| 161 |
return bert_tokenized, t5_inputs, t5_tokenized
|
| 162 |
|
| 163 |
@st.cache_data
|
| 164 |
def extract_skills(text):
|
| 165 |
"""Extract skills from text in a single pass."""
|
|
|
|
| 166 |
text_normalized = normalize_text(text)
|
| 167 |
text_normalized = re.sub(r'[,_-]', ' ', text_normalized)
|
| 168 |
found_skills = skills_pattern.findall(text_normalized)
|
|
|
|
| 169 |
return set(found_skills)
|
| 170 |
|
| 171 |
@st.cache_data
|
| 172 |
+
def classify_and_summarize_batch(resumes, job_description, _bert_tokenized, _t5_inputs, _t5_tokenized, _job_skills_set):
|
| 173 |
+
bert_tokenizer, bert_model, t5_tokenizer, t5_model, device = st.session_state.models
|
| 174 |
+
bert_tokenized = {k: v.to(device) for k, v in _bert_tokenized.items()}
|
|
|
|
|
|
|
| 175 |
|
| 176 |
+
# BERT inference (batched)
|
| 177 |
+
with torch.no_grad():
|
| 178 |
+
outputs = bert_model(**bert_tokenized)
|
| 179 |
+
|
| 180 |
+
logits = outputs.logits
|
| 181 |
+
probabilities = torch.softmax(logits, dim=1).cpu().numpy()
|
| 182 |
+
predictions = np.argmax(probabilities, axis=1)
|
| 183 |
+
|
| 184 |
+
confidence_threshold = 0.85
|
| 185 |
+
results = []
|
| 186 |
+
|
| 187 |
+
# Batch T5 inference for all resumes
|
| 188 |
+
t5_tokenized = {k: v.to(device) for k, v in _t5_tokenized.items()}
|
| 189 |
+
with torch.no_grad():
|
| 190 |
+
t5_outputs = t5_model.generate(
|
| 191 |
+
t5_tokenized['input_ids'],
|
| 192 |
+
attention_mask=t5_tokenized['attention_mask'],
|
| 193 |
+
max_length=30,
|
| 194 |
+
min_length=8,
|
| 195 |
+
num_beams=2,
|
| 196 |
+
no_repeat_ngram_size=3,
|
| 197 |
+
length_penalty=2.0,
|
| 198 |
+
early_stopping=True
|
| 199 |
+
)
|
| 200 |
+
summaries = [t5_tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True) for output in t5_outputs]
|
| 201 |
+
summaries = [re.sub(r'\s+', ' ', summary).strip() for summary in summaries]
|
| 202 |
+
|
| 203 |
+
for i, (resume, prob, pred, summary, t5_input) in enumerate(zip(resumes, probabilities, predictions, summaries, _t5_inputs)):
|
| 204 |
+
# Compute skill overlap
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
resume_skills_set = extract_skills(resume)
|
| 206 |
skill_overlap = len(_job_skills_set.intersection(resume_skills_set)) / len(_job_skills_set) if _job_skills_set else 0
|
| 207 |
|
| 208 |
+
# Step 1: Check skill irrelevance
|
| 209 |
+
if skill_overlap < 0.4:
|
| 210 |
+
suitability = "Irrelevant"
|
| 211 |
+
warning = "Skills are irrelevant"
|
| 212 |
else:
|
| 213 |
+
# Step 2: Check experience mismatch (takes precedence)
|
| 214 |
+
exp_warning = check_experience_mismatch(resume, job_description)
|
| 215 |
+
if exp_warning:
|
| 216 |
+
suitability = "Uncertain"
|
| 217 |
+
warning = exp_warning
|
| 218 |
else:
|
| 219 |
+
# Step 3: Check model confidence
|
| 220 |
+
if prob[pred] < confidence_threshold:
|
|
|
|
|
|
|
|
|
|
| 221 |
suitability = "Uncertain"
|
| 222 |
+
warning = f"Low confidence: {prob[pred]:.4f}"
|
| 223 |
+
else:
|
| 224 |
+
# Step 4: Determine suitability based on skill overlap
|
| 225 |
+
suitability = "Relevant" if skill_overlap >= 0.5 else "Irrelevant"
|
| 226 |
+
warning = "Skills are not a strong match" if suitability == "Irrelevant" else None
|
| 227 |
|
| 228 |
+
# Post-process T5 summary for all resumes (Relevant, Uncertain, or Irrelevant)
|
| 229 |
+
skills = list(set(skills_pattern.findall(t5_input))) # Deduplicate skills
|
| 230 |
exp_match = re.search(r'\d+\s*years?|senior', resume.lower())
|
| 231 |
if skills and exp_match:
|
| 232 |
summary = f"{', '.join(skills)} proficiency, {exp_match.group(0)} experience"
|
| 233 |
else:
|
| 234 |
summary = f"{exp_match.group(0) if exp_match else 'unknown'} experience"
|
| 235 |
|
| 236 |
+
results.append({
|
| 237 |
+
"Resume": f"Resume {st.session_state.resumes.index(resume)+1}",
|
| 238 |
"Suitability": suitability,
|
| 239 |
"Data/Tech Related Skills Summary": summary,
|
| 240 |
+
"Warning": warning or "None"
|
| 241 |
+
})
|
| 242 |
+
|
| 243 |
+
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
|
| 245 |
@st.cache_data
|
| 246 |
def generate_skill_pie_chart(resumes):
|
|
|
|
|
|
|
| 247 |
skill_counts = {}
|
| 248 |
total_resumes = len([r for r in resumes if r.strip()])
|
| 249 |
|
| 250 |
if total_resumes == 0:
|
| 251 |
return None
|
| 252 |
|
| 253 |
+
# Count skills that appear in resumes
|
| 254 |
for resume in resumes:
|
| 255 |
if resume.strip():
|
| 256 |
resume_lower = normalize_text(resume)
|
|
|
|
| 257 |
found_skills = skills_pattern.findall(resume_lower)
|
| 258 |
for skill in found_skills:
|
| 259 |
skill_counts[skill] = skill_counts.get(skill, 0) + 1
|
|
|
|
| 268 |
colors = plt.cm.Blues(np.linspace(0.4, 0.8, len(labels)))
|
| 269 |
ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors, textprops={'fontsize': 10})
|
| 270 |
ax.axis('equal')
|
| 271 |
+
plt.title("Skill Frequency Across Resumes", fontsize=12, color='#007BFF', pad=10)
|
|
|
|
|
|
|
| 272 |
return fig
|
| 273 |
|
| 274 |
+
def main():
|
| 275 |
+
"""Main function to run the Streamlit app for resume screening."""
|
| 276 |
+
# Streamlit interface
|
| 277 |
with st.sidebar:
|
| 278 |
st.markdown("""
|
| 279 |
+
<h1 style='text-align: center; color: #007BFF; font-size: 32px; text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.1); margin-bottom: 10px;'>π» Resume Screening Assistant for Data/Tech</h1>
|
| 280 |
<p style='text-align: center; font-size: 16px; margin-top: 0;'>
|
| 281 |
+
Welcome to our AI-powered resume screening tool, specialized for data science and tech roles! This app evaluates multiple resumes against a single job description to determine suitability, providing concise summaries of key data and tech skills and experience. Built with advanced natural language processing, it ensures accurate and efficient screening for technical positions. <br><br><strong>Note:</strong> Performance may vary due to server load on free CPU instances.
|
| 282 |
</p>
|
| 283 |
""", unsafe_allow_html=True)
|
| 284 |
+
|
| 285 |
+
with st.expander("π How to Use the App", expanded=True):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
st.markdown("""
|
| 287 |
+
**Instructions**:
|
| 288 |
+
- Upload a PDF or Word (.docx) CV or manually enter up to 5 candidate resumes in the text boxes, listing data/tech skills and experience (e.g., "Expert in python, databricks, 6 years experience").
|
| 289 |
- Enter the job description, specifying required skills and experience (e.g., "Data engineer requires python, spark, 5 years+").
|
| 290 |
+
- Click **Analyze** to evaluate all non-empty resumes (at least one required).
|
| 291 |
+
- Use **Add Resume** or **Remove Resume** to adjust the number of resume fields (1β5).
|
| 292 |
+
- Use the **Reset** button to clear all inputs and results.
|
| 293 |
+
- Download results as a CSV file for record-keeping.
|
| 294 |
+
- View the skill frequency pie chart to see skill distribution across resumes.
|
| 295 |
+
- Example test cases:
|
| 296 |
+
- **Test Case 1**: Resumes like "Expert in python, machine learning, tableau, 4 years experience" against "Data scientist requires python, machine learning, 3 years+".
|
| 297 |
+
- **Test Case 2**: Resumes like "Skilled in databricks, spark, python, 6 years experience" against "Data engineer requires python, spark, 5 years+".
|
| 298 |
+
|
| 299 |
+
**Guidelines**:
|
| 300 |
+
- Use comma-separated skills from a comprehensive list including python, sql, databricks, etc. (79 skills supported).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
- Include experience in years (e.g., "3 years experience" or "1 year experience") or as "senior".
|
| 302 |
- Focus on data/tech skills for accurate summarization.
|
| 303 |
- Resumes with only irrelevant skills (e.g., sales, marketing) will be classified as "Irrelevant".
|
| 304 |
+
- If uploading a CV, ensure itβs a text-based PDF or Word document (scanned PDFs may not work).
|
| 305 |
""")
|
| 306 |
+
with st.expander("βΉοΈ Classification Criteria", expanded=True):
|
|
|
|
|
|
|
| 307 |
st.markdown("""
|
| 308 |
+
The app classifies resumes based on:
|
| 309 |
+
- **Skill Overlap**: The resumeβs data/tech skills are compared to the jobβs requirements. A skill overlap below 40% results in an "Irrelevant" classification.
|
| 310 |
- **Model Confidence**: A finetuned BERT model evaluates skill relevance. If confidence is below 85%, the classification is "Uncertain".
|
| 311 |
+
- **Experience Match**: The resumeβs experience (in years or seniority) must meet or exceed the jobβs requirement.
|
| 312 |
|
| 313 |
**Outcomes**:
|
| 314 |
+
- **Relevant**: Skill overlap β₯ 50%, sufficient experience, and high model confidence (β₯85%).
|
| 315 |
- **Irrelevant**: Skill overlap < 40% or high confidence in low skill relevance.
|
| 316 |
+
- **Uncertain**: Skill overlap β₯ 50% but experience mismatch (e.g., resume has 2 years, job requires 5 years+), or low model confidence (<85%).
|
| 317 |
|
| 318 |
+
**Note**: An experience mismatch warning is shown if the resumeβs experience is below the jobβs requirement, overriding the skill overlap and confidence to classify as Uncertain.
|
| 319 |
""")
|
| 320 |
|
| 321 |
+
# Input form
|
| 322 |
+
st.markdown("### π Enter Resumes")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
if 'resumes' not in st.session_state:
|
| 324 |
+
st.session_state.resumes = ["Expert in python, machine learning, tableau, 4 years experience", "", ""]
|
| 325 |
+
if 'input_job_description' not in st.session_state:
|
| 326 |
+
st.session_state.input_job_description = "Data scientist requires python, machine learning, 3 years+"
|
|
|
|
|
|
|
| 327 |
if 'results' not in st.session_state:
|
| 328 |
+
st.session_state.results = []
|
| 329 |
+
if 'valid_resumes' not in st.session_state:
|
| 330 |
+
st.session_state.valid_resumes = []
|
| 331 |
+
if 'models' not in st.session_state:
|
| 332 |
+
st.session_state.models = None
|
| 333 |
+
|
| 334 |
+
# Resume inputs with file upload and manual text input
|
| 335 |
+
for i in range(len(st.session_state.resumes)):
|
| 336 |
+
st.markdown(f"**Resume {i+1}**")
|
| 337 |
+
uploaded_file = st.file_uploader(f"Upload CV (PDF or Word) for Resume {i+1}", type=['pdf', 'docx'], key=f"file_upload_{i}")
|
| 338 |
+
|
| 339 |
+
if uploaded_file is not None:
|
| 340 |
+
extracted_text = extract_text_from_file(uploaded_file)
|
| 341 |
+
if extracted_text:
|
| 342 |
+
st.session_state.resumes[i] = extracted_text
|
| 343 |
+
else:
|
| 344 |
+
st.session_state.resumes[i] = ""
|
| 345 |
+
|
| 346 |
+
st.session_state.resumes[i] = st.text_area(
|
| 347 |
+
f"Enter or edit resume text",
|
| 348 |
+
value=st.session_state.resumes[i],
|
| 349 |
+
height=100,
|
| 350 |
+
key=f"resume_{i}",
|
| 351 |
+
placeholder="e.g., Expert in python, sql, 3 years experience"
|
| 352 |
+
)
|
| 353 |
+
validation_error = validate_input(st.session_state.resumes[i], is_resume=True)
|
| 354 |
+
if validation_error and st.session_state.resumes[i].strip():
|
| 355 |
+
st.warning(f"Resume {i+1}: {validation_error}")
|
| 356 |
+
|
| 357 |
+
# Add/Remove resume buttons
|
| 358 |
+
col_add, col_remove, _ = st.columns([1, 1, 3])
|
| 359 |
+
with col_add:
|
| 360 |
+
if st.button("Add Resume") and len(st.session_state.resumes) < 5:
|
| 361 |
+
st.session_state.resumes.append("")
|
| 362 |
st.rerun()
|
| 363 |
+
with col_remove:
|
| 364 |
+
if st.button("Remove Resume") and len(st.session_state.resumes) > 1:
|
| 365 |
+
st.session_state.resumes.pop()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
st.rerun()
|
| 367 |
|
| 368 |
# Job description input
|
| 369 |
+
st.markdown("### π Enter Job Description")
|
| 370 |
+
job_description = st.text_area(
|
| 371 |
+
"Job Description",
|
| 372 |
+
value=st.session_state.input_job_description,
|
| 373 |
+
height=100,
|
| 374 |
+
key="job_description",
|
| 375 |
+
placeholder="e.g., Data scientist requires python, sql, 3 years+"
|
| 376 |
+
)
|
| 377 |
+
validation_error = validate_input(job_description, is_resume=False)
|
| 378 |
+
if validation_error and job_description.strip():
|
| 379 |
+
st.warning(f"Job Description: {validation_error}")
|
| 380 |
+
|
| 381 |
+
# Analyze and Reset buttons
|
| 382 |
+
col_btn1, col_btn2, _ = st.columns([1, 1, 3])
|
| 383 |
+
with col_btn1:
|
| 384 |
+
analyze_clicked = st.button("Analyze", type="primary")
|
| 385 |
+
with col_btn2:
|
| 386 |
+
reset_clicked = st.button("Reset")
|
| 387 |
+
|
| 388 |
+
# Handle reset
|
| 389 |
+
if reset_clicked:
|
| 390 |
+
st.session_state.resumes = ["", "", ""]
|
| 391 |
+
st.session_state.input_job_description = ""
|
| 392 |
+
st.session_state.results = []
|
| 393 |
+
st.session_state.valid_resumes = []
|
| 394 |
+
st.rerun()
|
| 395 |
+
|
| 396 |
+
# Handle analysis with early validation and lazy model loading
|
| 397 |
+
if analyze_clicked:
|
| 398 |
+
# Early validation of inputs
|
| 399 |
+
valid_resumes = []
|
| 400 |
+
for i, resume in enumerate(st.session_state.resumes):
|
| 401 |
+
validation_error = validate_input(resume, is_resume=True)
|
| 402 |
+
if not validation_error and resume.strip():
|
| 403 |
+
valid_resumes.append(resume)
|
| 404 |
+
elif validation_error and resume.strip():
|
| 405 |
+
st.warning(f"Resume {i+1}: {validation_error}")
|
| 406 |
+
|
| 407 |
+
validation_error = validate_input(job_description, is_resume=False)
|
| 408 |
+
if validation_error and job_description.strip():
|
| 409 |
+
st.warning(f"Job Description: {validation_error}")
|
| 410 |
+
|
| 411 |
+
if valid_resumes and job_description.strip():
|
| 412 |
+
# Load models only when needed
|
| 413 |
+
if st.session_state.models is None:
|
| 414 |
+
with st.spinner("Loading models, please wait..."):
|
| 415 |
+
st.session_state.models = load_models()
|
| 416 |
+
|
| 417 |
+
st.session_state.results = []
|
| 418 |
+
st.session_state.valid_resumes = valid_resumes
|
| 419 |
+
total_steps = len(valid_resumes)
|
| 420 |
+
|
| 421 |
+
with st.spinner("Analyzing resumes..."):
|
| 422 |
+
progress_bar = st.progress(0)
|
| 423 |
+
status_text = st.empty()
|
| 424 |
+
status_text.text("Preparing inputs...")
|
| 425 |
+
|
| 426 |
+
# Retrieve tokenizers from st.session_state.models
|
| 427 |
+
bert_tokenizer, bert_model, t5_tokenizer, t5_model, device = st.session_state.models
|
| 428 |
+
|
| 429 |
+
# Precompute tokenized inputs and job skills
|
| 430 |
+
bert_tokenized, t5_inputs, t5_tokenized = tokenize_inputs(valid_resumes, job_description, bert_tokenizer, t5_tokenizer)
|
| 431 |
+
job_skills_set = extract_skills(job_description)
|
| 432 |
+
|
| 433 |
+
status_text.text("Classifying and summarizing resumes...")
|
| 434 |
+
results = classify_and_summarize_batch(valid_resumes, job_description, bert_tokenized, t5_inputs, t5_tokenized, job_skills_set)
|
| 435 |
+
progress_bar.progress(1.0)
|
| 436 |
+
|
| 437 |
+
st.session_state.results = results
|
| 438 |
+
|
| 439 |
+
status_text.empty()
|
| 440 |
+
progress_bar.empty()
|
| 441 |
+
st.success("Analysis completed! π")
|
| 442 |
+
else:
|
| 443 |
+
st.error("Please enter at least one valid resume and a job description.")
|
| 444 |
|
| 445 |
# Display results
|
| 446 |
if st.session_state.results:
|
| 447 |
+
st.markdown("### π Results")
|
| 448 |
+
st.table(st.session_state.results)
|
| 449 |
+
|
| 450 |
+
csv_buffer = io.StringIO()
|
| 451 |
+
csv_buffer.write("Resume Number,Resume Text,Job Description,Suitability,Summary,Warning\n")
|
| 452 |
+
for i, result in enumerate(st.session_state.results):
|
| 453 |
+
resume_text = st.session_state.valid_resumes[i].replace('"', '""').replace('\n', ' ')
|
| 454 |
+
job_text = job_description.replace('"', '""').replace('\n', ' ')
|
| 455 |
+
csv_buffer.write(f'"{result["Resume"]}","{resume_text}","{job_text}","{result["Suitability"]}","{result["Data/Tech Related Skills Summary"]}","{result["Warning"]}"\n')
|
| 456 |
+
st.download_button("Download Results", csv_buffer.getvalue(), file_name="resume_analysis.csv", mime="text/csv")
|
| 457 |
+
|
| 458 |
+
with st.expander("π Skill Frequency Across Resumes", expanded=False):
|
| 459 |
+
if st.session_state.valid_resumes:
|
| 460 |
+
fig = generate_skill_pie_chart(st.session_state.valid_resumes)
|
| 461 |
+
if fig:
|
| 462 |
+
st.pyplot(fig)
|
| 463 |
+
plt.close(fig)
|
| 464 |
+
else:
|
| 465 |
+
st.write("No recognized data/tech skills found in the resumes.")
|
| 466 |
+
else:
|
| 467 |
+
st.write("No valid resumes to analyze.")
|
| 468 |
|
| 469 |
if __name__ == "__main__":
|
| 470 |
+
# When this module is run directly, call the main function.
|
| 471 |
main()
|