Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
# app.py
|
| 2 |
-
# Business-Focused Streamlit Application for AI Talent Screening
|
| 3 |
|
| 4 |
import streamlit as st
|
| 5 |
from transformers import BertTokenizer, BertForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
|
|
@@ -21,7 +21,7 @@ st.set_page_config(
|
|
| 21 |
initial_sidebar_state="expanded",
|
| 22 |
)
|
| 23 |
|
| 24 |
-
# --- CUSTOM PROFESSIONAL CSS OVERHAUL ---
|
| 25 |
st.markdown("""
|
| 26 |
<style>
|
| 27 |
/* 0. GLOBAL CONFIG & LIGHT THEME */
|
|
@@ -98,7 +98,7 @@ st.markdown("""
|
|
| 98 |
font-weight: bold;
|
| 99 |
}
|
| 100 |
.stSidebar {
|
| 101 |
-
background-color: #E9ECEF; /*
|
| 102 |
border-right: 1px solid #DEE2E6;
|
| 103 |
}
|
| 104 |
|
|
@@ -139,10 +139,12 @@ st.markdown("""
|
|
| 139 |
</style>
|
| 140 |
""", unsafe_allow_html=True)
|
| 141 |
|
| 142 |
-
|
| 143 |
# --- (Model and Helper Functions - Core logic remains the same) ---
|
| 144 |
-
#
|
| 145 |
-
|
|
|
|
|
|
|
|
|
|
| 146 |
skills_list = [
|
| 147 |
'python', 'sql', 'c++', 'java', 'tableau', 'machine learning', 'data analysis',
|
| 148 |
'business intelligence', 'r', 'tensorflow', 'pandas', 'spark', 'scikit-learn', 'aws',
|
|
@@ -159,14 +161,15 @@ skills_list = [
|
|
| 159 |
]
|
| 160 |
skills_pattern = re.compile(r'\b(' + '|'.join(re.escape(skill) for skill in skills_list) + r')\b', re.IGNORECASE)
|
| 161 |
|
| 162 |
-
#
|
| 163 |
def extract_text_from_pdf(file):
|
| 164 |
try:
|
| 165 |
pdf_reader = PyPDF2.PdfReader(file)
|
| 166 |
text = ""
|
| 167 |
for page in pdf_reader.pages:
|
| 168 |
page_text = page.extract_text()
|
| 169 |
-
if page_text:
|
|
|
|
| 170 |
return text.strip()
|
| 171 |
except: return ""
|
| 172 |
|
|
@@ -178,7 +181,7 @@ def extract_text_from_docx(file):
|
|
| 178 |
text += paragraph.text + "\n"
|
| 179 |
return text.strip()
|
| 180 |
except: return ""
|
| 181 |
-
|
| 182 |
def extract_text_from_file(uploaded_file):
|
| 183 |
if uploaded_file.name.endswith('.pdf'): return extract_text_from_pdf(uploaded_file)
|
| 184 |
elif uploaded_file.name.endswith('.docx'): return extract_text_from_docx(uploaded_file)
|
|
@@ -190,7 +193,6 @@ def normalize_text(text):
|
|
| 190 |
return text
|
| 191 |
|
| 192 |
def check_experience_mismatch(resume, job_description):
|
| 193 |
-
# ... (Experience mismatch logic remains) ...
|
| 194 |
resume_match = re.search(r'(\d+)\s*years?|senior', resume.lower())
|
| 195 |
job_match = re.search(r'(\d+)\s*years?(?:\s+\w+)*\+|senior\+', job_description.lower())
|
| 196 |
if resume_match and job_match:
|
|
@@ -212,7 +214,6 @@ def validate_input(text, is_resume=True):
|
|
| 212 |
|
| 213 |
@st.cache_resource
|
| 214 |
def load_models():
|
| 215 |
-
# ... (Model loading logic remains) ...
|
| 216 |
bert_model_path = 'scmlewis/bert-finetuned-isom5240'
|
| 217 |
bert_tokenizer = BertTokenizer.from_pretrained(bert_model_path)
|
| 218 |
bert_model = BertForSequenceClassification.from_pretrained(bert_model_path, num_labels=2)
|
|
@@ -227,7 +228,6 @@ def load_models():
|
|
| 227 |
|
| 228 |
@st.cache_data
|
| 229 |
def tokenize_inputs(resumes, job_description, _bert_tokenizer, _t5_tokenizer):
|
| 230 |
-
# ... (Tokenization logic remains) ...
|
| 231 |
job_description_norm = normalize_text(job_description)
|
| 232 |
bert_inputs = [f"resume: {normalize_text(resume)} [sep] job: {job_description_norm}" for resume in resumes]
|
| 233 |
bert_tokenized = _bert_tokenizer(bert_inputs, return_tensors='pt', padding=True, truncation=True, max_length=64)
|
|
@@ -248,7 +248,6 @@ def extract_skills(text):
|
|
| 248 |
|
| 249 |
@st.cache_data
|
| 250 |
def classify_and_summarize_batch(resume, job_description, _bert_tok, _t5_input, _t5_tok, _job_skills_set):
|
| 251 |
-
# ... (Inference and classification logic remains) ...
|
| 252 |
_, bert_model, t5_tokenizer, t5_model, device = st.session_state.models
|
| 253 |
timeout = 60
|
| 254 |
|
|
@@ -307,7 +306,6 @@ def classify_and_summarize_batch(resume, job_description, _bert_tok, _t5_input,
|
|
| 307 |
|
| 308 |
@st.cache_data
|
| 309 |
def generate_skill_pie_chart(resumes):
|
| 310 |
-
# ... (Pie chart logic remains, but with business colors) ...
|
| 311 |
skill_counts = {}
|
| 312 |
total_resumes = len([r for r in resumes if r.strip()])
|
| 313 |
if total_resumes == 0: return None
|
|
@@ -328,10 +326,9 @@ def generate_skill_pie_chart(resumes):
|
|
| 328 |
labels = list(top_skills.keys())
|
| 329 |
sizes = [(count / sum(top_skills.values())) * 100 for count in top_skills.values()]
|
| 330 |
|
| 331 |
-
# Use standard white background for a business report look
|
| 332 |
plt.style.use('default')
|
| 333 |
fig, ax = plt.subplots(figsize=(6, 4))
|
| 334 |
-
colors = plt.cm.tab10(np.linspace(0, 1, len(labels)))
|
| 335 |
plt.rcParams['text.color'] = 'black'
|
| 336 |
wedges, texts, autotexts = ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors, textprops={'fontsize': 10, 'color': 'black'})
|
| 337 |
ax.axis('equal')
|
|
@@ -339,10 +336,16 @@ def generate_skill_pie_chart(resumes):
|
|
| 339 |
return fig
|
| 340 |
|
| 341 |
def render_sidebar():
|
| 342 |
-
"""Render sidebar content with professional HR language."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
with st.sidebar:
|
| 344 |
st.markdown(f"""
|
| 345 |
-
<h2 style='text-align: center; border-left: none; padding-left: 0; color:
|
| 346 |
Talent Screening Assistant
|
| 347 |
</h2>
|
| 348 |
<p style='text-align: center; font-size: 14px; margin-top: 0; color: #6C757D;'>
|
|
@@ -367,10 +370,11 @@ def render_sidebar():
|
|
| 367 |
""")
|
| 368 |
|
| 369 |
with st.expander("🎯 Screening Outcomes Explained", expanded=False):
|
|
|
|
| 370 |
st.markdown(f"""
|
| 371 |
-
- **Relevant** (`{
|
| 372 |
-
- **Irrelevant** (`{
|
| 373 |
-
- **Requires Review** (`{
|
| 374 |
""")
|
| 375 |
|
| 376 |
def main():
|
|
@@ -499,8 +503,17 @@ def main():
|
|
| 499 |
|
| 500 |
for i, resume in enumerate(valid_resumes):
|
| 501 |
status_text.text(f"Status: Analyzing Profile {i+1} of {total_steps}...")
|
| 502 |
-
|
| 503 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 504 |
result = classify_and_summarize_batch(resume, job_description, bert_tok_single, t5_inputs[i], t5_tok_single, job_skills_set)
|
| 505 |
result["Profile ID"] = f"Candidate {i+1}"
|
| 506 |
results.append(result)
|
|
@@ -527,37 +540,43 @@ def main():
|
|
| 527 |
|
| 528 |
st.markdown(f"#### Overview: {total} Candidate Profiles Processed")
|
| 529 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 530 |
col1, col2, col3, col4 = st.columns(4)
|
| 531 |
|
| 532 |
with col1:
|
| 533 |
st.markdown(f"""
|
| 534 |
<div class='scorecard-block'>
|
| 535 |
<div class='scorecard-label'>TOTAL PROFILES</div>
|
| 536 |
-
<div class='scorecard-value' style='color:{
|
| 537 |
</div>
|
| 538 |
""", unsafe_allow_html=True)
|
| 539 |
|
| 540 |
with col2:
|
| 541 |
st.markdown(f"""
|
| 542 |
<div class='scorecard-block block-relevant'>
|
| 543 |
-
<div class='scorecard-label' style='color: {
|
| 544 |
-
<div class='scorecard-value' style='color: {
|
| 545 |
</div>
|
| 546 |
""", unsafe_allow_html=True)
|
| 547 |
|
| 548 |
with col3:
|
| 549 |
st.markdown(f"""
|
| 550 |
<div class='scorecard-block block-uncertain'>
|
| 551 |
-
<div class='scorecard-label' style='color: {
|
| 552 |
-
<div class='scorecard-value' style='color: {
|
| 553 |
</div>
|
| 554 |
""", unsafe_allow_html=True)
|
| 555 |
|
| 556 |
with col4:
|
| 557 |
st.markdown(f"""
|
| 558 |
<div class='scorecard-block block-irrelevant'>
|
| 559 |
-
<div class='scorecard-label' style='color: {
|
| 560 |
-
<div class='scorecard-value' style='color: {
|
| 561 |
</div>
|
| 562 |
""", unsafe_allow_html=True)
|
| 563 |
|
|
|
|
| 1 |
# app.py
|
| 2 |
+
# Business-Focused Streamlit Application for AI Talent Screening (FIXED)
|
| 3 |
|
| 4 |
import streamlit as st
|
| 5 |
from transformers import BertTokenizer, BertForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
|
|
|
|
| 21 |
initial_sidebar_state="expanded",
|
| 22 |
)
|
| 23 |
|
| 24 |
+
# --- CUSTOM PROFESSIONAL CSS OVERHAUL (UNCHANGED) ---
|
| 25 |
st.markdown("""
|
| 26 |
<style>
|
| 27 |
/* 0. GLOBAL CONFIG & LIGHT THEME */
|
|
|
|
| 98 |
font-weight: bold;
|
| 99 |
}
|
| 100 |
.stSidebar {
|
| 101 |
+
background-color: #E9ECEF; /* Sidebar background color matching light theme */
|
| 102 |
border-right: 1px solid #DEE2E6;
|
| 103 |
}
|
| 104 |
|
|
|
|
| 139 |
</style>
|
| 140 |
""", unsafe_allow_html=True)
|
| 141 |
|
|
|
|
| 142 |
# --- (Model and Helper Functions - Core logic remains the same) ---
|
| 143 |
+
# ... (skills_list, skills_pattern, extract_text_from_pdf/docx/file, normalize_text, check_experience_mismatch, validate_input, load_models, tokenize_inputs, extract_skills, classify_and_summarize_batch, generate_skill_pie_chart functions remain unchanged) ...
|
| 144 |
+
|
| 145 |
+
# NOTE: Since the file content is large, I'm only including the modified function `render_sidebar`
|
| 146 |
+
# and the affected part of `main` for brevity. The full code block at the end contains the complete, fixed file.
|
| 147 |
+
|
| 148 |
skills_list = [
|
| 149 |
'python', 'sql', 'c++', 'java', 'tableau', 'machine learning', 'data analysis',
|
| 150 |
'business intelligence', 'r', 'tensorflow', 'pandas', 'spark', 'scikit-learn', 'aws',
|
|
|
|
| 161 |
]
|
| 162 |
skills_pattern = re.compile(r'\b(' + '|'.join(re.escape(skill) for skill in skills_list) + r')\b', re.IGNORECASE)
|
| 163 |
|
| 164 |
+
# Helper functions for CV parsing
|
| 165 |
def extract_text_from_pdf(file):
|
| 166 |
try:
|
| 167 |
pdf_reader = PyPDF2.PdfReader(file)
|
| 168 |
text = ""
|
| 169 |
for page in pdf_reader.pages:
|
| 170 |
page_text = page.extract_text()
|
| 171 |
+
if page_text:
|
| 172 |
+
text += page_text + "\n"
|
| 173 |
return text.strip()
|
| 174 |
except: return ""
|
| 175 |
|
|
|
|
| 181 |
text += paragraph.text + "\n"
|
| 182 |
return text.strip()
|
| 183 |
except: return ""
|
| 184 |
+
|
| 185 |
def extract_text_from_file(uploaded_file):
|
| 186 |
if uploaded_file.name.endswith('.pdf'): return extract_text_from_pdf(uploaded_file)
|
| 187 |
elif uploaded_file.name.endswith('.docx'): return extract_text_from_docx(uploaded_file)
|
|
|
|
| 193 |
return text
|
| 194 |
|
| 195 |
def check_experience_mismatch(resume, job_description):
|
|
|
|
| 196 |
resume_match = re.search(r'(\d+)\s*years?|senior', resume.lower())
|
| 197 |
job_match = re.search(r'(\d+)\s*years?(?:\s+\w+)*\+|senior\+', job_description.lower())
|
| 198 |
if resume_match and job_match:
|
|
|
|
| 214 |
|
| 215 |
@st.cache_resource
|
| 216 |
def load_models():
|
|
|
|
| 217 |
bert_model_path = 'scmlewis/bert-finetuned-isom5240'
|
| 218 |
bert_tokenizer = BertTokenizer.from_pretrained(bert_model_path)
|
| 219 |
bert_model = BertForSequenceClassification.from_pretrained(bert_model_path, num_labels=2)
|
|
|
|
| 228 |
|
| 229 |
@st.cache_data
|
| 230 |
def tokenize_inputs(resumes, job_description, _bert_tokenizer, _t5_tokenizer):
|
|
|
|
| 231 |
job_description_norm = normalize_text(job_description)
|
| 232 |
bert_inputs = [f"resume: {normalize_text(resume)} [sep] job: {job_description_norm}" for resume in resumes]
|
| 233 |
bert_tokenized = _bert_tokenizer(bert_inputs, return_tensors='pt', padding=True, truncation=True, max_length=64)
|
|
|
|
| 248 |
|
| 249 |
@st.cache_data
|
| 250 |
def classify_and_summarize_batch(resume, job_description, _bert_tok, _t5_input, _t5_tok, _job_skills_set):
|
|
|
|
| 251 |
_, bert_model, t5_tokenizer, t5_model, device = st.session_state.models
|
| 252 |
timeout = 60
|
| 253 |
|
|
|
|
| 306 |
|
| 307 |
@st.cache_data
|
| 308 |
def generate_skill_pie_chart(resumes):
|
|
|
|
| 309 |
skill_counts = {}
|
| 310 |
total_resumes = len([r for r in resumes if r.strip()])
|
| 311 |
if total_resumes == 0: return None
|
|
|
|
| 326 |
labels = list(top_skills.keys())
|
| 327 |
sizes = [(count / sum(top_skills.values())) * 100 for count in top_skills.values()]
|
| 328 |
|
|
|
|
| 329 |
plt.style.use('default')
|
| 330 |
fig, ax = plt.subplots(figsize=(6, 4))
|
| 331 |
+
colors = plt.cm.tab10(np.linspace(0, 1, len(labels)))
|
| 332 |
plt.rcParams['text.color'] = 'black'
|
| 333 |
wedges, texts, autotexts = ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors, textprops={'fontsize': 10, 'color': 'black'})
|
| 334 |
ax.axis('equal')
|
|
|
|
| 336 |
return fig
|
| 337 |
|
| 338 |
def render_sidebar():
|
| 339 |
+
"""Render sidebar content with professional HR language. FIXED: Replaced st.get_style_color with hex codes."""
|
| 340 |
+
# Define hex colors to replace st.get_style_color() calls
|
| 341 |
+
SUCCESS_COLOR = "#28A745" # Corporate Green
|
| 342 |
+
WARNING_COLOR = "#FFC107" # Corporate Yellow
|
| 343 |
+
DANGER_COLOR = "#DC3545" # Corporate Red
|
| 344 |
+
PRIMARY_COLOR = "#007BFF" # Corporate Blue
|
| 345 |
+
|
| 346 |
with st.sidebar:
|
| 347 |
st.markdown(f"""
|
| 348 |
+
<h2 style='text-align: center; border-left: none; padding-left: 0; color: {PRIMARY_COLOR};'>
|
| 349 |
Talent Screening Assistant
|
| 350 |
</h2>
|
| 351 |
<p style='text-align: center; font-size: 14px; margin-top: 0; color: #6C757D;'>
|
|
|
|
| 370 |
""")
|
| 371 |
|
| 372 |
with st.expander("🎯 Screening Outcomes Explained", expanded=False):
|
| 373 |
+
# FIXED: Replaced st.get_style_color with hex color codes
|
| 374 |
st.markdown(f"""
|
| 375 |
+
- **Relevant** (`{SUCCESS_COLOR}`): Strong match across all criteria. Proceed to interview.
|
| 376 |
+
- **Irrelevant** (`{DANGER_COLOR}`): Low skill overlap or poor fit. Pass on candidate.
|
| 377 |
+
- **Requires Review** (`{WARNING_COLOR}`): **Flagged** due to Experience Mismatch or Lower AI confidence. Requires manual review by a hiring manager.
|
| 378 |
""")
|
| 379 |
|
| 380 |
def main():
|
|
|
|
| 503 |
|
| 504 |
for i, resume in enumerate(valid_resumes):
|
| 505 |
status_text.text(f"Status: Analyzing Profile {i+1} of {total_steps}...")
|
| 506 |
+
|
| 507 |
+
# Create single-batch tensors for BERT and T5
|
| 508 |
+
bert_tok_single = {
|
| 509 |
+
'input_ids': bert_tokenized['input_ids'][i].unsqueeze(0),
|
| 510 |
+
'attention_mask': bert_tokenized['attention_mask'][i].unsqueeze(0)
|
| 511 |
+
}
|
| 512 |
+
t5_tok_single = {
|
| 513 |
+
'input_ids': t5_tokenized['input_ids'][i].unsqueeze(0),
|
| 514 |
+
'attention_mask': t5_tokenized['attention_mask'][i].unsqueeze(0)
|
| 515 |
+
}
|
| 516 |
+
|
| 517 |
result = classify_and_summarize_batch(resume, job_description, bert_tok_single, t5_inputs[i], t5_tok_single, job_skills_set)
|
| 518 |
result["Profile ID"] = f"Candidate {i+1}"
|
| 519 |
results.append(result)
|
|
|
|
| 540 |
|
| 541 |
st.markdown(f"#### Overview: {total} Candidate Profiles Processed")
|
| 542 |
|
| 543 |
+
# Define hex colors again for the scorecard blocks
|
| 544 |
+
PRIMARY_COLOR = "#007BFF" # Corporate Blue
|
| 545 |
+
SUCCESS_COLOR = "#28A745" # Corporate Green
|
| 546 |
+
WARNING_COLOR = "#FFC107" # Corporate Yellow
|
| 547 |
+
DANGER_COLOR = "#DC3545" # Corporate Red
|
| 548 |
+
|
| 549 |
col1, col2, col3, col4 = st.columns(4)
|
| 550 |
|
| 551 |
with col1:
|
| 552 |
st.markdown(f"""
|
| 553 |
<div class='scorecard-block'>
|
| 554 |
<div class='scorecard-label'>TOTAL PROFILES</div>
|
| 555 |
+
<div class='scorecard-value' style='color:{PRIMARY_COLOR};'>{total}</div>
|
| 556 |
</div>
|
| 557 |
""", unsafe_allow_html=True)
|
| 558 |
|
| 559 |
with col2:
|
| 560 |
st.markdown(f"""
|
| 561 |
<div class='scorecard-block block-relevant'>
|
| 562 |
+
<div class='scorecard-label' style='color: {SUCCESS_COLOR};'>RELEVANT MATCHES</div>
|
| 563 |
+
<div class='scorecard-value' style='color: {SUCCESS_COLOR};'>{relevant_count}</div>
|
| 564 |
</div>
|
| 565 |
""", unsafe_allow_html=True)
|
| 566 |
|
| 567 |
with col3:
|
| 568 |
st.markdown(f"""
|
| 569 |
<div class='scorecard-block block-uncertain'>
|
| 570 |
+
<div class='scorecard-label' style='color: {WARNING_COLOR};'>REQUIRES REVIEW</div>
|
| 571 |
+
<div class='scorecard-value' style='color: {WARNING_COLOR};'>{review_count}</div>
|
| 572 |
</div>
|
| 573 |
""", unsafe_allow_html=True)
|
| 574 |
|
| 575 |
with col4:
|
| 576 |
st.markdown(f"""
|
| 577 |
<div class='scorecard-block block-irrelevant'>
|
| 578 |
+
<div class='scorecard-label' style='color: {DANGER_COLOR};'>IRRELEVANT / ERROR</div>
|
| 579 |
+
<div class='scorecard-value' style='color: {DANGER_COLOR};'>{irrelevant_count}</div>
|
| 580 |
</div>
|
| 581 |
""", unsafe_allow_html=True)
|
| 582 |
|