Spaces:

scmlewis
/

Resume_Screening_Assistant_for_Data_Tech

Sleeping

App Files Files Community

scmlewis commited on Oct 29, 2025

Commit

34cbce4

verified ·

1 Parent(s): edcaccf

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -30

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # app.py
-# Business-Focused Streamlit Application for AI Talent Screening
 import streamlit as st
 from transformers import BertTokenizer, BertForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
@@ -21,7 +21,7 @@ st.set_page_config(
     initial_sidebar_state="expanded",
 )
-# --- CUSTOM PROFESSIONAL CSS OVERHAUL ---
 st.markdown("""
     <style>
     /* 0. GLOBAL CONFIG & LIGHT THEME */
@@ -98,7 +98,7 @@ st.markdown("""
         font-weight: bold;
     }
     .stSidebar {
-        background-color: #E9ECEF; /* Slightly darker sidebar for contrast */
         border-right: 1px solid #DEE2E6;
     }
@@ -139,10 +139,12 @@ st.markdown("""
     </style>
 """, unsafe_allow_html=True)
 # --- (Model and Helper Functions - Core logic remains the same) ---
-# NOTE: The core ML logic and utility functions for PDF/DOCX parsing remain unchanged
-# as they are robust and purely functional.
 skills_list = [
     'python', 'sql', 'c++', 'java', 'tableau', 'machine learning', 'data analysis',
     'business intelligence', 'r', 'tensorflow', 'pandas', 'spark', 'scikit-learn', 'aws',
@@ -159,14 +161,15 @@ skills_list = [
 ]
 skills_pattern = re.compile(r'\b(' + '|'.join(re.escape(skill) for skill in skills_list) + r')\b', re.IGNORECASE)
-# Utility functions (simplified for brevity here, but full code in final output)
 def extract_text_from_pdf(file):
     try:
         pdf_reader = PyPDF2.PdfReader(file)
         text = ""
         for page in pdf_reader.pages:
             page_text = page.extract_text()
-            if page_text: text += page_text + "\n"
         return text.strip()
     except: return ""
@@ -178,7 +181,7 @@ def extract_text_from_docx(file):
             text += paragraph.text + "\n"
         return text.strip()
     except: return ""
 def extract_text_from_file(uploaded_file):
     if uploaded_file.name.endswith('.pdf'): return extract_text_from_pdf(uploaded_file)
     elif uploaded_file.name.endswith('.docx'): return extract_text_from_docx(uploaded_file)
@@ -190,7 +193,6 @@ def normalize_text(text):
     return text
 def check_experience_mismatch(resume, job_description):
-    # ... (Experience mismatch logic remains) ...
     resume_match = re.search(r'(\d+)\s*years?|senior', resume.lower())
     job_match = re.search(r'(\d+)\s*years?(?:\s+\w+)*\+|senior\+', job_description.lower())
     if resume_match and job_match:
@@ -212,7 +214,6 @@ def validate_input(text, is_resume=True):
 @st.cache_resource
 def load_models():
-    # ... (Model loading logic remains) ...
     bert_model_path = 'scmlewis/bert-finetuned-isom5240'
     bert_tokenizer = BertTokenizer.from_pretrained(bert_model_path)
     bert_model = BertForSequenceClassification.from_pretrained(bert_model_path, num_labels=2)
@@ -227,7 +228,6 @@ def load_models():
 @st.cache_data
 def tokenize_inputs(resumes, job_description, _bert_tokenizer, _t5_tokenizer):
-    # ... (Tokenization logic remains) ...
     job_description_norm = normalize_text(job_description)
     bert_inputs = [f"resume: {normalize_text(resume)} [sep] job: {job_description_norm}" for resume in resumes]
     bert_tokenized = _bert_tokenizer(bert_inputs, return_tensors='pt', padding=True, truncation=True, max_length=64)
@@ -248,7 +248,6 @@ def extract_skills(text):
 @st.cache_data
 def classify_and_summarize_batch(resume, job_description, _bert_tok, _t5_input, _t5_tok, _job_skills_set):
-    # ... (Inference and classification logic remains) ...
     _, bert_model, t5_tokenizer, t5_model, device = st.session_state.models
     timeout = 60
@@ -307,7 +306,6 @@ def classify_and_summarize_batch(resume, job_description, _bert_tok, _t5_input,
 @st.cache_data
 def generate_skill_pie_chart(resumes):
-    # ... (Pie chart logic remains, but with business colors) ...
     skill_counts = {}
     total_resumes = len([r for r in resumes if r.strip()])
     if total_resumes == 0: return None
@@ -328,10 +326,9 @@ def generate_skill_pie_chart(resumes):
     labels = list(top_skills.keys())
     sizes = [(count / sum(top_skills.values())) * 100 for count in top_skills.values()]
-    # Use standard white background for a business report look
     plt.style.use('default')
     fig, ax = plt.subplots(figsize=(6, 4))
-    colors = plt.cm.tab10(np.linspace(0, 1, len(labels))) # Professional color map
     plt.rcParams['text.color'] = 'black'
     wedges, texts, autotexts = ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors, textprops={'fontsize': 10, 'color': 'black'})
     ax.axis('equal')
@@ -339,10 +336,16 @@ def generate_skill_pie_chart(resumes):
     return fig
 def render_sidebar():
-    """Render sidebar content with professional HR language."""
     with st.sidebar:
         st.markdown(f"""
-            <h2 style='text-align: center; border-left: none; padding-left: 0; color: #007BFF;'>
                 Talent Screening Assistant
             </h2>
             <p style='text-align: center; font-size: 14px; margin-top: 0; color: #6C757D;'>
@@ -367,10 +370,11 @@ def render_sidebar():
             """)
         with st.expander("🎯 Screening Outcomes Explained", expanded=False):
             st.markdown(f"""
-                - **Relevant** (`{st.get_style_color("success")}`): Strong match across all criteria. Proceed to interview.
-                - **Irrelevant** (`{st.get_style_color("danger")}`): Low skill overlap or poor fit. Pass on candidate.
-                - **Requires Review** (`{st.get_style_color("warning")}`): **Flagged** due to Experience Mismatch or Lower AI confidence. Requires manual review by a hiring manager.
             """)
 def main():
@@ -499,8 +503,17 @@ def main():
                 for i, resume in enumerate(valid_resumes):
                     status_text.text(f"Status: Analyzing Profile {i+1} of {total_steps}...")
-                    bert_tok_single = {'input_ids': bert_tokenized['input_ids'][i].unsqueeze(0), 'attention_mask': bert_tokenized['attention_ids'][i].unsqueeze(0)}
-                    t5_tok_single = {'input_ids': t5_tokenized['input_ids'][i].unsqueeze(0), 'attention_mask': t5_tokenized['attention_mask'][i].unsqueeze(0)}
                     result = classify_and_summarize_batch(resume, job_description, bert_tok_single, t5_inputs[i], t5_tok_single, job_skills_set)
                     result["Profile ID"] = f"Candidate {i+1}"
                     results.append(result)
@@ -527,37 +540,43 @@ def main():
             st.markdown(f"#### Overview: {total} Candidate Profiles Processed")
             col1, col2, col3, col4 = st.columns(4)
             with col1:
                 st.markdown(f"""
                     <div class='scorecard-block'>
                         <div class='scorecard-label'>TOTAL PROFILES</div>
-                        <div class='scorecard-value' style='color:{st.get_style_color("primary")};'>{total}</div>
                     </div>
                 """, unsafe_allow_html=True)
             with col2:
                 st.markdown(f"""
                     <div class='scorecard-block block-relevant'>
-                        <div class='scorecard-label' style='color: {st.get_style_color("success")};'>RELEVANT MATCHES</div>
-                        <div class='scorecard-value' style='color: {st.get_style_color("success")};'>{relevant_count}</div>
                     </div>
                 """, unsafe_allow_html=True)
             with col3:
                 st.markdown(f"""
                     <div class='scorecard-block block-uncertain'>
-                        <div class='scorecard-label' style='color: {st.get_style_color("warning")};'>REQUIRES REVIEW</div>
-                        <div class='scorecard-value' style='color: {st.get_style_color("warning")};'>{review_count}</div>
                     </div>
                 """, unsafe_allow_html=True)
             with col4:
                 st.markdown(f"""
                     <div class='scorecard-block block-irrelevant'>
-                        <div class='scorecard-label' style='color: {st.get_style_color("danger")};'>IRRELEVANT / ERROR</div>
-                        <div class='scorecard-value' style='color: {st.get_style_color("danger")};'>{irrelevant_count}</div>
                     </div>
                 """, unsafe_allow_html=True)

 # app.py
+# Business-Focused Streamlit Application for AI Talent Screening (FIXED)
 import streamlit as st
 from transformers import BertTokenizer, BertForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
     initial_sidebar_state="expanded",
 )
+# --- CUSTOM PROFESSIONAL CSS OVERHAUL (UNCHANGED) ---
 st.markdown("""
     <style>
     /* 0. GLOBAL CONFIG & LIGHT THEME */
         font-weight: bold;
     }
     .stSidebar {
+        background-color: #E9ECEF; /* Sidebar background color matching light theme */
         border-right: 1px solid #DEE2E6;
     }
     </style>
 """, unsafe_allow_html=True)
 # --- (Model and Helper Functions - Core logic remains the same) ---
+# ... (skills_list, skills_pattern, extract_text_from_pdf/docx/file, normalize_text, check_experience_mismatch, validate_input, load_models, tokenize_inputs, extract_skills, classify_and_summarize_batch, generate_skill_pie_chart functions remain unchanged) ...
+# NOTE: Since the file content is large, I'm only including the modified function `render_sidebar`
+# and the affected part of `main` for brevity. The full code block at the end contains the complete, fixed file.
 skills_list = [
     'python', 'sql', 'c++', 'java', 'tableau', 'machine learning', 'data analysis',
     'business intelligence', 'r', 'tensorflow', 'pandas', 'spark', 'scikit-learn', 'aws',
 ]
 skills_pattern = re.compile(r'\b(' + '|'.join(re.escape(skill) for skill in skills_list) + r')\b', re.IGNORECASE)
+# Helper functions for CV parsing
 def extract_text_from_pdf(file):
     try:
         pdf_reader = PyPDF2.PdfReader(file)
         text = ""
         for page in pdf_reader.pages:
             page_text = page.extract_text()
+            if page_text:
+                text += page_text + "\n"
         return text.strip()
     except: return ""
             text += paragraph.text + "\n"
         return text.strip()
     except: return ""
 def extract_text_from_file(uploaded_file):
     if uploaded_file.name.endswith('.pdf'): return extract_text_from_pdf(uploaded_file)
     elif uploaded_file.name.endswith('.docx'): return extract_text_from_docx(uploaded_file)
     return text
 def check_experience_mismatch(resume, job_description):
     resume_match = re.search(r'(\d+)\s*years?|senior', resume.lower())
     job_match = re.search(r'(\d+)\s*years?(?:\s+\w+)*\+|senior\+', job_description.lower())
     if resume_match and job_match:
 @st.cache_resource
 def load_models():
     bert_model_path = 'scmlewis/bert-finetuned-isom5240'
     bert_tokenizer = BertTokenizer.from_pretrained(bert_model_path)
     bert_model = BertForSequenceClassification.from_pretrained(bert_model_path, num_labels=2)
 @st.cache_data
 def tokenize_inputs(resumes, job_description, _bert_tokenizer, _t5_tokenizer):
     job_description_norm = normalize_text(job_description)
     bert_inputs = [f"resume: {normalize_text(resume)} [sep] job: {job_description_norm}" for resume in resumes]
     bert_tokenized = _bert_tokenizer(bert_inputs, return_tensors='pt', padding=True, truncation=True, max_length=64)
 @st.cache_data
 def classify_and_summarize_batch(resume, job_description, _bert_tok, _t5_input, _t5_tok, _job_skills_set):
     _, bert_model, t5_tokenizer, t5_model, device = st.session_state.models
     timeout = 60
 @st.cache_data
 def generate_skill_pie_chart(resumes):
     skill_counts = {}
     total_resumes = len([r for r in resumes if r.strip()])
     if total_resumes == 0: return None
     labels = list(top_skills.keys())
     sizes = [(count / sum(top_skills.values())) * 100 for count in top_skills.values()]
     plt.style.use('default')
     fig, ax = plt.subplots(figsize=(6, 4))
+    colors = plt.cm.tab10(np.linspace(0, 1, len(labels)))
     plt.rcParams['text.color'] = 'black'
     wedges, texts, autotexts = ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors, textprops={'fontsize': 10, 'color': 'black'})
     ax.axis('equal')
     return fig
 def render_sidebar():
+    """Render sidebar content with professional HR language. FIXED: Replaced st.get_style_color with hex codes."""
+    # Define hex colors to replace st.get_style_color() calls
+    SUCCESS_COLOR = "#28A745"  # Corporate Green
+    WARNING_COLOR = "#FFC107"  # Corporate Yellow
+    DANGER_COLOR = "#DC3545"   # Corporate Red
+    PRIMARY_COLOR = "#007BFF"  # Corporate Blue
     with st.sidebar:
         st.markdown(f"""
+            <h2 style='text-align: center; border-left: none; padding-left: 0; color: {PRIMARY_COLOR};'>
                 Talent Screening Assistant
             </h2>
             <p style='text-align: center; font-size: 14px; margin-top: 0; color: #6C757D;'>
             """)
         with st.expander("🎯 Screening Outcomes Explained", expanded=False):
+            # FIXED: Replaced st.get_style_color with hex color codes
             st.markdown(f"""
+                - **Relevant** (`{SUCCESS_COLOR}`): Strong match across all criteria. Proceed to interview.
+                - **Irrelevant** (`{DANGER_COLOR}`): Low skill overlap or poor fit. Pass on candidate.
+                - **Requires Review** (`{WARNING_COLOR}`): **Flagged** due to Experience Mismatch or Lower AI confidence. Requires manual review by a hiring manager.
             """)
 def main():
                 for i, resume in enumerate(valid_resumes):
                     status_text.text(f"Status: Analyzing Profile {i+1} of {total_steps}...")
+                    # Create single-batch tensors for BERT and T5
+                    bert_tok_single = {
+                        'input_ids': bert_tokenized['input_ids'][i].unsqueeze(0),
+                        'attention_mask': bert_tokenized['attention_mask'][i].unsqueeze(0)
+                    }
+                    t5_tok_single = {
+                        'input_ids': t5_tokenized['input_ids'][i].unsqueeze(0),
+                        'attention_mask': t5_tokenized['attention_mask'][i].unsqueeze(0)
+                    }
                     result = classify_and_summarize_batch(resume, job_description, bert_tok_single, t5_inputs[i], t5_tok_single, job_skills_set)
                     result["Profile ID"] = f"Candidate {i+1}"
                     results.append(result)
             st.markdown(f"#### Overview: {total} Candidate Profiles Processed")
+            # Define hex colors again for the scorecard blocks
+            PRIMARY_COLOR = "#007BFF"  # Corporate Blue
+            SUCCESS_COLOR = "#28A745"  # Corporate Green
+            WARNING_COLOR = "#FFC107"  # Corporate Yellow
+            DANGER_COLOR = "#DC3545"   # Corporate Red
             col1, col2, col3, col4 = st.columns(4)
             with col1:
                 st.markdown(f"""
                     <div class='scorecard-block'>
                         <div class='scorecard-label'>TOTAL PROFILES</div>
+                        <div class='scorecard-value' style='color:{PRIMARY_COLOR};'>{total}</div>
                     </div>
                 """, unsafe_allow_html=True)
             with col2:
                 st.markdown(f"""
                     <div class='scorecard-block block-relevant'>
+                        <div class='scorecard-label' style='color: {SUCCESS_COLOR};'>RELEVANT MATCHES</div>
+                        <div class='scorecard-value' style='color: {SUCCESS_COLOR};'>{relevant_count}</div>
                     </div>
                 """, unsafe_allow_html=True)
             with col3:
                 st.markdown(f"""
                     <div class='scorecard-block block-uncertain'>
+                        <div class='scorecard-label' style='color: {WARNING_COLOR};'>REQUIRES REVIEW</div>
+                        <div class='scorecard-value' style='color: {WARNING_COLOR};'>{review_count}</div>
                     </div>
                 """, unsafe_allow_html=True)
             with col4:
                 st.markdown(f"""
                     <div class='scorecard-block block-irrelevant'>
+                        <div class='scorecard-label' style='color: {DANGER_COLOR};'>IRRELEVANT / ERROR</div>
+                        <div class='scorecard-value' style='color: {DANGER_COLOR};'>{irrelevant_count}</div>
                     </div>
                 """, unsafe_allow_html=True)