Spaces:

scmlewis
/

Resume_Screening_Assistant_for_Data_Tech

Sleeping

App Files Files Community

scmlewis commited on Oct 29, 2025

Commit

5fd9f8e

verified ·

1 Parent(s): f6d04de

Update app.py

Browse files

Files changed (1) hide show

app.py +286 -162

app.py CHANGED Viewed

@@ -15,9 +15,10 @@ import time
 # Set page config as the first Streamlit command
 st.set_page_config(page_title="Resume Screening Assistant for Data/Tech", page_icon="📄", layout="wide")
-# Set sidebar width and make uncollapsible
 st.markdown("""
     <style>
     .css-1d391kg {  /* Sidebar */
         width: 350px !important;
     }
@@ -28,14 +29,34 @@ st.markdown("""
         min-width: 350px !important;
         visibility: visible !important;
     }
     [data-testid="stExpander"] summary {  /* Expander headers */
         font-size: 26px !important;
         font-weight: bold !important;
         text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.1) !important;
         white-space: nowrap !important;
     }
-    .st-expander-content p {  /* Expander body text */
-        font-size: 12px !important;
     }
     </style>
 """, unsafe_allow_html=True)
@@ -59,7 +80,7 @@ skills_list = [
 # Precompile regex for skills matching (optimized for single pass)
 skills_pattern = re.compile(r'\b(' + '|'.join(re.escape(skill) for skill in skills_list) + r')\b', re.IGNORECASE)
-# Helper functions for CV parsing
 def extract_text_from_pdf(file):
     try:
         pdf_reader = PyPDF2.PdfReader(file)
@@ -90,10 +111,10 @@ def extract_text_from_file(uploaded_file):
     elif uploaded_file.name.endswith('.docx'):
         return extract_text_from_docx(uploaded_file)
     else:
-        st.error("Unsupported file format. Please upload a PDF or Word (.docx) document.")
         return ""
-# Helper functions for analysis
 def normalize_text(text):
     text = text.lower()
     # Remove underscores, hyphens, and specific phrases, replacing with empty string
@@ -101,24 +122,28 @@ def normalize_text(text):
     return text
 def check_experience_mismatch(resume, job_description):
     resume_match = re.search(r'(\d+)\s*years?|senior', resume.lower())
-    # Allow optional words like "experience" between "years" and "+"
     job_match = re.search(r'(\d+)\s*years?(?:\s+\w+)*\+|senior\+', job_description.lower())
     if resume_match and job_match:
-        resume_years = resume_match.group(0)
-        job_years = job_match.group(0)
         # Handle resume years
-        if 'senior' in resume_years:
             resume_num = 10
         else:
             resume_num = int(resume_match.group(1))
         # Handle job years
-        if 'senior+' in job_years:
             job_num = 10
         else:
             job_num = int(job_match.group(1))
         if resume_num < job_num:
-            return f"Experience mismatch: Resume has {resume_years}, job requires {job_years}"
     return None
 def validate_input(text, is_resume=True):
@@ -150,13 +175,16 @@ def tokenize_inputs(resumes, job_description, _bert_tokenizer, _t5_tokenizer):
     """Precompute tokenized inputs for BERT and T5."""
     job_description_norm = normalize_text(job_description)
     bert_inputs = [f"resume: {normalize_text(resume)} [sep] job: {job_description_norm}" for resume in resumes]
     bert_tokenized = _bert_tokenizer(bert_inputs, return_tensors='pt', padding=True, truncation=True, max_length=64)
     t5_inputs = []
     for resume in resumes:
         prompt = re.sub(r'\b[Cc]\+\+\b', 'c++', resume)
         prompt_normalized = normalize_text(prompt)
         t5_inputs.append(f"summarize: {prompt_normalized}")
     t5_tokenized = _t5_tokenizer(t5_inputs, return_tensors='pt', padding=True, truncation=True, max_length=64)
     return bert_tokenized, t5_inputs, t5_tokenized
@@ -167,19 +195,19 @@ def extract_skills(text):
     text_normalized = normalize_text(text)
     text_normalized = re.sub(r'[,_-]', ' ', text_normalized)
     found_skills = skills_pattern.findall(text_normalized)
-    return set(found_skills)
 @st.cache_data
-def classify_and_summarize_batch(resume, job_description, _bert_tokenized, _t5_input, _t5_tokenized, _job_skills_set):
     """Process one resume at a time to reduce CPU load with a timeout."""
     _, bert_model, t5_tokenizer, t5_model, device = st.session_state.models
-    start_time = time.time()
     timeout = 60  # Timeout after 60 seconds
     try:
-        bert_tokenized = {k: v.to(device) for k, v in _bert_tokenized.items()}
         with torch.no_grad():
-            # BERT inference
             bert_start = time.time()
             outputs = bert_model(**bert_tokenized)
             if time.time() - bert_start > timeout:
@@ -190,10 +218,11 @@ def classify_and_summarize_batch(resume, job_description, _bert_tokenized, _t5_i
         predictions = np.argmax(probabilities, axis=1)
         confidence_threshold = 0.85
-        t5_tokenized = {k: v.to(device) for k, v in _t5_tokenized.items()}
         with torch.no_grad():
-            # T5 inference
             t5_start = time.time()
             t5_outputs = t5_model.generate(
                 t5_tokenized['input_ids'],
@@ -208,39 +237,44 @@ def classify_and_summarize_batch(resume, job_description, _bert_tokenized, _t5_i
             if time.time() - t5_start > timeout:
                 raise TimeoutError("T5 inference timed out")
         summaries = [t5_tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True) for output in t5_outputs]
-        summaries = [re.sub(r'\s+', ' ', summary).strip() for summary in summaries]
-        prob, pred, summary, t5_input = probabilities[0], predictions[0], summaries[0], _t5_input
         resume_skills_set = extract_skills(resume)
         skill_overlap = len(_job_skills_set.intersection(resume_skills_set)) / len(_job_skills_set) if _job_skills_set else 0
         if skill_overlap < 0.4:
             suitability = "Irrelevant"
-            warning = "Skills are irrelevant"
-        else:
-            exp_warning = check_experience_mismatch(resume, job_description)
-            if exp_warning:
-                suitability = "Uncertain"
-                warning = exp_warning
-            else:
-                if prob[pred] < confidence_threshold:
-                    suitability = "Uncertain"
-                    warning = f"Low confidence: {prob[pred]:.4f}"
-                else:
-                    suitability = "Relevant" if skill_overlap >= 0.5 else "Irrelevant"
-                    warning = "Skills are not a strong match" if suitability == "Irrelevant" else None
-        skills = list(set(skills_pattern.findall(t5_input)))  # Deduplicate skills
         exp_match = re.search(r'\d+\s*years?|senior', resume.lower())
-        if skills and exp_match:
-            summary = f"{', '.join(skills)} proficiency, {exp_match.group(0)} experience"
         else:
-            summary = f"{exp_match.group(0) if exp_match else 'unknown'} experience"
         result = {
             "Suitability": suitability,
-            "Data/Tech Related Skills Summary": summary,
-            "Warning": warning or "None"
         }
         return result
@@ -273,19 +307,42 @@ def generate_skill_pie_chart(resumes):
             resume_lower = normalize_text(resume)
             found_skills = skills_pattern.findall(resume_lower)
             for skill in found_skills:
-                skill_counts[skill] = skill_counts.get(skill, 0) + 1
     if not skill_counts:
         return None
-    labels = list(skill_counts.keys())
-    sizes = [(count / sum(skill_counts.values())) * 100 for count in skill_counts.values()]
     fig, ax = plt.subplots(figsize=(6, 4))
-    colors = plt.cm.Blues(np.linspace(0.4, 0.8, len(labels)))
-    ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors, textprops={'fontsize': 10})
-    ax.axis('equal')
-    plt.title("Skill Frequency Across Resumes", fontsize=12, color='#007BFF', pad=10)
     return fig
 def render_sidebar():
@@ -301,37 +358,24 @@ def render_sidebar():
         with st.expander("📋 How to Use the App", expanded=True):
             st.markdown("""
                 **Instructions**:
-                - Upload a PDF or Word (.docx) CV or manually enter up to 5 candidate resumes in the text boxes, listing data/tech skills and experience (e.g., "Expert in python, databricks, 6 years experience").
-                - Enter the job description, specifying required skills and experience (e.g., "Data engineer requires python, spark, 5 years+").
-                - Click **Analyze** to evaluate all non-empty resumes (at least one required).
                 - Use **Add Resume** or **Remove Resume** to adjust the number of resume fields (1–5).
-                - Use the **Reset** button to clear all inputs and results.
-                - Download results as a CSV file for record-keeping.
-                - View the skill frequency pie chart to see skill distribution across resumes.
-                - Example test cases:
-                  - **Test Case 1**: Resumes like "Expert in python, machine learning, tableau, 4 years experience" against "Data scientist requires python, machine learning, 3 years+".
-                  - **Test Case 2**: Resumes like "Skilled in databricks, spark, python, 6 years experience" against "Data engineer requires python, spark, 5 years+".
-                **Guidelines**:
-                - Use comma-separated skills from a comprehensive list including python, sql, databricks, etc. (79 skills supported).
-                - Include experience in years (e.g., "3 years experience" or "1 year experience") or as "senior".
-                - Focus on data/tech skills for accurate summarization.
-                - Resumes with only irrelevant skills (e.g., sales, marketing) will be classified as "Irrelevant".
-                - If uploading a CV, ensure it’s a text-based PDF or Word document (scanned PDFs may not work).
             """)
         with st.expander("ℹ️ Classification Criteria", expanded=True):
             st.markdown("""
                 The app classifies resumes based on:
-                - **Skill Overlap**: The resume’s data/tech skills are compared to the job’s requirements. A skill overlap below 40% results in an "Irrelevant" classification.
-                - **Model Confidence**: A finetuned BERT model evaluates skill relevance. If confidence is below 85%, the classification is "Uncertain".
-                - **Experience Match**: The resume’s experience (in years or seniority) must meet or exceed the job’s requirement.
                 **Outcomes**:
-                - **Relevant**: Skill overlap ≥ 50%, sufficient experience, and high model confidence (≥85%).
-                - **Irrelevant**: Skill overlap < 40% or high confidence in low skill relevance.
-                - **Uncertain**: Skill overlap ≥ 50% but experience mismatch (e.g., resume has 2 years, job requires 5 years+), or low model confidence (<85%).
-                **Note**: An experience mismatch warning is shown if the resume’s experience is below the job’s requirement, overriding the skill overlap and confidence to classify as Uncertain.
             """)
 def main():
@@ -341,6 +385,7 @@ def main():
     # Initialize session state
     if 'resumes' not in st.session_state:
         st.session_state.resumes = ["Expert in python, machine learning, tableau, 4 years experience", "", ""]
     if 'input_job_description' not in st.session_state:
         st.session_state.input_job_description = "Data scientist requires python, machine learning, 3 years+"
@@ -351,85 +396,118 @@ def main():
     if 'models' not in st.session_state:
         st.session_state.models = None
-    # Resume inputs with file upload and manual text input
-    st.markdown("### 📝 Enter Resumes")
-    for i in range(len(st.session_state.resumes)):
-        st.markdown(f"**Resume {i+1}**")
-        uploaded_file = st.file_uploader(f"Upload CV (PDF or Word) for Resume {i+1}", type=['pdf', 'docx'], key=f"file_upload_{i}")
-        if uploaded_file is not None:
-            extracted_text = extract_text_from_file(uploaded_file)
-            if extracted_text:
-                st.session_state.resumes[i] = extracted_text
-            else:
-                st.session_state.resumes[i] = ""
-        st.session_state.resumes[i] = st.text_area(
-            f"Enter or edit resume text",
-            value=st.session_state.resumes[i],
-            height=100,
-            key=f"resume_{i}",
-            placeholder="e.g., Expert in python, sql, 3 years experience"
         )
-        validation_error = validate_input(st.session_state.resumes[i], is_resume=True)
-        if validation_error and st.session_state.resumes[i].strip():
-            st.warning(f"Resume {i+1}: {validation_error}")
-    # Add/Remove resume buttons
-    col_add, col_remove, _ = st.columns([1, 1, 3])
-    with col_add:
-        if st.button("Add Resume") and len(st.session_state.resumes) < 5:
-            st.session_state.resumes.append("")
-            st.rerun()
-    with col_remove:
-        if st.button("Remove Resume") and len(st.session_state.resumes) > 1:
-            st.session_state.resumes.pop()
-            st.rerun()
-    # Job description input
-    st.markdown("### 📋 Enter Job Description")
-    job_description = st.text_area(
-        "Job Description",
-        value=st.session_state.input_job_description,
-        height=100,
-        key="job_description",
-        placeholder="e.g., Data scientist requires python, sql, 3 years+"
-    )
-    validation_error = validate_input(job_description, is_resume=False)
-    if validation_error and job_description.strip():
-        st.warning(f"Job Description: {validation_error}")
-    # Analyze and Reset buttons
     col_btn1, col_btn2, _ = st.columns([1, 1, 3])
     with col_btn1:
-        analyze_clicked = st.button("Analyze", type="primary")
     with col_btn2:
-        reset_clicked = st.button("Reset")
     # Handle reset
     if reset_clicked:
         st.session_state.resumes = ["", "", ""]
         st.session_state.input_job_description = ""
         st.session_state.results = []
         st.session_state.valid_resumes = []
         st.rerun()
     # Handle analysis with early validation and lazy model loading
     if analyze_clicked:
         # Early validation of inputs
         valid_resumes = []
         for i, resume in enumerate(st.session_state.resumes):
             validation_error = validate_input(resume, is_resume=True)
             if not validation_error and resume.strip():
                 valid_resumes.append(resume)
             elif validation_error and resume.strip():
-                st.warning(f"Resume {i+1}: {validation_error}")
-        validation_error = validate_input(job_description, is_resume=False)
-        if validation_error and job_description.strip():
-            st.warning(f"Job Description: {validation_error}")
-        if valid_resumes and job_description.strip():
             # Load models only when needed
             if st.session_state.models is None:
                 with st.spinner("Loading models, please wait..."):
@@ -442,25 +520,29 @@ def main():
             with st.spinner("Analyzing resumes..."):
                 progress_bar = st.progress(0)
                 status_text = st.empty()
-                status_text.text("Preparing inputs...")
                 # Retrieve tokenizers from st.session_state.models
-                bert_tokenizer, bert_model, t5_tokenizer, t5_model, device = st.session_state.models
                 # Precompute tokenized inputs and job skills
                 bert_tokenized, t5_inputs, t5_tokenized = tokenize_inputs(valid_resumes, job_description, bert_tokenizer, t5_tokenizer)
                 job_skills_set = extract_skills(job_description)
-                status_text.text("Classifying and summarizing resumes...")
                 results = []
-                for i, (resume, bert_tok, t5_in, t5_tok) in enumerate(zip(valid_resumes, bert_tokenized['input_ids'], t5_inputs, t5_tokenized['input_ids'])):
-                    status_text.text(f"Processing Resume {i+1}/{total_steps}: {resume[:50]}...")
                     result = classify_and_summarize_batch(
                         resume,
                         job_description,
-                        {'input_ids': bert_tok.unsqueeze(0), 'attention_mask': bert_tokenized['attention_mask'][i].unsqueeze(0)},
-                        t5_in,
-                        {'input_ids': t5_tok.unsqueeze(0), 'attention_mask': t5_tokenized['attention_mask'][i].unsqueeze(0)},
                         job_skills_set
                     )
                     result["Resume"] = f"Resume {i+1}"
@@ -471,34 +553,76 @@ def main():
                 status_text.empty()
                 progress_bar.empty()
-                st.success("Analysis completed! 🎉")
         else:
-            st.error("Please enter at least one valid resume and a job description.")
-    # Display results
-    if st.session_state.results:
-        st.markdown("### 📊 Results")
-        st.table(st.session_state.results)
-        csv_buffer = io.StringIO()
-        csv_buffer.write("Resume Number,Resume Text,Job Description,Suitability,Summary,Warning\n")
-        for i, result in enumerate(st.session_state.results):
-            resume_text = st.session_state.valid_resumes[i].replace('"', '""').replace('\n', ' ')
-            job_text = job_description.replace('"', '""').replace('\n', ' ')
-            csv_buffer.write(f'"{result["Resume"]}","{resume_text}","{job_text}","{result["Suitability"]}","{result["Data/Tech Related Skills Summary"]}","{result["Warning"]}"\n')
-        st.download_button("Download Results", csv_buffer.getvalue(), file_name="resume_analysis.csv", mime="text/csv")
-        with st.expander("📈 Skill Frequency Across Resumes", expanded=False):
-            if st.session_state.valid_resumes:
-                fig = generate_skill_pie_chart(st.session_state.valid_resumes)
-                if fig:
-                    st.pyplot(fig)
-                    plt.close(fig)
-                else:
-                    st.write("No recognized data/tech skills found in the resumes.")
-            else:
-                st.write("No valid resumes to analyze.")
 if __name__ == "__main__":
-    # When this module is run directly, call the main function.
     main()

 # Set page config as the first Streamlit command
 st.set_page_config(page_title="Resume Screening Assistant for Data/Tech", page_icon="📄", layout="wide")
+# --- CUSTOM CSS FOR UI/UX IMPROVEMENTS ---
 st.markdown("""
     <style>
+    /* 1. Sidebar Styling */
     .css-1d391kg {  /* Sidebar */
         width: 350px !important;
     }
         min-width: 350px !important;
         visibility: visible !important;
     }
+    /* 2. Expander/Instructions Styling */
     [data-testid="stExpander"] summary {  /* Expander headers */
         font-size: 26px !important;
         font-weight: bold !important;
         text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.1) !important;
         white-space: nowrap !important;
     }
+    .st-expander-content p {  /* Expander body text - made slightly larger for readability */
+        font-size: 14px !important;
+        line-height: 1.6;
+    }
+    /* 3. Main Title Styling */
+    h1 {
+        text-align: center;
+        color: #007BFF;
+        font-size: 40px;
+    }
+    /* 4. Tab Styling */
+    .stTabs [data-baseweb="tab-list"] {
+        gap: 24px;
+    }
+    .stTabs [data-baseweb="tab"] {
+        height: 50px;
+        white-space: nowrap;
+        border-radius: 4px 4px 0 0;
+        gap: 1px;
+        padding-top: 10px;
+        padding-bottom: 10px;
     }
     </style>
 """, unsafe_allow_html=True)
 # Precompile regex for skills matching (optimized for single pass)
 skills_pattern = re.compile(r'\b(' + '|'.join(re.escape(skill) for skill in skills_list) + r')\b', re.IGNORECASE)
+# --- Helper functions for CV parsing ---
 def extract_text_from_pdf(file):
     try:
         pdf_reader = PyPDF2.PdfReader(file)
     elif uploaded_file.name.endswith('.docx'):
         return extract_text_from_docx(uploaded_file)
     else:
+        # st.error is now handled by the calling logic if the text is empty
         return ""
+# --- Helper functions for analysis ---
 def normalize_text(text):
     text = text.lower()
     # Remove underscores, hyphens, and specific phrases, replacing with empty string
     return text
 def check_experience_mismatch(resume, job_description):
+    # Search for year numbers or 'senior' in resume
     resume_match = re.search(r'(\d+)\s*years?|senior', resume.lower())
+    # Search for year numbers followed by '+' or 'senior+' in JD
     job_match = re.search(r'(\d+)\s*years?(?:\s+\w+)*\+|senior\+', job_description.lower())
     if resume_match and job_match:
+        resume_years_text = resume_match.group(0)
+        job_years_text = job_match.group(0)
         # Handle resume years
+        if 'senior' in resume_years_text:
             resume_num = 10
         else:
             resume_num = int(resume_match.group(1))
         # Handle job years
+        if 'senior+' in job_years_text:
             job_num = 10
         else:
             job_num = int(job_match.group(1))
         if resume_num < job_num:
+            return f"Experience mismatch: Resume has {resume_years_text.strip()}, job requires {job_years_text.strip()}"
     return None
 def validate_input(text, is_resume=True):
     """Precompute tokenized inputs for BERT and T5."""
     job_description_norm = normalize_text(job_description)
     bert_inputs = [f"resume: {normalize_text(resume)} [sep] job: {job_description_norm}" for resume in resumes]
+    # BERT tokens must be padded/truncated consistently
     bert_tokenized = _bert_tokenizer(bert_inputs, return_tensors='pt', padding=True, truncation=True, max_length=64)
     t5_inputs = []
     for resume in resumes:
+        # Prompt preparation for T5 summary
         prompt = re.sub(r'\b[Cc]\+\+\b', 'c++', resume)
         prompt_normalized = normalize_text(prompt)
         t5_inputs.append(f"summarize: {prompt_normalized}")
+    # T5 tokens must be padded/truncated consistently
     t5_tokenized = _t5_tokenizer(t5_inputs, return_tensors='pt', padding=True, truncation=True, max_length=64)
     return bert_tokenized, t5_inputs, t5_tokenized
     text_normalized = normalize_text(text)
     text_normalized = re.sub(r'[,_-]', ' ', text_normalized)
     found_skills = skills_pattern.findall(text_normalized)
+    return set(s.lower() for s in found_skills) # Ensure lower case for set intersection
 @st.cache_data
+def classify_and_summarize_batch(resume, job_description, _bert_tok, _t5_input, _t5_tok, _job_skills_set):
     """Process one resume at a time to reduce CPU load with a timeout."""
+    # Note: We pass single-item dicts for inference to avoid re-tokenization outside of cache
     _, bert_model, t5_tokenizer, t5_model, device = st.session_state.models
     timeout = 60  # Timeout after 60 seconds
     try:
+        # --- BERT Inference (Classification) ---
+        bert_tokenized = {k: v.to(device) for k, v in _bert_tok.items()}
         with torch.no_grad():
             bert_start = time.time()
             outputs = bert_model(**bert_tokenized)
             if time.time() - bert_start > timeout:
         predictions = np.argmax(probabilities, axis=1)
         confidence_threshold = 0.85
+        prob, pred = probabilities[0], predictions[0]
+        # --- T5 Inference (Summarization) ---
+        t5_tokenized = {k: v.to(device) for k, v in _t5_tok.items()}
         with torch.no_grad():
             t5_start = time.time()
             t5_outputs = t5_model.generate(
                 t5_tokenized['input_ids'],
             if time.time() - t5_start > timeout:
                 raise TimeoutError("T5 inference timed out")
         summaries = [t5_tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True) for output in t5_outputs]
+        summary = re.sub(r'\s+', ' ', summaries[0]).strip()
+        # --- Suitability Logic ---
         resume_skills_set = extract_skills(resume)
         skill_overlap = len(_job_skills_set.intersection(resume_skills_set)) / len(_job_skills_set) if _job_skills_set else 0
+        suitability = "Relevant"
+        warning = "None"
+        exp_warning = check_experience_mismatch(resume, job_description)
         if skill_overlap < 0.4:
             suitability = "Irrelevant"
+            warning = "Low skill overlap (<40%) with job requirements"
+        elif exp_warning:
+            suitability = "Uncertain"
+            warning = exp_warning
+        elif prob[pred] < confidence_threshold:
+            suitability = "Uncertain"
+            warning = f"Low model confidence: {prob[pred]:.2f}"
+        elif skill_overlap < 0.5:
+            suitability = "Irrelevant"
+            warning = "Skill overlap is acceptable but not a strong match (<50%)"
+        # --- Final Summary Formatting (Override T5 for clarity) ---
+        detected_skills = list(set(skills_pattern.findall(normalize_text(resume)))) # Deduplicate skills
         exp_match = re.search(r'\d+\s*years?|senior', resume.lower())
+        if detected_skills and exp_match:
+            final_summary = f"{', '.join(detected_skills)} proficiency, {exp_match.group(0)} experience"
+        elif detected_skills:
+            final_summary = f"{', '.join(detected_skills)} proficiency"
         else:
+            final_summary = f"{exp_match.group(0) if exp_match else 'unknown'} experience"
         result = {
             "Suitability": suitability,
+            "Data/Tech Related Skills Summary": final_summary,
+            "Warning": warning
         }
         return result
             resume_lower = normalize_text(resume)
             found_skills = skills_pattern.findall(resume_lower)
             for skill in found_skills:
+                skill_counts[skill.lower()] = skill_counts.get(skill.lower(), 0) + 1
+    # Filter for top N skills (e.g., top 8)
     if not skill_counts:
         return None
+    sorted_skills = sorted(skill_counts.items(), key=lambda item: item[1], reverse=True)
+    top_n = 8
+    # Aggregate "Other" skills
+    if len(sorted_skills) > top_n:
+        top_skills = dict(sorted_skills[:top_n-1])
+        other_count = sum(count for _, count in sorted_skills[top_n-1:])
+        top_skills["Other"] = other_count
+    else:
+        top_skills = dict(sorted_skills)
+    labels = list(top_skills.keys())
+    sizes = [(count / sum(top_skills.values())) * 100 for count in top_skills.values()]
     fig, ax = plt.subplots(figsize=(6, 4))
+    # Use a visually appealing color map
+    colors = plt.cm.tab20(np.linspace(0, 1, len(labels)))
+    # Draw pie chart with a shadow for depth
+    wedges, texts, autotexts = ax.pie(
+        sizes,
+        labels=labels,
+        autopct='%1.1f%%',
+        startangle=90,
+        colors=colors,
+        textprops={'fontsize': 10}
+    )
+    ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
+    plt.title("Top Skill Frequency Across Resumes", fontsize=12, color='#007BFF', pad=10)
     return fig
 def render_sidebar():
         with st.expander("📋 How to Use the App", expanded=True):
             st.markdown("""
                 **Instructions**:
+                - Use the **Setup** tab to input the Job Description.
+                - Use the **Resumes** tab to upload or paste up to 5 resumes, including skills and experience (e.g., "Expert in python, databricks, 6 years experience").
+                - Click **Run Analysis** to evaluate all valid resumes.
                 - Use **Add Resume** or **Remove Resume** to adjust the number of resume fields (1–5).
+                - Use **Reset All** to clear all inputs and results.
+                - View the detailed table, download results, and check the skill frequency chart in the **Results** tab.
             """)
         with st.expander("ℹ️ Classification Criteria", expanded=True):
             st.markdown("""
                 The app classifies resumes based on:
+                - **Skill Overlap**: Resume skills vs. Job skills. Overlap must be $\geq 40\%$.
+                - **Experience Match**: Resume's experience must meet or exceed the job's stated requirement (e.g., '5 years+').
+                - **Model Confidence**: BERT classification confidence $\geq 85\%$.
                 **Outcomes**:
+                - **Relevant**: High skill overlap ($\geq 50\%$), sufficient experience, and high confidence ($\geq 85\%$).
+                - **Irrelevant**: Skill overlap $< 40\%$ or acceptable overlap but high confidence in low relevance.
+                - **Uncertain**: Experience mismatch, or model confidence $< 85\%$.
             """)
 def main():
     # Initialize session state
     if 'resumes' not in st.session_state:
+        # Start with a good example and two empty slots
         st.session_state.resumes = ["Expert in python, machine learning, tableau, 4 years experience", "", ""]
     if 'input_job_description' not in st.session_state:
         st.session_state.input_job_description = "Data scientist requires python, machine learning, 3 years+"
     if 'models' not in st.session_state:
         st.session_state.models = None
+    st.markdown("<h1>📄 AI-Powered Resume Screening</h1>", unsafe_allow_html=True)
+    # 🌟 NEW: Use Streamlit Tabs for better flow
+    tab_setup, tab_resumes, tab_results = st.tabs(["⚙️ 1. Setup & Job Description", "📁 2. Manage Resumes", "📊 3. Analysis & Results"])
+    # --- TAB 1: Setup & Job Description ---
+    with tab_setup:
+        st.subheader("1. Enter Job Description")
+        st.info("Paste the job description here. It must include required skills and experience (e.g., 'Data engineer requires python, spark, 5 years+').")
+        job_description = st.text_area(
+            "Job Description Text",
+            value=st.session_state.input_job_description,
+            height=150,
+            key="job_description_tab",
+            placeholder="e.g., Data scientist requires python, sql, 3 years+"
         )
+        st.session_state.input_job_description = job_description
+        validation_error = validate_input(job_description, is_resume=False)
+        if validation_error and job_description.strip():
+            st.warning(f"Job Description Validation: {validation_error}")
+    # --- TAB 2: Manage Resumes ---
+    with tab_resumes:
+        st.subheader(f"2. Resume Inputs ({len(st.session_state.resumes)}/5)")
+        st.info("Upload PDF/DOCX or paste text for up to 5 resumes. Each must contain data/tech skills and experience.")
+        # Manage resume text areas and file uploads
+        for i in range(len(st.session_state.resumes)):
+            # Use an expander for each resume to keep the page clean
+            is_expanded = (i == 0) or (st.session_state.resumes[i].strip() != "")
+            with st.expander(f"**Resume {i+1}**", expanded=is_expanded):
+                # File uploader on the left
+                uploaded_file = st.file_uploader(
+                    f"Upload CV (PDF or Word) for Resume {i+1}",
+                    type=['pdf', 'docx'],
+                    key=f"file_upload_{i}"
+                )
+                # File upload logic
+                if uploaded_file is not None:
+                    extracted_text = extract_text_from_file(uploaded_file)
+                    if extracted_text:
+                        st.session_state.resumes[i] = extracted_text
+                    else:
+                        st.session_state.resumes[i] = ""
+                # Text area input
+                st.session_state.resumes[i] = st.text_area(
+                    f"Paste or edit resume text",
+                    value=st.session_state.resumes[i],
+                    height=100,
+                    key=f"resume_{i}_tab",
+                    placeholder="e.g., Expert in python, sql, 3 years experience"
+                )
+                # Validation feedback
+                validation_error = validate_input(st.session_state.resumes[i], is_resume=True)
+                if validation_error and st.session_state.resumes[i].strip():
+                    st.warning(f"Validation: {validation_error}")
+        # Add/Remove resume buttons outside the loop
+        col_add, col_remove, _ = st.columns([1, 1, 3])
+        with col_add:
+            if st.button("➕ Add Resume", use_container_width=True) and len(st.session_state.resumes) < 5:
+                st.session_state.resumes.append("")
+                st.rerun()
+        with col_remove:
+            if st.button("➖ Remove Resume", use_container_width=True) and len(st.session_state.resumes) > 1:
+                st.session_state.resumes.pop()
+                st.rerun()
+    # --- ACTION BUTTONS (Outside the tabs for prominence) ---
+    st.markdown("---")
     col_btn1, col_btn2, _ = st.columns([1, 1, 3])
     with col_btn1:
+        analyze_clicked = st.button("🚀 Run Analysis", type="primary", use_container_width=True)
     with col_btn2:
+        reset_clicked = st.button("🔄 Reset All", use_container_width=True)
+    st.markdown("---")
     # Handle reset
     if reset_clicked:
         st.session_state.resumes = ["", "", ""]
         st.session_state.input_job_description = ""
         st.session_state.results = []
         st.session_state.valid_resumes = []
+        st.session_state.models = None # Also clear models to re-load fresh
         st.rerun()
     # Handle analysis with early validation and lazy model loading
     if analyze_clicked:
         # Early validation of inputs
         valid_resumes = []
+        all_inputs_valid = True
         for i, resume in enumerate(st.session_state.resumes):
             validation_error = validate_input(resume, is_resume=True)
             if not validation_error and resume.strip():
                 valid_resumes.append(resume)
             elif validation_error and resume.strip():
+                st.error(f"Cannot run analysis. Resume {i+1} failed validation: {validation_error}")
+                all_inputs_valid = False
+        job_validation_error = validate_input(job_description, is_resume=False)
+        if job_validation_error and job_description.strip():
+            st.error(f"Cannot run analysis. Job Description failed validation: {job_validation_error}")
+            all_inputs_valid = False
+        if valid_resumes and job_description.strip() and all_inputs_valid:
             # Load models only when needed
             if st.session_state.models is None:
                 with st.spinner("Loading models, please wait..."):
             with st.spinner("Analyzing resumes..."):
                 progress_bar = st.progress(0)
                 status_text = st.empty()
                 # Retrieve tokenizers from st.session_state.models
+                bert_tokenizer, _, t5_tokenizer, _, _ = st.session_state.models
+                status_text.text("Preparing inputs: Tokenizing and extracting job skills...")
                 # Precompute tokenized inputs and job skills
                 bert_tokenized, t5_inputs, t5_tokenized = tokenize_inputs(valid_resumes, job_description, bert_tokenizer, t5_tokenizer)
                 job_skills_set = extract_skills(job_description)
                 results = []
+                for i, resume in enumerate(valid_resumes):
+                    status_text.text(f"Processing Resume {i+1}/{total_steps}: {resume[:30]}...")
+                    # Package single item inputs for the cached function call
+                    bert_tok_single = {'input_ids': bert_tokenized['input_ids'][i].unsqueeze(0), 'attention_mask': bert_tokenized['attention_mask'][i].unsqueeze(0)}
+                    t5_tok_single = {'input_ids': t5_tokenized['input_ids'][i].unsqueeze(0), 'attention_mask': t5_tokenized['attention_mask'][i].unsqueeze(0)}
                     result = classify_and_summarize_batch(
                         resume,
                         job_description,
+                        bert_tok_single,
+                        t5_inputs[i],
+                        t5_tok_single,
                         job_skills_set
                     )
                     result["Resume"] = f"Resume {i+1}"
                 status_text.empty()
                 progress_bar.empty()
+                st.success("Analysis completed! 🎉 Results are in the 'Analysis & Results' tab.")
+                st.balloons()
         else:
+            st.error("Please ensure at least one valid resume and a valid job description are provided.")
+    # --- TAB 3: Results ---
+    with tab_results:
+        st.subheader("3. Screening Results")
+        if st.session_state.results:
+            st.success("Analysis complete. See the suitability assessment below.")
+            # 🌟 NEW: Use st.dataframe for enhanced, sortable results table
+            st.dataframe(
+                st.session_state.results,
+                column_config={
+                    "Suitability": st.column_config.TextColumn(
+                        "Suitability",
+                        help="Model's assessment (Relevant, Irrelevant, Uncertain)",
+                        width="small"
+                    ),
+                    "Warning": st.column_config.TextColumn(
+                        "Warning",
+                        help="Reason for non-Relevant status (e.g., experience mismatch, low confidence)",
+                        width="medium"
+                    ),
+                    "Data/Tech Related Skills Summary": st.column_config.TextColumn(
+                        "Skills/Exp Summary",
+                        help="Concise summary of detected skills and experience",
+                        width="large"
+                    ),
+                    "Resume": st.column_config.TextColumn(
+                        "Resume",
+                        width="small"
+                    )
+                },
+                use_container_width=True
+            )
+            # Download and Chart Section
+            col_dl, col_chart_expander = st.columns([1, 3])
+            with col_dl:
+                csv_buffer = io.StringIO()
+                csv_buffer.write("Resume Number,Suitability,Summary,Warning\n")
+                # Exclude the full resume text from CSV to keep it clean and focused
+                for i, result in enumerate(st.session_state.results):
+                    summary = result["Data/Tech Related Skills Summary"].replace('"', '""')
+                    csv_buffer.write(f'"{result["Resume"]}","{result["Suitability"]}","{summary}","{result["Warning"]}"\n')
+                st.download_button(
+                    "💾 Download Results CSV",
+                    csv_buffer.getvalue(),
+                    file_name="resume_analysis.csv",
+                    mime="text/csv",
+                    use_container_width=True
+                )
+            with col_chart_expander:
+                with st.expander("📈 View Top Skill Frequency Across Resumes", expanded=False):
+                    if st.session_state.valid_resumes:
+                        fig = generate_skill_pie_chart(st.session_state.valid_resumes)
+                        if fig:
+                            st.pyplot(fig)
+                            plt.close(fig)
+                        else:
+                            st.info("No recognized data/tech skills found in the resumes for charting.")
+                    else:
+                        st.info("No valid resumes to analyze.")
+        else:
+            st.info("Please complete the Setup and Resume tabs, then click 'Run Analysis' to see results.")
 if __name__ == "__main__":
     main()