scmlewis commited on
Commit
a6d809c
·
verified ·
1 Parent(s): e536d23

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +160 -51
app.py CHANGED
@@ -1,68 +1,177 @@
1
  # app.py
2
- # Stage 5: Streamlit Application for Resume Screening
3
 
4
  import streamlit as st
5
- from transformers import BertTokenizer, BertForSequenceClassification, pipeline
6
  import torch
 
7
  import re
8
 
9
- # Set page configuration
10
- st.set_page_config(page_title="Resume Screening Application", page_icon="📄")
11
-
12
- # Title and description
13
- st.title("Resume Screening Application")
14
- st.markdown("""
15
- This application classifies a resume-job pair as **Relevant** or **Irrelevant** and generates a concise summary of the resume's skills.
16
-
17
- **Classification Criteria**:
18
- - **Skill Overlap**: At least 80% of the job's required skills must be in the resume.
19
- - **Experience Match**: The resume's experience must meet or exceed the job's requirement.
20
- - **Outcome**: Relevant if both conditions are met; otherwise, Irrelevant.
21
- """)
22
-
23
- # Load models
24
  @st.cache_resource
25
  def load_models():
26
  bert_model_path = 'scmlewis/bert-finetuned-isom5240'
27
- bert_tokenizer = BertTokenizer.from_pretrained(bert_model_path)
28
  bert_model = BertForSequenceClassification.from_pretrained(bert_model_path, num_labels=2)
29
- t5_generator = pipeline('text2text-generation', model='t5-small')
30
- return bert_tokenizer, bert_model, t5_generator
 
 
 
 
 
 
31
 
32
- bert_tokenizer, bert_model, t5_generator = load_models()
33
 
34
- # Input fields
35
- st.subheader("Enter Resume and Job Description")
36
- resume = st.text_area("Resume", placeholder="e.g., Skilled in Python, SQL, 3 years experience")
37
- job_description = st.text_area("Job Description", placeholder="e.g., Data analyst requires Python, SQL, 3 years+")
 
38
 
39
- # Process inputs
40
- if st.button("Screen Resume"):
41
- if resume and job_description:
42
- # Classification
43
- input_text = f"resume: {resume} [sep] job: {job_description}"
44
- inputs = bert_tokenizer(input_text, return_tensors='pt', padding=True, truncation=True, max_length=128)
45
- with torch.no_grad():
46
- outputs = bert_model(**inputs)
47
- suitability = "Relevant" if outputs.logits.argmax().item() == 1 else "Irrelevant"
48
-
49
- # Summary
50
- simplified_resume = re.sub(r'(versed in leveraging|designed applications for|created solutions with|led projects involving|collaborated in agile teams over)', 'proficient in', resume).strip()
51
- simplified_resume = re.sub(r'\s+', ' ', simplified_resume)
52
- prompt = f"summarize: {simplified_resume}"
53
- summary = t5_generator(
54
- prompt,
55
- max_length=20,
56
- min_length=5,
57
- num_beams=15,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  no_repeat_ngram_size=3,
59
- length_penalty=0.5,
60
  early_stopping=True
61
- )[0]['generated_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- # Display results
64
- st.subheader("Results")
65
- st.write(f"**Suitability**: {suitability}")
66
- st.write(f"**Summary**: {summary}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  else:
68
- st.error("Please enter both a resume and a job description.")
 
1
  # app.py
2
+ # Enhanced Streamlit Application for Resume Screening
3
 
4
  import streamlit as st
5
+ from transformers import BertTokenizer, BertForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
6
  import torch
7
+ import numpy as np
8
  import re
9
 
10
+ # Initialize models
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  @st.cache_resource
12
  def load_models():
13
  bert_model_path = 'scmlewis/bert-finetuned-isom5240'
14
+ bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
15
  bert_model = BertForSequenceClassification.from_pretrained(bert_model_path, num_labels=2)
16
+ t5_tokenizer = T5Tokenizer.from_pretrained('t5-small')
17
+ t5_model = T5ForConditionalGeneration.from_pretrained('t5-small')
18
+ device = torch.device('cpu') # CPU for lightweight deployment
19
+ bert_model.to(device)
20
+ t5_model.to(device)
21
+ bert_model.eval()
22
+ t5_model.eval()
23
+ return bert_tokenizer, bert_model, t5_tokenizer, t5_model, device
24
 
25
+ bert_tokenizer, bert_model, t5_tokenizer, t5_model, device = load_models()
26
 
27
+ # Helper functions
28
+ def normalize_text(text):
29
+ text = text.lower()
30
+ text = re.sub(r',\s*collaborated in agile teams|,\s*developed solutions for|,\s*led projects involving|,\s*designed applications with|,\s*built machine learning models for|,\s*implemented data pipelines for|,\s*deployed cloud-based solutions|,\s*optimized workflows for|,\s*contributed to data-driven projects', '', text)
31
+ return text
32
 
33
+ def check_experience_mismatch(resume, job_description):
34
+ resume_match = re.search(r'(\d+)\s*years?|senior', resume.lower())
35
+ job_match = re.search(r'(\d+)\s*years?\+|senior\+', job_description.lower())
36
+ if resume_match and job_match:
37
+ resume_years = resume_match.group(0)
38
+ job_years = job_match.group(0)
39
+ resume_num = 10 if 'senior' in resume_years else int(resume_years.split()[0])
40
+ job_num = 10 if 'senior' in job_years else int(job_years.split()[0])
41
+ if resume_num < job_num:
42
+ return f"Experience mismatch: Resume has {resume_years}, job requires {job_years}"
43
+ return None
44
+
45
+ def classify_and_summarize(resume, job_description):
46
+ original_resume = resume
47
+ resume = normalize_text(resume)
48
+ job_description = normalize_text(job_description)
49
+ input_text = f"resume: {resume} [sep] job: {job_description}"
50
+
51
+ inputs = bert_tokenizer(input_text, return_tensors='pt', padding=True, truncation=True, max_length=128)
52
+ inputs = {k: v.to(device) for k, v in inputs.items()}
53
+
54
+ with torch.no_grad():
55
+ outputs = bert_model(**inputs)
56
+
57
+ logits = outputs.logits
58
+ probabilities = torch.softmax(logits, dim=1).cpu().numpy()[0]
59
+ prediction = np.argmax(probabilities)
60
+
61
+ confidence_threshold = 0.95
62
+ if probabilities[prediction] < confidence_threshold:
63
+ suitability = "Uncertain"
64
+ warning = f"Low confidence: {probabilities[prediction]:.4f}"
65
+ else:
66
+ suitability = "Relevant" if prediction == 1 else "Irrelevant"
67
+ warning = None
68
+
69
+ exp_warning = check_experience_mismatch(original_resume, job_description)
70
+ if exp_warning and suitability == "Relevant":
71
+ suitability = "Uncertain"
72
+ warning = exp_warning if not warning else f"{warning}; {exp_warning}"
73
+
74
+ prompt = f"summarize: {resume}"
75
+ inputs = t5_tokenizer(prompt, return_tensors='pt', padding=True, truncation=True, max_length=128).to(device)
76
+
77
+ with torch.no_grad():
78
+ outputs = t5_model.generate(
79
+ inputs['input_ids'],
80
+ max_length=18,
81
+ min_length=8,
82
+ num_beams=4,
83
  no_repeat_ngram_size=3,
84
+ length_penalty=3.0,
85
  early_stopping=True
86
+ )
87
+
88
+ summary = t5_tokenizer.decode(outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
89
+ summary = re.sub(r'with\s*(sql|pandas|java|c\+\+|python|machine\s*learning|tableau|\d+\s*years)\s*(and\s*\1)?', '', summary).strip()
90
+ summary = re.sub(r'\b(skilled in|proficient in|expert in|versed in|experienced in|specialized in|accomplished in|trained in)\b', '', summary).strip()
91
+ summary = re.sub(r'\s*and\s*(sql|pandas|java|c\+\+|python|machine\s*learning|tableau|\d+\s*years)', '', summary).strip()
92
+ summary = re.sub(r'experience\s*(of|and)\s*experience', 'experience', summary).strip()
93
+ summary = re.sub(r'years\s*years', 'years', summary).strip()
94
+ skills = re.findall(r'\b(python|sql|pandas|java|c\+\+|machine\s*learning|tableau)\b', prompt.lower())
95
+ exp_match = re.search(r'\d+\s*years|senior', resume.lower())
96
+ if skills and exp_match:
97
+ summary = f"{', '.join(skills)} proficiency, {exp_match.group(0)} experience"
98
+ else:
99
+ summary = f"{exp_match.group(0) if exp_match else 'unknown'} experience"
100
+
101
+ return suitability, summary, warning
102
+
103
+ # Streamlit interface
104
+ st.set_page_config(page_title="Resume Screening App", page_icon="📄", layout="centered")
105
+
106
+ # Introduction
107
+ st.markdown("""
108
+ <h1 style='text-align: center; color: #2E4053;'>Resume Screening Application</h1>
109
+ <p style='text-align: center; color: #566573;'>
110
+ Welcome to our AI-powered resume screening tool! This app evaluates resumes against job descriptions to determine suitability, providing a concise summary of key skills and experience. Built with advanced natural language processing, it ensures accurate and efficient screening.
111
+ </p>
112
+ """, unsafe_allow_html=True)
113
+
114
+ # Instructions and Guidelines
115
+ with st.expander("📋 How to Use the App", expanded=False):
116
+ st.markdown("""
117
+ **Instructions**:
118
+ - Enter the candidate's resume in the first text box, listing skills and experience (e.g., "Expert in python, machine learning, 4 years experience").
119
+ - Enter the job description in the second text box, specifying required skills and experience (e.g., "Data scientist requires python, machine learning, 3 years+").
120
+ - Click **Analyze** to get the suitability and summary.
121
+ - Use the **Reset** button to clear inputs and start over.
122
+
123
+ **Guidelines**:
124
+ - Use clear, comma-separated lists for skills (e.g., "python, sql, pandas").
125
+ - Include experience in years (e.g., "4 years experience") or as "senior" for senior-level roles.
126
+ - Avoid ambiguous phrases; be specific about skills and requirements.
127
+ """)
128
+
129
+ # Classification Criteria
130
+ with st.expander("ℹ️ Classification Criteria", expanded=False):
131
+ st.markdown("""
132
+ The app classifies resumes based on:
133
+ - **Skill Overlap**: At least 70% of the job’s required skills must match the resume’s skills.
134
+ - **Experience Match**: The resume’s experience (in years or seniority) must meet or exceed the job’s requirement.
135
+
136
+ **Outcomes**:
137
+ - **Relevant**: High skill overlap and sufficient experience, with strong confidence (≥95%).
138
+ - **Irrelevant**: Low skill overlap or insufficient experience, with strong confidence.
139
+ - **Uncertain**: Borderline confidence (<95%) or experience mismatch (e.g., resume has 2 years, job requires 3 years+).
140
 
141
+ **Note**: An experience mismatch warning is shown if the resume’s experience is below the job’s requirement, even if skills match.
142
+ """)
143
+
144
+ # Input form
145
+ st.markdown("### 📝 Enter Details")
146
+ col1, col2 = st.columns([1, 1])
147
+ with col1:
148
+ resume = st.text_area("Resume", value="Expert in python, machine learning, tableau, 4 years experience", height=100, key="resume")
149
+ with col2:
150
+ job_description = st.text_area("Job Description", value="Data scientist requires python, machine learning, 3 years+", height=100, key="job_description")
151
+
152
+ # Buttons
153
+ col_btn1, col_btn2, _ = st.columns([1, 1, 3])
154
+ with col_btn1:
155
+ analyze_clicked = st.button("Analyze", type="primary")
156
+ with col_btn2:
157
+ reset_clicked = st.button("Reset")
158
+
159
+ # Handle reset
160
+ if reset_clicked:
161
+ st.session_state.resume = ""
162
+ st.session_state.job_description = ""
163
+ st.experimental_rerun()
164
+
165
+ # Handle analysis
166
+ if analyze_clicked:
167
+ if resume.strip() and job_description.strip():
168
+ with st.spinner("Analyzing resume..."):
169
+ suitability, summary, warning = classify_and_summarize(resume, job_description)
170
+ st.success("Analysis completed! 🎉")
171
+ st.markdown("### 📊 Results")
172
+ st.markdown(f"**Suitability**: {suitability}", unsafe_allow_html=True)
173
+ st.markdown(f"**Summary**: {summary}", unsafe_allow_html=True)
174
+ if warning:
175
+ st.warning(f"**Warning**: {warning}")
176
  else:
177
+ st.error("Please enter both a resume and job description.")