scmlewis commited on
Commit
d74b1fa
Β·
verified Β·
1 Parent(s): 4497360

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +308 -392
app.py CHANGED
@@ -1,5 +1,5 @@
1
  # app.py
2
- # Optimized Streamlit Application for Resume Screening with Multiple Resumes, Professional Theme, and Dark Mode Support
3
 
4
  import streamlit as st
5
  from transformers import BertTokenizer, BertForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
@@ -8,14 +8,39 @@ import numpy as np
8
  import re
9
  import io
10
  import matplotlib.pyplot as plt
11
- import time
12
- import pandas as pd
13
 
14
  # Set page config as the first Streamlit command
15
  st.set_page_config(page_title="Resume Screening Assistant for Data/Tech", page_icon="πŸ“„", layout="wide")
16
 
17
- # Compile regex patterns once for efficiency
18
- skills_pattern = re.compile(r'\b(?:' + '|'.join(map(re.escape, [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  'python', 'sql', 'c++', 'java', 'tableau', 'machine learning', 'data analysis',
20
  'business intelligence', 'r', 'tensorflow', 'pandas', 'spark', 'scikit-learn', 'aws',
21
  'javascript', 'scala', 'go', 'ruby', 'pytorch', 'keras', 'deep learning', 'nlp',
@@ -28,118 +53,67 @@ skills_pattern = re.compile(r'\b(?:' + '|'.join(map(re.escape, [
28
  'cybersecurity', 'project management', 'technical writing', 'business analysis',
29
  'agile methodologies', 'communication', 'team leadership',
30
  'databricks', 'synapse', 'delta lake', 'streamlit', 'fastapi', 'graphql', 'mlflow', 'kedro'
31
- ])) + r')\b')
32
-
33
- normalize_pattern = re.compile(r'_|-|,\s*collaborated in agile teams|,\s*developed solutions for|,\s*led projects involving|,\s*designed applications with|,\s*built machine learning models for|,\s*implemented data pipelines for|,\s*deployed cloud-based solutions|,\s*optimized workflows for|,\s*contributed to data-driven projects')
34
-
35
- # Apply custom CSS for layout stability and element styling
36
- st.markdown("""
37
- <style>
38
- /* Sidebar Styling */
39
- [data-testid="stSidebar"] {
40
- width: 350px !important;
41
- min-width: 350px !important;
42
- max-height: 100vh;
43
- overflow-y: auto;
44
- padding-bottom: 20px;
45
- }
46
- [data-testid="stSidebarCollapseButton"] {
47
- display: none !important;
48
- }
49
-
50
- /* Main Content Styling */
51
- .block-container {
52
- margin-left: 350px;
53
- }
54
- h2, h3 {
55
- color: var(--primaryColor);
56
- font-weight: bold;
57
- margin-top: 20px;
58
- }
59
-
60
- /* Input Fields */
61
- .stTextInput > label {
62
- font-weight: bold;
63
- font-size: 16px;
64
- }
65
- .stTextInput > div > input {
66
- border: 1px solid var(--secondaryBackgroundColor);
67
- border-radius: 5px;
68
- padding: 8px;
69
- font-size: 14px;
70
- }
71
- .stTextInput > div > input::placeholder {
72
- color: #888888;
73
- }
74
 
75
- /* Buttons */
76
- .stButton > button {
77
- border-radius: 5px;
78
- padding: 10px 20px;
79
- font-size: 16px;
80
- border: none;
81
- }
82
- .stButton > button:hover {
83
- border: 1px solid var(--textColor);
84
- }
85
-
86
- /* Results Table */
87
- div[data-testid="stDataFrame"] {
88
- border: 1px solid var(--secondaryBackgroundColor);
89
- border-radius: 5px;
90
- }
91
- div[data-testid="stDataFrame"] table th {
92
- background-color: var(--primaryColor);
93
- color: var(--textColor);
94
- font-weight: bold;
95
- }
96
- div[data-testid="stDataFrame"] table td {
97
- color: var(--textColor);
98
- }
99
 
100
- /* Alerts */
101
- .stAlert {
102
- border-radius: 5px;
103
- }
104
-
105
- /* Pie Chart Section */
106
- .stPlotlyChart, .stImage {
107
- border-radius: 5px;
108
- padding: 10px;
109
- }
110
- </style>
111
- """, unsafe_allow_html=True)
112
-
113
- # Theme toggle button
114
- def toggle_theme():
115
- current_theme = st.config.get_option("theme.base")
116
- new_theme = "dark" if current_theme == "light" else "light"
117
- st.config.set_option("theme.base", new_theme)
118
- st.rerun()
119
-
120
- # Place the toggle button in the main content area (top-right)
121
- col1, col2 = st.columns([9, 1])
122
- with col2:
123
- if st.button(f"Switch to {'Dark' if st.config.get_option('theme.base') == 'light' else 'Light'} Mode", key="theme_toggle"):
124
- toggle_theme()
125
 
126
- # Helper functions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  def normalize_text(text):
128
  text = text.lower()
129
- return normalize_pattern.sub('', text)
 
 
130
 
131
  def check_experience_mismatch(resume, job_description):
132
  resume_match = re.search(r'(\d+)\s*years?|senior', resume.lower())
133
- job_match = re.search(r'(\d+)\s*years?\+|senior\+', job_description.lower())
 
134
  if resume_match and job_match:
135
  resume_years = resume_match.group(0)
136
  job_years = job_match.group(0)
 
137
  if 'senior' in resume_years:
138
  resume_num = 10
139
  else:
140
  resume_num = int(resume_match.group(1))
 
141
  if 'senior+' in job_years:
142
- job_num = 10
143
  else:
144
  job_num = int(job_match.group(1))
145
  if resume_num < job_num:
@@ -150,9 +124,7 @@ def validate_input(text, is_resume=True):
150
  if not text.strip() or len(text.strip()) < 10:
151
  return "Input is too short (minimum 10 characters)."
152
  text_normalized = normalize_text(text)
153
- text_normalized = re.sub(r'[,_-]', ' ', text_normalized)
154
- found_skill = bool(skills_pattern.search(text_normalized))
155
- if is_resume and not found_skill:
156
  return "Please include at least one data/tech skill (e.g., python, sql, databricks)."
157
  if is_resume and not re.search(r'\d+\s*year(s)?|senior', text.lower()):
158
  return "Please include experience (e.g., '3 years experience' or 'senior')."
@@ -160,162 +132,128 @@ def validate_input(text, is_resume=True):
160
 
161
  @st.cache_resource
162
  def load_models():
163
- start_time = time.time()
164
  bert_model_path = 'scmlewis/bert-finetuned-isom5240'
165
- try:
166
- bert_tokenizer = BertTokenizer.from_pretrained(bert_model_path)
167
- bert_model = BertForSequenceClassification.from_pretrained(bert_model_path, num_labels=2)
168
- t5_tokenizer = T5Tokenizer.from_pretrained('t5-small')
169
- t5_model = T5ForConditionalGeneration.from_pretrained('t5-small')
170
- except Exception as e:
171
- st.error(f"Error loading models: {str(e)}")
172
- raise e
173
  device = torch.device('cpu') # CPU for lightweight deployment
174
  bert_model.to(device)
175
  t5_model.to(device)
176
  bert_model.eval()
177
  t5_model.eval()
178
- st.session_state.load_models_time = time.time() - start_time
179
  return bert_tokenizer, bert_model, t5_tokenizer, t5_model, device
180
 
181
  @st.cache_data
182
- def tokenize_inputs(resumes, job_description):
183
  """Precompute tokenized inputs for BERT and T5."""
184
- bert_tokenizer, _, t5_tokenizer, _, _ = st.session_state.models
185
- start_time = time.time()
186
-
187
  job_description_norm = normalize_text(job_description)
188
  bert_inputs = [f"resume: {normalize_text(resume)} [sep] job: {job_description_norm}" for resume in resumes]
189
- bert_tokenized = bert_tokenizer(bert_inputs, return_tensors='pt', padding=True, truncation=True, max_length=128)
190
 
191
  t5_inputs = []
192
  for resume in resumes:
193
  prompt = re.sub(r'\b[Cc]\+\+\b', 'c++', resume)
194
  prompt_normalized = normalize_text(prompt)
195
  t5_inputs.append(f"summarize: {prompt_normalized}")
196
- t5_tokenized = t5_tokenizer(t5_inputs, return_tensors='pt', padding=True, truncation=True, max_length=128)
197
 
198
- st.session_state.tokenize_time = time.time() - start_time
199
  return bert_tokenized, t5_inputs, t5_tokenized
200
 
201
  @st.cache_data
202
  def extract_skills(text):
203
  """Extract skills from text in a single pass."""
204
- start_time = time.time()
205
  text_normalized = normalize_text(text)
206
  text_normalized = re.sub(r'[,_-]', ' ', text_normalized)
207
  found_skills = skills_pattern.findall(text_normalized)
208
- st.session_state.extract_skills_time = time.time() - start_time
209
  return set(found_skills)
210
 
211
  @st.cache_data
212
- def classify_and_summarize_batch(resume, job_description, _bert_tokenized, _t5_input, _t5_tokenized, _job_skills_set):
213
- """Process one resume at a time to reduce CPU load with a timeout."""
214
- _, bert_model, t5_tokenizer, t5_model, device = st.session_state.models
215
- start_time = time.time()
216
- timeout = 60 # Timeout after 60 seconds
217
 
218
- try:
219
- bert_tokenized = {k: v.to(device) for k, v in _bert_tokenized.items()}
220
- with torch.no_grad():
221
- # BERT inference
222
- bert_start = time.time()
223
- outputs = bert_model(**bert_tokenized)
224
- if time.time() - bert_start > timeout:
225
- raise TimeoutError("BERT inference timed out")
226
-
227
- logits = outputs.logits
228
- probabilities = torch.softmax(logits, dim=1).cpu().numpy()
229
- predictions = np.argmax(probabilities, axis=1)
230
-
231
- confidence_threshold = 0.85
232
-
233
- t5_tokenized = {k: v.to(device) for k, v in _t5_tokenized.items()}
234
- with torch.no_grad():
235
- # T5 inference
236
- t5_start = time.time()
237
- t5_outputs = t5_model.generate(
238
- t5_tokenized['input_ids'],
239
- attention_mask=t5_tokenized['attention_mask'],
240
- max_length=30,
241
- min_length=8,
242
- num_beams=4,
243
- no_repeat_ngram_size=3,
244
- length_penalty=3.0,
245
- early_stopping=True
246
- )
247
- if time.time() - t5_start > timeout:
248
- raise TimeoutError("T5 inference timed out")
249
- summaries = [t5_tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True) for output in t5_outputs]
250
- summaries = [re.sub(r'\s+', ' ', summary).strip() for summary in summaries]
251
-
252
- prob, pred, summary, t5_input = probabilities[0], predictions[0], summaries[0], _t5_input
253
  resume_skills_set = extract_skills(resume)
254
  skill_overlap = len(_job_skills_set.intersection(resume_skills_set)) / len(_job_skills_set) if _job_skills_set else 0
255
 
256
- if prob[pred] < confidence_threshold:
257
- suitability = "Uncertain"
258
- warning = f"Low confidence: {prob[pred]:.4f}"
 
259
  else:
260
- if skill_overlap < 0.4:
261
- suitability = "Irrelevant"
262
- warning = "Skills are irrelevant"
 
 
263
  else:
264
- suitability = "Relevant" if skill_overlap >= 0.5 else "Irrelevant"
265
- warning = "Skills are not a strong match" if suitability == "Irrelevant" else None
266
-
267
- exp_warning = check_experience_mismatch(resume, job_description)
268
- if exp_warning:
269
  suitability = "Uncertain"
270
- warning = exp_warning
 
 
 
 
271
 
272
- skills = list(set(skills_pattern.findall(t5_input)))
 
273
  exp_match = re.search(r'\d+\s*years?|senior', resume.lower())
274
  if skills and exp_match:
275
  summary = f"{', '.join(skills)} proficiency, {exp_match.group(0)} experience"
276
  else:
277
  summary = f"{exp_match.group(0) if exp_match else 'unknown'} experience"
278
 
279
- result = {
 
280
  "Suitability": suitability,
281
  "Data/Tech Related Skills Summary": summary,
282
- "Warning": warning or "None",
283
- "Inference Time": time.time() - start_time
284
- }
285
-
286
- st.session_state.classify_summarize_time = time.time() - start_time
287
- return result
288
- except TimeoutError as e:
289
- st.warning(f"Skipped processing for resume due to timeout: {str(e)}")
290
- return {
291
- "Suitability": "Error",
292
- "Data/Tech Related Skills Summary": "Processing timed out",
293
- "Warning": str(e),
294
- "Inference Time": time.time() - start_time
295
- }
296
- except Exception as e:
297
- st.error(f"Error during inference for resume: {str(e)}")
298
- return {
299
- "Suitability": "Error",
300
- "Data/Tech Related Skills Summary": "Failed to process",
301
- "Warning": str(e),
302
- "Inference Time": time.time() - start_time
303
- }
304
 
305
  @st.cache_data
306
  def generate_skill_pie_chart(resumes):
307
- """Generate a pie chart of skill frequency across resumes."""
308
- start_time = time.time()
309
  skill_counts = {}
310
  total_resumes = len([r for r in resumes if r.strip()])
311
 
312
  if total_resumes == 0:
313
  return None
314
 
 
315
  for resume in resumes:
316
  if resume.strip():
317
  resume_lower = normalize_text(resume)
318
- resume_lower = re.sub(r'[,_-]', ' ', resume_lower)
319
  found_skills = skills_pattern.findall(resume_lower)
320
  for skill in found_skills:
321
  skill_counts[skill] = skill_counts.get(skill, 0) + 1
@@ -330,226 +268,204 @@ def generate_skill_pie_chart(resumes):
330
  colors = plt.cm.Blues(np.linspace(0.4, 0.8, len(labels)))
331
  ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors, textprops={'fontsize': 10})
332
  ax.axis('equal')
333
- plt.title("Skill Frequency Across Resumes", fontsize=12, pad=10)
334
-
335
- st.session_state.pie_chart_time = time.time() - start_time
336
  return fig
337
 
338
- def render_sidebar():
339
- """Render sidebar content."""
 
340
  with st.sidebar:
341
  st.markdown("""
342
- <h1 style='text-align: center; font-size: 32px; margin-bottom: 10px;'>πŸ“„ Resume Screening Assistant for Databricks</h1>
343
  <p style='text-align: center; font-size: 16px; margin-top: 0;'>
344
- Welcome to our AI-powered resume screening tool, specialized for data science and tech roles! This app evaluates multiple resumes against a single job description, providing suitability classifications, skill summaries, and a skill frequency visualization.
345
  </p>
346
  """, unsafe_allow_html=True)
347
-
348
- # Persist expander states
349
- if 'expander1' not in st.session_state:
350
- st.session_state.expander1 = True
351
- if 'expander2' not in st.session_state:
352
- st.session_state.expander2 = False
353
- if 'expander3' not in st.session_state:
354
- st.session_state.expander3 = False
355
- if 'expander4' not in st.session_state:
356
- st.session_state.expander4 = False
357
-
358
- with st.expander("How to Use the App", expanded=st.session_state.expander1):
359
- st.session_state.expander1 = True
360
  st.markdown("""
361
- - Enter up to 5 candidate resumes in the text boxes below, listing data/tech skills and experience (e.g., "Expert in python, databricks, 6 years experience").
 
362
  - Enter the job description, specifying required skills and experience (e.g., "Data engineer requires python, spark, 5 years+").
363
- - Click the "Analyze" button to evaluate all non-empty resumes (at least one resume required).
364
- - Use the "Add Resume" or "Remove Resume" buttons to adjust the number of resume fields (1-5).
365
- - Use the "Reset" button to clear all inputs and results.
366
- - Results can be downloaded as a CSV file for record-keeping.
367
- - View the skill frequency pie chart to see the distribution of skills across resumes.
368
- """)
369
-
370
- with st.expander("Example Test Cases", expanded=st.session_state.expander2):
371
- st.session_state.expander2 = True
372
- st.markdown("""
373
- - **Test Case 1**:
374
- - Resume 1: "Expert in python, machine learning, tableau, 4 years experience"
375
- - Resume 2: "Skilled in sql, pandas, 2 years experience"
376
- - Resume 3: "Proficient in java, python, 5 years experience"
377
- - Job Description: "Data scientist requires python, machine learning, 3 years+"
378
- - **Test Case 2**:
379
- - Resume 1: "Skilled in databricks, spark, python, 6 years experience"
380
- - Resume 2: "Expert in sql, tableau, business intelligence, 3 years experience"
381
- - Resume 3: "Proficient in rust, langchain, 2 years experience"
382
- - Job Description: "Data engineer requires python, spark, 5 years+"
383
- """)
384
-
385
- with st.expander("Guidelines", expanded=st.session_state.expander3):
386
- st.session_state.expander3 = True
387
- st.markdown("""
388
- - Use comma-separated skills from a comprehensive list including python, sql, databricks, etc. (79 skills supported, see Project Report for full list).
389
  - Include experience in years (e.g., "3 years experience" or "1 year experience") or as "senior".
390
  - Focus on data/tech skills for accurate summarization.
391
  - Resumes with only irrelevant skills (e.g., sales, marketing) will be classified as "Irrelevant".
 
392
  """)
393
-
394
- with st.expander("Classification Criteria", expanded=st.session_state.expander4):
395
- st.session_state.expander4 = True
396
  st.markdown("""
397
- Resumes are classified based on:
398
- - **Skill Overlap**: The resume's data/tech skills are compared to the job's requirements. A skill overlap below 40% results in an "Irrelevant" classification.
399
  - **Model Confidence**: A finetuned BERT model evaluates skill relevance. If confidence is below 85%, the classification is "Uncertain".
400
- - **Experience Match**: The resume's experience (in years or seniority) must meet or exceed the job's requirement.
401
 
402
  **Outcomes**:
403
- - **Relevant**: Skill overlap β‰₯ 50%, sufficient experience, and high model confidence (β‰₯ 85%).
404
  - **Irrelevant**: Skill overlap < 40% or high confidence in low skill relevance.
405
- - **Uncertain**: Skill overlap β‰₯ 50% but experience mismatch (e.g., resume has 2 years, job requires 5 years+), or low model confidence (< 85%).
406
 
407
- **Note**: An experience mismatch warning is shown if the resume's experience is below the job's requirement, overriding the skill overlap and confidence to classify as Uncertain.
408
  """)
409
 
410
- def main():
411
- """Main function to run the Streamlit app for resume screening."""
412
- # Render sidebar
413
- render_sidebar()
414
-
415
- # Initialize session state
416
- if 'models' not in st.session_state:
417
- st.session_state.models = load_models()
418
  if 'resumes' not in st.session_state:
419
- st.session_state.resumes = ["Expert in python, machine learning, tableau, 4 years experience"] + [""] * 4 # Prefill first resume with Test Case 1
420
- if 'num_resumes' not in st.session_state:
421
- st.session_state.num_resumes = 3 # Default to 3 textboxes
422
- if 'job_description' not in st.session_state:
423
- st.session_state.job_description = "Data scientist requires python, machine learning, 3 years+" # Prefill with Test Case 1
424
  if 'results' not in st.session_state:
425
- st.session_state.results = None
426
- if 'pie_chart' not in st.session_state:
427
- st.session_state.pie_chart = None
428
- if 'total_analyze_time' not in st.session_state:
429
- st.session_state.total_analyze_time = 0 # Initialize to avoid AttributeError
430
- if 'classify_summarize_time' not in st.session_state:
431
- st.session_state.classify_summarize_time = 0
432
-
433
- # Resume input fields
434
- with st.container():
435
- st.subheader("Candidate Resumes")
436
- num_resumes = st.session_state.num_resumes
437
- for i in range(num_resumes):
438
- placeholder = "e.g., 'Expert in python, databricks, 6 years experience'" if i == 0 else "Enter candidate resume (optional)"
439
- st.session_state.resumes[i] = st.text_input(f"Resume {i+1}", value=st.session_state.resumes[i], key=f"resume_{i}", placeholder=placeholder)
440
-
441
- # Buttons to add/remove resume fields
442
- col1, col2, col3 = st.columns([1, 1, 1])
443
- with col1:
444
- if st.button("Add Resume") and num_resumes < 5:
445
- st.session_state.num_resumes += 1
446
- st.session_state.results = None # Clear previous results
447
- st.session_state.pie_chart = None
448
- st.rerun()
449
- with col2:
450
- if st.button("Remove Resume") and num_resumes > 1:
451
- st.session_state.num_resumes -= 1
452
- st.session_state.resumes[num_resumes] = "" # Clear the removed field
453
- st.session_state.results = None # Clear previous results
454
- st.session_state.pie_chart = None
 
 
 
 
455
  st.rerun()
456
- with col3:
457
- if st.button("Reset"):
458
- st.session_state.num_resumes = 3
459
- st.session_state.resumes = ["Expert in python, machine learning, tableau, 4 years experience"] + [""] * 4
460
- st.session_state.job_description = "Data scientist requires python, machine learning, 3 years+"
461
- st.session_state.results = None
462
- st.session_state.pie_chart = None
463
  st.rerun()
464
 
465
  # Job description input
466
- with st.container():
467
- st.subheader("Job Description")
468
- st.session_state.job_description = st.text_input(
469
- "Enter the job description",
470
- value=st.session_state.job_description,
471
- placeholder="e.g., 'Data engineer requires python, spark, 5 years+'"
472
- )
473
-
474
- # Analyze button with loading spinner and global timeout
475
- if st.button("Analyze"):
476
- with st.spinner("Analyzing resumes... This may take a moment depending on server load."):
477
- start_time = time.time()
478
- global_timeout = 180 # Global timeout of 3 minutes for all resumes
479
- resumes = tuple(resume.strip() for resume in st.session_state.resumes[:num_resumes]) # Use tuple for cache stability
480
- job_description = st.session_state.job_description.strip()
481
-
482
- valid_resumes = []
483
- for i, resume in enumerate(resumes):
484
- validation_error = validate_input(resume, is_resume=True)
485
- if validation_error and resume:
486
- st.error(f"Resume {i+1}: {validation_error}")
487
- elif resume:
488
- valid_resumes.append(resume)
489
-
490
- validation_error = validate_input(job_description, is_resume=False)
491
- if validation_error and job_description:
492
- st.error(f"Job Description: {validation_error}")
493
-
494
- if valid_resumes and job_description:
495
- try:
496
- job_skills_set = extract_skills(job_description)
497
- results = []
498
- for i, resume in enumerate(valid_resumes):
499
- if time.time() - start_time > global_timeout:
500
- st.error("Analysis timed out after 3 minutes. Please try again or deploy on a different platform.")
501
- break
502
- st.write(f"Processing {resume[:50]}...") # Log progress
503
- bert_tokenized, t5_inputs, t5_tokenized = tokenize_inputs([resume], job_description)
504
- result = classify_and_summarize_batch(resume, job_description, bert_tokenized, t5_inputs[0], t5_tokenized, job_skills_set)
505
- result["Resume"] = f"Resume {i+1}"
506
- results.append(result)
507
- st.session_state.results = results
508
- pie_chart = generate_skill_pie_chart(valid_resumes)
509
- st.session_state.pie_chart = pie_chart
510
- except Exception as e:
511
- st.error(f"Failed to process resumes: {str(e)}")
512
- st.session_state.results = None
513
- st.session_state.pie_chart = None
514
-
515
- st.session_state.total_analyze_time = time.time() - start_time
516
- # Detailed timing logs
517
- st.write(f"Total Analyze Time: {st.session_state.total_analyze_time:.2f} seconds")
518
- st.write(f"Model Load Time: {getattr(st.session_state, 'load_models_time', 0):.2f} seconds")
519
- st.write(f"Tokenize Time: {getattr(st.session_state, 'tokenize_time', 0):.2f} seconds")
520
- st.write(f"Extract Skills Time: {getattr(st.session_state, 'extract_skills_time', 0):.2f} seconds")
521
- if st.session_state.results:
522
- for idx, result in enumerate(st.session_state.results):
523
- st.write(f"Inference Time for {result['Resume']}: {result['Inference Time']:.2f} seconds")
524
- st.write(f"Pie Chart Time: {getattr(st.session_state, 'pie_chart_time', 0):.2f} seconds")
525
-
526
- # Performance note
527
- if st.session_state.total_analyze_time > 60:
528
- st.warning("The runtime is longer than expected due to server load on Hugging Face Spaces. For a smoother experience, consider testing locally or deploying on a different platform (e.g., Streamlit Community Cloud or a personal server).")
 
 
 
 
 
 
 
 
 
 
 
 
529
 
530
  # Display results
531
  if st.session_state.results:
532
- with st.container():
533
- st.subheader("Results")
534
- df = pd.DataFrame(st.session_state.results)
535
- df = df[["Resume", "Suitability", "Data/Tech Related Skills Summary", "Warning"]] # Exclude Inference Time from display
536
- st.dataframe(df, use_container_width=True)
537
-
538
- csv = df.to_csv(index=False)
539
- st.download_button(
540
- label="Download Results as CSV",
541
- data=csv,
542
- file_name="resume_screening_results.csv",
543
- mime="text/csv",
544
- )
545
-
546
- # Display pie chart
547
- if st.session_state.pie_chart:
548
- with st.container():
549
- st.subheader("Skill Frequency Across Resumes")
550
- st.pyplot(st.session_state.pie_chart)
551
- elif st.session_state.results and not st.session_state.pie_chart:
552
- st.warning("No recognized data/tech skills found in the resumes to generate a pie chart.")
553
 
554
  if __name__ == "__main__":
 
555
  main()
 
1
  # app.py
2
+ # Optimized Streamlit Application for Resume Screening with Multiple Resumes
3
 
4
  import streamlit as st
5
  from transformers import BertTokenizer, BertForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
 
8
  import re
9
  import io
10
  import matplotlib.pyplot as plt
11
+ import PyPDF2
12
+ from docx import Document
13
 
14
  # Set page config as the first Streamlit command
15
  st.set_page_config(page_title="Resume Screening Assistant for Data/Tech", page_icon="πŸ“„", layout="wide")
16
 
17
+ # Set sidebar width and make uncollapsible
18
+ st.markdown("""
19
+ <style>
20
+ .css-1d391kg { /* Sidebar */
21
+ width: 350px !important;
22
+ }
23
+ [data-testid="stSidebarCollapseButton"] { /* Hide toggle button */
24
+ display: none !important;
25
+ }
26
+ .stSidebar { /* Ensure sidebar visibility */
27
+ min-width: 350px !important;
28
+ visibility: visible !important;
29
+ }
30
+ [data-testid="stExpander"] summary { /* Expander headers */
31
+ font-size: 26px !important;
32
+ font-weight: bold !important;
33
+ text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.1) !important;
34
+ white-space: nowrap !important;
35
+ }
36
+ .st-expander-content p { /* Expander body text */
37
+ font-size: 12px !important;
38
+ }
39
+ </style>
40
+ """, unsafe_allow_html=True)
41
+
42
+ # Skills list (79 skills from Application_Demo.ipynb)
43
+ skills_list = [
44
  'python', 'sql', 'c++', 'java', 'tableau', 'machine learning', 'data analysis',
45
  'business intelligence', 'r', 'tensorflow', 'pandas', 'spark', 'scikit-learn', 'aws',
46
  'javascript', 'scala', 'go', 'ruby', 'pytorch', 'keras', 'deep learning', 'nlp',
 
53
  'cybersecurity', 'project management', 'technical writing', 'business analysis',
54
  'agile methodologies', 'communication', 'team leadership',
55
  'databricks', 'synapse', 'delta lake', 'streamlit', 'fastapi', 'graphql', 'mlflow', 'kedro'
56
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
+ # Precompile regex for skills matching (optimized for single pass)
59
+ skills_pattern = re.compile(r'\b(' + '|'.join(re.escape(skill) for skill in skills_list) + r')\b', re.IGNORECASE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
+ # Helper functions for CV parsing
62
+ def extract_text_from_pdf(file):
63
+ try:
64
+ pdf_reader = PyPDF2.PdfReader(file)
65
+ text = ""
66
+ for page in pdf_reader.pages:
67
+ page_text = page.extract_text()
68
+ if page_text:
69
+ text += page_text + "\n"
70
+ return text.strip()
71
+ except Exception as e:
72
+ st.error(f"Error extracting text from PDF: {str(e)}")
73
+ return ""
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
+ def extract_text_from_docx(file):
76
+ try:
77
+ doc = Document(file)
78
+ text = ""
79
+ for paragraph in doc.paragraphs:
80
+ text += paragraph.text + "\n"
81
+ return text.strip()
82
+ except Exception as e:
83
+ st.error(f"Error extracting text from Word document: {str(e)}")
84
+ return ""
85
+
86
+ def extract_text_from_file(uploaded_file):
87
+ if uploaded_file.name.endswith('.pdf'):
88
+ return extract_text_from_pdf(uploaded_file)
89
+ elif uploaded_file.name.endswith('.docx'):
90
+ return extract_text_from_docx(uploaded_file)
91
+ else:
92
+ st.error("Unsupported file format. Please upload a PDF or Word (.docx) document.")
93
+ return ""
94
+
95
+ # Helper functions for analysis
96
  def normalize_text(text):
97
  text = text.lower()
98
+ # Remove underscores, hyphens, and specific phrases, replacing with empty string
99
+ text = re.sub(r'_|-|,\s*collaborated in agile teams|,\s*developed solutions for|,\s*led projects involving|,\s*designed applications with|,\s*built machine learning models for|,\s*implemented data pipelines for|,\s*deployed cloud-based solutions|,\s*optimized workflows for|,\s*contributed to data-driven projects', '', text)
100
+ return text
101
 
102
  def check_experience_mismatch(resume, job_description):
103
  resume_match = re.search(r'(\d+)\s*years?|senior', resume.lower())
104
+ # Allow optional words like "experience" between "years" and "+"
105
+ job_match = re.search(r'(\d+)\s*years?(?:\s+\w+)*\+|senior\+', job_description.lower())
106
  if resume_match and job_match:
107
  resume_years = resume_match.group(0)
108
  job_years = job_match.group(0)
109
+ # Handle resume years
110
  if 'senior' in resume_years:
111
  resume_num = 10
112
  else:
113
  resume_num = int(resume_match.group(1))
114
+ # Handle job years
115
  if 'senior+' in job_years:
116
+ job_num = 10
117
  else:
118
  job_num = int(job_match.group(1))
119
  if resume_num < job_num:
 
124
  if not text.strip() or len(text.strip()) < 10:
125
  return "Input is too short (minimum 10 characters)."
126
  text_normalized = normalize_text(text)
127
+ if is_resume and not skills_pattern.search(text_normalized):
 
 
128
  return "Please include at least one data/tech skill (e.g., python, sql, databricks)."
129
  if is_resume and not re.search(r'\d+\s*year(s)?|senior', text.lower()):
130
  return "Please include experience (e.g., '3 years experience' or 'senior')."
 
132
 
133
  @st.cache_resource
134
  def load_models():
 
135
  bert_model_path = 'scmlewis/bert-finetuned-isom5240'
136
+ bert_tokenizer = BertTokenizer.from_pretrained(bert_model_path)
137
+ bert_model = BertForSequenceClassification.from_pretrained(bert_model_path, num_labels=2)
138
+ t5_tokenizer = T5Tokenizer.from_pretrained('t5-small')
139
+ t5_model = T5ForConditionalGeneration.from_pretrained('t5-small')
 
 
 
 
140
  device = torch.device('cpu') # CPU for lightweight deployment
141
  bert_model.to(device)
142
  t5_model.to(device)
143
  bert_model.eval()
144
  t5_model.eval()
 
145
  return bert_tokenizer, bert_model, t5_tokenizer, t5_model, device
146
 
147
  @st.cache_data
148
+ def tokenize_inputs(resumes, job_description, _bert_tokenizer, _t5_tokenizer):
149
  """Precompute tokenized inputs for BERT and T5."""
 
 
 
150
  job_description_norm = normalize_text(job_description)
151
  bert_inputs = [f"resume: {normalize_text(resume)} [sep] job: {job_description_norm}" for resume in resumes]
152
+ bert_tokenized = _bert_tokenizer(bert_inputs, return_tensors='pt', padding=True, truncation=True, max_length=64)
153
 
154
  t5_inputs = []
155
  for resume in resumes:
156
  prompt = re.sub(r'\b[Cc]\+\+\b', 'c++', resume)
157
  prompt_normalized = normalize_text(prompt)
158
  t5_inputs.append(f"summarize: {prompt_normalized}")
159
+ t5_tokenized = _t5_tokenizer(t5_inputs, return_tensors='pt', padding=True, truncation=True, max_length=64)
160
 
 
161
  return bert_tokenized, t5_inputs, t5_tokenized
162
 
163
  @st.cache_data
164
  def extract_skills(text):
165
  """Extract skills from text in a single pass."""
 
166
  text_normalized = normalize_text(text)
167
  text_normalized = re.sub(r'[,_-]', ' ', text_normalized)
168
  found_skills = skills_pattern.findall(text_normalized)
 
169
  return set(found_skills)
170
 
171
  @st.cache_data
172
+ def classify_and_summarize_batch(resumes, job_description, _bert_tokenized, _t5_inputs, _t5_tokenized, _job_skills_set):
173
+ bert_tokenizer, bert_model, t5_tokenizer, t5_model, device = st.session_state.models
174
+ bert_tokenized = {k: v.to(device) for k, v in _bert_tokenized.items()}
 
 
175
 
176
+ # BERT inference (batched)
177
+ with torch.no_grad():
178
+ outputs = bert_model(**bert_tokenized)
179
+
180
+ logits = outputs.logits
181
+ probabilities = torch.softmax(logits, dim=1).cpu().numpy()
182
+ predictions = np.argmax(probabilities, axis=1)
183
+
184
+ confidence_threshold = 0.85
185
+ results = []
186
+
187
+ # Batch T5 inference for all resumes
188
+ t5_tokenized = {k: v.to(device) for k, v in _t5_tokenized.items()}
189
+ with torch.no_grad():
190
+ t5_outputs = t5_model.generate(
191
+ t5_tokenized['input_ids'],
192
+ attention_mask=t5_tokenized['attention_mask'],
193
+ max_length=30,
194
+ min_length=8,
195
+ num_beams=2,
196
+ no_repeat_ngram_size=3,
197
+ length_penalty=2.0,
198
+ early_stopping=True
199
+ )
200
+ summaries = [t5_tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True) for output in t5_outputs]
201
+ summaries = [re.sub(r'\s+', ' ', summary).strip() for summary in summaries]
202
+
203
+ for i, (resume, prob, pred, summary, t5_input) in enumerate(zip(resumes, probabilities, predictions, summaries, _t5_inputs)):
204
+ # Compute skill overlap
 
 
 
 
 
 
205
  resume_skills_set = extract_skills(resume)
206
  skill_overlap = len(_job_skills_set.intersection(resume_skills_set)) / len(_job_skills_set) if _job_skills_set else 0
207
 
208
+ # Step 1: Check skill irrelevance
209
+ if skill_overlap < 0.4:
210
+ suitability = "Irrelevant"
211
+ warning = "Skills are irrelevant"
212
  else:
213
+ # Step 2: Check experience mismatch (takes precedence)
214
+ exp_warning = check_experience_mismatch(resume, job_description)
215
+ if exp_warning:
216
+ suitability = "Uncertain"
217
+ warning = exp_warning
218
  else:
219
+ # Step 3: Check model confidence
220
+ if prob[pred] < confidence_threshold:
 
 
 
221
  suitability = "Uncertain"
222
+ warning = f"Low confidence: {prob[pred]:.4f}"
223
+ else:
224
+ # Step 4: Determine suitability based on skill overlap
225
+ suitability = "Relevant" if skill_overlap >= 0.5 else "Irrelevant"
226
+ warning = "Skills are not a strong match" if suitability == "Irrelevant" else None
227
 
228
+ # Post-process T5 summary for all resumes (Relevant, Uncertain, or Irrelevant)
229
+ skills = list(set(skills_pattern.findall(t5_input))) # Deduplicate skills
230
  exp_match = re.search(r'\d+\s*years?|senior', resume.lower())
231
  if skills and exp_match:
232
  summary = f"{', '.join(skills)} proficiency, {exp_match.group(0)} experience"
233
  else:
234
  summary = f"{exp_match.group(0) if exp_match else 'unknown'} experience"
235
 
236
+ results.append({
237
+ "Resume": f"Resume {st.session_state.resumes.index(resume)+1}",
238
  "Suitability": suitability,
239
  "Data/Tech Related Skills Summary": summary,
240
+ "Warning": warning or "None"
241
+ })
242
+
243
+ return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
  @st.cache_data
246
  def generate_skill_pie_chart(resumes):
 
 
247
  skill_counts = {}
248
  total_resumes = len([r for r in resumes if r.strip()])
249
 
250
  if total_resumes == 0:
251
  return None
252
 
253
+ # Count skills that appear in resumes
254
  for resume in resumes:
255
  if resume.strip():
256
  resume_lower = normalize_text(resume)
 
257
  found_skills = skills_pattern.findall(resume_lower)
258
  for skill in found_skills:
259
  skill_counts[skill] = skill_counts.get(skill, 0) + 1
 
268
  colors = plt.cm.Blues(np.linspace(0.4, 0.8, len(labels)))
269
  ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=colors, textprops={'fontsize': 10})
270
  ax.axis('equal')
271
+ plt.title("Skill Frequency Across Resumes", fontsize=12, color='#007BFF', pad=10)
 
 
272
  return fig
273
 
274
+ def main():
275
+ """Main function to run the Streamlit app for resume screening."""
276
+ # Streamlit interface
277
  with st.sidebar:
278
  st.markdown("""
279
+ <h1 style='text-align: center; color: #007BFF; font-size: 32px; text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.1); margin-bottom: 10px;'>πŸ’» Resume Screening Assistant for Data/Tech</h1>
280
  <p style='text-align: center; font-size: 16px; margin-top: 0;'>
281
+ Welcome to our AI-powered resume screening tool, specialized for data science and tech roles! This app evaluates multiple resumes against a single job description to determine suitability, providing concise summaries of key data and tech skills and experience. Built with advanced natural language processing, it ensures accurate and efficient screening for technical positions. <br><br><strong>Note:</strong> Performance may vary due to server load on free CPU instances.
282
  </p>
283
  """, unsafe_allow_html=True)
284
+
285
+ with st.expander("πŸ“‹ How to Use the App", expanded=True):
 
 
 
 
 
 
 
 
 
 
 
286
  st.markdown("""
287
+ **Instructions**:
288
+ - Upload a PDF or Word (.docx) CV or manually enter up to 5 candidate resumes in the text boxes, listing data/tech skills and experience (e.g., "Expert in python, databricks, 6 years experience").
289
  - Enter the job description, specifying required skills and experience (e.g., "Data engineer requires python, spark, 5 years+").
290
+ - Click **Analyze** to evaluate all non-empty resumes (at least one required).
291
+ - Use **Add Resume** or **Remove Resume** to adjust the number of resume fields (1–5).
292
+ - Use the **Reset** button to clear all inputs and results.
293
+ - Download results as a CSV file for record-keeping.
294
+ - View the skill frequency pie chart to see skill distribution across resumes.
295
+ - Example test cases:
296
+ - **Test Case 1**: Resumes like "Expert in python, machine learning, tableau, 4 years experience" against "Data scientist requires python, machine learning, 3 years+".
297
+ - **Test Case 2**: Resumes like "Skilled in databricks, spark, python, 6 years experience" against "Data engineer requires python, spark, 5 years+".
298
+
299
+ **Guidelines**:
300
+ - Use comma-separated skills from a comprehensive list including python, sql, databricks, etc. (79 skills supported).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  - Include experience in years (e.g., "3 years experience" or "1 year experience") or as "senior".
302
  - Focus on data/tech skills for accurate summarization.
303
  - Resumes with only irrelevant skills (e.g., sales, marketing) will be classified as "Irrelevant".
304
+ - If uploading a CV, ensure it’s a text-based PDF or Word document (scanned PDFs may not work).
305
  """)
306
+ with st.expander("ℹ️ Classification Criteria", expanded=True):
 
 
307
  st.markdown("""
308
+ The app classifies resumes based on:
309
+ - **Skill Overlap**: The resume’s data/tech skills are compared to the job’s requirements. A skill overlap below 40% results in an "Irrelevant" classification.
310
  - **Model Confidence**: A finetuned BERT model evaluates skill relevance. If confidence is below 85%, the classification is "Uncertain".
311
+ - **Experience Match**: The resume’s experience (in years or seniority) must meet or exceed the job’s requirement.
312
 
313
  **Outcomes**:
314
+ - **Relevant**: Skill overlap β‰₯ 50%, sufficient experience, and high model confidence (β‰₯85%).
315
  - **Irrelevant**: Skill overlap < 40% or high confidence in low skill relevance.
316
+ - **Uncertain**: Skill overlap β‰₯ 50% but experience mismatch (e.g., resume has 2 years, job requires 5 years+), or low model confidence (<85%).
317
 
318
+ **Note**: An experience mismatch warning is shown if the resume’s experience is below the job’s requirement, overriding the skill overlap and confidence to classify as Uncertain.
319
  """)
320
 
321
+ # Input form
322
+ st.markdown("### πŸ“ Enter Resumes")
 
 
 
 
 
 
323
  if 'resumes' not in st.session_state:
324
+ st.session_state.resumes = ["Expert in python, machine learning, tableau, 4 years experience", "", ""]
325
+ if 'input_job_description' not in st.session_state:
326
+ st.session_state.input_job_description = "Data scientist requires python, machine learning, 3 years+"
 
 
327
  if 'results' not in st.session_state:
328
+ st.session_state.results = []
329
+ if 'valid_resumes' not in st.session_state:
330
+ st.session_state.valid_resumes = []
331
+ if 'models' not in st.session_state:
332
+ st.session_state.models = None
333
+
334
+ # Resume inputs with file upload and manual text input
335
+ for i in range(len(st.session_state.resumes)):
336
+ st.markdown(f"**Resume {i+1}**")
337
+ uploaded_file = st.file_uploader(f"Upload CV (PDF or Word) for Resume {i+1}", type=['pdf', 'docx'], key=f"file_upload_{i}")
338
+
339
+ if uploaded_file is not None:
340
+ extracted_text = extract_text_from_file(uploaded_file)
341
+ if extracted_text:
342
+ st.session_state.resumes[i] = extracted_text
343
+ else:
344
+ st.session_state.resumes[i] = ""
345
+
346
+ st.session_state.resumes[i] = st.text_area(
347
+ f"Enter or edit resume text",
348
+ value=st.session_state.resumes[i],
349
+ height=100,
350
+ key=f"resume_{i}",
351
+ placeholder="e.g., Expert in python, sql, 3 years experience"
352
+ )
353
+ validation_error = validate_input(st.session_state.resumes[i], is_resume=True)
354
+ if validation_error and st.session_state.resumes[i].strip():
355
+ st.warning(f"Resume {i+1}: {validation_error}")
356
+
357
+ # Add/Remove resume buttons
358
+ col_add, col_remove, _ = st.columns([1, 1, 3])
359
+ with col_add:
360
+ if st.button("Add Resume") and len(st.session_state.resumes) < 5:
361
+ st.session_state.resumes.append("")
362
  st.rerun()
363
+ with col_remove:
364
+ if st.button("Remove Resume") and len(st.session_state.resumes) > 1:
365
+ st.session_state.resumes.pop()
 
 
 
 
366
  st.rerun()
367
 
368
  # Job description input
369
+ st.markdown("### πŸ“‹ Enter Job Description")
370
+ job_description = st.text_area(
371
+ "Job Description",
372
+ value=st.session_state.input_job_description,
373
+ height=100,
374
+ key="job_description",
375
+ placeholder="e.g., Data scientist requires python, sql, 3 years+"
376
+ )
377
+ validation_error = validate_input(job_description, is_resume=False)
378
+ if validation_error and job_description.strip():
379
+ st.warning(f"Job Description: {validation_error}")
380
+
381
+ # Analyze and Reset buttons
382
+ col_btn1, col_btn2, _ = st.columns([1, 1, 3])
383
+ with col_btn1:
384
+ analyze_clicked = st.button("Analyze", type="primary")
385
+ with col_btn2:
386
+ reset_clicked = st.button("Reset")
387
+
388
+ # Handle reset
389
+ if reset_clicked:
390
+ st.session_state.resumes = ["", "", ""]
391
+ st.session_state.input_job_description = ""
392
+ st.session_state.results = []
393
+ st.session_state.valid_resumes = []
394
+ st.rerun()
395
+
396
+ # Handle analysis with early validation and lazy model loading
397
+ if analyze_clicked:
398
+ # Early validation of inputs
399
+ valid_resumes = []
400
+ for i, resume in enumerate(st.session_state.resumes):
401
+ validation_error = validate_input(resume, is_resume=True)
402
+ if not validation_error and resume.strip():
403
+ valid_resumes.append(resume)
404
+ elif validation_error and resume.strip():
405
+ st.warning(f"Resume {i+1}: {validation_error}")
406
+
407
+ validation_error = validate_input(job_description, is_resume=False)
408
+ if validation_error and job_description.strip():
409
+ st.warning(f"Job Description: {validation_error}")
410
+
411
+ if valid_resumes and job_description.strip():
412
+ # Load models only when needed
413
+ if st.session_state.models is None:
414
+ with st.spinner("Loading models, please wait..."):
415
+ st.session_state.models = load_models()
416
+
417
+ st.session_state.results = []
418
+ st.session_state.valid_resumes = valid_resumes
419
+ total_steps = len(valid_resumes)
420
+
421
+ with st.spinner("Analyzing resumes..."):
422
+ progress_bar = st.progress(0)
423
+ status_text = st.empty()
424
+ status_text.text("Preparing inputs...")
425
+
426
+ # Retrieve tokenizers from st.session_state.models
427
+ bert_tokenizer, bert_model, t5_tokenizer, t5_model, device = st.session_state.models
428
+
429
+ # Precompute tokenized inputs and job skills
430
+ bert_tokenized, t5_inputs, t5_tokenized = tokenize_inputs(valid_resumes, job_description, bert_tokenizer, t5_tokenizer)
431
+ job_skills_set = extract_skills(job_description)
432
+
433
+ status_text.text("Classifying and summarizing resumes...")
434
+ results = classify_and_summarize_batch(valid_resumes, job_description, bert_tokenized, t5_inputs, t5_tokenized, job_skills_set)
435
+ progress_bar.progress(1.0)
436
+
437
+ st.session_state.results = results
438
+
439
+ status_text.empty()
440
+ progress_bar.empty()
441
+ st.success("Analysis completed! πŸŽ‰")
442
+ else:
443
+ st.error("Please enter at least one valid resume and a job description.")
444
 
445
  # Display results
446
  if st.session_state.results:
447
+ st.markdown("### πŸ“Š Results")
448
+ st.table(st.session_state.results)
449
+
450
+ csv_buffer = io.StringIO()
451
+ csv_buffer.write("Resume Number,Resume Text,Job Description,Suitability,Summary,Warning\n")
452
+ for i, result in enumerate(st.session_state.results):
453
+ resume_text = st.session_state.valid_resumes[i].replace('"', '""').replace('\n', ' ')
454
+ job_text = job_description.replace('"', '""').replace('\n', ' ')
455
+ csv_buffer.write(f'"{result["Resume"]}","{resume_text}","{job_text}","{result["Suitability"]}","{result["Data/Tech Related Skills Summary"]}","{result["Warning"]}"\n')
456
+ st.download_button("Download Results", csv_buffer.getvalue(), file_name="resume_analysis.csv", mime="text/csv")
457
+
458
+ with st.expander("πŸ“ˆ Skill Frequency Across Resumes", expanded=False):
459
+ if st.session_state.valid_resumes:
460
+ fig = generate_skill_pie_chart(st.session_state.valid_resumes)
461
+ if fig:
462
+ st.pyplot(fig)
463
+ plt.close(fig)
464
+ else:
465
+ st.write("No recognized data/tech skills found in the resumes.")
466
+ else:
467
+ st.write("No valid resumes to analyze.")
468
 
469
  if __name__ == "__main__":
470
+ # When this module is run directly, call the main function.
471
  main()