Spaces:
Sleeping
Sleeping
File size: 18,711 Bytes
46917c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 |
# Import necessary libraries
import json
import os
from typing import List
import networkx as nx
import nltk
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import streamlit as st
from annotated_text import annotated_text, parameters
from streamlit_extras import add_vertical_space as avs
from streamlit_extras.badges import badge
from scripts import JobDescriptionProcessor, ResumeProcessor
from scripts.parsers import ParseJobDesc, ParseResume
from scripts.ReadPdf import read_single_pdf
from scripts.similarity.get_score import *
from scripts.utils import get_filenames_from_dir
# Set page configuration
st.set_page_config(
page_title="Resume Matcher",
page_icon="Assets/img/favicon.ico",
initial_sidebar_state="auto",
layout="wide",
)
# Find the current working directory and configuration path
cwd = find_path("Resume-Matcher")
config_path = os.path.join(cwd, "scripts", "similarity")
# Check if NLTK punkt_tab data is available, if not, download it
try:
nltk.data.find("tokenizers/punkt_tab")
except LookupError:
nltk.download("punkt_tab")
# Set some visualization parameters using the annotated_text library
parameters.SHOW_LABEL_SEPARATOR = False
parameters.BORDER_RADIUS = 3
parameters.PADDING = "0.5 0.25rem"
# Function to set session state variables
def update_session_state(key, val):
st.session_state[key] = val
# Function to delete all files in a directory
def delete_from_dir(filepath: str) -> bool:
try:
for file in os.scandir(filepath):
os.remove(file.path)
return True
except OSError as error:
print(f"Exception: {error}")
return False
# Function to create a star-shaped graph visualization
def create_star_graph(nodes_and_weights, title):
"""
Create a star-shaped graph visualization.
Args:
nodes_and_weights (list): List of tuples containing nodes and their weights.
title (str): Title for the graph.
Returns:
None
"""
# Create an empty graph
graph = nx.Graph()
# Add the central node
central_node = "resume"
graph.add_node(central_node)
# Add nodes and edges with weights to the graph
for node, weight in nodes_and_weights:
graph.add_node(node)
graph.add_edge(central_node, node, weight=weight * 100)
# Get position layout for nodes
pos = nx.spring_layout(graph)
# Create edge trace
edge_x = []
edge_y = []
for edge in graph.edges():
x0, y0 = pos[edge[0]]
x1, y1 = pos[edge[1]]
edge_x.extend([x0, x1, None])
edge_y.extend([y0, y1, None])
edge_trace = go.Scatter(
x=edge_x,
y=edge_y,
line=dict(width=0.5, color="#888"),
hoverinfo="none",
mode="lines",
)
# Create node trace
node_x = []
node_y = []
for node in graph.nodes():
x, y = pos[node]
node_x.append(x)
node_y.append(y)
node_trace = go.Scatter(
x=node_x,
y=node_y,
mode="markers",
hoverinfo="text",
marker=dict(
showscale=True,
colorscale="Rainbow",
reversescale=True,
color=[],
size=10,
colorbar=dict(
thickness=15,
title="Node Connections",
xanchor="left",
titleside="right",
),
line_width=2,
),
)
# Color node points by number of connections
node_adjacencies = []
node_text = []
for node in graph.nodes():
adjacencies = list(graph.adj[node]) # Changes here
node_adjacencies.append(len(adjacencies))
node_text.append(f"{node}<br># of connections: {len(adjacencies)}")
node_trace.marker.color = node_adjacencies
node_trace.text = node_text
# Create the figure
figure = go.Figure(
data=[edge_trace, node_trace],
layout=go.Layout(
title=title,
titlefont=dict(size=16),
showlegend=False,
hovermode="closest",
margin=dict(b=20, l=5, r=5, t=40),
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
),
)
# Show the figure
st.plotly_chart(figure, use_container_width=True)
# Function to create annotated text with highlighting
def create_annotated_text(
input_string: str, word_list: List[str], annotation: str, color_code: str
):
"""
Create annotated text with highlighted keywords.
Args:
input_string (str): The input text.
word_list (List[str]): List of keywords to be highlighted.
annotation (str): Annotation label for highlighted keywords.
color_code (str): Color code for highlighting.
Returns:
List: Annotated text with highlighted keywords.
"""
# Tokenize the input string
tokens = nltk.word_tokenize(input_string)
# Convert the list to a set for quick lookups
word_set = set(word_list)
# Initialize an empty list to hold the annotated text
ret_annotated_text = []
for token in tokens:
# Check if the token is in the set
if token in word_set:
# If it is, append a tuple with the token, annotation, and color code
ret_annotated_text.append((token, annotation, color_code))
else:
# If it's not, just append the token as a string
ret_annotated_text.append(token)
return ret_annotated_text
# Function to read JSON data from a file
def read_json(filename):
"""
Read JSON data from a file.
Args:
filename (str): The path to the JSON file.
Returns:
dict: The JSON data.
"""
with open(filename) as f:
data = json.load(f)
return data
# Function to tokenize a string
def tokenize_string(input_string):
"""
Tokenize a string into words.
Args:
input_string (str): The input string.
Returns:
List[str]: List of tokens.
"""
tokens = nltk.word_tokenize(input_string)
return tokens
# Cleanup processed resume / job descriptions
delete_from_dir(os.path.join(cwd, "Data", "Processed", "Resumes"))
delete_from_dir(os.path.join(cwd, "Data", "Processed", "JobDescription"))
# Set default session states for first run
if "resumeUploaded" not in st.session_state.keys():
update_session_state("resumeUploaded", "Pending")
update_session_state("resumePath", "")
if "jobDescriptionUploaded" not in st.session_state.keys():
update_session_state("jobDescriptionUploaded", "Pending")
update_session_state("jobDescriptionPath", "")
# Display the main title and sub-headers
st.title(":blue[Resume Matcher]")
with st.sidebar:
st.image("Assets/img/header_image.png")
st.subheader(
"Free and Open Source ATS to help your resume pass the screening stage."
)
st.markdown(
"Check the website [www.resumematcher.fyi](https://www.resumematcher.fyi/)"
)
st.markdown(
"Give Resume Matcher a ⭐ on [GitHub](https://github.com/srbhr/resume-matcher)"
)
badge(type="github", name="srbhr/Resume-Matcher")
st.markdown("For updates follow me on Twitter.")
badge(type="twitter", name="_srbhr_")
st.markdown(
"If you like the project and would like to further help in development please consider 👇"
)
badge(type="buymeacoffee", name="srbhr")
st.divider()
avs.add_vertical_space(1)
with st.container():
resumeCol, jobDescriptionCol = st.columns(2)
with resumeCol:
uploaded_Resume = st.file_uploader("Choose a Resume", type="pdf")
if uploaded_Resume is not None:
if st.session_state["resumeUploaded"] == "Pending":
save_path_resume = os.path.join(
cwd, "Data", "Resumes", uploaded_Resume.name
)
with open(save_path_resume, mode="wb") as w:
w.write(uploaded_Resume.getvalue())
if os.path.exists(save_path_resume):
st.toast(
f"File {uploaded_Resume.name} is successfully saved!", icon="✔️"
)
update_session_state("resumeUploaded", "Uploaded")
update_session_state("resumePath", save_path_resume)
else:
update_session_state("resumeUploaded", "Pending")
update_session_state("resumePath", "")
with jobDescriptionCol:
uploaded_JobDescription = st.file_uploader(
"Choose a Job Description", type="pdf"
)
if uploaded_JobDescription is not None:
if st.session_state["jobDescriptionUploaded"] == "Pending":
save_path_jobDescription = os.path.join(
cwd, "Data", "JobDescription", uploaded_JobDescription.name
)
with open(save_path_jobDescription, mode="wb") as w:
w.write(uploaded_JobDescription.getvalue())
if os.path.exists(save_path_jobDescription):
st.toast(
f"File {uploaded_JobDescription.name} is successfully saved!",
icon="✔️",
)
update_session_state("jobDescriptionUploaded", "Uploaded")
update_session_state("jobDescriptionPath", save_path_jobDescription)
else:
update_session_state("jobDescriptionUploaded", "Pending")
update_session_state("jobDescriptionPath", "")
with st.spinner("Please wait..."):
if (
uploaded_Resume is not None
and st.session_state["jobDescriptionUploaded"] == "Uploaded"
and uploaded_JobDescription is not None
and st.session_state["jobDescriptionUploaded"] == "Uploaded"
):
resumeProcessor = ParseResume(read_single_pdf(st.session_state["resumePath"]))
jobDescriptionProcessor = ParseJobDesc(
read_single_pdf(st.session_state["jobDescriptionPath"])
)
# Resume / JD output
selected_file = resumeProcessor.get_JSON()
selected_jd = jobDescriptionProcessor.get_JSON()
# Add containers for each row to avoid overlap
# Parsed data
with st.container():
resumeCol, jobDescriptionCol = st.columns(2)
with resumeCol:
with st.expander("Parsed Resume Data"):
st.caption(
"This text is parsed from your resume. This is how it'll look like after getting parsed by an "
"ATS."
)
st.caption(
"Utilize this to understand how to make your resume ATS friendly."
)
avs.add_vertical_space(3)
st.write(selected_file["clean_data"])
with jobDescriptionCol:
with st.expander("Parsed Job Description"):
st.caption(
"Currently in the pipeline I'm parsing this from PDF but it'll be from txt or copy paste."
)
avs.add_vertical_space(3)
st.write(selected_jd["clean_data"])
# Extracted keywords
with st.container():
resumeCol, jobDescriptionCol = st.columns(2)
with resumeCol:
with st.expander("Extracted Keywords"):
st.write(
"Now let's take a look at the extracted keywords from the resume."
)
annotated_text(
create_annotated_text(
selected_file["clean_data"],
selected_file["extracted_keywords"],
"KW",
"#0B666A",
)
)
with jobDescriptionCol:
with st.expander("Extracted Keywords"):
st.write(
"Now let's take a look at the extracted keywords from the job description."
)
annotated_text(
create_annotated_text(
selected_jd["clean_data"],
selected_jd["extracted_keywords"],
"KW",
"#0B666A",
)
)
# Star graph visualization
with st.container():
resumeCol, jobDescriptionCol = st.columns(2)
with resumeCol:
with st.expander("Extracted Entities"):
st.write(
"Now let's take a look at the extracted entities from the resume."
)
# Call the function with your data
create_star_graph(selected_file["keyterms"], "Entities from Resume")
with jobDescriptionCol:
with st.expander("Extracted Entities"):
st.write(
"Now let's take a look at the extracted entities from the job description."
)
# Call the function with your data
create_star_graph(
selected_jd["keyterms"], "Entities from Job Description"
)
# Keywords and values
with st.container():
resumeCol, jobDescriptionCol = st.columns(2)
with resumeCol:
with st.expander("Keywords & Values"):
df1 = pd.DataFrame(
selected_file["keyterms"], columns=["keyword", "value"]
)
# Create the dictionary
keyword_dict = {}
for keyword, value in selected_file["keyterms"]:
keyword_dict[keyword] = value * 100
fig = go.Figure(
data=[
go.Table(
header=dict(
values=["Keyword", "Value"],
font=dict(size=12, color="white"),
fill_color="#1d2078",
),
cells=dict(
values=[
list(keyword_dict.keys()),
list(keyword_dict.values()),
],
line_color="darkslategray",
fill_color="#6DA9E4",
),
)
]
)
st.plotly_chart(fig, use_container_width=True)
with jobDescriptionCol:
with st.expander("Keywords & Values"):
df2 = pd.DataFrame(
selected_jd["keyterms"], columns=["keyword", "value"]
)
# Create the dictionary
keyword_dict = {}
for keyword, value in selected_jd["keyterms"]:
keyword_dict[keyword] = value * 100
fig = go.Figure(
data=[
go.Table(
header=dict(
values=["Keyword", "Value"],
font=dict(size=12, color="white"),
fill_color="#1d2078",
),
cells=dict(
values=[
list(keyword_dict.keys()),
list(keyword_dict.values()),
],
line_color="darkslategray",
fill_color="#6DA9E4",
),
)
]
)
st.plotly_chart(fig, use_container_width=True)
# Treemaps
with st.container():
resumeCol, jobDescriptionCol = st.columns(2)
with resumeCol:
with st.expander("Key Topics"):
fig = px.treemap(
df1,
path=["keyword"],
values="value",
color_continuous_scale="Rainbow",
title="Key Terms/Topics Extracted from your Resume",
)
st.plotly_chart(fig, use_container_width=True)
with jobDescriptionCol:
with st.expander("Key Topics"):
fig = px.treemap(
df2,
path=["keyword"],
values="value",
color_continuous_scale="Rainbow",
title="Key Terms/Topics Extracted from Job Description",
)
st.plotly_chart(fig, use_container_width=True)
avs.add_vertical_space(2)
st.markdown("#### Similarity Score")
print("Config file parsed successfully:")
resume_string = " ".join(selected_file["extracted_keywords"])
jd_string = " ".join(selected_jd["extracted_keywords"])
result = get_score(resume_string, jd_string)
similarity_score = round(result[0].score * 100, 2)
# Default color to green
score_color = "green"
if similarity_score < 60:
score_color = "red"
elif 60 <= similarity_score < 75:
score_color = "orange"
st.markdown(
f"Similarity Score obtained for the resume and job description is "
f'<span style="color:{score_color};font-size:24px; font-weight:Bold">{similarity_score}</span>',
unsafe_allow_html=True,
)
avs.add_vertical_space(2)
with st.expander("Common words between Resume and Job Description:"):
annotated_text(
create_annotated_text(
selected_file["clean_data"],
selected_jd["extracted_keywords"],
"JD",
"#F24C3D",
)
)
st.divider()
# Go back to top
st.markdown("[:arrow_up: Back to Top](#resume-matcher)")
|