resumematcher / streamlit_interactive.py
March
first
46917c3
# Import necessary libraries
import json
import os
from typing import List
import networkx as nx
import nltk
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import streamlit as st
from annotated_text import annotated_text, parameters
from streamlit_extras import add_vertical_space as avs
from streamlit_extras.badges import badge
from scripts import JobDescriptionProcessor, ResumeProcessor
from scripts.parsers import ParseJobDesc, ParseResume
from scripts.ReadPdf import read_single_pdf
from scripts.similarity.get_score import *
from scripts.utils import get_filenames_from_dir
# Set page configuration
st.set_page_config(
page_title="Resume Matcher",
page_icon="Assets/img/favicon.ico",
initial_sidebar_state="auto",
layout="wide",
)
# Find the current working directory and configuration path
cwd = find_path("Resume-Matcher")
config_path = os.path.join(cwd, "scripts", "similarity")
# Check if NLTK punkt_tab data is available, if not, download it
try:
nltk.data.find("tokenizers/punkt_tab")
except LookupError:
nltk.download("punkt_tab")
# Set some visualization parameters using the annotated_text library
parameters.SHOW_LABEL_SEPARATOR = False
parameters.BORDER_RADIUS = 3
parameters.PADDING = "0.5 0.25rem"
# Function to set session state variables
def update_session_state(key, val):
st.session_state[key] = val
# Function to delete all files in a directory
def delete_from_dir(filepath: str) -> bool:
try:
for file in os.scandir(filepath):
os.remove(file.path)
return True
except OSError as error:
print(f"Exception: {error}")
return False
# Function to create a star-shaped graph visualization
def create_star_graph(nodes_and_weights, title):
"""
Create a star-shaped graph visualization.
Args:
nodes_and_weights (list): List of tuples containing nodes and their weights.
title (str): Title for the graph.
Returns:
None
"""
# Create an empty graph
graph = nx.Graph()
# Add the central node
central_node = "resume"
graph.add_node(central_node)
# Add nodes and edges with weights to the graph
for node, weight in nodes_and_weights:
graph.add_node(node)
graph.add_edge(central_node, node, weight=weight * 100)
# Get position layout for nodes
pos = nx.spring_layout(graph)
# Create edge trace
edge_x = []
edge_y = []
for edge in graph.edges():
x0, y0 = pos[edge[0]]
x1, y1 = pos[edge[1]]
edge_x.extend([x0, x1, None])
edge_y.extend([y0, y1, None])
edge_trace = go.Scatter(
x=edge_x,
y=edge_y,
line=dict(width=0.5, color="#888"),
hoverinfo="none",
mode="lines",
)
# Create node trace
node_x = []
node_y = []
for node in graph.nodes():
x, y = pos[node]
node_x.append(x)
node_y.append(y)
node_trace = go.Scatter(
x=node_x,
y=node_y,
mode="markers",
hoverinfo="text",
marker=dict(
showscale=True,
colorscale="Rainbow",
reversescale=True,
color=[],
size=10,
colorbar=dict(
thickness=15,
title="Node Connections",
xanchor="left",
titleside="right",
),
line_width=2,
),
)
# Color node points by number of connections
node_adjacencies = []
node_text = []
for node in graph.nodes():
adjacencies = list(graph.adj[node]) # Changes here
node_adjacencies.append(len(adjacencies))
node_text.append(f"{node}<br># of connections: {len(adjacencies)}")
node_trace.marker.color = node_adjacencies
node_trace.text = node_text
# Create the figure
figure = go.Figure(
data=[edge_trace, node_trace],
layout=go.Layout(
title=title,
titlefont=dict(size=16),
showlegend=False,
hovermode="closest",
margin=dict(b=20, l=5, r=5, t=40),
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
),
)
# Show the figure
st.plotly_chart(figure, use_container_width=True)
# Function to create annotated text with highlighting
def create_annotated_text(
input_string: str, word_list: List[str], annotation: str, color_code: str
):
"""
Create annotated text with highlighted keywords.
Args:
input_string (str): The input text.
word_list (List[str]): List of keywords to be highlighted.
annotation (str): Annotation label for highlighted keywords.
color_code (str): Color code for highlighting.
Returns:
List: Annotated text with highlighted keywords.
"""
# Tokenize the input string
tokens = nltk.word_tokenize(input_string)
# Convert the list to a set for quick lookups
word_set = set(word_list)
# Initialize an empty list to hold the annotated text
ret_annotated_text = []
for token in tokens:
# Check if the token is in the set
if token in word_set:
# If it is, append a tuple with the token, annotation, and color code
ret_annotated_text.append((token, annotation, color_code))
else:
# If it's not, just append the token as a string
ret_annotated_text.append(token)
return ret_annotated_text
# Function to read JSON data from a file
def read_json(filename):
"""
Read JSON data from a file.
Args:
filename (str): The path to the JSON file.
Returns:
dict: The JSON data.
"""
with open(filename) as f:
data = json.load(f)
return data
# Function to tokenize a string
def tokenize_string(input_string):
"""
Tokenize a string into words.
Args:
input_string (str): The input string.
Returns:
List[str]: List of tokens.
"""
tokens = nltk.word_tokenize(input_string)
return tokens
# Cleanup processed resume / job descriptions
delete_from_dir(os.path.join(cwd, "Data", "Processed", "Resumes"))
delete_from_dir(os.path.join(cwd, "Data", "Processed", "JobDescription"))
# Set default session states for first run
if "resumeUploaded" not in st.session_state.keys():
update_session_state("resumeUploaded", "Pending")
update_session_state("resumePath", "")
if "jobDescriptionUploaded" not in st.session_state.keys():
update_session_state("jobDescriptionUploaded", "Pending")
update_session_state("jobDescriptionPath", "")
# Display the main title and sub-headers
st.title(":blue[Resume Matcher]")
with st.sidebar:
st.image("Assets/img/header_image.png")
st.subheader(
"Free and Open Source ATS to help your resume pass the screening stage."
)
st.markdown(
"Check the website [www.resumematcher.fyi](https://www.resumematcher.fyi/)"
)
st.markdown(
"Give Resume Matcher a ⭐ on [GitHub](https://github.com/srbhr/resume-matcher)"
)
badge(type="github", name="srbhr/Resume-Matcher")
st.markdown("For updates follow me on Twitter.")
badge(type="twitter", name="_srbhr_")
st.markdown(
"If you like the project and would like to further help in development please consider 👇"
)
badge(type="buymeacoffee", name="srbhr")
st.divider()
avs.add_vertical_space(1)
with st.container():
resumeCol, jobDescriptionCol = st.columns(2)
with resumeCol:
uploaded_Resume = st.file_uploader("Choose a Resume", type="pdf")
if uploaded_Resume is not None:
if st.session_state["resumeUploaded"] == "Pending":
save_path_resume = os.path.join(
cwd, "Data", "Resumes", uploaded_Resume.name
)
with open(save_path_resume, mode="wb") as w:
w.write(uploaded_Resume.getvalue())
if os.path.exists(save_path_resume):
st.toast(
f"File {uploaded_Resume.name} is successfully saved!", icon="✔️"
)
update_session_state("resumeUploaded", "Uploaded")
update_session_state("resumePath", save_path_resume)
else:
update_session_state("resumeUploaded", "Pending")
update_session_state("resumePath", "")
with jobDescriptionCol:
uploaded_JobDescription = st.file_uploader(
"Choose a Job Description", type="pdf"
)
if uploaded_JobDescription is not None:
if st.session_state["jobDescriptionUploaded"] == "Pending":
save_path_jobDescription = os.path.join(
cwd, "Data", "JobDescription", uploaded_JobDescription.name
)
with open(save_path_jobDescription, mode="wb") as w:
w.write(uploaded_JobDescription.getvalue())
if os.path.exists(save_path_jobDescription):
st.toast(
f"File {uploaded_JobDescription.name} is successfully saved!",
icon="✔️",
)
update_session_state("jobDescriptionUploaded", "Uploaded")
update_session_state("jobDescriptionPath", save_path_jobDescription)
else:
update_session_state("jobDescriptionUploaded", "Pending")
update_session_state("jobDescriptionPath", "")
with st.spinner("Please wait..."):
if (
uploaded_Resume is not None
and st.session_state["jobDescriptionUploaded"] == "Uploaded"
and uploaded_JobDescription is not None
and st.session_state["jobDescriptionUploaded"] == "Uploaded"
):
resumeProcessor = ParseResume(read_single_pdf(st.session_state["resumePath"]))
jobDescriptionProcessor = ParseJobDesc(
read_single_pdf(st.session_state["jobDescriptionPath"])
)
# Resume / JD output
selected_file = resumeProcessor.get_JSON()
selected_jd = jobDescriptionProcessor.get_JSON()
# Add containers for each row to avoid overlap
# Parsed data
with st.container():
resumeCol, jobDescriptionCol = st.columns(2)
with resumeCol:
with st.expander("Parsed Resume Data"):
st.caption(
"This text is parsed from your resume. This is how it'll look like after getting parsed by an "
"ATS."
)
st.caption(
"Utilize this to understand how to make your resume ATS friendly."
)
avs.add_vertical_space(3)
st.write(selected_file["clean_data"])
with jobDescriptionCol:
with st.expander("Parsed Job Description"):
st.caption(
"Currently in the pipeline I'm parsing this from PDF but it'll be from txt or copy paste."
)
avs.add_vertical_space(3)
st.write(selected_jd["clean_data"])
# Extracted keywords
with st.container():
resumeCol, jobDescriptionCol = st.columns(2)
with resumeCol:
with st.expander("Extracted Keywords"):
st.write(
"Now let's take a look at the extracted keywords from the resume."
)
annotated_text(
create_annotated_text(
selected_file["clean_data"],
selected_file["extracted_keywords"],
"KW",
"#0B666A",
)
)
with jobDescriptionCol:
with st.expander("Extracted Keywords"):
st.write(
"Now let's take a look at the extracted keywords from the job description."
)
annotated_text(
create_annotated_text(
selected_jd["clean_data"],
selected_jd["extracted_keywords"],
"KW",
"#0B666A",
)
)
# Star graph visualization
with st.container():
resumeCol, jobDescriptionCol = st.columns(2)
with resumeCol:
with st.expander("Extracted Entities"):
st.write(
"Now let's take a look at the extracted entities from the resume."
)
# Call the function with your data
create_star_graph(selected_file["keyterms"], "Entities from Resume")
with jobDescriptionCol:
with st.expander("Extracted Entities"):
st.write(
"Now let's take a look at the extracted entities from the job description."
)
# Call the function with your data
create_star_graph(
selected_jd["keyterms"], "Entities from Job Description"
)
# Keywords and values
with st.container():
resumeCol, jobDescriptionCol = st.columns(2)
with resumeCol:
with st.expander("Keywords & Values"):
df1 = pd.DataFrame(
selected_file["keyterms"], columns=["keyword", "value"]
)
# Create the dictionary
keyword_dict = {}
for keyword, value in selected_file["keyterms"]:
keyword_dict[keyword] = value * 100
fig = go.Figure(
data=[
go.Table(
header=dict(
values=["Keyword", "Value"],
font=dict(size=12, color="white"),
fill_color="#1d2078",
),
cells=dict(
values=[
list(keyword_dict.keys()),
list(keyword_dict.values()),
],
line_color="darkslategray",
fill_color="#6DA9E4",
),
)
]
)
st.plotly_chart(fig, use_container_width=True)
with jobDescriptionCol:
with st.expander("Keywords & Values"):
df2 = pd.DataFrame(
selected_jd["keyterms"], columns=["keyword", "value"]
)
# Create the dictionary
keyword_dict = {}
for keyword, value in selected_jd["keyterms"]:
keyword_dict[keyword] = value * 100
fig = go.Figure(
data=[
go.Table(
header=dict(
values=["Keyword", "Value"],
font=dict(size=12, color="white"),
fill_color="#1d2078",
),
cells=dict(
values=[
list(keyword_dict.keys()),
list(keyword_dict.values()),
],
line_color="darkslategray",
fill_color="#6DA9E4",
),
)
]
)
st.plotly_chart(fig, use_container_width=True)
# Treemaps
with st.container():
resumeCol, jobDescriptionCol = st.columns(2)
with resumeCol:
with st.expander("Key Topics"):
fig = px.treemap(
df1,
path=["keyword"],
values="value",
color_continuous_scale="Rainbow",
title="Key Terms/Topics Extracted from your Resume",
)
st.plotly_chart(fig, use_container_width=True)
with jobDescriptionCol:
with st.expander("Key Topics"):
fig = px.treemap(
df2,
path=["keyword"],
values="value",
color_continuous_scale="Rainbow",
title="Key Terms/Topics Extracted from Job Description",
)
st.plotly_chart(fig, use_container_width=True)
avs.add_vertical_space(2)
st.markdown("#### Similarity Score")
print("Config file parsed successfully:")
resume_string = " ".join(selected_file["extracted_keywords"])
jd_string = " ".join(selected_jd["extracted_keywords"])
result = get_score(resume_string, jd_string)
similarity_score = round(result[0].score * 100, 2)
# Default color to green
score_color = "green"
if similarity_score < 60:
score_color = "red"
elif 60 <= similarity_score < 75:
score_color = "orange"
st.markdown(
f"Similarity Score obtained for the resume and job description is "
f'<span style="color:{score_color};font-size:24px; font-weight:Bold">{similarity_score}</span>',
unsafe_allow_html=True,
)
avs.add_vertical_space(2)
with st.expander("Common words between Resume and Job Description:"):
annotated_text(
create_annotated_text(
selected_file["clean_data"],
selected_jd["extracted_keywords"],
"JD",
"#F24C3D",
)
)
st.divider()
# Go back to top
st.markdown("[:arrow_up: Back to Top](#resume-matcher)")