Spaces:

marchji2415
/

resumematcher

Sleeping

resumematcher / streamlit_interactive.py

March

first

46917c3 9 months ago

18.7 kB

	# Import necessary libraries
	import json
	import os
	from typing import List

	import networkx as nx
	import nltk
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	import streamlit as st
	from annotated_text import annotated_text, parameters
	from streamlit_extras import add_vertical_space as avs
	from streamlit_extras.badges import badge

	from scripts import JobDescriptionProcessor, ResumeProcessor
	from scripts.parsers import ParseJobDesc, ParseResume
	from scripts.ReadPdf import read_single_pdf
	from scripts.similarity.get_score import *
	from scripts.utils import get_filenames_from_dir

	# Set page configuration
	st.set_page_config(
	page_title="Resume Matcher",
	page_icon="Assets/img/favicon.ico",
	initial_sidebar_state="auto",
	layout="wide",
	)

	# Find the current working directory and configuration path
	cwd = find_path("Resume-Matcher")
	config_path = os.path.join(cwd, "scripts", "similarity")

	# Check if NLTK punkt_tab data is available, if not, download it
	try:
	nltk.data.find("tokenizers/punkt_tab")
	except LookupError:
	nltk.download("punkt_tab")

	# Set some visualization parameters using the annotated_text library
	parameters.SHOW_LABEL_SEPARATOR = False
	parameters.BORDER_RADIUS = 3
	parameters.PADDING = "0.5 0.25rem"


	# Function to set session state variables
	def update_session_state(key, val):
	st.session_state[key] = val


	# Function to delete all files in a directory
	def delete_from_dir(filepath: str) -> bool:
	try:
	for file in os.scandir(filepath):
	os.remove(file.path)

	return True
	except OSError as error:
	print(f"Exception: {error}")
	return False


	# Function to create a star-shaped graph visualization
	def create_star_graph(nodes_and_weights, title):
	"""
	Create a star-shaped graph visualization.

	Args:
	nodes_and_weights (list): List of tuples containing nodes and their weights.
	title (str): Title for the graph.

	Returns:
	None
	"""
	# Create an empty graph
	graph = nx.Graph()

	# Add the central node
	central_node = "resume"
	graph.add_node(central_node)

	# Add nodes and edges with weights to the graph
	for node, weight in nodes_and_weights:
	graph.add_node(node)
	graph.add_edge(central_node, node, weight=weight * 100)

	# Get position layout for nodes
	pos = nx.spring_layout(graph)

	# Create edge trace
	edge_x = []
	edge_y = []
	for edge in graph.edges():
	x0, y0 = pos[edge[0]]
	x1, y1 = pos[edge[1]]
	edge_x.extend([x0, x1, None])
	edge_y.extend([y0, y1, None])

	edge_trace = go.Scatter(
	x=edge_x,
	y=edge_y,
	line=dict(width=0.5, color="#888"),
	hoverinfo="none",
	mode="lines",
	)

	# Create node trace
	node_x = []
	node_y = []
	for node in graph.nodes():
	x, y = pos[node]
	node_x.append(x)
	node_y.append(y)

	node_trace = go.Scatter(
	x=node_x,
	y=node_y,
	mode="markers",
	hoverinfo="text",
	marker=dict(
	showscale=True,
	colorscale="Rainbow",
	reversescale=True,
	color=[],
	size=10,
	colorbar=dict(
	thickness=15,
	title="Node Connections",
	xanchor="left",
	titleside="right",
	),
	line_width=2,
	),
	)

	# Color node points by number of connections
	node_adjacencies = []
	node_text = []
	for node in graph.nodes():
	adjacencies = list(graph.adj[node]) # Changes here
	node_adjacencies.append(len(adjacencies))
	node_text.append(f"{node}<br># of connections: {len(adjacencies)}")

	node_trace.marker.color = node_adjacencies
	node_trace.text = node_text

	# Create the figure
	figure = go.Figure(
	data=[edge_trace, node_trace],
	layout=go.Layout(
	title=title,
	titlefont=dict(size=16),
	showlegend=False,
	hovermode="closest",
	margin=dict(b=20, l=5, r=5, t=40),
	xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
	yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
	),
	)

	# Show the figure
	st.plotly_chart(figure, use_container_width=True)


	# Function to create annotated text with highlighting
	def create_annotated_text(
	input_string: str, word_list: List[str], annotation: str, color_code: str
	):
	"""
	Create annotated text with highlighted keywords.

	Args:
	input_string (str): The input text.
	word_list (List[str]): List of keywords to be highlighted.
	annotation (str): Annotation label for highlighted keywords.
	color_code (str): Color code for highlighting.

	Returns:
	List: Annotated text with highlighted keywords.
	"""
	# Tokenize the input string
	tokens = nltk.word_tokenize(input_string)

	# Convert the list to a set for quick lookups
	word_set = set(word_list)

	# Initialize an empty list to hold the annotated text
	ret_annotated_text = []

	for token in tokens:
	# Check if the token is in the set
	if token in word_set:
	# If it is, append a tuple with the token, annotation, and color code
	ret_annotated_text.append((token, annotation, color_code))
	else:
	# If it's not, just append the token as a string
	ret_annotated_text.append(token)

	return ret_annotated_text


	# Function to read JSON data from a file
	def read_json(filename):
	"""
	Read JSON data from a file.

	Args:
	filename (str): The path to the JSON file.

	Returns:
	dict: The JSON data.
	"""
	with open(filename) as f:
	data = json.load(f)
	return data


	# Function to tokenize a string
	def tokenize_string(input_string):
	"""
	Tokenize a string into words.

	Args:
	input_string (str): The input string.

	Returns:
	List[str]: List of tokens.
	"""
	tokens = nltk.word_tokenize(input_string)
	return tokens


	# Cleanup processed resume / job descriptions
	delete_from_dir(os.path.join(cwd, "Data", "Processed", "Resumes"))
	delete_from_dir(os.path.join(cwd, "Data", "Processed", "JobDescription"))

	# Set default session states for first run
	if "resumeUploaded" not in st.session_state.keys():
	update_session_state("resumeUploaded", "Pending")
	update_session_state("resumePath", "")
	if "jobDescriptionUploaded" not in st.session_state.keys():
	update_session_state("jobDescriptionUploaded", "Pending")
	update_session_state("jobDescriptionPath", "")

	# Display the main title and sub-headers
	st.title(":blue[Resume Matcher]")
	with st.sidebar:
	st.image("Assets/img/header_image.png")
	st.subheader(
	"Free and Open Source ATS to help your resume pass the screening stage."
	)
	st.markdown(
	"Check the website [www.resumematcher.fyi](https://www.resumematcher.fyi/)"
	)
	st.markdown(
	"Give Resume Matcher a ⭐ on [GitHub](https://github.com/srbhr/resume-matcher)"
	)
	badge(type="github", name="srbhr/Resume-Matcher")
	st.markdown("For updates follow me on Twitter.")
	badge(type="twitter", name="_srbhr_")
	st.markdown(
	"If you like the project and would like to further help in development please consider 👇"
	)
	badge(type="buymeacoffee", name="srbhr")

	st.divider()
	avs.add_vertical_space(1)

	with st.container():
	resumeCol, jobDescriptionCol = st.columns(2)
	with resumeCol:
	uploaded_Resume = st.file_uploader("Choose a Resume", type="pdf")
	if uploaded_Resume is not None:
	if st.session_state["resumeUploaded"] == "Pending":
	save_path_resume = os.path.join(
	cwd, "Data", "Resumes", uploaded_Resume.name
	)

	with open(save_path_resume, mode="wb") as w:
	w.write(uploaded_Resume.getvalue())

	if os.path.exists(save_path_resume):
	st.toast(
	f"File {uploaded_Resume.name} is successfully saved!", icon="✔️"
	)
	update_session_state("resumeUploaded", "Uploaded")
	update_session_state("resumePath", save_path_resume)
	else:
	update_session_state("resumeUploaded", "Pending")
	update_session_state("resumePath", "")

	with jobDescriptionCol:
	uploaded_JobDescription = st.file_uploader(
	"Choose a Job Description", type="pdf"
	)
	if uploaded_JobDescription is not None:
	if st.session_state["jobDescriptionUploaded"] == "Pending":
	save_path_jobDescription = os.path.join(
	cwd, "Data", "JobDescription", uploaded_JobDescription.name
	)

	with open(save_path_jobDescription, mode="wb") as w:
	w.write(uploaded_JobDescription.getvalue())

	if os.path.exists(save_path_jobDescription):
	st.toast(
	f"File {uploaded_JobDescription.name} is successfully saved!",
	icon="✔️",
	)
	update_session_state("jobDescriptionUploaded", "Uploaded")
	update_session_state("jobDescriptionPath", save_path_jobDescription)
	else:
	update_session_state("jobDescriptionUploaded", "Pending")
	update_session_state("jobDescriptionPath", "")

	with st.spinner("Please wait..."):
	if (
	uploaded_Resume is not None
	and st.session_state["jobDescriptionUploaded"] == "Uploaded"
	and uploaded_JobDescription is not None
	and st.session_state["jobDescriptionUploaded"] == "Uploaded"
	):

	resumeProcessor = ParseResume(read_single_pdf(st.session_state["resumePath"]))
	jobDescriptionProcessor = ParseJobDesc(
	read_single_pdf(st.session_state["jobDescriptionPath"])
	)

	# Resume / JD output
	selected_file = resumeProcessor.get_JSON()
	selected_jd = jobDescriptionProcessor.get_JSON()

	# Add containers for each row to avoid overlap

	# Parsed data
	with st.container():
	resumeCol, jobDescriptionCol = st.columns(2)
	with resumeCol:
	with st.expander("Parsed Resume Data"):
	st.caption(
	"This text is parsed from your resume. This is how it'll look like after getting parsed by an "
	"ATS."
	)
	st.caption(
	"Utilize this to understand how to make your resume ATS friendly."
	)
	avs.add_vertical_space(3)
	st.write(selected_file["clean_data"])

	with jobDescriptionCol:
	with st.expander("Parsed Job Description"):
	st.caption(
	"Currently in the pipeline I'm parsing this from PDF but it'll be from txt or copy paste."
	)
	avs.add_vertical_space(3)
	st.write(selected_jd["clean_data"])

	# Extracted keywords
	with st.container():
	resumeCol, jobDescriptionCol = st.columns(2)
	with resumeCol:
	with st.expander("Extracted Keywords"):
	st.write(
	"Now let's take a look at the extracted keywords from the resume."
	)
	annotated_text(
	create_annotated_text(
	selected_file["clean_data"],
	selected_file["extracted_keywords"],
	"KW",
	"#0B666A",
	)
	)
	with jobDescriptionCol:
	with st.expander("Extracted Keywords"):
	st.write(
	"Now let's take a look at the extracted keywords from the job description."
	)
	annotated_text(
	create_annotated_text(
	selected_jd["clean_data"],
	selected_jd["extracted_keywords"],
	"KW",
	"#0B666A",
	)
	)

	# Star graph visualization
	with st.container():
	resumeCol, jobDescriptionCol = st.columns(2)
	with resumeCol:
	with st.expander("Extracted Entities"):
	st.write(
	"Now let's take a look at the extracted entities from the resume."
	)

	# Call the function with your data
	create_star_graph(selected_file["keyterms"], "Entities from Resume")
	with jobDescriptionCol:
	with st.expander("Extracted Entities"):
	st.write(
	"Now let's take a look at the extracted entities from the job description."
	)

	# Call the function with your data
	create_star_graph(
	selected_jd["keyterms"], "Entities from Job Description"
	)

	# Keywords and values
	with st.container():
	resumeCol, jobDescriptionCol = st.columns(2)
	with resumeCol:
	with st.expander("Keywords & Values"):
	df1 = pd.DataFrame(
	selected_file["keyterms"], columns=["keyword", "value"]
	)

	# Create the dictionary
	keyword_dict = {}
	for keyword, value in selected_file["keyterms"]:
	keyword_dict[keyword] = value * 100

	fig = go.Figure(
	data=[
	go.Table(
	header=dict(
	values=["Keyword", "Value"],
	font=dict(size=12, color="white"),
	fill_color="#1d2078",
	),
	cells=dict(
	values=[
	list(keyword_dict.keys()),
	list(keyword_dict.values()),
	],
	line_color="darkslategray",
	fill_color="#6DA9E4",
	),
	)
	]
	)
	st.plotly_chart(fig, use_container_width=True)
	with jobDescriptionCol:
	with st.expander("Keywords & Values"):
	df2 = pd.DataFrame(
	selected_jd["keyterms"], columns=["keyword", "value"]
	)

	# Create the dictionary
	keyword_dict = {}
	for keyword, value in selected_jd["keyterms"]:
	keyword_dict[keyword] = value * 100

	fig = go.Figure(
	data=[
	go.Table(
	header=dict(
	values=["Keyword", "Value"],
	font=dict(size=12, color="white"),
	fill_color="#1d2078",
	),
	cells=dict(
	values=[
	list(keyword_dict.keys()),
	list(keyword_dict.values()),
	],
	line_color="darkslategray",
	fill_color="#6DA9E4",
	),
	)
	]
	)
	st.plotly_chart(fig, use_container_width=True)

	# Treemaps
	with st.container():
	resumeCol, jobDescriptionCol = st.columns(2)
	with resumeCol:
	with st.expander("Key Topics"):
	fig = px.treemap(
	df1,
	path=["keyword"],
	values="value",
	color_continuous_scale="Rainbow",
	title="Key Terms/Topics Extracted from your Resume",
	)
	st.plotly_chart(fig, use_container_width=True)

	with jobDescriptionCol:
	with st.expander("Key Topics"):
	fig = px.treemap(
	df2,
	path=["keyword"],
	values="value",
	color_continuous_scale="Rainbow",
	title="Key Terms/Topics Extracted from Job Description",
	)
	st.plotly_chart(fig, use_container_width=True)

	avs.add_vertical_space(2)
	st.markdown("#### Similarity Score")
	print("Config file parsed successfully:")
	resume_string = " ".join(selected_file["extracted_keywords"])
	jd_string = " ".join(selected_jd["extracted_keywords"])
	result = get_score(resume_string, jd_string)
	similarity_score = round(result[0].score * 100, 2)

	# Default color to green
	score_color = "green"
	if similarity_score < 60:
	score_color = "red"
	elif 60 <= similarity_score < 75:
	score_color = "orange"

	st.markdown(
	f"Similarity Score obtained for the resume and job description is "
	f'<span style="color:{score_color};font-size:24px; font-weight:Bold">{similarity_score}</span>',
	unsafe_allow_html=True,
	)

	avs.add_vertical_space(2)
	with st.expander("Common words between Resume and Job Description:"):
	annotated_text(
	create_annotated_text(
	selected_file["clean_data"],
	selected_jd["extracted_keywords"],
	"JD",
	"#F24C3D",
	)
	)

	st.divider()

	# Go back to top
	st.markdown("[:arrow_up: Back to Top](#resume-matcher)")