# Import necessary libraries import json import os from typing import List import networkx as nx import nltk import pandas as pd import plotly.express as px import plotly.graph_objects as go import streamlit as st from annotated_text import annotated_text, parameters from streamlit_extras import add_vertical_space as avs from streamlit_extras.badges import badge from scripts import JobDescriptionProcessor, ResumeProcessor from scripts.parsers import ParseJobDesc, ParseResume from scripts.ReadPdf import read_single_pdf from scripts.similarity.get_score import * from scripts.utils import get_filenames_from_dir # Set page configuration st.set_page_config( page_title="Resume Matcher", page_icon="Assets/img/favicon.ico", initial_sidebar_state="auto", layout="wide", ) # Find the current working directory and configuration path cwd = find_path("Resume-Matcher") config_path = os.path.join(cwd, "scripts", "similarity") # Check if NLTK punkt_tab data is available, if not, download it try: nltk.data.find("tokenizers/punkt_tab") except LookupError: nltk.download("punkt_tab") # Set some visualization parameters using the annotated_text library parameters.SHOW_LABEL_SEPARATOR = False parameters.BORDER_RADIUS = 3 parameters.PADDING = "0.5 0.25rem" # Function to set session state variables def update_session_state(key, val): st.session_state[key] = val # Function to delete all files in a directory def delete_from_dir(filepath: str) -> bool: try: for file in os.scandir(filepath): os.remove(file.path) return True except OSError as error: print(f"Exception: {error}") return False # Function to create a star-shaped graph visualization def create_star_graph(nodes_and_weights, title): """ Create a star-shaped graph visualization. Args: nodes_and_weights (list): List of tuples containing nodes and their weights. title (str): Title for the graph. Returns: None """ # Create an empty graph graph = nx.Graph() # Add the central node central_node = "resume" graph.add_node(central_node) # Add nodes and edges with weights to the graph for node, weight in nodes_and_weights: graph.add_node(node) graph.add_edge(central_node, node, weight=weight * 100) # Get position layout for nodes pos = nx.spring_layout(graph) # Create edge trace edge_x = [] edge_y = [] for edge in graph.edges(): x0, y0 = pos[edge[0]] x1, y1 = pos[edge[1]] edge_x.extend([x0, x1, None]) edge_y.extend([y0, y1, None]) edge_trace = go.Scatter( x=edge_x, y=edge_y, line=dict(width=0.5, color="#888"), hoverinfo="none", mode="lines", ) # Create node trace node_x = [] node_y = [] for node in graph.nodes(): x, y = pos[node] node_x.append(x) node_y.append(y) node_trace = go.Scatter( x=node_x, y=node_y, mode="markers", hoverinfo="text", marker=dict( showscale=True, colorscale="Rainbow", reversescale=True, color=[], size=10, colorbar=dict( thickness=15, title="Node Connections", xanchor="left", titleside="right", ), line_width=2, ), ) # Color node points by number of connections node_adjacencies = [] node_text = [] for node in graph.nodes(): adjacencies = list(graph.adj[node]) # Changes here node_adjacencies.append(len(adjacencies)) node_text.append(f"{node}
# of connections: {len(adjacencies)}") node_trace.marker.color = node_adjacencies node_trace.text = node_text # Create the figure figure = go.Figure( data=[edge_trace, node_trace], layout=go.Layout( title=title, titlefont=dict(size=16), showlegend=False, hovermode="closest", margin=dict(b=20, l=5, r=5, t=40), xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), yaxis=dict(showgrid=False, zeroline=False, showticklabels=False), ), ) # Show the figure st.plotly_chart(figure, use_container_width=True) # Function to create annotated text with highlighting def create_annotated_text( input_string: str, word_list: List[str], annotation: str, color_code: str ): """ Create annotated text with highlighted keywords. Args: input_string (str): The input text. word_list (List[str]): List of keywords to be highlighted. annotation (str): Annotation label for highlighted keywords. color_code (str): Color code for highlighting. Returns: List: Annotated text with highlighted keywords. """ # Tokenize the input string tokens = nltk.word_tokenize(input_string) # Convert the list to a set for quick lookups word_set = set(word_list) # Initialize an empty list to hold the annotated text ret_annotated_text = [] for token in tokens: # Check if the token is in the set if token in word_set: # If it is, append a tuple with the token, annotation, and color code ret_annotated_text.append((token, annotation, color_code)) else: # If it's not, just append the token as a string ret_annotated_text.append(token) return ret_annotated_text # Function to read JSON data from a file def read_json(filename): """ Read JSON data from a file. Args: filename (str): The path to the JSON file. Returns: dict: The JSON data. """ with open(filename) as f: data = json.load(f) return data # Function to tokenize a string def tokenize_string(input_string): """ Tokenize a string into words. Args: input_string (str): The input string. Returns: List[str]: List of tokens. """ tokens = nltk.word_tokenize(input_string) return tokens # Cleanup processed resume / job descriptions delete_from_dir(os.path.join(cwd, "Data", "Processed", "Resumes")) delete_from_dir(os.path.join(cwd, "Data", "Processed", "JobDescription")) # Set default session states for first run if "resumeUploaded" not in st.session_state.keys(): update_session_state("resumeUploaded", "Pending") update_session_state("resumePath", "") if "jobDescriptionUploaded" not in st.session_state.keys(): update_session_state("jobDescriptionUploaded", "Pending") update_session_state("jobDescriptionPath", "") # Display the main title and sub-headers st.title(":blue[Resume Matcher]") with st.sidebar: st.image("Assets/img/header_image.png") st.subheader( "Free and Open Source ATS to help your resume pass the screening stage." ) st.markdown( "Check the website [www.resumematcher.fyi](https://www.resumematcher.fyi/)" ) st.markdown( "Give Resume Matcher a ⭐ on [GitHub](https://github.com/srbhr/resume-matcher)" ) badge(type="github", name="srbhr/Resume-Matcher") st.markdown("For updates follow me on Twitter.") badge(type="twitter", name="_srbhr_") st.markdown( "If you like the project and would like to further help in development please consider 👇" ) badge(type="buymeacoffee", name="srbhr") st.divider() avs.add_vertical_space(1) with st.container(): resumeCol, jobDescriptionCol = st.columns(2) with resumeCol: uploaded_Resume = st.file_uploader("Choose a Resume", type="pdf") if uploaded_Resume is not None: if st.session_state["resumeUploaded"] == "Pending": save_path_resume = os.path.join( cwd, "Data", "Resumes", uploaded_Resume.name ) with open(save_path_resume, mode="wb") as w: w.write(uploaded_Resume.getvalue()) if os.path.exists(save_path_resume): st.toast( f"File {uploaded_Resume.name} is successfully saved!", icon="✔️" ) update_session_state("resumeUploaded", "Uploaded") update_session_state("resumePath", save_path_resume) else: update_session_state("resumeUploaded", "Pending") update_session_state("resumePath", "") with jobDescriptionCol: uploaded_JobDescription = st.file_uploader( "Choose a Job Description", type="pdf" ) if uploaded_JobDescription is not None: if st.session_state["jobDescriptionUploaded"] == "Pending": save_path_jobDescription = os.path.join( cwd, "Data", "JobDescription", uploaded_JobDescription.name ) with open(save_path_jobDescription, mode="wb") as w: w.write(uploaded_JobDescription.getvalue()) if os.path.exists(save_path_jobDescription): st.toast( f"File {uploaded_JobDescription.name} is successfully saved!", icon="✔️", ) update_session_state("jobDescriptionUploaded", "Uploaded") update_session_state("jobDescriptionPath", save_path_jobDescription) else: update_session_state("jobDescriptionUploaded", "Pending") update_session_state("jobDescriptionPath", "") with st.spinner("Please wait..."): if ( uploaded_Resume is not None and st.session_state["jobDescriptionUploaded"] == "Uploaded" and uploaded_JobDescription is not None and st.session_state["jobDescriptionUploaded"] == "Uploaded" ): resumeProcessor = ParseResume(read_single_pdf(st.session_state["resumePath"])) jobDescriptionProcessor = ParseJobDesc( read_single_pdf(st.session_state["jobDescriptionPath"]) ) # Resume / JD output selected_file = resumeProcessor.get_JSON() selected_jd = jobDescriptionProcessor.get_JSON() # Add containers for each row to avoid overlap # Parsed data with st.container(): resumeCol, jobDescriptionCol = st.columns(2) with resumeCol: with st.expander("Parsed Resume Data"): st.caption( "This text is parsed from your resume. This is how it'll look like after getting parsed by an " "ATS." ) st.caption( "Utilize this to understand how to make your resume ATS friendly." ) avs.add_vertical_space(3) st.write(selected_file["clean_data"]) with jobDescriptionCol: with st.expander("Parsed Job Description"): st.caption( "Currently in the pipeline I'm parsing this from PDF but it'll be from txt or copy paste." ) avs.add_vertical_space(3) st.write(selected_jd["clean_data"]) # Extracted keywords with st.container(): resumeCol, jobDescriptionCol = st.columns(2) with resumeCol: with st.expander("Extracted Keywords"): st.write( "Now let's take a look at the extracted keywords from the resume." ) annotated_text( create_annotated_text( selected_file["clean_data"], selected_file["extracted_keywords"], "KW", "#0B666A", ) ) with jobDescriptionCol: with st.expander("Extracted Keywords"): st.write( "Now let's take a look at the extracted keywords from the job description." ) annotated_text( create_annotated_text( selected_jd["clean_data"], selected_jd["extracted_keywords"], "KW", "#0B666A", ) ) # Star graph visualization with st.container(): resumeCol, jobDescriptionCol = st.columns(2) with resumeCol: with st.expander("Extracted Entities"): st.write( "Now let's take a look at the extracted entities from the resume." ) # Call the function with your data create_star_graph(selected_file["keyterms"], "Entities from Resume") with jobDescriptionCol: with st.expander("Extracted Entities"): st.write( "Now let's take a look at the extracted entities from the job description." ) # Call the function with your data create_star_graph( selected_jd["keyterms"], "Entities from Job Description" ) # Keywords and values with st.container(): resumeCol, jobDescriptionCol = st.columns(2) with resumeCol: with st.expander("Keywords & Values"): df1 = pd.DataFrame( selected_file["keyterms"], columns=["keyword", "value"] ) # Create the dictionary keyword_dict = {} for keyword, value in selected_file["keyterms"]: keyword_dict[keyword] = value * 100 fig = go.Figure( data=[ go.Table( header=dict( values=["Keyword", "Value"], font=dict(size=12, color="white"), fill_color="#1d2078", ), cells=dict( values=[ list(keyword_dict.keys()), list(keyword_dict.values()), ], line_color="darkslategray", fill_color="#6DA9E4", ), ) ] ) st.plotly_chart(fig, use_container_width=True) with jobDescriptionCol: with st.expander("Keywords & Values"): df2 = pd.DataFrame( selected_jd["keyterms"], columns=["keyword", "value"] ) # Create the dictionary keyword_dict = {} for keyword, value in selected_jd["keyterms"]: keyword_dict[keyword] = value * 100 fig = go.Figure( data=[ go.Table( header=dict( values=["Keyword", "Value"], font=dict(size=12, color="white"), fill_color="#1d2078", ), cells=dict( values=[ list(keyword_dict.keys()), list(keyword_dict.values()), ], line_color="darkslategray", fill_color="#6DA9E4", ), ) ] ) st.plotly_chart(fig, use_container_width=True) # Treemaps with st.container(): resumeCol, jobDescriptionCol = st.columns(2) with resumeCol: with st.expander("Key Topics"): fig = px.treemap( df1, path=["keyword"], values="value", color_continuous_scale="Rainbow", title="Key Terms/Topics Extracted from your Resume", ) st.plotly_chart(fig, use_container_width=True) with jobDescriptionCol: with st.expander("Key Topics"): fig = px.treemap( df2, path=["keyword"], values="value", color_continuous_scale="Rainbow", title="Key Terms/Topics Extracted from Job Description", ) st.plotly_chart(fig, use_container_width=True) avs.add_vertical_space(2) st.markdown("#### Similarity Score") print("Config file parsed successfully:") resume_string = " ".join(selected_file["extracted_keywords"]) jd_string = " ".join(selected_jd["extracted_keywords"]) result = get_score(resume_string, jd_string) similarity_score = round(result[0].score * 100, 2) # Default color to green score_color = "green" if similarity_score < 60: score_color = "red" elif 60 <= similarity_score < 75: score_color = "orange" st.markdown( f"Similarity Score obtained for the resume and job description is " f'{similarity_score}', unsafe_allow_html=True, ) avs.add_vertical_space(2) with st.expander("Common words between Resume and Job Description:"): annotated_text( create_annotated_text( selected_file["clean_data"], selected_jd["extracted_keywords"], "JD", "#F24C3D", ) ) st.divider() # Go back to top st.markdown("[:arrow_up: Back to Top](#resume-matcher)")