Abhishek Mathur imhalcyon commited on
Commit
3e31acb
·
unverified ·
1 Parent(s): a301826

Feature: Streamlit Interactive v1.0 (#184)

Browse files

* Initial commit

* Docs: ChatGPTd for comments and docstrings

* Fix: Type warning for title-font in create_star_graph()

* Format: Re-formatted as per PEP 8

* UI: Move intro to sidebar

* UI: Remove footer links, add Back to Top link

* UI: Add columns for resume and job description upload

* Fix: header_image extension updated in script

* Update: streamlit upgraded to 1.27.0 & streamlit-extras upgrade to 0.3.2+htbuilder upgraded to 0.6.2

* Misc: Formatting

* UI: Add components for each column and add cleanup of processed files

* UI: Add Favicon file

* Fix: Updated syntax for string comparison

* Cleanup: Delete unnecessary files

* Fix: Remove dependency on run_first. Add dir delete function. Include icon in st.toast implementation.

* Ignore: Add /Data/Processed/* in gitignore

* UI: Add containers to fix columns together for each row

* Fix: Reference issue for resume key topics. Also add wide layout as default.

* Revert "Cleanup: Delete unnecessary files"

This reverts commit cf3f1c73d8fa91f48b50eef2f669e68e9cbe80fa.

---------

Co-authored-by: imhalcyon <shake.aftermath@gmail.com>

Files changed (3) hide show
  1. .gitignore +3 -0
  2. Assets/img/favicon.ico +0 -0
  3. streamlit_interactive.py +414 -0
.gitignore CHANGED
@@ -141,4 +141,7 @@ scripts/similarity/config.yml
141
 
142
  # Personal Data / Secrets
143
  *.local.yml
 
 
 
144
  *.local.pdf
 
141
 
142
  # Personal Data / Secrets
143
  *.local.yml
144
+
145
+ # Processed or local files
146
+ /Data/Processed/*
147
  *.local.pdf
Assets/img/favicon.ico ADDED
streamlit_interactive.py ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import necessary libraries
2
+ import json
3
+ import os
4
+ from typing import List
5
+
6
+ import networkx as nx
7
+ import nltk
8
+ import pandas as pd
9
+ import plotly.express as px
10
+ import plotly.graph_objects as go
11
+ import streamlit as st
12
+ from annotated_text import annotated_text, parameters
13
+ from streamlit_extras import add_vertical_space as avs
14
+ from streamlit_extras.badges import badge
15
+
16
+ from scripts import ResumeProcessor, JobDescriptionProcessor
17
+ from scripts.ReadPdf import read_single_pdf
18
+ from scripts.similarity import get_similarity_score, find_path, read_config
19
+ from scripts.parsers import ParseResume
20
+ from scripts.parsers import ParseJobDesc
21
+ from scripts.utils import get_filenames_from_dir
22
+
23
+ # Set page configuration
24
+ st.set_page_config(page_title='Resume Matcher', page_icon="Assets/img/favicon.ico", initial_sidebar_state='auto', layout='wide')
25
+
26
+ # Find the current working directory and configuration path
27
+ cwd = find_path('Resume-Matcher')
28
+ config_path = os.path.join(cwd, "scripts", "similarity")
29
+
30
+ # Check if NLTK punkt data is available, if not, download it
31
+ try:
32
+ nltk.data.find('tokenizers/punkt')
33
+ except LookupError:
34
+ nltk.download('punkt')
35
+
36
+ # Set some visualization parameters using the annotated_text library
37
+ parameters.SHOW_LABEL_SEPARATOR = False
38
+ parameters.BORDER_RADIUS = 3
39
+ parameters.PADDING = "0.5 0.25rem"
40
+
41
+
42
+ # Function to set session state variables
43
+ def update_session_state(key, val):
44
+ st.session_state[key] = val
45
+
46
+
47
+ # Function to delete all files in a directory
48
+ def delete_from_dir(filepath: str) -> bool:
49
+ try:
50
+ for file in os.scandir(filepath):
51
+ os.remove(file.path)
52
+
53
+ return True
54
+ except OSError as error:
55
+ print(f"Exception: {error}")
56
+ return False
57
+
58
+
59
+ # Function to create a star-shaped graph visualization
60
+ def create_star_graph(nodes_and_weights, title):
61
+ """
62
+ Create a star-shaped graph visualization.
63
+
64
+ Args:
65
+ nodes_and_weights (list): List of tuples containing nodes and their weights.
66
+ title (str): Title for the graph.
67
+
68
+ Returns:
69
+ None
70
+ """
71
+ # Create an empty graph
72
+ graph = nx.Graph()
73
+
74
+ # Add the central node
75
+ central_node = "resume"
76
+ graph.add_node(central_node)
77
+
78
+ # Add nodes and edges with weights to the graph
79
+ for node, weight in nodes_and_weights:
80
+ graph.add_node(node)
81
+ graph.add_edge(central_node, node, weight=weight * 100)
82
+
83
+ # Get position layout for nodes
84
+ pos = nx.spring_layout(graph)
85
+
86
+ # Create edge trace
87
+ edge_x = []
88
+ edge_y = []
89
+ for edge in graph.edges():
90
+ x0, y0 = pos[edge[0]]
91
+ x1, y1 = pos[edge[1]]
92
+ edge_x.extend([x0, x1, None])
93
+ edge_y.extend([y0, y1, None])
94
+
95
+ edge_trace = go.Scatter(x=edge_x, y=edge_y, line=dict(
96
+ width=0.5, color='#888'), hoverinfo='none', mode='lines')
97
+
98
+ # Create node trace
99
+ node_x = []
100
+ node_y = []
101
+ for node in graph.nodes():
102
+ x, y = pos[node]
103
+ node_x.append(x)
104
+ node_y.append(y)
105
+
106
+ node_trace = go.Scatter(x=node_x, y=node_y, mode='markers', hoverinfo='text',
107
+ marker=dict(showscale=True, colorscale='Rainbow', reversescale=True, color=[], size=10,
108
+ colorbar=dict(thickness=15, title='Node Connections', xanchor='left',
109
+ titleside='right'), line_width=2))
110
+
111
+ # Color node points by number of connections
112
+ node_adjacencies = []
113
+ node_text = []
114
+ for node in graph.nodes():
115
+ adjacencies = list(graph.adj[node]) # Changes here
116
+ node_adjacencies.append(len(adjacencies))
117
+ node_text.append(f'{node}<br># of connections: {len(adjacencies)}')
118
+
119
+ node_trace.marker.color = node_adjacencies
120
+ node_trace.text = node_text
121
+
122
+ # Create the figure
123
+ figure = go.Figure(data=[edge_trace, node_trace],
124
+ layout=go.Layout(title=title, titlefont=dict(size=16), showlegend=False,
125
+ hovermode='closest', margin=dict(b=20, l=5, r=5, t=40),
126
+ xaxis=dict(
127
+ showgrid=False, zeroline=False, showticklabels=False),
128
+ yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))
129
+
130
+ # Show the figure
131
+ st.plotly_chart(figure, use_container_width=True)
132
+
133
+
134
+ # Function to create annotated text with highlighting
135
+ def create_annotated_text(input_string: str, word_list: List[str], annotation: str, color_code: str):
136
+ """
137
+ Create annotated text with highlighted keywords.
138
+
139
+ Args:
140
+ input_string (str): The input text.
141
+ word_list (List[str]): List of keywords to be highlighted.
142
+ annotation (str): Annotation label for highlighted keywords.
143
+ color_code (str): Color code for highlighting.
144
+
145
+ Returns:
146
+ List: Annotated text with highlighted keywords.
147
+ """
148
+ # Tokenize the input string
149
+ tokens = nltk.word_tokenize(input_string)
150
+
151
+ # Convert the list to a set for quick lookups
152
+ word_set = set(word_list)
153
+
154
+ # Initialize an empty list to hold the annotated text
155
+ ret_annotated_text = []
156
+
157
+ for token in tokens:
158
+ # Check if the token is in the set
159
+ if token in word_set:
160
+ # If it is, append a tuple with the token, annotation, and color code
161
+ ret_annotated_text.append((token, annotation, color_code))
162
+ else:
163
+ # If it's not, just append the token as a string
164
+ ret_annotated_text.append(token)
165
+
166
+ return ret_annotated_text
167
+
168
+
169
+ # Function to read JSON data from a file
170
+ def read_json(filename):
171
+ """
172
+ Read JSON data from a file.
173
+
174
+ Args:
175
+ filename (str): The path to the JSON file.
176
+
177
+ Returns:
178
+ dict: The JSON data.
179
+ """
180
+ with open(filename) as f:
181
+ data = json.load(f)
182
+ return data
183
+
184
+
185
+ # Function to tokenize a string
186
+ def tokenize_string(input_string):
187
+ """
188
+ Tokenize a string into words.
189
+
190
+ Args:
191
+ input_string (str): The input string.
192
+
193
+ Returns:
194
+ List[str]: List of tokens.
195
+ """
196
+ tokens = nltk.word_tokenize(input_string)
197
+ return tokens
198
+
199
+
200
+ # Cleanup processed resume / job descriptions
201
+ delete_from_dir(os.path.join(cwd, "Data", "Processed", "Resumes"))
202
+ delete_from_dir(os.path.join(cwd, "Data", "Processed", "JobDescription"))
203
+
204
+ # Set default session states for first run
205
+ if "resumeUploaded" not in st.session_state.keys():
206
+ update_session_state("resumeUploaded", "Pending")
207
+ update_session_state("resumePath", "")
208
+ if "jobDescriptionUploaded" not in st.session_state.keys():
209
+ update_session_state("jobDescriptionUploaded", "Pending")
210
+ update_session_state("jobDescriptionPath", "")
211
+
212
+ # Display the main title and sub-headers
213
+ st.title(':blue[Resume Matcher]')
214
+ with st.sidebar:
215
+ st.image('Assets/img/header_image.png')
216
+ st.subheader('Free and Open Source ATS to help your resume pass the screening stage.')
217
+ st.markdown('Check the website [www.resumematcher.fyi](https://www.resumematcher.fyi/)')
218
+ st.markdown('Give Resume Matcher a ⭐ on [GitHub](https://github.com/srbhr/resume-matcher)')
219
+ badge(type="github", name="srbhr/Resume-Matcher")
220
+ st.markdown('For updates follow me on Twitter.')
221
+ badge(type="twitter", name="_srbhr_")
222
+ st.markdown('If you like the project and would like to further help in development please consider 👇')
223
+ badge(type="buymeacoffee", name="srbhr")
224
+
225
+ st.divider()
226
+ avs.add_vertical_space(1)
227
+
228
+ with st.container():
229
+ resumeCol, jobDescriptionCol = st.columns(2)
230
+ with resumeCol:
231
+ uploaded_Resume = st.file_uploader("Choose a Resume", type="pdf")
232
+ if uploaded_Resume is not None:
233
+ if st.session_state["resumeUploaded"] == "Pending":
234
+ save_path_resume = os.path.join(cwd, "Data", "Resumes", uploaded_Resume.name)
235
+
236
+ with open(save_path_resume, mode='wb') as w:
237
+ w.write(uploaded_Resume.getvalue())
238
+
239
+ if os.path.exists(save_path_resume):
240
+ st.toast(f'File {uploaded_Resume.name} is successfully saved!', icon="✔️")
241
+ update_session_state("resumeUploaded", "Uploaded")
242
+ update_session_state("resumePath", save_path_resume)
243
+ else:
244
+ update_session_state("resumeUploaded", "Pending")
245
+ update_session_state("resumePath", "")
246
+
247
+ with jobDescriptionCol:
248
+ uploaded_JobDescription = st.file_uploader("Choose a Job Description", type="pdf")
249
+ if uploaded_JobDescription is not None:
250
+ if st.session_state["jobDescriptionUploaded"] == "Pending":
251
+ save_path_jobDescription = os.path.join(cwd, "Data", "JobDescription", uploaded_JobDescription.name)
252
+
253
+ with open(save_path_jobDescription, mode='wb') as w:
254
+ w.write(uploaded_JobDescription.getvalue())
255
+
256
+ if os.path.exists(save_path_jobDescription):
257
+ st.toast(f'File {uploaded_JobDescription.name} is successfully saved!', icon="✔️")
258
+ update_session_state("jobDescriptionUploaded", "Uploaded")
259
+ update_session_state("jobDescriptionPath", save_path_jobDescription)
260
+ else:
261
+ update_session_state("jobDescriptionUploaded", "Pending")
262
+ update_session_state("jobDescriptionPath", "")
263
+
264
+ with st.spinner('Please wait...'):
265
+ if (uploaded_Resume is not None and
266
+ st.session_state["jobDescriptionUploaded"] == "Uploaded" and
267
+ uploaded_JobDescription is not None and
268
+ st.session_state["jobDescriptionUploaded"] == "Uploaded"):
269
+
270
+ resumeProcessor = ParseResume(read_single_pdf(st.session_state["resumePath"]))
271
+ jobDescriptionProcessor = ParseJobDesc(read_single_pdf(st.session_state["jobDescriptionPath"]))
272
+
273
+ # Resume / JD output
274
+ selected_file = resumeProcessor.get_JSON()
275
+ selected_jd = jobDescriptionProcessor.get_JSON()
276
+
277
+ # Add containers for each row to avoid overlap
278
+ with st.container():
279
+ resumeCol, jobDescriptionCol = st.columns(2)
280
+ with resumeCol:
281
+ with st.expander("Parsed Resume Data"):
282
+ st.caption(
283
+ "This text is parsed from your resume. This is how it'll look like after getting parsed by an "
284
+ "ATS.")
285
+ st.caption("Utilize this to understand how to make your resume ATS friendly.")
286
+ avs.add_vertical_space(3)
287
+ st.write(selected_file["clean_data"])
288
+
289
+ with jobDescriptionCol:
290
+ with st.expander("Parsed Job Description"):
291
+ st.caption(
292
+ "Currently in the pipeline I'm parsing this from PDF but it'll be from txt or copy paste.")
293
+ avs.add_vertical_space(3)
294
+ st.write(selected_jd["clean_data"])
295
+
296
+ with st.container():
297
+ resumeCol, jobDescriptionCol = st.columns(2)
298
+ with resumeCol:
299
+ with st.expander("Extracted Keywords"):
300
+ st.write("Now let's take a look at the extracted keywords from the resume.")
301
+ annotated_text(create_annotated_text(
302
+ selected_file["clean_data"], selected_file["extracted_keywords"],
303
+ "KW", "#0B666A"))
304
+ with jobDescriptionCol:
305
+ with st.expander("Extracted Keywords"):
306
+ st.write("Now let's take a look at the extracted keywords from the job description.")
307
+ annotated_text(create_annotated_text(
308
+ selected_jd["clean_data"], selected_jd["extracted_keywords"],
309
+ "KW", "#0B666A"))
310
+
311
+ with st.container():
312
+ resumeCol, jobDescriptionCol = st.columns(2)
313
+ with resumeCol:
314
+ with st.expander("Extracted Entities"):
315
+ st.write("Now let's take a look at the extracted entities from the resume.")
316
+
317
+ # Call the function with your data
318
+ create_star_graph(selected_file['keyterms'], "Entities from Resume")
319
+ with jobDescriptionCol:
320
+ with st.expander("Extracted Entities"):
321
+ st.write("Now let's take a look at the extracted entities from the job description.")
322
+
323
+ # Call the function with your data
324
+ create_star_graph(selected_jd['keyterms'], "Entities from Job Description")
325
+
326
+ with st.container():
327
+ resumeCol, jobDescriptionCol = st.columns(2)
328
+ with resumeCol:
329
+ with st.expander("Keywords & Values"):
330
+ df1 = pd.DataFrame(selected_file['keyterms'], columns=["keyword", "value"])
331
+
332
+ # Create the dictionary
333
+ keyword_dict = {}
334
+ for keyword, value in selected_file['keyterms']:
335
+ keyword_dict[keyword] = value * 100
336
+
337
+ fig = go.Figure(data=[go.Table(header=dict(values=["Keyword", "Value"],
338
+ font=dict(size=12, color="white"),
339
+ fill_color='#1d2078'),
340
+ cells=dict(values=[list(keyword_dict.keys()),
341
+ list(keyword_dict.values())],
342
+ line_color='darkslategray',
343
+ fill_color='#6DA9E4'))
344
+ ])
345
+ st.plotly_chart(fig, use_container_width=True)
346
+ with jobDescriptionCol:
347
+ with st.expander("Keywords & Values"):
348
+ df2 = pd.DataFrame(selected_jd['keyterms'], columns=["keyword", "value"])
349
+
350
+ # Create the dictionary
351
+ keyword_dict = {}
352
+ for keyword, value in selected_jd['keyterms']:
353
+ keyword_dict[keyword] = value * 100
354
+
355
+ fig = go.Figure(data=[go.Table(header=dict(values=["Keyword", "Value"],
356
+ font=dict(size=12, color="white"),
357
+ fill_color='#1d2078'),
358
+ cells=dict(values=[list(keyword_dict.keys()),
359
+ list(keyword_dict.values())],
360
+ line_color='darkslategray',
361
+ fill_color='#6DA9E4'))
362
+ ])
363
+ st.plotly_chart(fig, use_container_width=True)
364
+
365
+ with st.container():
366
+ resumeCol, jobDescriptionCol = st.columns(2)
367
+ with resumeCol:
368
+ with st.expander("Key Topics"):
369
+ fig = px.treemap(df1, path=['keyword'], values='value',
370
+ color_continuous_scale='Rainbow',
371
+ title='Key Terms/Topics Extracted from your Resume')
372
+ st.plotly_chart(fig, use_container_width=True)
373
+
374
+ with jobDescriptionCol:
375
+ with st.expander("Key Topics"):
376
+ fig = px.treemap(df2, path=['keyword'], values='value',
377
+ color_continuous_scale='Rainbow',
378
+ title='Key Terms/Topics Extracted from Job Description')
379
+ st.plotly_chart(fig, use_container_width=True)
380
+
381
+ avs.add_vertical_space(2)
382
+ config_file_path = config_path + "/config.yml"
383
+ if os.path.exists(config_file_path):
384
+ config_data = read_config(config_file_path)
385
+ if config_data:
386
+ print("Config file parsed successfully:")
387
+ resume_string = ' '.join(selected_file["extracted_keywords"])
388
+ jd_string = ' '.join(selected_jd["extracted_keywords"])
389
+ result = get_similarity_score(resume_string, jd_string)
390
+ similarity_score = round(result[0]["score"] * 100, 2)
391
+
392
+ # Default color to green
393
+ score_color = "green"
394
+ if similarity_score < 60:
395
+ score_color = "red"
396
+ elif 60 <= similarity_score < 75:
397
+ score_color = "orange"
398
+
399
+ st.markdown(f'Similarity Score obtained for the resume and job description is '
400
+ f'<span style="color:{score_color};font-size:24px; font-weight:Bold">{similarity_score}</span>',
401
+ unsafe_allow_html=True)
402
+ else:
403
+ print("Config file does not exist.")
404
+
405
+ avs.add_vertical_space(2)
406
+ with st.expander("Common words between Resume and Job Description:"):
407
+ annotated_text(create_annotated_text(
408
+ selected_file["clean_data"], selected_jd["extracted_keywords"],
409
+ "JD", "#F24C3D"))
410
+
411
+ st.divider()
412
+
413
+ # Go back to top
414
+ st.markdown('[:arrow_up: Back to Top](#resume-matcher)')