srbhr commited on
Commit
264e11c
·
1 Parent(s): 26c4bc1

Updated-the-parsing-algo

Browse files
Data/Processed/Resume-f493637b-0880-4eb9-ba16-a79cb254e94a.json ADDED
@@ -0,0 +1,321 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bi_grams": "[Job Description, Description Java, Java Developer, experienced Java, Java developer, Java development, maintaining enterprise, enterprise level, level applications, possess e, e xcellent, xcellent communication, communication skills, collaborative mindset, fast paced, paced dynamic, dynamic team, team environment, Develop test, maintain Java, Java based, based applications, industry standard, Colla borate, cross functional, functional teams, address business, technical issues, architecture discussions, scalable efficient, secure applications, Write clean, clean efficient, welldocumented code, industry best, code reviews, reviews testing, deployment activities, Continuously improve, Java development, development including, emerging technologies, d frameworks, team members, understand business, deliver solutions, exceed expectations, Computer Science, Science Engineering, related field, lea st, Java development, strong proficiency, Java Spring, Hibernate frameworks, web development, development technologies, HTML CSS, database technologies, Oracle MySQL, SQ L, L Server, Agile development, development methodologies, Strong understanding, software development, development best, best practices, practices including, including object, oriented design, design principles, principles testing, version control, control systems, Excellent written, verbal communicatio, communicatio n, n skills, Strong problem, problem solving, critical thinking, thinking skills, work independently, team environment, Java development, cutting edge, edge technologies, competitive salary, salary comprehensive, comprehensive benefits, benefits package, dynamic work]",
3
+ "clean_data": "Job Description Java Developer \n \nWe are seeking an experienced Java developer to join our team The ideal \ncandidate should have at least 5 years of experience in Java development with a \nfocus on developing and maintaining enterprise level applications The candidate \nshould also possess e xcellent communication skills and a collaborative mindset \nwith an ability to work in a fast paced dynamic team environment \n \nResponsibilities \n \nDevelop test and maintain Java based applications using industry standard \nframeworks and technologies \nColla borate with cross functional teams to identify and address business \nrequirements and technical issues \nParticipate in design and architecture discussions to ensure the development of \nscalable efficient and secure applications \nWrite clean efficient and welldocumented code that adheres to industry best \npractices and standards \nParticipate in code reviews testing and deployment activities as required \nContinuously improve your knowledge and skills in Java development including \nemerging technologies an d frameworks \nCommunicate with team members and stakeholders to understand business \nrequirements and deliver solutions that meet or exceed expectations \nRequirements \n \nBachelor's or Master's degree in Computer Science Engineering or related field \nAt lea st 5 years of experience in Java development with strong proficiency in \nJava Spring and Hibernate frameworks Experience with web development technologies such as HTML CSS and \nJavaScript \nKnowledge of database technologies such as Oracle MySQL or SQ L Server \nExperience with Agile development methodologies \nStrong understanding of software development best practices including object \noriented design principles testing and version control systems such as Git \nExcellent written and verbal communicatio n skills \nStrong problem solving and critical thinking skills \nAbility to work independently and collaboratively in a team environment \nIf you are passionate about Java development and have a desire to work with \ncutting edge technologies and frameworks we encourage you to apply We offer a \ncompetitive salary comprehensive benefits package and a dynamic work \nenvironment with opportunities for growth and advancement ",
4
+ "entities": [
5
+ "Computer Science Engineering",
6
+ "HTML CSS",
7
+ "Oracle MySQL or",
8
+ "SQ L Server",
9
+ "Git \nExcellent"
10
+ ],
11
+ "extracted_keywords": [
12
+ "Job",
13
+ "Description",
14
+ "Java",
15
+ "Developer",
16
+ "seeking",
17
+ "experienced",
18
+ "Java",
19
+ "developer",
20
+ "join",
21
+ "team",
22
+ "ideal",
23
+ "candidate",
24
+ "have",
25
+ "years",
26
+ "experience",
27
+ "Java",
28
+ "development",
29
+ "focus",
30
+ "developing",
31
+ "maintaining",
32
+ "enterprise",
33
+ "level",
34
+ "applications",
35
+ "candidate",
36
+ "possess",
37
+ "e",
38
+ "xcellent",
39
+ "communication",
40
+ "skills",
41
+ "collaborative",
42
+ "mindset",
43
+ "ability",
44
+ "work",
45
+ "paced",
46
+ "dynamic",
47
+ "team",
48
+ "environment",
49
+ "Responsibilities",
50
+ "Develop",
51
+ "test",
52
+ "maintain",
53
+ "Java",
54
+ "based",
55
+ "applications",
56
+ "using",
57
+ "industry",
58
+ "standard",
59
+ "frameworks",
60
+ "technologies",
61
+ "Colla",
62
+ "borate",
63
+ "cross",
64
+ "functional",
65
+ "teams",
66
+ "identify",
67
+ "address",
68
+ "business",
69
+ "requirements",
70
+ "technical",
71
+ "issues",
72
+ "Participate",
73
+ "design",
74
+ "architecture",
75
+ "discussions",
76
+ "ensure",
77
+ "development",
78
+ "scalable",
79
+ "efficient",
80
+ "secure",
81
+ "applications",
82
+ "Write",
83
+ "clean",
84
+ "efficient",
85
+ "welldocumented",
86
+ "code",
87
+ "adheres",
88
+ "industry",
89
+ "best",
90
+ "practices",
91
+ "standards",
92
+ "Participate",
93
+ "code",
94
+ "reviews",
95
+ "testing",
96
+ "deployment",
97
+ "activities",
98
+ "required",
99
+ "improve",
100
+ "knowledge",
101
+ "skills",
102
+ "Java",
103
+ "development",
104
+ "including",
105
+ "emerging",
106
+ "technologies",
107
+ "d",
108
+ "frameworks",
109
+ "Communicate",
110
+ "team",
111
+ "members",
112
+ "stakeholders",
113
+ "understand",
114
+ "business",
115
+ "requirements",
116
+ "deliver",
117
+ "solutions",
118
+ "meet",
119
+ "exceed",
120
+ "expectations",
121
+ "Requirements",
122
+ "Bachelor",
123
+ "Master",
124
+ "degree",
125
+ "Computer",
126
+ "Science",
127
+ "Engineering",
128
+ "related",
129
+ "field",
130
+ "lea",
131
+ "st",
132
+ "years",
133
+ "experience",
134
+ "Java",
135
+ "development",
136
+ "strong",
137
+ "proficiency",
138
+ "Java",
139
+ "Spring",
140
+ "Hibernate",
141
+ "frameworks",
142
+ "Experience",
143
+ "web",
144
+ "development",
145
+ "technologies",
146
+ "such",
147
+ "HTML",
148
+ "CSS",
149
+ "JavaScript",
150
+ "Knowledge",
151
+ "database",
152
+ "technologies",
153
+ "such",
154
+ "Oracle",
155
+ "MySQL",
156
+ "SQ",
157
+ "L",
158
+ "Server",
159
+ "Experience",
160
+ "Agile",
161
+ "development",
162
+ "methodologies",
163
+ "Strong",
164
+ "understanding",
165
+ "software",
166
+ "development",
167
+ "best",
168
+ "practices",
169
+ "including",
170
+ "object",
171
+ "oriented",
172
+ "design",
173
+ "principles",
174
+ "testing",
175
+ "version",
176
+ "control",
177
+ "systems",
178
+ "such",
179
+ "Git",
180
+ "Excellent",
181
+ "written",
182
+ "verbal",
183
+ "communicatio",
184
+ "skills",
185
+ "Strong",
186
+ "problem",
187
+ "solving",
188
+ "critical",
189
+ "thinking",
190
+ "skills",
191
+ "Ability",
192
+ "work",
193
+ "team",
194
+ "environment",
195
+ "passionate",
196
+ "Java",
197
+ "development",
198
+ "have",
199
+ "desire",
200
+ "work",
201
+ "cutting",
202
+ "edge",
203
+ "technologies",
204
+ "frameworks",
205
+ "encourage",
206
+ "apply",
207
+ "offer",
208
+ "competitive",
209
+ "salary",
210
+ "comprehensive",
211
+ "benefits",
212
+ "package",
213
+ "dynamic",
214
+ "work",
215
+ "environment",
216
+ "opportunities",
217
+ "growth",
218
+ "advancement"
219
+ ],
220
+ "job_desc_data": "Job Description: Java Developer \n \nWe are seeking an experienced Java developer to join our team. The ideal \ncandidate should have at least 5 years of experience in Java development, with a \nfocus on developing and maintaining enterprise -level applications. The candidate \nshould also possess e xcellent communication skills and a collaborative mindset, \nwith an ability to work in a fast -paced, dynamic team environment. \n \nResponsibilities: \n \nDevelop, test, and maintain Java -based applications using industry -standard \nframeworks and technologies. \nColla borate with cross -functional teams to identify and address business \nrequirements and technical issues. \nParticipate in design and architecture discussions to ensure the development of \nscalable, efficient, and secure applications. \nWrite clean, efficient, and well-documented code that adheres to industry best \npractices and standards. \nParticipate in code reviews, testing, and deployment activities as required. \nContinuously improve your knowledge and skills in Java development, including \nemerging technologies an d frameworks. \nCommunicate with team members and stakeholders to understand business \nrequirements and deliver solutions that meet or exceed expectations. \nRequirements: \n \nBachelor's or Master's degree in Computer Science, Engineering, or related field. \nAt lea st 5 years of experience in Java development, with strong proficiency in \nJava, Spring, and Hibernate frameworks. Experience with web development technologies such as HTML, CSS, and \nJavaScript. \nKnowledge of database technologies such as Oracle, MySQL, or SQ L Server. \nExperience with Agile development methodologies. \nStrong understanding of software development best practices, including object -\noriented design principles, testing, and version control systems such as Git. \nExcellent written and verbal communicatio n skills. \nStrong problem -solving and critical -thinking skills. \nAbility to work independently and collaboratively in a team environment. \nIf you are passionate about Java development and have a desire to work with \ncutting -edge technologies and frameworks, we encourage you to apply. We offer a \ncompetitive salary, comprehensive benefits package, and a dynamic work \nenvironment with opportunities for growth and advancement. ",
221
+ "keyterms": [
222
+ [
223
+ "Java development",
224
+ 0.26919475322747016
225
+ ],
226
+ [
227
+ "team environment",
228
+ 0.09941934234511868
229
+ ],
230
+ [
231
+ "experience",
232
+ 0.02977637498685747
233
+ ],
234
+ [
235
+ "application",
236
+ 0.02379850592847604
237
+ ],
238
+ [
239
+ "skill",
240
+ 0.022497177455640515
241
+ ],
242
+ [
243
+ "technology",
244
+ 0.021616804020294696
245
+ ],
246
+ [
247
+ "framework",
248
+ 0.018326336126746803
249
+ ],
250
+ [
251
+ "year",
252
+ 0.01807172297101236
253
+ ],
254
+ [
255
+ "candidate",
256
+ 0.014562722583606963
257
+ ],
258
+ [
259
+ "ability",
260
+ 0.012715438755893347
261
+ ],
262
+ [
263
+ "requirement",
264
+ 0.012405073234017967
265
+ ],
266
+ [
267
+ "standard",
268
+ 0.011643052002888876
269
+ ],
270
+ [
271
+ "industry",
272
+ 0.011232996235539427
273
+ ],
274
+ [
275
+ "dynamic",
276
+ 0.010197178749718634
277
+ ],
278
+ [
279
+ "business",
280
+ 0.010027130262939828
281
+ ],
282
+ [
283
+ "design",
284
+ 0.009519266187661463
285
+ ],
286
+ [
287
+ "efficient",
288
+ 0.008186434398199447
289
+ ],
290
+ [
291
+ "good",
292
+ 0.007964469415171026
293
+ ],
294
+ [
295
+ "practice",
296
+ 0.007854481690515808
297
+ ],
298
+ [
299
+ "code",
300
+ 0.007498641755548127
301
+ ]
302
+ ],
303
+ "pos_frequencies": {
304
+ "ADJ": 27,
305
+ "ADP": 31,
306
+ "ADV": 7,
307
+ "AUX": 4,
308
+ "CCONJ": 29,
309
+ "DET": 13,
310
+ "NOUN": 104,
311
+ "NUM": 2,
312
+ "PART": 10,
313
+ "PRON": 9,
314
+ "PROPN": 37,
315
+ "SCONJ": 2,
316
+ "SPACE": 39,
317
+ "VERB": 39
318
+ },
319
+ "tri_grams": "[Job Description Java, Description Java Developer, seeking an experienced, experienced Java developer, developer to join, join our team, team The ideal, years of experience, experience in Java, focus on developing, developing and maintaining, maintaining enterprise level, enterprise level applications, applications The candidate, possess e xcellent, e xcellent communication, xcellent communication skills, ability to work, fast paced dynamic, paced dynamic team, dynamic team environment, test and maintain, maintain Java based, Java based applications, applications using industry, frameworks and technologies, borate with cross, cross functional teams, teams to identify, identify and address, requirements and technical, Participate in design, design and architecture, discussions to ensure, ensure the development, efficient and secure, Write clean efficient, code that adheres, adheres to industry, practices and standards, Participate in code, code reviews testing, testing and deployment, activities as required, improve your knowledge, knowledge and skills, skills in Java, Java development including, technologies an d, Communicate with team, members and stakeholders, stakeholders to understand, requirements and deliver, solutions that meet, meet or exceed, Master's degree, degree in Computer, Computer Science Engineering, Engineering or related, years of experience, experience in Java, development with strong, Spring and Hibernate, Experience with web, web development technologies, Knowledge of database, MySQL or SQ, SQ L Server, Experience with Agile, Agile development methodologies, understanding of software, software development best, development best practices, best practices including, practices including object, oriented design principles, design principles testing, testing and version, version control systems, written and verbal, verbal communicatio n, communicatio n skills, Strong problem solving, solving and critical, critical thinking skills, Ability to work, independently and collaboratively, passionate about Java, desire to work, cutting edge technologies, technologies and frameworks, apply We offer, competitive salary comprehensive, salary comprehensive benefits, comprehensive benefits package, environment with opportunities, opportunities for growth, growth and advancement]",
320
+ "unique_id": "f493637b-0880-4eb9-ba16-a79cb254e94a"
321
+ }
scripts/ReadPdf.py CHANGED
@@ -3,6 +3,22 @@ import glob
3
  from pypdf import PdfReader
4
 
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  def read_multiple_pdf(file_path: str) -> list:
7
  """
8
  Read multiple PDF files from the specified file path and extract the text from each page.
@@ -19,10 +35,10 @@ def read_multiple_pdf(file_path: str) -> list:
19
  try:
20
  with open(file, 'rb') as f:
21
  pdf_reader = PdfReader(f)
22
- count = len(pdf_reader.pages)
23
  for i in range(count):
24
- page = pdf_reader.pages[i]
25
- output.append(page.extract_text())
26
  except Exception as e:
27
  print(f"Error reading file '{file}': {str(e)}")
28
  return output
 
3
  from pypdf import PdfReader
4
 
5
 
6
+ def get_pdf_files(file_path):
7
+ """
8
+ Get all PDF files from the specified file path.
9
+
10
+ Args:
11
+ file_path (str): The directory path containing the PDF files.
12
+
13
+ Returns:
14
+ list: A list containing the paths of all the PDF files in the directory.
15
+ """
16
+ if os.path.exists(file_path):
17
+ return glob.glob(os.path.join(file_path, '*.pdf'))
18
+ else:
19
+ return []
20
+
21
+
22
  def read_multiple_pdf(file_path: str) -> list:
23
  """
24
  Read multiple PDF files from the specified file path and extract the text from each page.
 
35
  try:
36
  with open(file, 'rb') as f:
37
  pdf_reader = PdfReader(f)
38
+ count = pdf_reader.getNumPages()
39
  for i in range(count):
40
+ page = pdf_reader.getPage(i)
41
+ output.append(page.extractText())
42
  except Exception as e:
43
  print(f"Error reading file '{file}': {str(e)}")
44
  return output
scripts/parsers/ParseJobDescToJson.py CHANGED
@@ -5,7 +5,7 @@ from scripts.utils.Utils import TextCleaner, CountFrequency, generate_unique_id
5
  from scripts.KeytermsExtraction import KeytermExtractor
6
  import os
7
 
8
- SAVE_DIRECTORY = "Data/Processed/"
9
 
10
 
11
  class ParseJobDesc:
 
5
  from scripts.KeytermsExtraction import KeytermExtractor
6
  import os
7
 
8
+ SAVE_DIRECTORY = "../../Data/Processed/"
9
 
10
 
11
  class ParseJobDesc:
scripts/similarity/qdrant_search.py ADDED
File without changes
scripts/utils/ExtraScripts.py ADDED
File without changes
streamlit_app.py CHANGED
@@ -9,7 +9,7 @@ import plotly.graph_objects as go
9
  import matplotlib.pyplot as plt
10
  import squarify
11
 
12
- st.title('Resume :blue[Ranker]')
13
  st.subheader('_AI Based Resume Analyzer & Ranker_')
14
 
15
 
@@ -155,8 +155,10 @@ def preprocess_text(text):
155
 
156
 
157
  # read the json file
158
- resume = read_json('resume.json')
159
- job_desc = read_json('Data/Processed/Job-Desc-a4f06ccb-8d5a-4d0b-9f02-3ba6d686472e.json')
 
 
160
  st.json(resume)
161
 
162
  st.json(job_desc)
@@ -212,7 +214,7 @@ fig = px.treemap(df2, path=['keyword'], values='value',
212
  title='Resume POS')
213
  st.write(fig)
214
 
215
- st.text(resume['resume_data'])
216
 
217
 
218
  fig = go.Figure(data=[go.Table(
@@ -249,9 +251,9 @@ fig = go.Figure(data=[go.Table(
249
 
250
  st.plotly_chart(figure_or_data=fig)
251
 
252
- resume_list = preprocess_text(resume['resume_data'])
253
 
254
- job_desc_list = preprocess_text(job_desc['job_desc_data'])
255
 
256
  df_data = find_intersection_of_lists(resume_list, job_desc_list)
257
 
@@ -263,7 +265,7 @@ st.write(df_data)
263
 
264
  st.write(data_length)
265
 
266
- st.write(px.data.tips())
267
 
268
  fig = px.pie(data_length, values='values', names='elements')
269
  st.write(fig)
 
9
  import matplotlib.pyplot as plt
10
  import squarify
11
 
12
+ st.title('Resume :blue[Matcher]')
13
  st.subheader('_AI Based Resume Analyzer & Ranker_')
14
 
15
 
 
155
 
156
 
157
  # read the json file
158
+ resume = read_json(
159
+ 'Data/Processed/Resume-d531571e-e4fa-45eb-ab6a-267cdeb6647e.json')
160
+ job_desc = read_json(
161
+ 'Data/Processed/Job-Desc-a4f06ccb-8d5a-4d0b-9f02-3ba6d686472e.json')
162
  st.json(resume)
163
 
164
  st.json(job_desc)
 
214
  title='Resume POS')
215
  st.write(fig)
216
 
217
+ st.text(resume['clean_data'])
218
 
219
 
220
  fig = go.Figure(data=[go.Table(
 
251
 
252
  st.plotly_chart(figure_or_data=fig)
253
 
254
+ resume_list = preprocess_text(resume['clean_data'])
255
 
256
+ job_desc_list = preprocess_text(job_desc['clean_data'])
257
 
258
  df_data = find_intersection_of_lists(resume_list, job_desc_list)
259
 
 
265
 
266
  st.write(data_length)
267
 
268
+ # st.write(px.data.tips())
269
 
270
  fig = px.pie(data_length, values='values', names='elements')
271
  st.write(fig)