srbhr commited on
Commit
fee6403
·
1 Parent(s): db81ee1

Updating Progress

Browse files
Progress/Similar.py CHANGED
@@ -1,14 +1,13 @@
1
  import textdistance as td
2
  import Cleaner
3
 
 
4
  def match(resume, job_des):
5
- j = td.jaccard.similarity(resume, job_des)*100
6
- s = td.sorensen_dice.similarity(resume, job_des)*100
7
- c = td.cosine.similarity(resume, job_des)*100
8
- o = td.overlap.normalized_similarity(resume, job_des)*100
9
  total = (j+s+c+o)/4
10
- return total
11
 
12
- # https://realpython.com/working-with-files-in-python/
13
 
14
- # https://support.dlink.ca/emulators/wbr2310/index.htm
 
1
  import textdistance as td
2
  import Cleaner
3
 
4
+
5
  def match(resume, job_des):
6
+ j = td.jaccard.similarity(resume, job_des)
7
+ s = td.sorensen_dice.similarity(resume, job_des)
8
+ c = td.cosine.similarity(resume, job_des)
9
+ o = td.overlap.normalized_similarity(resume, job_des)
10
  total = (j+s+c+o)/4
11
+ return total*100
12
 
 
13
 
 
Progress/app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import Cleaner
2
+ import Similar
3
+ import textract as tx
4
+ import pandas as pd
5
+ import os
6
+ import streamlit as st
7
+
8
+ st.title("Naive Resume Matcher")
9
+ st.markdown(""" ### Ranking **Resumes** based on the Matching Skills as provided by the required job description. This uses a **Token, String and Word Embedding** based algorithm created to generate a match score that ranks a resume.""")
10
+
11
+
12
+ resume_dir = "Data/Resumes/"
13
+ job_desc_dir = "Data/JobDesc/"
14
+ resume_names = os.listdir(resume_dir)
15
+ document = []
16
+
17
+ for res in resume_names:
18
+ temp = []
19
+ temp.append(res)
20
+ text = tx.process(resume_dir+res, encoding='ascii')
21
+ text = str(text, 'utf-8')
22
+ temp.append(text)
23
+ document.append(temp)
24
+
25
+ df = pd.DataFrame(document, columns=['Name', 'Context'])
26
+
27
+ # Only one Job Description should be present and in docx format
28
+ job_docs = os.listdir(job_desc_dir)
29
+ job_desc = tx.process(
30
+ job_desc_dir+job_docs[1], extension='docx', encoding='ascii')
31
+ job_desc = str(job_desc, 'utf-8')
32
+ job_des = Cleaner.Cleaner(job_desc)
33
+
34
+ st.subheader("Job Description")
35
+ st.markdown(" --- ")
36
+ st.write(job_desc)
37
+ st.markdown(" --- ")
38
+
39
+ scores = []
40
+ for text in df['Context']:
41
+ raw = Cleaner.Cleaner(text)
42
+ score = Similar.match(raw[2], job_des[2])
43
+ scores.append(score)
44
+ st.write(scores)
45
+ df['Scores'] = scores
46
+
47
+ st.dataframe(df)
48
+ df2 = df.sort_values(by=['Scores'], ascending=False)
49
+ st.dataframe(df2)
50
+ print(df2.iloc[0, 1])
Progress/compute.py CHANGED
@@ -23,7 +23,7 @@ df = pd.DataFrame(document, columns=['Name', 'Context'])
23
  # Only one Job Description should be present and in docx format
24
  job_docs = os.listdir(job_desc_dir)
25
  job_desc = tx.process(
26
- job_desc_dir+job_docs[0], extension='docx', encoding='ascii')
27
  job_desc = str(job_desc, 'utf-8')
28
  job_des = Cleaner.Cleaner(job_desc[0])
29
 
 
23
  # Only one Job Description should be present and in docx format
24
  job_docs = os.listdir(job_desc_dir)
25
  job_desc = tx.process(
26
+ job_desc_dir+job_docs[1], extension='docx', encoding='ascii')
27
  job_desc = str(job_desc, 'utf-8')
28
  job_des = Cleaner.Cleaner(job_desc[0])
29
 
Progress/testing_compute.ipynb ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os\n",
10
+ "import pandas as pd\n",
11
+ "import textract as tx\n",
12
+ "import Similar\n",
13
+ "import Cleaner"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "code",
18
+ "execution_count": 2,
19
+ "metadata": {},
20
+ "outputs": [],
21
+ "source": [
22
+ "resume_dir = \"Data/Resumes/\"\n",
23
+ "job_desc_dir = \"Data/JobDesc/\"\n",
24
+ "resume_names = os.listdir(resume_dir)\n",
25
+ "document = []"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": 3,
31
+ "metadata": {},
32
+ "outputs": [
33
+ {
34
+ "data": {
35
+ "text/plain": [
36
+ "[]"
37
+ ]
38
+ },
39
+ "execution_count": 3,
40
+ "metadata": {},
41
+ "output_type": "execute_result"
42
+ }
43
+ ],
44
+ "source": [
45
+ "document "
46
+ ]
47
+ },
48
+ {
49
+ "cell_type": "code",
50
+ "execution_count": 4,
51
+ "metadata": {},
52
+ "outputs": [],
53
+ "source": [
54
+ "for res in resume_names:\n",
55
+ " temp = []\n",
56
+ " temp.append(res)\n",
57
+ " text = tx.process(resume_dir+res, encoding='ascii')\n",
58
+ " text = str(text, 'utf-8')\n",
59
+ " temp.append(text)\n",
60
+ " document.append(temp)"
61
+ ]
62
+ },
63
+ {
64
+ "cell_type": "code",
65
+ "execution_count": 5,
66
+ "metadata": {},
67
+ "outputs": [],
68
+ "source": [
69
+ "df = pd.DataFrame(document, columns=['Name', 'Context'])"
70
+ ]
71
+ },
72
+ {
73
+ "cell_type": "code",
74
+ "execution_count": 6,
75
+ "metadata": {},
76
+ "outputs": [
77
+ {
78
+ "data": {
79
+ "text/html": [
80
+ "<div>\n",
81
+ "<style scoped>\n",
82
+ " .dataframe tbody tr th:only-of-type {\n",
83
+ " vertical-align: middle;\n",
84
+ " }\n",
85
+ "\n",
86
+ " .dataframe tbody tr th {\n",
87
+ " vertical-align: top;\n",
88
+ " }\n",
89
+ "\n",
90
+ " .dataframe thead th {\n",
91
+ " text-align: right;\n",
92
+ " }\n",
93
+ "</style>\n",
94
+ "<table border=\"1\" class=\"dataframe\">\n",
95
+ " <thead>\n",
96
+ " <tr style=\"text-align: right;\">\n",
97
+ " <th></th>\n",
98
+ " <th>Name</th>\n",
99
+ " <th>Context</th>\n",
100
+ " </tr>\n",
101
+ " </thead>\n",
102
+ " <tbody>\n",
103
+ " <tr>\n",
104
+ " <th>0</th>\n",
105
+ " <td>ALANKRIT NIRJHARPremium CV Featured Resume.docx</td>\n",
106
+ " <td>ALANKRIT NIRJHARPremium CV Featured Resume \\n...</td>\n",
107
+ " </tr>\n",
108
+ " <tr>\n",
109
+ " <th>1</th>\n",
110
+ " <td>Amarpreet SinghPremium CV Featured Resume.docx</td>\n",
111
+ " <td>Amarpreet SinghPremium CV Featured Resume \\n\\...</td>\n",
112
+ " </tr>\n",
113
+ " <tr>\n",
114
+ " <th>2</th>\n",
115
+ " <td>Shambhai MishraFeatured Resume.docx</td>\n",
116
+ " <td>Shambhai MishraFeatured Resume \\n\\n \\n\\nA har...</td>\n",
117
+ " </tr>\n",
118
+ " <tr>\n",
119
+ " <th>3</th>\n",
120
+ " <td>Amruta B.pdf</td>\n",
121
+ " <td>Amruta B. ThoratFeatured Resume \\n\\n \\n\\nJr...</td>\n",
122
+ " </tr>\n",
123
+ " <tr>\n",
124
+ " <th>4</th>\n",
125
+ " <td>Dhavakumar AmarntharajanFeatured Resume.docx</td>\n",
126
+ " <td>Dhavakumar AmarntharajanFeatured Resume \\n\\n ...</td>\n",
127
+ " </tr>\n",
128
+ " <tr>\n",
129
+ " <th>5</th>\n",
130
+ " <td>DIVYA PRAKASH SINGHFeatured Resume.docx</td>\n",
131
+ " <td>DIVYA PRAKASH SINGHFeatured Resume \\n\\n \\n\\nM...</td>\n",
132
+ " </tr>\n",
133
+ " <tr>\n",
134
+ " <th>6</th>\n",
135
+ " <td>Abhishek SharmaFeatured Resume.docx</td>\n",
136
+ " <td>Abhishek SharmaFeatured Resume \\n\\n \\n\\nDevel...</td>\n",
137
+ " </tr>\n",
138
+ " <tr>\n",
139
+ " <th>7</th>\n",
140
+ " <td>Manoj KumarFeatured Resume.docx</td>\n",
141
+ " <td>Manoj KumarFeatured Resume \\n\\n \\n\\nAPPLICATI...</td>\n",
142
+ " </tr>\n",
143
+ " <tr>\n",
144
+ " <th>8</th>\n",
145
+ " <td>Seshadri Sastry Kunapuli.docx</td>\n",
146
+ " <td>Seshadri Sastry Kunapuli \\n\\n \\n\\nManager ...</td>\n",
147
+ " </tr>\n",
148
+ " <tr>\n",
149
+ " <th>9</th>\n",
150
+ " <td>SNEHA SAHUFeatured Resume.docx</td>\n",
151
+ " <td>SNEHA SAHUFeatured Resume \\n\\n \\n\\nMachine Le...</td>\n",
152
+ " </tr>\n",
153
+ " <tr>\n",
154
+ " <th>10</th>\n",
155
+ " <td>Bijjula SahithiFeatured Resume.docx</td>\n",
156
+ " <td>Bijjula SahithiFeatured Resume \\n\\n \\n\\nProdu...</td>\n",
157
+ " </tr>\n",
158
+ " <tr>\n",
159
+ " <th>11</th>\n",
160
+ " <td>Prashant Bhat.pdf</td>\n",
161
+ " <td>Prashant Bhat \\n\\n \\n\\nStanford Machine ...</td>\n",
162
+ " </tr>\n",
163
+ " <tr>\n",
164
+ " <th>12</th>\n",
165
+ " <td>Suraj Chauhan.docx</td>\n",
166
+ " <td>Suraj Chauhan \\n\\n \\n\\nSenior Web Developer ...</td>\n",
167
+ " </tr>\n",
168
+ " <tr>\n",
169
+ " <th>13</th>\n",
170
+ " <td>Nandagopal HFeatured Resume.pdf</td>\n",
171
+ " <td>Nandagopal HFeatured Resume \\n\\n \\n\\nMYSQL,...</td>\n",
172
+ " </tr>\n",
173
+ " <tr>\n",
174
+ " <th>14</th>\n",
175
+ " <td>AJAY CHINNIFeatured Resume.docx</td>\n",
176
+ " <td>AJAY CHINNIFeatured Resume \\n\\n \\n\\nSeeking r...</td>\n",
177
+ " </tr>\n",
178
+ " <tr>\n",
179
+ " <th>15</th>\n",
180
+ " <td>Ashwani Kumar RajputPremium CV Featured Resume...</td>\n",
181
+ " <td>Ashwani Kumar RajputPremium CV Featured Resume...</td>\n",
182
+ " </tr>\n",
183
+ " <tr>\n",
184
+ " <th>16</th>\n",
185
+ " <td>RISHABH SHARMAFeatured Resume.docx</td>\n",
186
+ " <td>RISHABH SHARMAFeatured Resume \\n\\n \\n\\nJunior...</td>\n",
187
+ " </tr>\n",
188
+ " <tr>\n",
189
+ " <th>17</th>\n",
190
+ " <td>NARENDRA HINGE.docx</td>\n",
191
+ " <td>NARENDRA HINGE\\n\\n\\n\\n \\n\\nSenior Web Develope...</td>\n",
192
+ " </tr>\n",
193
+ " <tr>\n",
194
+ " <th>18</th>\n",
195
+ " <td>Kaustav SenFeatured Resume.docx</td>\n",
196
+ " <td>Kaustav SenFeatured Resume \\n\\n \\n\\nTech Lead...</td>\n",
197
+ " </tr>\n",
198
+ " <tr>\n",
199
+ " <th>19</th>\n",
200
+ " <td>khyati gandhi.docx</td>\n",
201
+ " <td>khyati gandhi \\n\\n \\n\\nWeb Developer seeking...</td>\n",
202
+ " </tr>\n",
203
+ " <tr>\n",
204
+ " <th>20</th>\n",
205
+ " <td>Jithin J NairFeatured Resume.docx</td>\n",
206
+ " <td>Jithin J NairFeatured Resume \\n\\n \\n\\nMachine...</td>\n",
207
+ " </tr>\n",
208
+ " </tbody>\n",
209
+ "</table>\n",
210
+ "</div>"
211
+ ],
212
+ "text/plain": [
213
+ " Name \\\n",
214
+ "0 ALANKRIT NIRJHARPremium CV Featured Resume.docx \n",
215
+ "1 Amarpreet SinghPremium CV Featured Resume.docx \n",
216
+ "2 Shambhai MishraFeatured Resume.docx \n",
217
+ "3 Amruta B.pdf \n",
218
+ "4 Dhavakumar AmarntharajanFeatured Resume.docx \n",
219
+ "5 DIVYA PRAKASH SINGHFeatured Resume.docx \n",
220
+ "6 Abhishek SharmaFeatured Resume.docx \n",
221
+ "7 Manoj KumarFeatured Resume.docx \n",
222
+ "8 Seshadri Sastry Kunapuli.docx \n",
223
+ "9 SNEHA SAHUFeatured Resume.docx \n",
224
+ "10 Bijjula SahithiFeatured Resume.docx \n",
225
+ "11 Prashant Bhat.pdf \n",
226
+ "12 Suraj Chauhan.docx \n",
227
+ "13 Nandagopal HFeatured Resume.pdf \n",
228
+ "14 AJAY CHINNIFeatured Resume.docx \n",
229
+ "15 Ashwani Kumar RajputPremium CV Featured Resume... \n",
230
+ "16 RISHABH SHARMAFeatured Resume.docx \n",
231
+ "17 NARENDRA HINGE.docx \n",
232
+ "18 Kaustav SenFeatured Resume.docx \n",
233
+ "19 khyati gandhi.docx \n",
234
+ "20 Jithin J NairFeatured Resume.docx \n",
235
+ "\n",
236
+ " Context \n",
237
+ "0 ALANKRIT NIRJHARPremium CV Featured Resume \\n... \n",
238
+ "1 Amarpreet SinghPremium CV Featured Resume \\n\\... \n",
239
+ "2 Shambhai MishraFeatured Resume \\n\\n \\n\\nA har... \n",
240
+ "3 Amruta B. ThoratFeatured Resume \\n\\n \\n\\nJr... \n",
241
+ "4 Dhavakumar AmarntharajanFeatured Resume \\n\\n ... \n",
242
+ "5 DIVYA PRAKASH SINGHFeatured Resume \\n\\n \\n\\nM... \n",
243
+ "6 Abhishek SharmaFeatured Resume \\n\\n \\n\\nDevel... \n",
244
+ "7 Manoj KumarFeatured Resume \\n\\n \\n\\nAPPLICATI... \n",
245
+ "8 Seshadri Sastry Kunapuli \\n\\n \\n\\nManager ... \n",
246
+ "9 SNEHA SAHUFeatured Resume \\n\\n \\n\\nMachine Le... \n",
247
+ "10 Bijjula SahithiFeatured Resume \\n\\n \\n\\nProdu... \n",
248
+ "11 Prashant Bhat \\n\\n \\n\\nStanford Machine ... \n",
249
+ "12 Suraj Chauhan \\n\\n \\n\\nSenior Web Developer ... \n",
250
+ "13 Nandagopal HFeatured Resume \\n\\n \\n\\nMYSQL,... \n",
251
+ "14 AJAY CHINNIFeatured Resume \\n\\n \\n\\nSeeking r... \n",
252
+ "15 Ashwani Kumar RajputPremium CV Featured Resume... \n",
253
+ "16 RISHABH SHARMAFeatured Resume \\n\\n \\n\\nJunior... \n",
254
+ "17 NARENDRA HINGE\\n\\n\\n\\n \\n\\nSenior Web Develope... \n",
255
+ "18 Kaustav SenFeatured Resume \\n\\n \\n\\nTech Lead... \n",
256
+ "19 khyati gandhi \\n\\n \\n\\nWeb Developer seeking... \n",
257
+ "20 Jithin J NairFeatured Resume \\n\\n \\n\\nMachine... "
258
+ ]
259
+ },
260
+ "execution_count": 6,
261
+ "metadata": {},
262
+ "output_type": "execute_result"
263
+ }
264
+ ],
265
+ "source": [
266
+ "df"
267
+ ]
268
+ },
269
+ {
270
+ "cell_type": "code",
271
+ "execution_count": 12,
272
+ "metadata": {},
273
+ "outputs": [],
274
+ "source": [
275
+ "def get_job_description():\n",
276
+ " job_docs = os.listdir(job_desc_dir)\n",
277
+ " job_desc = tx.process(\n",
278
+ " job_desc_dir+job_docs[1], extension='docx', encoding='ascii')\n",
279
+ " job_desc = str(job_desc, 'utf-8')\n",
280
+ " job_des = Cleaner.Cleaner(job_desc)"
281
+ ]
282
+ },
283
+ {
284
+ "cell_type": "code",
285
+ "execution_count": 8,
286
+ "metadata": {},
287
+ "outputs": [],
288
+ "source": [
289
+ "scores = []\n",
290
+ "def compute_score(x=2, y=2):\n",
291
+ " for text in df['Context']:\n",
292
+ " raw = Cleaner.Cleaner(text)\n",
293
+ " score = Similar.match(raw[2], job_des[2])\n",
294
+ " scores.append(score)"
295
+ ]
296
+ },
297
+ {
298
+ "cell_type": "code",
299
+ "execution_count": 9,
300
+ "metadata": {},
301
+ "outputs": [
302
+ {
303
+ "data": {
304
+ "text/plain": [
305
+ "[7.188133725864406,\n",
306
+ " 17.082715886282017,\n",
307
+ " 9.476762357802984,\n",
308
+ " 17.291130127664918,\n",
309
+ " 18.012687466241466,\n",
310
+ " 9.452611578580793,\n",
311
+ " 8.42476835081202,\n",
312
+ " 11.586667849627627,\n",
313
+ " 12.456231619031753,\n",
314
+ " 17.0072580417408,\n",
315
+ " 14.108484885761968,\n",
316
+ " 9.982352544008108,\n",
317
+ " 16.236249502169258,\n",
318
+ " 17.05720171632391,\n",
319
+ " 11.320461075067941,\n",
320
+ " 15.613431194443613,\n",
321
+ " 9.533217549290569,\n",
322
+ " 14.925508576697014,\n",
323
+ " 17.989739486512008,\n",
324
+ " 24.064673772279484,\n",
325
+ " 11.435471298914607]"
326
+ ]
327
+ },
328
+ "execution_count": 9,
329
+ "metadata": {},
330
+ "output_type": "execute_result"
331
+ }
332
+ ],
333
+ "source": [
334
+ "scores"
335
+ ]
336
+ },
337
+ {
338
+ "cell_type": "code",
339
+ "execution_count": 11,
340
+ "metadata": {},
341
+ "outputs": [],
342
+ "source": [
343
+ "df['Scores'] = scores\n",
344
+ "df2 = df.sort_values(by=['Scores'], ascending=False)\n",
345
+ "# print(df2.iloc[0, 1])"
346
+ ]
347
+ },
348
+ {
349
+ "cell_type": "code",
350
+ "execution_count": 17,
351
+ "metadata": {},
352
+ "outputs": [],
353
+ "source": [
354
+ "# import matplotlib.pyplot as plt"
355
+ ]
356
+ },
357
+ {
358
+ "cell_type": "code",
359
+ "execution_count": 16,
360
+ "metadata": {},
361
+ "outputs": [],
362
+ "source": [
363
+ "# plt.text(15, 30)"
364
+ ]
365
+ },
366
+ {
367
+ "cell_type": "code",
368
+ "execution_count": null,
369
+ "metadata": {},
370
+ "outputs": [],
371
+ "source": []
372
+ }
373
+ ],
374
+ "metadata": {
375
+ "kernelspec": {
376
+ "display_name": "Python 3",
377
+ "language": "python",
378
+ "name": "python3"
379
+ },
380
+ "language_info": {
381
+ "codemirror_mode": {
382
+ "name": "ipython",
383
+ "version": 3
384
+ },
385
+ "file_extension": ".py",
386
+ "mimetype": "text/x-python",
387
+ "name": "python",
388
+ "nbconvert_exporter": "python",
389
+ "pygments_lexer": "ipython3",
390
+ "version": "3.8.2"
391
+ }
392
+ },
393
+ "nbformat": 4,
394
+ "nbformat_minor": 4
395
+ }
Progress/topic_modelling_1.ipynb ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 12,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import gensim\n",
10
+ "import gensim.corpora as corpora\n",
11
+ "from gensim.utils import simple_preprocess\n",
12
+ "from gensim.models import CoherenceModel\n",
13
+ "import re\n",
14
+ "import numpy as np\n",
15
+ "import pandas as pd\n",
16
+ "from pprint import pprint"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": 13,
22
+ "metadata": {},
23
+ "outputs": [],
24
+ "source": [
25
+ "import Distill"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": 14,
31
+ "metadata": {},
32
+ "outputs": [],
33
+ "source": [
34
+ "from texts import text_2"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": 15,
40
+ "metadata": {},
41
+ "outputs": [],
42
+ "source": [
43
+ "text = Distill.remove_stopwords(Distill.tokenize(text_2))"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 16,
49
+ "metadata": {},
50
+ "outputs": [],
51
+ "source": [
52
+ "text = Distill.remove_tags(text)"
53
+ ]
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": 17,
58
+ "metadata": {},
59
+ "outputs": [],
60
+ "source": [
61
+ "text = Distill.lemmatize(text)"
62
+ ]
63
+ },
64
+ {
65
+ "cell_type": "code",
66
+ "execution_count": 18,
67
+ "metadata": {},
68
+ "outputs": [],
69
+ "source": [
70
+ "# text = Distill._to_string(text)"
71
+ ]
72
+ },
73
+ {
74
+ "cell_type": "code",
75
+ "execution_count": 22,
76
+ "metadata": {},
77
+ "outputs": [],
78
+ "source": [
79
+ "tokens_ = []\n",
80
+ "for a in text:\n",
81
+ " tokens_.append([a])"
82
+ ]
83
+ },
84
+ {
85
+ "cell_type": "code",
86
+ "execution_count": 24,
87
+ "metadata": {},
88
+ "outputs": [],
89
+ "source": [
90
+ "# tokens_"
91
+ ]
92
+ },
93
+ {
94
+ "cell_type": "code",
95
+ "execution_count": 26,
96
+ "metadata": {},
97
+ "outputs": [],
98
+ "source": [
99
+ "id2word = corpora.Dictionary(tokens_)"
100
+ ]
101
+ },
102
+ {
103
+ "cell_type": "code",
104
+ "execution_count": 27,
105
+ "metadata": {},
106
+ "outputs": [],
107
+ "source": [
108
+ "def to_token(List):\n",
109
+ " # takes a simple list and breaks it into tokens of the form [[],[],[]]\n",
110
+ " token = [[a] for a in List]\n",
111
+ " return token"
112
+ ]
113
+ },
114
+ {
115
+ "cell_type": "code",
116
+ "execution_count": 28,
117
+ "metadata": {},
118
+ "outputs": [],
119
+ "source": [
120
+ "saas = to_token(text)"
121
+ ]
122
+ },
123
+ {
124
+ "cell_type": "code",
125
+ "execution_count": 30,
126
+ "metadata": {},
127
+ "outputs": [],
128
+ "source": [
129
+ "# saas"
130
+ ]
131
+ },
132
+ {
133
+ "cell_type": "code",
134
+ "execution_count": 31,
135
+ "metadata": {},
136
+ "outputs": [],
137
+ "source": [
138
+ "corpus = [id2word.doc2bow(text) for text in tokens_]"
139
+ ]
140
+ },
141
+ {
142
+ "cell_type": "code",
143
+ "execution_count": 39,
144
+ "metadata": {},
145
+ "outputs": [],
146
+ "source": [
147
+ "# Human readable format of corpus (term-frequency)\n",
148
+ "# [[(id2word[id], freq) for id, freq in cp] for cp in corpus[:]]"
149
+ ]
150
+ },
151
+ {
152
+ "cell_type": "code",
153
+ "execution_count": 40,
154
+ "metadata": {},
155
+ "outputs": [],
156
+ "source": [
157
+ "# corpus[:]"
158
+ ]
159
+ },
160
+ {
161
+ "cell_type": "code",
162
+ "execution_count": 41,
163
+ "metadata": {},
164
+ "outputs": [],
165
+ "source": [
166
+ "# Build LDA model\n",
167
+ "lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus,\n",
168
+ " id2word=id2word,\n",
169
+ " num_topics=20, \n",
170
+ " random_state=100,\n",
171
+ " update_every=1,\n",
172
+ " chunksize=100,\n",
173
+ " passes=10,\n",
174
+ " alpha='auto',\n",
175
+ " per_word_topics=True)"
176
+ ]
177
+ },
178
+ {
179
+ "cell_type": "code",
180
+ "execution_count": 42,
181
+ "metadata": {},
182
+ "outputs": [
183
+ {
184
+ "name": "stdout",
185
+ "output_type": "stream",
186
+ "text": [
187
+ "[(0,\n",
188
+ " '0.087*\"property\" + 0.046*\"brake\" + 0.046*\"lead\" + 0.004*\"commonwealth\" + '\n",
189
+ " '0.004*\"compensate\" + 0.004*\"design\" + 0.004*\"decade\" + 0.004*\"range\" + '\n",
190
+ " '0.004*\"local\" + 0.004*\"mean\"'),\n",
191
+ " (1,\n",
192
+ " '0.160*\"damage\" + 0.040*\"reach\" + 0.040*\"expose\" + 0.040*\"contaminate\" + '\n",
193
+ " '0.004*\"range\" + 0.004*\"assessment\" + 0.004*\"design\" + 0.004*\"public\" + '\n",
194
+ " '0.004*\"mean\" + 0.004*\"reimbursement\"'),\n",
195
+ " (2,\n",
196
+ " '0.196*\"party\" + 0.070*\"private\" + 0.037*\"caustic\" + 0.037*\"action\" + '\n",
197
+ " '0.004*\"mean\" + 0.004*\"design\" + 0.004*\"public\" + 0.004*\"assessment\" + '\n",
198
+ " '0.004*\"commonwealth\" + 0.004*\"range\"'),\n",
199
+ " (3,\n",
200
+ " '0.120*\"habitat\" + 0.044*\"agree\" + 0.044*\"fabric\" + 0.004*\"party\" + '\n",
201
+ " '0.004*\"compensate\" + 0.004*\"design\" + 0.004*\"public\" + 0.004*\"assessment\" + '\n",
202
+ " '0.004*\"commonwealth\" + 0.004*\"encourage\"'),\n",
203
+ " (4,\n",
204
+ " '0.313*\"site\" + 0.143*\"ecological\" + 0.052*\"predecessor\" + 0.003*\"mean\" + '\n",
205
+ " '0.003*\"fws\" + 0.003*\"range\" + 0.003*\"compensate\" + 0.003*\"commonwealth\" + '\n",
206
+ " '0.003*\"reimbursement\" + 0.003*\"assessment\"'),\n",
207
+ " (5,\n",
208
+ " '0.194*\"trustee\" + 0.037*\"hazardous\" + 0.037*\"well\" + 0.037*\"perform\" + '\n",
209
+ " '0.037*\"doi\" + 0.004*\"settlement\" + 0.004*\"federal\" + 0.004*\"commonwealth\" + '\n",
210
+ " '0.004*\"mean\" + 0.004*\"range\"'),\n",
211
+ " (6,\n",
212
+ " '0.052*\"pay\" + 0.005*\"owner\" + 0.005*\"resource\" + 0.005*\"mean\" + '\n",
213
+ " '0.005*\"design\" + 0.005*\"range\" + 0.005*\"compensate\" + 0.005*\"fws\" + '\n",
214
+ " '0.005*\"commonwealth\" + 0.005*\"reimbursement\"'),\n",
215
+ " (7,\n",
216
+ " '0.204*\"federal\" + 0.038*\"liability\" + 0.038*\"century\" + 0.038*\"current\" + '\n",
217
+ " '0.004*\"range\" + 0.004*\"mean\" + 0.004*\"design\" + 0.004*\"assessment\" + '\n",
218
+ " '0.004*\"commonwealth\" + 0.004*\"fws\"'),\n",
219
+ " (8,\n",
220
+ " '0.075*\"owner\" + 0.075*\"lining\" + 0.075*\"asbestos\" + 0.039*\"former\" + '\n",
221
+ " '0.039*\"clean\" + 0.004*\"compensate\" + 0.004*\"public\" + 0.004*\"assessment\" + '\n",
222
+ " '0.004*\"commonwealth\" + 0.004*\"design\"'),\n",
223
+ " (9,\n",
224
+ " '0.175*\"state\" + 0.113*\"portion\" + 0.036*\"business\" + 0.036*\"involve\" + '\n",
225
+ " '0.004*\"private\" + 0.004*\"compensate\" + 0.004*\"design\" + 0.004*\"public\" + '\n",
226
+ " '0.004*\"mean\" + 0.004*\"encourage\"'),\n",
227
+ " (10,\n",
228
+ " '0.110*\"industrial\" + 0.090*\"require\" + 0.040*\"announce\" + 0.040*\"water\" + '\n",
229
+ " '0.004*\"portion\" + 0.004*\"design\" + 0.004*\"compensate\" + 0.004*\"decade\" + '\n",
230
+ " '0.004*\"range\" + 0.004*\"commonwealth\"'),\n",
231
+ " (11,\n",
232
+ " '0.048*\"operation\" + 0.048*\"solution\" + 0.048*\"claim\" + 0.005*\"include\" + '\n",
233
+ " '0.005*\"site\" + 0.005*\"today\" + 0.005*\"compensate\" + 0.005*\"design\" + '\n",
234
+ " '0.005*\"mean\" + 0.005*\"range\"'),\n",
235
+ " (12,\n",
236
+ " '0.146*\"wetland\" + 0.128*\"provide\" + 0.036*\"land\" + 0.036*\"facility\" + '\n",
237
+ " '0.004*\"asbestos\" + 0.004*\"state\" + 0.004*\"settlement\" + 0.004*\"mean\" + '\n",
238
+ " '0.004*\"design\" + 0.004*\"fws\"'),\n",
239
+ " (13,\n",
240
+ " '0.160*\"contamination\" + 0.040*\"operate\" + 0.040*\"estimate\" + 0.040*\"result\" '\n",
241
+ " '+ 0.004*\"public\" + 0.004*\"design\" + 0.004*\"fws\" + 0.004*\"commonwealth\" + '\n",
242
+ " '0.004*\"compensate\" + 0.004*\"range\"'),\n",
243
+ " (14,\n",
244
+ " '0.307*\"use\" + 0.132*\"decree\" + 0.132*\"consent\" + 0.062*\"cost\" + '\n",
245
+ " '0.018*\"manufacturing\" + 0.018*\"remedial\" + 0.018*\"arsenic\" + '\n",
246
+ " '0.002*\"restoration\" + 0.002*\"also\" + 0.002*\"project\"'),\n",
247
+ " (15,\n",
248
+ " '0.168*\"include\" + 0.042*\"19th\" + 0.042*\"resolve\" + 0.004*\"mean\" + '\n",
249
+ " '0.004*\"assessment\" + 0.004*\"range\" + 0.004*\"compensate\" + '\n",
250
+ " '0.004*\"reimbursement\" + 0.004*\"fws\" + 0.004*\"commonwealth\"'),\n",
251
+ " (16,\n",
252
+ " '0.584*\"settlement\" + 0.057*\"today\" + 0.018*\"manufacture\" + 0.018*\"acre\" + '\n",
253
+ " '0.002*\"restoration\" + 0.002*\"public\" + 0.002*\"MassDEPs\" + 0.002*\"work\" + '\n",
254
+ " '0.002*\"comment\" + 0.002*\"fund\"'),\n",
255
+ " (17,\n",
256
+ " '0.250*\"groundwater\" + 0.036*\"substance\" + 0.036*\"cotton\" + 0.036*\"various\" '\n",
257
+ " '+ 0.004*\"natural\" + 0.004*\"mean\" + 0.004*\"range\" + 0.004*\"compensate\" + '\n",
258
+ " '0.004*\"assessment\" + 0.004*\"commonwealth\"'),\n",
259
+ " (18,\n",
260
+ " '0.273*\"natural\" + 0.067*\"injury\" + 0.067*\"lodge\" + 0.030*\"clutch\" + '\n",
261
+ " '0.030*\"back\" + 0.003*\"range\" + 0.003*\"commonwealth\" + 0.003*\"design\" + '\n",
262
+ " '0.003*\"assessment\" + 0.003*\"mean\"'),\n",
263
+ " (19,\n",
264
+ " '0.315*\"resource\" + 0.031*\"large\" + 0.031*\"operator\" + 0.031*\"date\" + '\n",
265
+ " '0.031*\"1million\" + 0.003*\"property\" + 0.003*\"compensate\" + 0.003*\"design\" + '\n",
266
+ " '0.003*\"public\" + 0.003*\"organization\"')]\n"
267
+ ]
268
+ }
269
+ ],
270
+ "source": [
271
+ "# Print the Keyword in the 10 topics\n",
272
+ "pprint(lda_model.print_topics())\n",
273
+ "doc_lda = lda_model[corpus]"
274
+ ]
275
+ },
276
+ {
277
+ "cell_type": "code",
278
+ "execution_count": 44,
279
+ "metadata": {},
280
+ "outputs": [
281
+ {
282
+ "name": "stdout",
283
+ "output_type": "stream",
284
+ "text": [
285
+ "\n",
286
+ "Perplexity: -20.580518818099517\n",
287
+ "\n",
288
+ "Coherence Score: 0.8399551092841173\n"
289
+ ]
290
+ }
291
+ ],
292
+ "source": [
293
+ "# Compute Perplexity\n",
294
+ "print('\\nPerplexity: ', lda_model.log_perplexity(corpus)) # a measure of how good the model is. lower the better.\n",
295
+ "\n",
296
+ "# Compute Coherence Score\n",
297
+ "coherence_model_lda = CoherenceModel(model=lda_model, texts=tokens_, dictionary=id2word, coherence='c_v')\n",
298
+ "coherence_lda = coherence_model_lda.get_coherence()\n",
299
+ "print('\\nCoherence Score: ', coherence_lda)"
300
+ ]
301
+ },
302
+ {
303
+ "cell_type": "code",
304
+ "execution_count": 45,
305
+ "metadata": {},
306
+ "outputs": [],
307
+ "source": [
308
+ "# Plotting tools\n",
309
+ "import pyLDAvis\n",
310
+ "import pyLDAvis.gensim # don't skip this\n",
311
+ "import matplotlib.pyplot as plt\n",
312
+ "%matplotlib inline"
313
+ ]
314
+ },
315
+ {
316
+ "cell_type": "code",
317
+ "execution_count": 1,
318
+ "metadata": {},
319
+ "outputs": [],
320
+ "source": [
321
+ "# # Visualize the topics\n",
322
+ "# pyLDAvis.enable_notebook()\n",
323
+ "# vis = pyLDAvis.gensim.prepare(lda_model, corpus, id2word)\n",
324
+ "# vis"
325
+ ]
326
+ },
327
+ {
328
+ "cell_type": "code",
329
+ "execution_count": null,
330
+ "metadata": {},
331
+ "outputs": [],
332
+ "source": []
333
+ }
334
+ ],
335
+ "metadata": {
336
+ "kernelspec": {
337
+ "display_name": "Python 3",
338
+ "language": "python",
339
+ "name": "python3"
340
+ },
341
+ "language_info": {
342
+ "codemirror_mode": {
343
+ "name": "ipython",
344
+ "version": 3
345
+ },
346
+ "file_extension": ".py",
347
+ "mimetype": "text/x-python",
348
+ "name": "python",
349
+ "nbconvert_exporter": "python",
350
+ "pygments_lexer": "ipython3",
351
+ "version": "3.8.2"
352
+ }
353
+ },
354
+ "nbformat": 4,
355
+ "nbformat_minor": 4
356
+ }
Progress/topic_modelling_resumes.ipynb ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import gensim\n",
10
+ "import gensim.corpora as corpora\n",
11
+ "from gensim.utils import simple_preprocess\n",
12
+ "from gensim.models import CoherenceModel\n",
13
+ "import re\n",
14
+ "import numpy as np\n",
15
+ "import pandas as pd\n",
16
+ "from pprint import pprint\n",
17
+ "import Cleaner\n",
18
+ "import Similar"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 7,
24
+ "metadata": {},
25
+ "outputs": [],
26
+ "source": [
27
+ "import os\n",
28
+ "import textract as tx"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "code",
33
+ "execution_count": 8,
34
+ "metadata": {},
35
+ "outputs": [],
36
+ "source": [
37
+ "resume_dir = \"Data/Resumes/\"\n",
38
+ "job_desc_dir = \"Data/JobDesc/\"\n",
39
+ "resume_names = os.listdir(resume_dir)"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "execution_count": 58,
45
+ "metadata": {},
46
+ "outputs": [],
47
+ "source": [
48
+ "document = []\n",
49
+ "for res in resume_names:\n",
50
+ " text = tx.process(resume_dir+res, encoding='ascii')\n",
51
+ " text = str(text, 'utf-8')\n",
52
+ " temp = Cleaner.Cleaner(text)\n",
53
+ " document.append(temp[1])"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": null,
59
+ "metadata": {},
60
+ "outputs": [],
61
+ "source": []
62
+ },
63
+ {
64
+ "cell_type": "code",
65
+ "execution_count": 59,
66
+ "metadata": {},
67
+ "outputs": [],
68
+ "source": [
69
+ "id2word = corpora.Dictionary(document)"
70
+ ]
71
+ },
72
+ {
73
+ "cell_type": "code",
74
+ "execution_count": null,
75
+ "metadata": {},
76
+ "outputs": [],
77
+ "source": []
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "execution_count": 60,
82
+ "metadata": {},
83
+ "outputs": [],
84
+ "source": [
85
+ "corpus = [id2word.doc2bow(text) for text in document]"
86
+ ]
87
+ },
88
+ {
89
+ "cell_type": "code",
90
+ "execution_count": 69,
91
+ "metadata": {},
92
+ "outputs": [],
93
+ "source": [
94
+ "lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus,\n",
95
+ " id2word=id2word,\n",
96
+ " num_topics=2, \n",
97
+ " random_state=100,\n",
98
+ " update_every=1,\n",
99
+ " chunksize=100,\n",
100
+ " passes=50,\n",
101
+ " alpha='auto',\n",
102
+ " per_word_topics=True)"
103
+ ]
104
+ },
105
+ {
106
+ "cell_type": "code",
107
+ "execution_count": 70,
108
+ "metadata": {},
109
+ "outputs": [
110
+ {
111
+ "name": "stdout",
112
+ "output_type": "stream",
113
+ "text": [
114
+ "[(0,\n",
115
+ " '0.002*\"role\" + 0.002*\"Experience\" + 0.002*\"Services\" + 0.002*\"Skills\" + '\n",
116
+ " '0.002*\"Skill\" + 0.002*\"Date\" + 0.002*\"skill\" + 0.002*\"back\" + 0.002*\"Area\" '\n",
117
+ " '+ 0.002*\"last\"'),\n",
118
+ " (1,\n",
119
+ " '0.003*\"IT\" + 0.003*\"Pref\" + 0.003*\"ID\" + 0.003*\"Active\" + 0.003*\"UG\" + '\n",
120
+ " '0.003*\"d\" + 0.003*\"Current\" + 0.003*\"experience\" + 0.003*\"Summary\" + '\n",
121
+ " '0.003*\"Months\"')]\n"
122
+ ]
123
+ }
124
+ ],
125
+ "source": [
126
+ "pprint(lda_model.print_topics())\n",
127
+ "doc_lda = lda_model[corpus]"
128
+ ]
129
+ },
130
+ {
131
+ "cell_type": "code",
132
+ "execution_count": 71,
133
+ "metadata": {},
134
+ "outputs": [],
135
+ "source": [
136
+ "import pyLDAvis\n",
137
+ "import pyLDAvis.gensim # don't skip this\n",
138
+ "import matplotlib.pyplot as plt\n",
139
+ "%matplotlib inline"
140
+ ]
141
+ },
142
+ {
143
+ "cell_type": "code",
144
+ "execution_count": 72,
145
+ "metadata": {},
146
+ "outputs": [
147
+ {
148
+ "data": {
149
+ "text/html": [
150
+ "\n",
151
+ "<link rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.rawgit.com/bmabey/pyLDAvis/files/ldavis.v1.0.0.css\">\n",
152
+ "\n",
153
+ "\n",
154
+ "<div id=\"ldavis_el22281400102997868488005527649\"></div>\n",
155
+ "<script type=\"text/javascript\">\n",
156
+ "\n",
157
+ "var ldavis_el22281400102997868488005527649_data = {\"mdsDat\": {\"x\": [0.045266102999448776, -0.045266102999448776], \"y\": [0.0, 0.0], \"topics\": [1, 2], \"cluster\": [1, 1], \"Freq\": [50.73581314086914, 49.264190673828125]}, \"tinfo\": {\"Term\": [\"improve\", \"less\", \"Days\", \"program\", \"reduce\", \"Technologies\", \"Sep\", \"lead\", \"NCR\", \"SQL\", \"notice\", \"Leader\", \"detect\", \"duration\", \"extensive\", \"track\", \"prediction\", \"structure\", \"document\", \"MTech\", \"enhance\", \"apply\", \"Apr\", \"user\", \"May\", \"Jump\", \"service\", \"cycle\", \"troubleshoot\", \"full\", \"NCR\", \"service\", \"cycle\", \"troubleshoot\", \"full\", \"cloud\", \"text\", \"actively\", \"leadership\", \"MS\", \"MCA\", \"Days\", \"less\", \"look\", \"sequence\", \"GCP\", \"monitoring\", \"junior\", \"level\", \"June\", \"transfer\", \"LearningDeep\", \"JDK\", \"box\", \"JIRA\", \"fashion\", \"pretraine\", \"domain\", \"working\", \"accordingly\", \"Marathi\", \"Jquery\", \"SQL\", \"client\", \"perform\", \"Marital\", \"notice\", \"Jump\", \"singleunmarrie\", \"degree\", \"professional\", \"SectionWork\", \"Resume\", \"IT\", \"Modified\", \"industry\", \"Highest\", \"Pref\", \"d\", \"UG\", \"ID\", \"Active\", \"experience\", \"Current\", \"Summary\", \"Months\", \"Location\", \"Status\", \"Phone\", \"Period\", \"Education\", \"total\", \"Functional\", \"year\", \"Key\", \"Number\", \"Email\", \"it\", \"Top\", \"version\", \"use\", \"back\", \"Name\", \"skill\", \"Area\", \"last\", \"Date\", \"Skills\", \"improve\", \"reduce\", \"program\", \"Technologies\", \"Leader\", \"track\", \"prediction\", \"extensive\", \"duration\", \"detect\", \"structure\", \"document\", \"enhance\", \"MTech\", \"apply\", \"value\", \"many\", \"control\", \"artificial\", \"Till\", \"tuning\", \"coordinate\", \"Numpy\", \"index\", \"enable\", \"Entity\", \"suggest\", \"steering\", \"architecture\", \"OpenCV\", \"Gradient\", \"camera\", \"Sep\", \"lead\", \"information\", \"identification\", \"Apr\", \"user\", \"May\", \"code\", \"identify\", \"performance\", \"algorithm\", \"deliver\", \"website\", \"section\", \"component\", \"css\", \"Maintenance\", \"company\", \"Nov\", \"issue\", \"test\", \"new\", \"system\", \"design\", \"develop\", \"role\", \"datum\", \"Computer\", \"Experience\", \"Services\", \"Engineering\", \"application\", \"ITSoftwareSoftware\", \"marital\", \"Skills\", \"Skill\", \"Date\", \"Area\", \"back\", \"skill\", \"last\", \"Name\", \"it\", \"version\", \"use\", \"Top\", \"Number\", \"Email\", \"year\", \"Key\", \"Education\", \"Period\", \"total\", \"Functional\", \"Status\", \"Phone\", \"Months\", \"Location\", \"ID\", \"UG\", \"d\", \"experience\", \"Summary\", \"Active\", \"Pref\"], \"Freq\": [4.0, 6.0, 6.0, 3.0, 3.0, 3.0, 5.0, 5.0, 3.0, 7.0, 10.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0, 4.0, 4.0, 12.0, 2.0, 2.0, 2.0, 2.0, 3.0877420902252197, 2.4015042781829834, 2.4015040397644043, 2.4015040397644043, 2.401503801345825, 2.401503801345825, 2.401503801345825, 2.401503562927246, 2.401503562927246, 2.401503562927246, 2.401503324508667, 5.146483421325684, 5.146483421325684, 1.7152669429779053, 1.7152669429779053, 1.7152669429779053, 1.7152668237686157, 1.7152668237686157, 1.7152665853500366, 1.7152668237686157, 1.7152668237686157, 1.7152668237686157, 1.7152668237686157, 1.7152668237686157, 1.7152668237686157, 1.7152665853500366, 1.7152668237686157, 1.7152668237686157, 1.7152668237686157, 1.7152665853500366, 1.7152668237686157, 1.7152668237686157, 5.832734107971191, 3.7740161418914795, 3.7740161418914795, 3.774012565612793, 7.891523361206055, 8.57771110534668, 7.205255508422852, 5.832758903503418, 5.832754135131836, 7.891510009765625, 7.891506671905518, 9.263986587524414, 9.263972282409668, 6.519008636474609, 7.891479969024658, 9.263984680175781, 9.263981819152832, 9.263981819152832, 9.263981819152832, 9.263981819152832, 9.263980865478516, 9.263980865478516, 9.2639799118042, 9.2639799118042, 9.2639799118042, 9.263978004455566, 9.26397705078125, 9.26397705078125, 9.26397705078125, 9.26397705078125, 9.26397705078125, 9.26397705078125, 9.263976097106934, 9.2639741897583, 9.2639741897583, 8.577733993530273, 8.577733993530273, 8.577733993530273, 8.57773208618164, 8.57773208618164, 8.57773208618164, 8.57773208618164, 8.57773208618164, 8.57773208618164, 8.57773208618164, 8.577725410461426, 3.736862897872925, 3.057375192642212, 3.057375192642212, 3.057373523712158, 2.377887010574341, 2.3778867721557617, 2.3778867721557617, 2.3778867721557617, 2.377887010574341, 2.377887010574341, 2.3778862953186035, 2.3778860569000244, 2.3778860569000244, 2.3778860569000244, 2.377885580062866, 1.6984007358551025, 1.6984007358551025, 1.6984007358551025, 1.6984007358551025, 1.6984007358551025, 1.6984003782272339, 1.6984003782272339, 1.6984003782272339, 1.6984003782272339, 1.6984003782272339, 1.698400616645813, 1.698400616645813, 1.698400616645813, 1.698400616645813, 1.698400616645813, 1.698400616645813, 1.698400616645813, 4.416375637054443, 4.4163618087768555, 1.698400616645813, 1.698400616645813, 3.7368907928466797, 3.7368743419647217, 3.7368698120117188, 4.416360855102539, 3.0574069023132324, 3.0573978424072266, 3.05739688873291, 3.057391881942749, 3.0573902130126953, 3.0573861598968506, 3.0573785305023193, 3.0573790073394775, 3.0573790073394775, 3.7368814945220947, 3.7368786334991455, 3.7368733882904053, 3.7368719577789307, 3.7368710041046143, 3.736868143081665, 4.416351318359375, 5.09583044052124, 5.775294303894043, 4.416353225708008, 4.416348934173584, 5.775278568267822, 5.775277137756348, 4.416343688964844, 4.4163289070129395, 5.095806121826172, 5.095800399780273, 5.775272369384766, 5.775270938873291, 5.775267124176025, 5.775265693664551, 5.775265693664551, 5.775265693664551, 5.775265693664551, 5.775265216827393, 5.775263786315918, 5.775264739990234, 5.775264739990234, 5.775262832641602, 5.775261878967285, 5.775261878967285, 5.775261402130127, 5.775261402130127, 5.775259971618652, 5.775259971618652, 5.775259971618652, 5.775259971618652, 5.775259494781494, 5.775259971618652, 5.7752580642700195, 5.7752580642700195, 5.7752556800842285, 5.775256156921387, 5.775256156921387, 5.775257110595703, 5.775257587432861, 5.775256156921387, 5.775253772735596], \"Total\": [4.0, 6.0, 6.0, 3.0, 3.0, 3.0, 5.0, 5.0, 3.0, 7.0, 10.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0, 4.0, 4.0, 12.0, 2.0, 2.0, 2.0, 2.0, 3.427489757537842, 2.741250991821289, 2.741250991821289, 2.741250991821289, 2.74125075340271, 2.741250991821289, 2.741250991821289, 2.74125075340271, 2.741250991821289, 2.74125075340271, 2.74125075340271, 6.165385723114014, 6.165386199951172, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550129413604736, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550129413604736, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.055013418197632, 2.055013418197632, 7.531106948852539, 4.792906284332275, 4.792906761169434, 4.792906761169434, 10.948802947998047, 12.314531326293945, 10.262561798095703, 8.210594177246094, 8.210594177246094, 11.628292083740234, 11.628291130065918, 14.359748840332031, 14.359748840332031, 9.576322555541992, 12.307779312133789, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237022399902, 15.039237022399902, 15.039237022399902, 15.039237022399902, 15.039237022399902, 15.039237976074219, 15.039237976074219, 15.039236068725586, 15.039236068725586, 14.352997779846191, 14.352996826171875, 14.352998733520508, 14.352996826171875, 14.352997779846191, 14.352996826171875, 14.352997779846191, 14.352997779846191, 14.352997779846191, 14.352998733520508, 14.352997779846191, 4.079987525939941, 3.400498390197754, 3.400498628616333, 3.400498390197754, 2.7210097312927246, 2.7210097312927246, 2.7210097312927246, 2.7210097312927246, 2.7210099697113037, 2.7210099697113037, 2.7210097312927246, 2.7210097312927246, 2.7210097312927246, 2.7210097312927246, 2.7210097312927246, 2.0415220260620117, 2.0415220260620117, 2.041522264480591, 2.041522264480591, 2.041522264480591, 2.0415220260620117, 2.0415220260620117, 2.0415220260620117, 2.0415220260620117, 2.0415220260620117, 2.041522264480591, 2.041522264480591, 2.041522264480591, 2.041522264480591, 2.041522264480591, 2.041522264480591, 2.041522264480591, 5.445394039154053, 5.445394039154053, 2.041522264480591, 2.041522264480591, 4.765904903411865, 4.765905380249023, 4.765904903411865, 6.131625175476074, 4.086416244506836, 4.086416244506836, 4.086415767669678, 4.086415767669678, 4.086415767669678, 4.086416244506836, 4.086415767669678, 4.086416244506836, 4.086416244506836, 5.452136039733887, 5.452136039733887, 5.452136039733887, 5.452136039733887, 5.452136039733887, 5.452136516571045, 7.504101753234863, 9.556071281433105, 12.294279098510742, 8.19034194946289, 8.19034194946289, 13.666759490966797, 13.666759490966797, 8.19034194946289, 8.19034194946289, 10.928550720214844, 10.928550720214844, 14.352997779846191, 14.352998733520508, 14.352998733520508, 14.352997779846191, 14.352997779846191, 14.352997779846191, 14.352997779846191, 14.352996826171875, 14.352997779846191, 14.352998733520508, 14.352996826171875, 14.352996826171875, 15.039236068725586, 15.039236068725586, 15.039237976074219, 15.039237976074219, 15.039237022399902, 15.039237022399902, 15.039237022399902, 15.039237022399902, 15.039237976074219, 15.039237022399902, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219], \"Category\": [\"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\"], \"logprob\": [30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, -6.811100006103516, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -6.30019998550415, -6.30019998550415, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -6.175000190734863, -6.610400199890137, -6.610400199890137, -6.610400199890137, -5.872700214385986, -5.789299964904785, -5.963699817657471, -6.175000190734863, -6.175000190734863, -5.872700214385986, -5.872700214385986, -5.712399959564209, -5.712399959564209, -6.063799858093262, -5.872700214385986, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -6.590799808502197, -6.791500091552734, -6.791500091552734, -6.791500091552734, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -6.423699855804443, -6.423699855804443, -7.37939977645874, -7.37939977645874, -6.590799808502197, -6.590799808502197, -6.590799808502197, -6.423799991607666, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.590799808502197, -6.590799808502197, -6.590799808502197, -6.590799808502197, -6.590799808502197, -6.590799808502197, -6.423799991607666, -6.280600070953369, -6.1554999351501465, -6.423799991607666, -6.423799991607666, -6.1554999351501465, -6.1554999351501465, -6.423799991607666, -6.423799991607666, -6.280600070953369, -6.280600070953369, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465], \"loglift\": [30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.5741999745368958, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.49790000915527344, 0.49790000915527344, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.4230000078678131, 0.43950000405311584, 0.43950000405311584, 0.43950000405311584, 0.35109999775886536, 0.31690001487731934, 0.3248000144958496, 0.33660000562667847, 0.33660000562667847, 0.29089999198913574, 0.29089999198913574, 0.2401999980211258, 0.2401999980211258, 0.2939999997615814, 0.23409999907016754, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.6201000213623047, 0.6015999913215637, 0.6015999913215637, 0.6015999913215637, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.4984999895095825, 0.4984999895095825, 0.5239999890327454, 0.5239999890327454, 0.46470001339912415, 0.46470001339912415, 0.46470001339912415, 0.3797999918460846, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.3301999866962433, 0.3301999866962433, 0.3301999866962433, 0.3301999866962433, 0.3301999866962433, 0.3301999866962433, 0.1777999997138977, 0.07919999957084656, -0.047600001096725464, 0.09030000120401382, 0.09030000120401382, -0.1534000039100647, -0.1534000039100647, 0.09030000120401382, 0.09030000120401382, -0.054999999701976776, -0.054999999701976776, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214]}, \"token.table\": {\"Topic\": [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 1, 1, 2, 2, 1, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 1, 2], \"Freq\": [0.598434567451477, 0.39895638823509216, 0.20982374250888824, 0.839294970035553, 0.6270467042922974, 0.41803112626075745, 0.48838010430336, 0.48838010430336, 0.598434567451477, 0.39895638823509216, 0.6270466446876526, 0.41803109645843506, 0.8109792470932007, 0.1621958464384079, 0.5984346270561218, 0.39895641803741455, 0.5984346270561218, 0.39895644783973694, 0.48838010430336, 0.48838010430336, 0.9796611070632935, 0.585361897945404, 0.43902140855789185, 0.5984346270561218, 0.39895641803741455, 0.9732297658920288, 0.9796611070632935, 0.6499953866004944, 0.3249976933002472, 0.598434567451477, 0.39895638823509216, 0.6267518997192383, 0.34819549322128296, 0.5490206480026245, 0.45751720666885376, 0.9732297658920288, 0.9732297658920288, 0.9732296466827393, 0.7308439016342163, 0.32481950521469116, 0.9732297658920288, 0.598434567451477, 0.39895638823509216, 0.7350212335586548, 0.9732297658920288, 0.598434567451477, 0.39895638823509216, 0.7295939922332764, 0.7295939922332764, 0.7350212335586548, 0.2447132021188736, 0.734139621257782, 0.9732296466827393, 0.834566593170166, 0.2086416482925415, 0.20982374250888824, 0.839294970035553, 0.6267518997192383, 0.34819549322128296, 0.598434567451477, 0.39895638823509216, 0.8752761483192444, 0.6270467638969421, 0.41803115606307983, 0.36682870984077454, 0.7336574196815491, 0.5984346270561218, 0.39895644783973694, 0.979661226272583, 0.9796611070632935, 0.5984346270561218, 0.39895641803741455, 0.5984346270561218, 0.39895641803741455, 0.598434567451477, 0.39895638823509216, 0.6879772543907166, 0.3439886271953583, 0.7966956496238708, 0.2655652165412903, 0.6879771947860718, 0.3439885973930359, 0.1836414337158203, 0.7345657348632812, 0.585361897945404, 0.43902140855789185, 0.6270466446876526, 0.41803109645843506, 0.6270467042922974, 0.41803112626075745, 0.598434567451477, 0.39895638823509216, 0.598434567451477, 0.39895638823509216, 0.8822236061096191, 0.9796611070632935, 0.6270467638969421, 0.41803115606307983, 0.598434567451477, 0.39895638823509216, 0.9732297658920288, 0.7295939922332764, 0.24471323192119598, 0.7341396808624268, 0.48838010430336, 0.48838010430336, 0.7350212335586548, 0.9796611070632935, 0.9796611070632935, 0.6270467042922974, 0.41803112626075745, 0.9732297658920288, 0.9796611070632935, 0.8345667123794556, 0.2086416780948639, 0.7295938730239868, 0.32617780566215515, 0.6523556113243103, 0.36682870984077454, 0.7336574196815491, 0.24471323192119598, 0.7341396808624268, 0.9796611070632935, 0.979661226272583, 0.2447132021188736, 0.734139621257782, 0.7295938730239868, 0.598434567451477, 0.39895638823509216, 0.48838010430336, 0.48838010430336, 0.7307631969451904, 0.24358773231506348, 0.24471323192119598, 0.7341396808624268, 0.39978134632110596, 0.5330418348312378, 0.73502117395401, 0.4185820519924164, 0.5232275724411011, 0.7350212335586548, 0.9732297658920288, 0.73502117395401, 0.979661226272583, 0.7350212335586548, 0.598434567451477, 0.39895638823509216, 0.7350212335586548, 0.9732298851013184, 0.7295939922332764, 0.9796611070632935, 0.2447132021188736, 0.734139621257782, 0.9803951382637024, 0.979661226272583, 0.7309695482254028, 0.3132726550102234, 0.9796611070632935, 0.36682870984077454, 0.7336574196815491, 0.6270467042922974, 0.41803112626075745, 0.9732297658920288, 0.6270467042922974, 0.41803112626075745, 0.1836414337158203, 0.7345657348632812, 0.7295938730239868, 0.8109791874885559, 0.1621958464384079, 0.9732298851013184, 0.9732297658920288, 0.979661226272583, 0.5490206480026245, 0.45751720666885376, 0.9732297658920288, 0.36682870984077454, 0.7336574196815491, 0.7306734919548035, 0.2740025520324707, 0.834566593170166, 0.2086416482925415, 0.2447132021188736, 0.734139621257782, 0.7350212335586548, 0.9732297658920288, 0.7307631969451904, 0.24358773231506348, 0.8822235465049744, 0.8822236061096191, 0.5693705081939697, 0.48803186416625977, 0.2447132021188736, 0.734139621257782, 0.9732297658920288, 0.7295938730239868, 0.6820908784866333, 0.29232466220855713, 0.6270467042922974, 0.41803112626075745, 0.9796611070632935, 0.7350212335586548, 0.9796611070632935, 0.36682868003845215, 0.7336573600769043, 0.36682870984077454, 0.7336574196815491, 0.7295938730239868, 0.5984346270561218, 0.39895641803741455, 0.7350212335586548, 0.9732297658920288, 0.7295938730239868, 0.979661226272583, 0.6270467638969421, 0.41803115606307983, 0.20982372760772705, 0.8392949104309082, 0.979661226272583, 0.6270466446876526, 0.41803109645843506, 0.24471323192119598, 0.7341396808624268, 0.9732297658920288, 0.598434567451477, 0.39895638823509216], \"Term\": [\"Active\", \"Active\", \"Apr\", \"Apr\", \"Area\", \"Area\", \"Computer\", \"Computer\", \"Current\", \"Current\", \"Date\", \"Date\", \"Days\", \"Days\", \"Education\", \"Education\", \"Email\", \"Email\", \"Engineering\", \"Engineering\", \"Entity\", \"Experience\", \"Experience\", \"Functional\", \"Functional\", \"GCP\", \"Gradient\", \"Highest\", \"Highest\", \"ID\", \"ID\", \"IT\", \"IT\", \"ITSoftwareSoftware\", \"ITSoftwareSoftware\", \"JDK\", \"JIRA\", \"Jquery\", \"Jump\", \"Jump\", \"June\", \"Key\", \"Key\", \"Leader\", \"LearningDeep\", \"Location\", \"Location\", \"MCA\", \"MS\", \"MTech\", \"Maintenance\", \"Maintenance\", \"Marathi\", \"Marital\", \"Marital\", \"May\", \"May\", \"Modified\", \"Modified\", \"Months\", \"Months\", \"NCR\", \"Name\", \"Name\", \"Nov\", \"Nov\", \"Number\", \"Number\", \"Numpy\", \"OpenCV\", \"Period\", \"Period\", \"Phone\", \"Phone\", \"Pref\", \"Pref\", \"Resume\", \"Resume\", \"SQL\", \"SQL\", \"SectionWork\", \"SectionWork\", \"Sep\", \"Sep\", \"Services\", \"Services\", \"Skill\", \"Skill\", \"Skills\", \"Skills\", \"Status\", \"Status\", \"Summary\", \"Summary\", \"Technologies\", \"Till\", \"Top\", \"Top\", \"UG\", \"UG\", \"accordingly\", \"actively\", \"algorithm\", \"algorithm\", \"application\", \"application\", \"apply\", \"architecture\", \"artificial\", \"back\", \"back\", \"box\", \"camera\", \"client\", \"client\", \"cloud\", \"code\", \"code\", \"company\", \"company\", \"component\", \"component\", \"control\", \"coordinate\", \"css\", \"css\", \"cycle\", \"d\", \"d\", \"datum\", \"datum\", \"degree\", \"degree\", \"deliver\", \"deliver\", \"design\", \"design\", \"detect\", \"develop\", \"develop\", \"document\", \"domain\", \"duration\", \"enable\", \"enhance\", \"experience\", \"experience\", \"extensive\", \"fashion\", \"full\", \"identification\", \"identify\", \"identify\", \"improve\", \"index\", \"industry\", \"industry\", \"information\", \"issue\", \"issue\", \"it\", \"it\", \"junior\", \"last\", \"last\", \"lead\", \"lead\", \"leadership\", \"less\", \"less\", \"level\", \"look\", \"many\", \"marital\", \"marital\", \"monitoring\", \"new\", \"new\", \"notice\", \"notice\", \"perform\", \"perform\", \"performance\", \"performance\", \"prediction\", \"pretraine\", \"professional\", \"professional\", \"program\", \"reduce\", \"role\", \"role\", \"section\", \"section\", \"sequence\", \"service\", \"singleunmarrie\", \"singleunmarrie\", \"skill\", \"skill\", \"steering\", \"structure\", \"suggest\", \"system\", \"system\", \"test\", \"test\", \"text\", \"total\", \"total\", \"track\", \"transfer\", \"troubleshoot\", \"tuning\", \"use\", \"use\", \"user\", \"user\", \"value\", \"version\", \"version\", \"website\", \"website\", \"working\", \"year\", \"year\"]}, \"R\": 30, \"lambda.step\": 0.01, \"plot.opts\": {\"xlab\": \"PC1\", \"ylab\": \"PC2\"}, \"topic.order\": [2, 1]};\n",
158
+ "\n",
159
+ "function LDAvis_load_lib(url, callback){\n",
160
+ " var s = document.createElement('script');\n",
161
+ " s.src = url;\n",
162
+ " s.async = true;\n",
163
+ " s.onreadystatechange = s.onload = callback;\n",
164
+ " s.onerror = function(){console.warn(\"failed to load library \" + url);};\n",
165
+ " document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
166
+ "}\n",
167
+ "\n",
168
+ "if(typeof(LDAvis) !== \"undefined\"){\n",
169
+ " // already loaded: just create the visualization\n",
170
+ " !function(LDAvis){\n",
171
+ " new LDAvis(\"#\" + \"ldavis_el22281400102997868488005527649\", ldavis_el22281400102997868488005527649_data);\n",
172
+ " }(LDAvis);\n",
173
+ "}else if(typeof define === \"function\" && define.amd){\n",
174
+ " // require.js is available: use it to load d3/LDAvis\n",
175
+ " require.config({paths: {d3: \"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min\"}});\n",
176
+ " require([\"d3\"], function(d3){\n",
177
+ " window.d3 = d3;\n",
178
+ " LDAvis_load_lib(\"https://cdn.rawgit.com/bmabey/pyLDAvis/files/ldavis.v1.0.0.js\", function(){\n",
179
+ " new LDAvis(\"#\" + \"ldavis_el22281400102997868488005527649\", ldavis_el22281400102997868488005527649_data);\n",
180
+ " });\n",
181
+ " });\n",
182
+ "}else{\n",
183
+ " // require.js not available: dynamically load d3 & LDAvis\n",
184
+ " LDAvis_load_lib(\"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min.js\", function(){\n",
185
+ " LDAvis_load_lib(\"https://cdn.rawgit.com/bmabey/pyLDAvis/files/ldavis.v1.0.0.js\", function(){\n",
186
+ " new LDAvis(\"#\" + \"ldavis_el22281400102997868488005527649\", ldavis_el22281400102997868488005527649_data);\n",
187
+ " })\n",
188
+ " });\n",
189
+ "}\n",
190
+ "</script>"
191
+ ],
192
+ "text/plain": [
193
+ "PreparedData(topic_coordinates= x y topics cluster Freq\n",
194
+ "topic \n",
195
+ "1 0.045266 0.0 1 1 50.735813\n",
196
+ "0 -0.045266 0.0 2 1 49.264191, topic_info= Term Freq Total Category logprob loglift\n",
197
+ "759 improve 4.000000 4.000000 Default 30.0000 30.0000\n",
198
+ "565 less 6.000000 6.000000 Default 29.0000 29.0000\n",
199
+ "520 Days 6.000000 6.000000 Default 28.0000 28.0000\n",
200
+ "781 program 3.000000 3.000000 Default 27.0000 27.0000\n",
201
+ "1734 reduce 3.000000 3.000000 Default 26.0000 26.0000\n",
202
+ "... ... ... ... ... ... ...\n",
203
+ "185 d 5.775256 15.039238 Topic2 -6.1555 -0.2491\n",
204
+ "197 experience 5.775257 15.039238 Topic2 -6.1555 -0.2491\n",
205
+ "152 Summary 5.775258 15.039238 Topic2 -6.1555 -0.2491\n",
206
+ "0 Active 5.775256 15.039238 Topic2 -6.1555 -0.2491\n",
207
+ "117 Pref 5.775254 15.039238 Topic2 -6.1555 -0.2491\n",
208
+ "\n",
209
+ "[205 rows x 6 columns], token_table= Topic Freq Term\n",
210
+ "term \n",
211
+ "0 1 0.598435 Active\n",
212
+ "0 2 0.398956 Active\n",
213
+ "1107 1 0.209824 Apr\n",
214
+ "1107 2 0.839295 Apr\n",
215
+ "7 1 0.627047 Area\n",
216
+ "... ... ... ...\n",
217
+ "671 1 0.244713 website\n",
218
+ "671 2 0.734140 website\n",
219
+ "515 1 0.973230 working\n",
220
+ "271 1 0.598435 year\n",
221
+ "271 2 0.398956 year\n",
222
+ "\n",
223
+ "[226 rows x 3 columns], R=30, lambda_step=0.01, plot_opts={'xlab': 'PC1', 'ylab': 'PC2'}, topic_order=[2, 1])"
224
+ ]
225
+ },
226
+ "execution_count": 72,
227
+ "metadata": {},
228
+ "output_type": "execute_result"
229
+ }
230
+ ],
231
+ "source": [
232
+ "# Visualize the topics\n",
233
+ "pyLDAvis.enable_notebook()\n",
234
+ "vis = pyLDAvis.gensim.prepare(lda_model, corpus, id2word)\n",
235
+ "vis"
236
+ ]
237
+ },
238
+ {
239
+ "cell_type": "code",
240
+ "execution_count": null,
241
+ "metadata": {},
242
+ "outputs": [],
243
+ "source": []
244
+ }
245
+ ],
246
+ "metadata": {
247
+ "kernelspec": {
248
+ "display_name": "Python 3",
249
+ "language": "python",
250
+ "name": "python3"
251
+ },
252
+ "language_info": {
253
+ "codemirror_mode": {
254
+ "name": "ipython",
255
+ "version": 3
256
+ },
257
+ "file_extension": ".py",
258
+ "mimetype": "text/x-python",
259
+ "name": "python",
260
+ "nbconvert_exporter": "python",
261
+ "pygments_lexer": "ipython3",
262
+ "version": "3.8.2"
263
+ }
264
+ },
265
+ "nbformat": 4,
266
+ "nbformat_minor": 4
267
+ }