Spaces:
Runtime error
Runtime error
Updating Progress
Browse files- Progress/Similar.py +6 -7
- Progress/app.py +50 -0
- Progress/compute.py +1 -1
- Progress/testing_compute.ipynb +395 -0
- Progress/topic_modelling_1.ipynb +356 -0
- Progress/topic_modelling_resumes.ipynb +267 -0
Progress/Similar.py
CHANGED
|
@@ -1,14 +1,13 @@
|
|
| 1 |
import textdistance as td
|
| 2 |
import Cleaner
|
| 3 |
|
|
|
|
| 4 |
def match(resume, job_des):
|
| 5 |
-
j = td.jaccard.similarity(resume, job_des)
|
| 6 |
-
s = td.sorensen_dice.similarity(resume, job_des)
|
| 7 |
-
c = td.cosine.similarity(resume, job_des)
|
| 8 |
-
o = td.overlap.normalized_similarity(resume, job_des)
|
| 9 |
total = (j+s+c+o)/4
|
| 10 |
-
return total
|
| 11 |
|
| 12 |
-
# https://realpython.com/working-with-files-in-python/
|
| 13 |
|
| 14 |
-
# https://support.dlink.ca/emulators/wbr2310/index.htm
|
|
|
|
| 1 |
import textdistance as td
|
| 2 |
import Cleaner
|
| 3 |
|
| 4 |
+
|
| 5 |
def match(resume, job_des):
|
| 6 |
+
j = td.jaccard.similarity(resume, job_des)
|
| 7 |
+
s = td.sorensen_dice.similarity(resume, job_des)
|
| 8 |
+
c = td.cosine.similarity(resume, job_des)
|
| 9 |
+
o = td.overlap.normalized_similarity(resume, job_des)
|
| 10 |
total = (j+s+c+o)/4
|
| 11 |
+
return total*100
|
| 12 |
|
|
|
|
| 13 |
|
|
|
Progress/app.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import Cleaner
|
| 2 |
+
import Similar
|
| 3 |
+
import textract as tx
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import os
|
| 6 |
+
import streamlit as st
|
| 7 |
+
|
| 8 |
+
st.title("Naive Resume Matcher")
|
| 9 |
+
st.markdown(""" ### Ranking **Resumes** based on the Matching Skills as provided by the required job description. This uses a **Token, String and Word Embedding** based algorithm created to generate a match score that ranks a resume.""")
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
resume_dir = "Data/Resumes/"
|
| 13 |
+
job_desc_dir = "Data/JobDesc/"
|
| 14 |
+
resume_names = os.listdir(resume_dir)
|
| 15 |
+
document = []
|
| 16 |
+
|
| 17 |
+
for res in resume_names:
|
| 18 |
+
temp = []
|
| 19 |
+
temp.append(res)
|
| 20 |
+
text = tx.process(resume_dir+res, encoding='ascii')
|
| 21 |
+
text = str(text, 'utf-8')
|
| 22 |
+
temp.append(text)
|
| 23 |
+
document.append(temp)
|
| 24 |
+
|
| 25 |
+
df = pd.DataFrame(document, columns=['Name', 'Context'])
|
| 26 |
+
|
| 27 |
+
# Only one Job Description should be present and in docx format
|
| 28 |
+
job_docs = os.listdir(job_desc_dir)
|
| 29 |
+
job_desc = tx.process(
|
| 30 |
+
job_desc_dir+job_docs[1], extension='docx', encoding='ascii')
|
| 31 |
+
job_desc = str(job_desc, 'utf-8')
|
| 32 |
+
job_des = Cleaner.Cleaner(job_desc)
|
| 33 |
+
|
| 34 |
+
st.subheader("Job Description")
|
| 35 |
+
st.markdown(" --- ")
|
| 36 |
+
st.write(job_desc)
|
| 37 |
+
st.markdown(" --- ")
|
| 38 |
+
|
| 39 |
+
scores = []
|
| 40 |
+
for text in df['Context']:
|
| 41 |
+
raw = Cleaner.Cleaner(text)
|
| 42 |
+
score = Similar.match(raw[2], job_des[2])
|
| 43 |
+
scores.append(score)
|
| 44 |
+
st.write(scores)
|
| 45 |
+
df['Scores'] = scores
|
| 46 |
+
|
| 47 |
+
st.dataframe(df)
|
| 48 |
+
df2 = df.sort_values(by=['Scores'], ascending=False)
|
| 49 |
+
st.dataframe(df2)
|
| 50 |
+
print(df2.iloc[0, 1])
|
Progress/compute.py
CHANGED
|
@@ -23,7 +23,7 @@ df = pd.DataFrame(document, columns=['Name', 'Context'])
|
|
| 23 |
# Only one Job Description should be present and in docx format
|
| 24 |
job_docs = os.listdir(job_desc_dir)
|
| 25 |
job_desc = tx.process(
|
| 26 |
-
job_desc_dir+job_docs[
|
| 27 |
job_desc = str(job_desc, 'utf-8')
|
| 28 |
job_des = Cleaner.Cleaner(job_desc[0])
|
| 29 |
|
|
|
|
| 23 |
# Only one Job Description should be present and in docx format
|
| 24 |
job_docs = os.listdir(job_desc_dir)
|
| 25 |
job_desc = tx.process(
|
| 26 |
+
job_desc_dir+job_docs[1], extension='docx', encoding='ascii')
|
| 27 |
job_desc = str(job_desc, 'utf-8')
|
| 28 |
job_des = Cleaner.Cleaner(job_desc[0])
|
| 29 |
|
Progress/testing_compute.ipynb
ADDED
|
@@ -0,0 +1,395 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"import os\n",
|
| 10 |
+
"import pandas as pd\n",
|
| 11 |
+
"import textract as tx\n",
|
| 12 |
+
"import Similar\n",
|
| 13 |
+
"import Cleaner"
|
| 14 |
+
]
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"cell_type": "code",
|
| 18 |
+
"execution_count": 2,
|
| 19 |
+
"metadata": {},
|
| 20 |
+
"outputs": [],
|
| 21 |
+
"source": [
|
| 22 |
+
"resume_dir = \"Data/Resumes/\"\n",
|
| 23 |
+
"job_desc_dir = \"Data/JobDesc/\"\n",
|
| 24 |
+
"resume_names = os.listdir(resume_dir)\n",
|
| 25 |
+
"document = []"
|
| 26 |
+
]
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"cell_type": "code",
|
| 30 |
+
"execution_count": 3,
|
| 31 |
+
"metadata": {},
|
| 32 |
+
"outputs": [
|
| 33 |
+
{
|
| 34 |
+
"data": {
|
| 35 |
+
"text/plain": [
|
| 36 |
+
"[]"
|
| 37 |
+
]
|
| 38 |
+
},
|
| 39 |
+
"execution_count": 3,
|
| 40 |
+
"metadata": {},
|
| 41 |
+
"output_type": "execute_result"
|
| 42 |
+
}
|
| 43 |
+
],
|
| 44 |
+
"source": [
|
| 45 |
+
"document "
|
| 46 |
+
]
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"cell_type": "code",
|
| 50 |
+
"execution_count": 4,
|
| 51 |
+
"metadata": {},
|
| 52 |
+
"outputs": [],
|
| 53 |
+
"source": [
|
| 54 |
+
"for res in resume_names:\n",
|
| 55 |
+
" temp = []\n",
|
| 56 |
+
" temp.append(res)\n",
|
| 57 |
+
" text = tx.process(resume_dir+res, encoding='ascii')\n",
|
| 58 |
+
" text = str(text, 'utf-8')\n",
|
| 59 |
+
" temp.append(text)\n",
|
| 60 |
+
" document.append(temp)"
|
| 61 |
+
]
|
| 62 |
+
},
|
| 63 |
+
{
|
| 64 |
+
"cell_type": "code",
|
| 65 |
+
"execution_count": 5,
|
| 66 |
+
"metadata": {},
|
| 67 |
+
"outputs": [],
|
| 68 |
+
"source": [
|
| 69 |
+
"df = pd.DataFrame(document, columns=['Name', 'Context'])"
|
| 70 |
+
]
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"cell_type": "code",
|
| 74 |
+
"execution_count": 6,
|
| 75 |
+
"metadata": {},
|
| 76 |
+
"outputs": [
|
| 77 |
+
{
|
| 78 |
+
"data": {
|
| 79 |
+
"text/html": [
|
| 80 |
+
"<div>\n",
|
| 81 |
+
"<style scoped>\n",
|
| 82 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
| 83 |
+
" vertical-align: middle;\n",
|
| 84 |
+
" }\n",
|
| 85 |
+
"\n",
|
| 86 |
+
" .dataframe tbody tr th {\n",
|
| 87 |
+
" vertical-align: top;\n",
|
| 88 |
+
" }\n",
|
| 89 |
+
"\n",
|
| 90 |
+
" .dataframe thead th {\n",
|
| 91 |
+
" text-align: right;\n",
|
| 92 |
+
" }\n",
|
| 93 |
+
"</style>\n",
|
| 94 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
| 95 |
+
" <thead>\n",
|
| 96 |
+
" <tr style=\"text-align: right;\">\n",
|
| 97 |
+
" <th></th>\n",
|
| 98 |
+
" <th>Name</th>\n",
|
| 99 |
+
" <th>Context</th>\n",
|
| 100 |
+
" </tr>\n",
|
| 101 |
+
" </thead>\n",
|
| 102 |
+
" <tbody>\n",
|
| 103 |
+
" <tr>\n",
|
| 104 |
+
" <th>0</th>\n",
|
| 105 |
+
" <td>ALANKRIT NIRJHARPremium CV Featured Resume.docx</td>\n",
|
| 106 |
+
" <td>ALANKRIT NIRJHARPremium CV Featured Resume \\n...</td>\n",
|
| 107 |
+
" </tr>\n",
|
| 108 |
+
" <tr>\n",
|
| 109 |
+
" <th>1</th>\n",
|
| 110 |
+
" <td>Amarpreet SinghPremium CV Featured Resume.docx</td>\n",
|
| 111 |
+
" <td>Amarpreet SinghPremium CV Featured Resume \\n\\...</td>\n",
|
| 112 |
+
" </tr>\n",
|
| 113 |
+
" <tr>\n",
|
| 114 |
+
" <th>2</th>\n",
|
| 115 |
+
" <td>Shambhai MishraFeatured Resume.docx</td>\n",
|
| 116 |
+
" <td>Shambhai MishraFeatured Resume \\n\\n \\n\\nA har...</td>\n",
|
| 117 |
+
" </tr>\n",
|
| 118 |
+
" <tr>\n",
|
| 119 |
+
" <th>3</th>\n",
|
| 120 |
+
" <td>Amruta B.pdf</td>\n",
|
| 121 |
+
" <td>Amruta B. ThoratFeatured Resume \\n\\n \\n\\nJr...</td>\n",
|
| 122 |
+
" </tr>\n",
|
| 123 |
+
" <tr>\n",
|
| 124 |
+
" <th>4</th>\n",
|
| 125 |
+
" <td>Dhavakumar AmarntharajanFeatured Resume.docx</td>\n",
|
| 126 |
+
" <td>Dhavakumar AmarntharajanFeatured Resume \\n\\n ...</td>\n",
|
| 127 |
+
" </tr>\n",
|
| 128 |
+
" <tr>\n",
|
| 129 |
+
" <th>5</th>\n",
|
| 130 |
+
" <td>DIVYA PRAKASH SINGHFeatured Resume.docx</td>\n",
|
| 131 |
+
" <td>DIVYA PRAKASH SINGHFeatured Resume \\n\\n \\n\\nM...</td>\n",
|
| 132 |
+
" </tr>\n",
|
| 133 |
+
" <tr>\n",
|
| 134 |
+
" <th>6</th>\n",
|
| 135 |
+
" <td>Abhishek SharmaFeatured Resume.docx</td>\n",
|
| 136 |
+
" <td>Abhishek SharmaFeatured Resume \\n\\n \\n\\nDevel...</td>\n",
|
| 137 |
+
" </tr>\n",
|
| 138 |
+
" <tr>\n",
|
| 139 |
+
" <th>7</th>\n",
|
| 140 |
+
" <td>Manoj KumarFeatured Resume.docx</td>\n",
|
| 141 |
+
" <td>Manoj KumarFeatured Resume \\n\\n \\n\\nAPPLICATI...</td>\n",
|
| 142 |
+
" </tr>\n",
|
| 143 |
+
" <tr>\n",
|
| 144 |
+
" <th>8</th>\n",
|
| 145 |
+
" <td>Seshadri Sastry Kunapuli.docx</td>\n",
|
| 146 |
+
" <td>Seshadri Sastry Kunapuli \\n\\n \\n\\nManager ...</td>\n",
|
| 147 |
+
" </tr>\n",
|
| 148 |
+
" <tr>\n",
|
| 149 |
+
" <th>9</th>\n",
|
| 150 |
+
" <td>SNEHA SAHUFeatured Resume.docx</td>\n",
|
| 151 |
+
" <td>SNEHA SAHUFeatured Resume \\n\\n \\n\\nMachine Le...</td>\n",
|
| 152 |
+
" </tr>\n",
|
| 153 |
+
" <tr>\n",
|
| 154 |
+
" <th>10</th>\n",
|
| 155 |
+
" <td>Bijjula SahithiFeatured Resume.docx</td>\n",
|
| 156 |
+
" <td>Bijjula SahithiFeatured Resume \\n\\n \\n\\nProdu...</td>\n",
|
| 157 |
+
" </tr>\n",
|
| 158 |
+
" <tr>\n",
|
| 159 |
+
" <th>11</th>\n",
|
| 160 |
+
" <td>Prashant Bhat.pdf</td>\n",
|
| 161 |
+
" <td>Prashant Bhat \\n\\n \\n\\nStanford Machine ...</td>\n",
|
| 162 |
+
" </tr>\n",
|
| 163 |
+
" <tr>\n",
|
| 164 |
+
" <th>12</th>\n",
|
| 165 |
+
" <td>Suraj Chauhan.docx</td>\n",
|
| 166 |
+
" <td>Suraj Chauhan \\n\\n \\n\\nSenior Web Developer ...</td>\n",
|
| 167 |
+
" </tr>\n",
|
| 168 |
+
" <tr>\n",
|
| 169 |
+
" <th>13</th>\n",
|
| 170 |
+
" <td>Nandagopal HFeatured Resume.pdf</td>\n",
|
| 171 |
+
" <td>Nandagopal HFeatured Resume \\n\\n \\n\\nMYSQL,...</td>\n",
|
| 172 |
+
" </tr>\n",
|
| 173 |
+
" <tr>\n",
|
| 174 |
+
" <th>14</th>\n",
|
| 175 |
+
" <td>AJAY CHINNIFeatured Resume.docx</td>\n",
|
| 176 |
+
" <td>AJAY CHINNIFeatured Resume \\n\\n \\n\\nSeeking r...</td>\n",
|
| 177 |
+
" </tr>\n",
|
| 178 |
+
" <tr>\n",
|
| 179 |
+
" <th>15</th>\n",
|
| 180 |
+
" <td>Ashwani Kumar RajputPremium CV Featured Resume...</td>\n",
|
| 181 |
+
" <td>Ashwani Kumar RajputPremium CV Featured Resume...</td>\n",
|
| 182 |
+
" </tr>\n",
|
| 183 |
+
" <tr>\n",
|
| 184 |
+
" <th>16</th>\n",
|
| 185 |
+
" <td>RISHABH SHARMAFeatured Resume.docx</td>\n",
|
| 186 |
+
" <td>RISHABH SHARMAFeatured Resume \\n\\n \\n\\nJunior...</td>\n",
|
| 187 |
+
" </tr>\n",
|
| 188 |
+
" <tr>\n",
|
| 189 |
+
" <th>17</th>\n",
|
| 190 |
+
" <td>NARENDRA HINGE.docx</td>\n",
|
| 191 |
+
" <td>NARENDRA HINGE\\n\\n\\n\\n \\n\\nSenior Web Develope...</td>\n",
|
| 192 |
+
" </tr>\n",
|
| 193 |
+
" <tr>\n",
|
| 194 |
+
" <th>18</th>\n",
|
| 195 |
+
" <td>Kaustav SenFeatured Resume.docx</td>\n",
|
| 196 |
+
" <td>Kaustav SenFeatured Resume \\n\\n \\n\\nTech Lead...</td>\n",
|
| 197 |
+
" </tr>\n",
|
| 198 |
+
" <tr>\n",
|
| 199 |
+
" <th>19</th>\n",
|
| 200 |
+
" <td>khyati gandhi.docx</td>\n",
|
| 201 |
+
" <td>khyati gandhi \\n\\n \\n\\nWeb Developer seeking...</td>\n",
|
| 202 |
+
" </tr>\n",
|
| 203 |
+
" <tr>\n",
|
| 204 |
+
" <th>20</th>\n",
|
| 205 |
+
" <td>Jithin J NairFeatured Resume.docx</td>\n",
|
| 206 |
+
" <td>Jithin J NairFeatured Resume \\n\\n \\n\\nMachine...</td>\n",
|
| 207 |
+
" </tr>\n",
|
| 208 |
+
" </tbody>\n",
|
| 209 |
+
"</table>\n",
|
| 210 |
+
"</div>"
|
| 211 |
+
],
|
| 212 |
+
"text/plain": [
|
| 213 |
+
" Name \\\n",
|
| 214 |
+
"0 ALANKRIT NIRJHARPremium CV Featured Resume.docx \n",
|
| 215 |
+
"1 Amarpreet SinghPremium CV Featured Resume.docx \n",
|
| 216 |
+
"2 Shambhai MishraFeatured Resume.docx \n",
|
| 217 |
+
"3 Amruta B.pdf \n",
|
| 218 |
+
"4 Dhavakumar AmarntharajanFeatured Resume.docx \n",
|
| 219 |
+
"5 DIVYA PRAKASH SINGHFeatured Resume.docx \n",
|
| 220 |
+
"6 Abhishek SharmaFeatured Resume.docx \n",
|
| 221 |
+
"7 Manoj KumarFeatured Resume.docx \n",
|
| 222 |
+
"8 Seshadri Sastry Kunapuli.docx \n",
|
| 223 |
+
"9 SNEHA SAHUFeatured Resume.docx \n",
|
| 224 |
+
"10 Bijjula SahithiFeatured Resume.docx \n",
|
| 225 |
+
"11 Prashant Bhat.pdf \n",
|
| 226 |
+
"12 Suraj Chauhan.docx \n",
|
| 227 |
+
"13 Nandagopal HFeatured Resume.pdf \n",
|
| 228 |
+
"14 AJAY CHINNIFeatured Resume.docx \n",
|
| 229 |
+
"15 Ashwani Kumar RajputPremium CV Featured Resume... \n",
|
| 230 |
+
"16 RISHABH SHARMAFeatured Resume.docx \n",
|
| 231 |
+
"17 NARENDRA HINGE.docx \n",
|
| 232 |
+
"18 Kaustav SenFeatured Resume.docx \n",
|
| 233 |
+
"19 khyati gandhi.docx \n",
|
| 234 |
+
"20 Jithin J NairFeatured Resume.docx \n",
|
| 235 |
+
"\n",
|
| 236 |
+
" Context \n",
|
| 237 |
+
"0 ALANKRIT NIRJHARPremium CV Featured Resume \\n... \n",
|
| 238 |
+
"1 Amarpreet SinghPremium CV Featured Resume \\n\\... \n",
|
| 239 |
+
"2 Shambhai MishraFeatured Resume \\n\\n \\n\\nA har... \n",
|
| 240 |
+
"3 Amruta B. ThoratFeatured Resume \\n\\n \\n\\nJr... \n",
|
| 241 |
+
"4 Dhavakumar AmarntharajanFeatured Resume \\n\\n ... \n",
|
| 242 |
+
"5 DIVYA PRAKASH SINGHFeatured Resume \\n\\n \\n\\nM... \n",
|
| 243 |
+
"6 Abhishek SharmaFeatured Resume \\n\\n \\n\\nDevel... \n",
|
| 244 |
+
"7 Manoj KumarFeatured Resume \\n\\n \\n\\nAPPLICATI... \n",
|
| 245 |
+
"8 Seshadri Sastry Kunapuli \\n\\n \\n\\nManager ... \n",
|
| 246 |
+
"9 SNEHA SAHUFeatured Resume \\n\\n \\n\\nMachine Le... \n",
|
| 247 |
+
"10 Bijjula SahithiFeatured Resume \\n\\n \\n\\nProdu... \n",
|
| 248 |
+
"11 Prashant Bhat \\n\\n \\n\\nStanford Machine ... \n",
|
| 249 |
+
"12 Suraj Chauhan \\n\\n \\n\\nSenior Web Developer ... \n",
|
| 250 |
+
"13 Nandagopal HFeatured Resume \\n\\n \\n\\nMYSQL,... \n",
|
| 251 |
+
"14 AJAY CHINNIFeatured Resume \\n\\n \\n\\nSeeking r... \n",
|
| 252 |
+
"15 Ashwani Kumar RajputPremium CV Featured Resume... \n",
|
| 253 |
+
"16 RISHABH SHARMAFeatured Resume \\n\\n \\n\\nJunior... \n",
|
| 254 |
+
"17 NARENDRA HINGE\\n\\n\\n\\n \\n\\nSenior Web Develope... \n",
|
| 255 |
+
"18 Kaustav SenFeatured Resume \\n\\n \\n\\nTech Lead... \n",
|
| 256 |
+
"19 khyati gandhi \\n\\n \\n\\nWeb Developer seeking... \n",
|
| 257 |
+
"20 Jithin J NairFeatured Resume \\n\\n \\n\\nMachine... "
|
| 258 |
+
]
|
| 259 |
+
},
|
| 260 |
+
"execution_count": 6,
|
| 261 |
+
"metadata": {},
|
| 262 |
+
"output_type": "execute_result"
|
| 263 |
+
}
|
| 264 |
+
],
|
| 265 |
+
"source": [
|
| 266 |
+
"df"
|
| 267 |
+
]
|
| 268 |
+
},
|
| 269 |
+
{
|
| 270 |
+
"cell_type": "code",
|
| 271 |
+
"execution_count": 12,
|
| 272 |
+
"metadata": {},
|
| 273 |
+
"outputs": [],
|
| 274 |
+
"source": [
|
| 275 |
+
"def get_job_description():\n",
|
| 276 |
+
" job_docs = os.listdir(job_desc_dir)\n",
|
| 277 |
+
" job_desc = tx.process(\n",
|
| 278 |
+
" job_desc_dir+job_docs[1], extension='docx', encoding='ascii')\n",
|
| 279 |
+
" job_desc = str(job_desc, 'utf-8')\n",
|
| 280 |
+
" job_des = Cleaner.Cleaner(job_desc)"
|
| 281 |
+
]
|
| 282 |
+
},
|
| 283 |
+
{
|
| 284 |
+
"cell_type": "code",
|
| 285 |
+
"execution_count": 8,
|
| 286 |
+
"metadata": {},
|
| 287 |
+
"outputs": [],
|
| 288 |
+
"source": [
|
| 289 |
+
"scores = []\n",
|
| 290 |
+
"def compute_score(x=2, y=2):\n",
|
| 291 |
+
" for text in df['Context']:\n",
|
| 292 |
+
" raw = Cleaner.Cleaner(text)\n",
|
| 293 |
+
" score = Similar.match(raw[2], job_des[2])\n",
|
| 294 |
+
" scores.append(score)"
|
| 295 |
+
]
|
| 296 |
+
},
|
| 297 |
+
{
|
| 298 |
+
"cell_type": "code",
|
| 299 |
+
"execution_count": 9,
|
| 300 |
+
"metadata": {},
|
| 301 |
+
"outputs": [
|
| 302 |
+
{
|
| 303 |
+
"data": {
|
| 304 |
+
"text/plain": [
|
| 305 |
+
"[7.188133725864406,\n",
|
| 306 |
+
" 17.082715886282017,\n",
|
| 307 |
+
" 9.476762357802984,\n",
|
| 308 |
+
" 17.291130127664918,\n",
|
| 309 |
+
" 18.012687466241466,\n",
|
| 310 |
+
" 9.452611578580793,\n",
|
| 311 |
+
" 8.42476835081202,\n",
|
| 312 |
+
" 11.586667849627627,\n",
|
| 313 |
+
" 12.456231619031753,\n",
|
| 314 |
+
" 17.0072580417408,\n",
|
| 315 |
+
" 14.108484885761968,\n",
|
| 316 |
+
" 9.982352544008108,\n",
|
| 317 |
+
" 16.236249502169258,\n",
|
| 318 |
+
" 17.05720171632391,\n",
|
| 319 |
+
" 11.320461075067941,\n",
|
| 320 |
+
" 15.613431194443613,\n",
|
| 321 |
+
" 9.533217549290569,\n",
|
| 322 |
+
" 14.925508576697014,\n",
|
| 323 |
+
" 17.989739486512008,\n",
|
| 324 |
+
" 24.064673772279484,\n",
|
| 325 |
+
" 11.435471298914607]"
|
| 326 |
+
]
|
| 327 |
+
},
|
| 328 |
+
"execution_count": 9,
|
| 329 |
+
"metadata": {},
|
| 330 |
+
"output_type": "execute_result"
|
| 331 |
+
}
|
| 332 |
+
],
|
| 333 |
+
"source": [
|
| 334 |
+
"scores"
|
| 335 |
+
]
|
| 336 |
+
},
|
| 337 |
+
{
|
| 338 |
+
"cell_type": "code",
|
| 339 |
+
"execution_count": 11,
|
| 340 |
+
"metadata": {},
|
| 341 |
+
"outputs": [],
|
| 342 |
+
"source": [
|
| 343 |
+
"df['Scores'] = scores\n",
|
| 344 |
+
"df2 = df.sort_values(by=['Scores'], ascending=False)\n",
|
| 345 |
+
"# print(df2.iloc[0, 1])"
|
| 346 |
+
]
|
| 347 |
+
},
|
| 348 |
+
{
|
| 349 |
+
"cell_type": "code",
|
| 350 |
+
"execution_count": 17,
|
| 351 |
+
"metadata": {},
|
| 352 |
+
"outputs": [],
|
| 353 |
+
"source": [
|
| 354 |
+
"# import matplotlib.pyplot as plt"
|
| 355 |
+
]
|
| 356 |
+
},
|
| 357 |
+
{
|
| 358 |
+
"cell_type": "code",
|
| 359 |
+
"execution_count": 16,
|
| 360 |
+
"metadata": {},
|
| 361 |
+
"outputs": [],
|
| 362 |
+
"source": [
|
| 363 |
+
"# plt.text(15, 30)"
|
| 364 |
+
]
|
| 365 |
+
},
|
| 366 |
+
{
|
| 367 |
+
"cell_type": "code",
|
| 368 |
+
"execution_count": null,
|
| 369 |
+
"metadata": {},
|
| 370 |
+
"outputs": [],
|
| 371 |
+
"source": []
|
| 372 |
+
}
|
| 373 |
+
],
|
| 374 |
+
"metadata": {
|
| 375 |
+
"kernelspec": {
|
| 376 |
+
"display_name": "Python 3",
|
| 377 |
+
"language": "python",
|
| 378 |
+
"name": "python3"
|
| 379 |
+
},
|
| 380 |
+
"language_info": {
|
| 381 |
+
"codemirror_mode": {
|
| 382 |
+
"name": "ipython",
|
| 383 |
+
"version": 3
|
| 384 |
+
},
|
| 385 |
+
"file_extension": ".py",
|
| 386 |
+
"mimetype": "text/x-python",
|
| 387 |
+
"name": "python",
|
| 388 |
+
"nbconvert_exporter": "python",
|
| 389 |
+
"pygments_lexer": "ipython3",
|
| 390 |
+
"version": "3.8.2"
|
| 391 |
+
}
|
| 392 |
+
},
|
| 393 |
+
"nbformat": 4,
|
| 394 |
+
"nbformat_minor": 4
|
| 395 |
+
}
|
Progress/topic_modelling_1.ipynb
ADDED
|
@@ -0,0 +1,356 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 12,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"import gensim\n",
|
| 10 |
+
"import gensim.corpora as corpora\n",
|
| 11 |
+
"from gensim.utils import simple_preprocess\n",
|
| 12 |
+
"from gensim.models import CoherenceModel\n",
|
| 13 |
+
"import re\n",
|
| 14 |
+
"import numpy as np\n",
|
| 15 |
+
"import pandas as pd\n",
|
| 16 |
+
"from pprint import pprint"
|
| 17 |
+
]
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"cell_type": "code",
|
| 21 |
+
"execution_count": 13,
|
| 22 |
+
"metadata": {},
|
| 23 |
+
"outputs": [],
|
| 24 |
+
"source": [
|
| 25 |
+
"import Distill"
|
| 26 |
+
]
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"cell_type": "code",
|
| 30 |
+
"execution_count": 14,
|
| 31 |
+
"metadata": {},
|
| 32 |
+
"outputs": [],
|
| 33 |
+
"source": [
|
| 34 |
+
"from texts import text_2"
|
| 35 |
+
]
|
| 36 |
+
},
|
| 37 |
+
{
|
| 38 |
+
"cell_type": "code",
|
| 39 |
+
"execution_count": 15,
|
| 40 |
+
"metadata": {},
|
| 41 |
+
"outputs": [],
|
| 42 |
+
"source": [
|
| 43 |
+
"text = Distill.remove_stopwords(Distill.tokenize(text_2))"
|
| 44 |
+
]
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"cell_type": "code",
|
| 48 |
+
"execution_count": 16,
|
| 49 |
+
"metadata": {},
|
| 50 |
+
"outputs": [],
|
| 51 |
+
"source": [
|
| 52 |
+
"text = Distill.remove_tags(text)"
|
| 53 |
+
]
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"cell_type": "code",
|
| 57 |
+
"execution_count": 17,
|
| 58 |
+
"metadata": {},
|
| 59 |
+
"outputs": [],
|
| 60 |
+
"source": [
|
| 61 |
+
"text = Distill.lemmatize(text)"
|
| 62 |
+
]
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"cell_type": "code",
|
| 66 |
+
"execution_count": 18,
|
| 67 |
+
"metadata": {},
|
| 68 |
+
"outputs": [],
|
| 69 |
+
"source": [
|
| 70 |
+
"# text = Distill._to_string(text)"
|
| 71 |
+
]
|
| 72 |
+
},
|
| 73 |
+
{
|
| 74 |
+
"cell_type": "code",
|
| 75 |
+
"execution_count": 22,
|
| 76 |
+
"metadata": {},
|
| 77 |
+
"outputs": [],
|
| 78 |
+
"source": [
|
| 79 |
+
"tokens_ = []\n",
|
| 80 |
+
"for a in text:\n",
|
| 81 |
+
" tokens_.append([a])"
|
| 82 |
+
]
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"cell_type": "code",
|
| 86 |
+
"execution_count": 24,
|
| 87 |
+
"metadata": {},
|
| 88 |
+
"outputs": [],
|
| 89 |
+
"source": [
|
| 90 |
+
"# tokens_"
|
| 91 |
+
]
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"cell_type": "code",
|
| 95 |
+
"execution_count": 26,
|
| 96 |
+
"metadata": {},
|
| 97 |
+
"outputs": [],
|
| 98 |
+
"source": [
|
| 99 |
+
"id2word = corpora.Dictionary(tokens_)"
|
| 100 |
+
]
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"cell_type": "code",
|
| 104 |
+
"execution_count": 27,
|
| 105 |
+
"metadata": {},
|
| 106 |
+
"outputs": [],
|
| 107 |
+
"source": [
|
| 108 |
+
"def to_token(List):\n",
|
| 109 |
+
" # takes a simple list and breaks it into tokens of the form [[],[],[]]\n",
|
| 110 |
+
" token = [[a] for a in List]\n",
|
| 111 |
+
" return token"
|
| 112 |
+
]
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"cell_type": "code",
|
| 116 |
+
"execution_count": 28,
|
| 117 |
+
"metadata": {},
|
| 118 |
+
"outputs": [],
|
| 119 |
+
"source": [
|
| 120 |
+
"saas = to_token(text)"
|
| 121 |
+
]
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"cell_type": "code",
|
| 125 |
+
"execution_count": 30,
|
| 126 |
+
"metadata": {},
|
| 127 |
+
"outputs": [],
|
| 128 |
+
"source": [
|
| 129 |
+
"# saas"
|
| 130 |
+
]
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"cell_type": "code",
|
| 134 |
+
"execution_count": 31,
|
| 135 |
+
"metadata": {},
|
| 136 |
+
"outputs": [],
|
| 137 |
+
"source": [
|
| 138 |
+
"corpus = [id2word.doc2bow(text) for text in tokens_]"
|
| 139 |
+
]
|
| 140 |
+
},
|
| 141 |
+
{
|
| 142 |
+
"cell_type": "code",
|
| 143 |
+
"execution_count": 39,
|
| 144 |
+
"metadata": {},
|
| 145 |
+
"outputs": [],
|
| 146 |
+
"source": [
|
| 147 |
+
"# Human readable format of corpus (term-frequency)\n",
|
| 148 |
+
"# [[(id2word[id], freq) for id, freq in cp] for cp in corpus[:]]"
|
| 149 |
+
]
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"cell_type": "code",
|
| 153 |
+
"execution_count": 40,
|
| 154 |
+
"metadata": {},
|
| 155 |
+
"outputs": [],
|
| 156 |
+
"source": [
|
| 157 |
+
"# corpus[:]"
|
| 158 |
+
]
|
| 159 |
+
},
|
| 160 |
+
{
|
| 161 |
+
"cell_type": "code",
|
| 162 |
+
"execution_count": 41,
|
| 163 |
+
"metadata": {},
|
| 164 |
+
"outputs": [],
|
| 165 |
+
"source": [
|
| 166 |
+
"# Build LDA model\n",
|
| 167 |
+
"lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus,\n",
|
| 168 |
+
" id2word=id2word,\n",
|
| 169 |
+
" num_topics=20, \n",
|
| 170 |
+
" random_state=100,\n",
|
| 171 |
+
" update_every=1,\n",
|
| 172 |
+
" chunksize=100,\n",
|
| 173 |
+
" passes=10,\n",
|
| 174 |
+
" alpha='auto',\n",
|
| 175 |
+
" per_word_topics=True)"
|
| 176 |
+
]
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"cell_type": "code",
|
| 180 |
+
"execution_count": 42,
|
| 181 |
+
"metadata": {},
|
| 182 |
+
"outputs": [
|
| 183 |
+
{
|
| 184 |
+
"name": "stdout",
|
| 185 |
+
"output_type": "stream",
|
| 186 |
+
"text": [
|
| 187 |
+
"[(0,\n",
|
| 188 |
+
" '0.087*\"property\" + 0.046*\"brake\" + 0.046*\"lead\" + 0.004*\"commonwealth\" + '\n",
|
| 189 |
+
" '0.004*\"compensate\" + 0.004*\"design\" + 0.004*\"decade\" + 0.004*\"range\" + '\n",
|
| 190 |
+
" '0.004*\"local\" + 0.004*\"mean\"'),\n",
|
| 191 |
+
" (1,\n",
|
| 192 |
+
" '0.160*\"damage\" + 0.040*\"reach\" + 0.040*\"expose\" + 0.040*\"contaminate\" + '\n",
|
| 193 |
+
" '0.004*\"range\" + 0.004*\"assessment\" + 0.004*\"design\" + 0.004*\"public\" + '\n",
|
| 194 |
+
" '0.004*\"mean\" + 0.004*\"reimbursement\"'),\n",
|
| 195 |
+
" (2,\n",
|
| 196 |
+
" '0.196*\"party\" + 0.070*\"private\" + 0.037*\"caustic\" + 0.037*\"action\" + '\n",
|
| 197 |
+
" '0.004*\"mean\" + 0.004*\"design\" + 0.004*\"public\" + 0.004*\"assessment\" + '\n",
|
| 198 |
+
" '0.004*\"commonwealth\" + 0.004*\"range\"'),\n",
|
| 199 |
+
" (3,\n",
|
| 200 |
+
" '0.120*\"habitat\" + 0.044*\"agree\" + 0.044*\"fabric\" + 0.004*\"party\" + '\n",
|
| 201 |
+
" '0.004*\"compensate\" + 0.004*\"design\" + 0.004*\"public\" + 0.004*\"assessment\" + '\n",
|
| 202 |
+
" '0.004*\"commonwealth\" + 0.004*\"encourage\"'),\n",
|
| 203 |
+
" (4,\n",
|
| 204 |
+
" '0.313*\"site\" + 0.143*\"ecological\" + 0.052*\"predecessor\" + 0.003*\"mean\" + '\n",
|
| 205 |
+
" '0.003*\"fws\" + 0.003*\"range\" + 0.003*\"compensate\" + 0.003*\"commonwealth\" + '\n",
|
| 206 |
+
" '0.003*\"reimbursement\" + 0.003*\"assessment\"'),\n",
|
| 207 |
+
" (5,\n",
|
| 208 |
+
" '0.194*\"trustee\" + 0.037*\"hazardous\" + 0.037*\"well\" + 0.037*\"perform\" + '\n",
|
| 209 |
+
" '0.037*\"doi\" + 0.004*\"settlement\" + 0.004*\"federal\" + 0.004*\"commonwealth\" + '\n",
|
| 210 |
+
" '0.004*\"mean\" + 0.004*\"range\"'),\n",
|
| 211 |
+
" (6,\n",
|
| 212 |
+
" '0.052*\"pay\" + 0.005*\"owner\" + 0.005*\"resource\" + 0.005*\"mean\" + '\n",
|
| 213 |
+
" '0.005*\"design\" + 0.005*\"range\" + 0.005*\"compensate\" + 0.005*\"fws\" + '\n",
|
| 214 |
+
" '0.005*\"commonwealth\" + 0.005*\"reimbursement\"'),\n",
|
| 215 |
+
" (7,\n",
|
| 216 |
+
" '0.204*\"federal\" + 0.038*\"liability\" + 0.038*\"century\" + 0.038*\"current\" + '\n",
|
| 217 |
+
" '0.004*\"range\" + 0.004*\"mean\" + 0.004*\"design\" + 0.004*\"assessment\" + '\n",
|
| 218 |
+
" '0.004*\"commonwealth\" + 0.004*\"fws\"'),\n",
|
| 219 |
+
" (8,\n",
|
| 220 |
+
" '0.075*\"owner\" + 0.075*\"lining\" + 0.075*\"asbestos\" + 0.039*\"former\" + '\n",
|
| 221 |
+
" '0.039*\"clean\" + 0.004*\"compensate\" + 0.004*\"public\" + 0.004*\"assessment\" + '\n",
|
| 222 |
+
" '0.004*\"commonwealth\" + 0.004*\"design\"'),\n",
|
| 223 |
+
" (9,\n",
|
| 224 |
+
" '0.175*\"state\" + 0.113*\"portion\" + 0.036*\"business\" + 0.036*\"involve\" + '\n",
|
| 225 |
+
" '0.004*\"private\" + 0.004*\"compensate\" + 0.004*\"design\" + 0.004*\"public\" + '\n",
|
| 226 |
+
" '0.004*\"mean\" + 0.004*\"encourage\"'),\n",
|
| 227 |
+
" (10,\n",
|
| 228 |
+
" '0.110*\"industrial\" + 0.090*\"require\" + 0.040*\"announce\" + 0.040*\"water\" + '\n",
|
| 229 |
+
" '0.004*\"portion\" + 0.004*\"design\" + 0.004*\"compensate\" + 0.004*\"decade\" + '\n",
|
| 230 |
+
" '0.004*\"range\" + 0.004*\"commonwealth\"'),\n",
|
| 231 |
+
" (11,\n",
|
| 232 |
+
" '0.048*\"operation\" + 0.048*\"solution\" + 0.048*\"claim\" + 0.005*\"include\" + '\n",
|
| 233 |
+
" '0.005*\"site\" + 0.005*\"today\" + 0.005*\"compensate\" + 0.005*\"design\" + '\n",
|
| 234 |
+
" '0.005*\"mean\" + 0.005*\"range\"'),\n",
|
| 235 |
+
" (12,\n",
|
| 236 |
+
" '0.146*\"wetland\" + 0.128*\"provide\" + 0.036*\"land\" + 0.036*\"facility\" + '\n",
|
| 237 |
+
" '0.004*\"asbestos\" + 0.004*\"state\" + 0.004*\"settlement\" + 0.004*\"mean\" + '\n",
|
| 238 |
+
" '0.004*\"design\" + 0.004*\"fws\"'),\n",
|
| 239 |
+
" (13,\n",
|
| 240 |
+
" '0.160*\"contamination\" + 0.040*\"operate\" + 0.040*\"estimate\" + 0.040*\"result\" '\n",
|
| 241 |
+
" '+ 0.004*\"public\" + 0.004*\"design\" + 0.004*\"fws\" + 0.004*\"commonwealth\" + '\n",
|
| 242 |
+
" '0.004*\"compensate\" + 0.004*\"range\"'),\n",
|
| 243 |
+
" (14,\n",
|
| 244 |
+
" '0.307*\"use\" + 0.132*\"decree\" + 0.132*\"consent\" + 0.062*\"cost\" + '\n",
|
| 245 |
+
" '0.018*\"manufacturing\" + 0.018*\"remedial\" + 0.018*\"arsenic\" + '\n",
|
| 246 |
+
" '0.002*\"restoration\" + 0.002*\"also\" + 0.002*\"project\"'),\n",
|
| 247 |
+
" (15,\n",
|
| 248 |
+
" '0.168*\"include\" + 0.042*\"19th\" + 0.042*\"resolve\" + 0.004*\"mean\" + '\n",
|
| 249 |
+
" '0.004*\"assessment\" + 0.004*\"range\" + 0.004*\"compensate\" + '\n",
|
| 250 |
+
" '0.004*\"reimbursement\" + 0.004*\"fws\" + 0.004*\"commonwealth\"'),\n",
|
| 251 |
+
" (16,\n",
|
| 252 |
+
" '0.584*\"settlement\" + 0.057*\"today\" + 0.018*\"manufacture\" + 0.018*\"acre\" + '\n",
|
| 253 |
+
" '0.002*\"restoration\" + 0.002*\"public\" + 0.002*\"MassDEPs\" + 0.002*\"work\" + '\n",
|
| 254 |
+
" '0.002*\"comment\" + 0.002*\"fund\"'),\n",
|
| 255 |
+
" (17,\n",
|
| 256 |
+
" '0.250*\"groundwater\" + 0.036*\"substance\" + 0.036*\"cotton\" + 0.036*\"various\" '\n",
|
| 257 |
+
" '+ 0.004*\"natural\" + 0.004*\"mean\" + 0.004*\"range\" + 0.004*\"compensate\" + '\n",
|
| 258 |
+
" '0.004*\"assessment\" + 0.004*\"commonwealth\"'),\n",
|
| 259 |
+
" (18,\n",
|
| 260 |
+
" '0.273*\"natural\" + 0.067*\"injury\" + 0.067*\"lodge\" + 0.030*\"clutch\" + '\n",
|
| 261 |
+
" '0.030*\"back\" + 0.003*\"range\" + 0.003*\"commonwealth\" + 0.003*\"design\" + '\n",
|
| 262 |
+
" '0.003*\"assessment\" + 0.003*\"mean\"'),\n",
|
| 263 |
+
" (19,\n",
|
| 264 |
+
" '0.315*\"resource\" + 0.031*\"large\" + 0.031*\"operator\" + 0.031*\"date\" + '\n",
|
| 265 |
+
" '0.031*\"1million\" + 0.003*\"property\" + 0.003*\"compensate\" + 0.003*\"design\" + '\n",
|
| 266 |
+
" '0.003*\"public\" + 0.003*\"organization\"')]\n"
|
| 267 |
+
]
|
| 268 |
+
}
|
| 269 |
+
],
|
| 270 |
+
"source": [
|
| 271 |
+
"# Print the Keyword in the 10 topics\n",
|
| 272 |
+
"pprint(lda_model.print_topics())\n",
|
| 273 |
+
"doc_lda = lda_model[corpus]"
|
| 274 |
+
]
|
| 275 |
+
},
|
| 276 |
+
{
|
| 277 |
+
"cell_type": "code",
|
| 278 |
+
"execution_count": 44,
|
| 279 |
+
"metadata": {},
|
| 280 |
+
"outputs": [
|
| 281 |
+
{
|
| 282 |
+
"name": "stdout",
|
| 283 |
+
"output_type": "stream",
|
| 284 |
+
"text": [
|
| 285 |
+
"\n",
|
| 286 |
+
"Perplexity: -20.580518818099517\n",
|
| 287 |
+
"\n",
|
| 288 |
+
"Coherence Score: 0.8399551092841173\n"
|
| 289 |
+
]
|
| 290 |
+
}
|
| 291 |
+
],
|
| 292 |
+
"source": [
|
| 293 |
+
"# Compute Perplexity\n",
|
| 294 |
+
"print('\\nPerplexity: ', lda_model.log_perplexity(corpus)) # a measure of how good the model is. lower the better.\n",
|
| 295 |
+
"\n",
|
| 296 |
+
"# Compute Coherence Score\n",
|
| 297 |
+
"coherence_model_lda = CoherenceModel(model=lda_model, texts=tokens_, dictionary=id2word, coherence='c_v')\n",
|
| 298 |
+
"coherence_lda = coherence_model_lda.get_coherence()\n",
|
| 299 |
+
"print('\\nCoherence Score: ', coherence_lda)"
|
| 300 |
+
]
|
| 301 |
+
},
|
| 302 |
+
{
|
| 303 |
+
"cell_type": "code",
|
| 304 |
+
"execution_count": 45,
|
| 305 |
+
"metadata": {},
|
| 306 |
+
"outputs": [],
|
| 307 |
+
"source": [
|
| 308 |
+
"# Plotting tools\n",
|
| 309 |
+
"import pyLDAvis\n",
|
| 310 |
+
"import pyLDAvis.gensim # don't skip this\n",
|
| 311 |
+
"import matplotlib.pyplot as plt\n",
|
| 312 |
+
"%matplotlib inline"
|
| 313 |
+
]
|
| 314 |
+
},
|
| 315 |
+
{
|
| 316 |
+
"cell_type": "code",
|
| 317 |
+
"execution_count": 1,
|
| 318 |
+
"metadata": {},
|
| 319 |
+
"outputs": [],
|
| 320 |
+
"source": [
|
| 321 |
+
"# # Visualize the topics\n",
|
| 322 |
+
"# pyLDAvis.enable_notebook()\n",
|
| 323 |
+
"# vis = pyLDAvis.gensim.prepare(lda_model, corpus, id2word)\n",
|
| 324 |
+
"# vis"
|
| 325 |
+
]
|
| 326 |
+
},
|
| 327 |
+
{
|
| 328 |
+
"cell_type": "code",
|
| 329 |
+
"execution_count": null,
|
| 330 |
+
"metadata": {},
|
| 331 |
+
"outputs": [],
|
| 332 |
+
"source": []
|
| 333 |
+
}
|
| 334 |
+
],
|
| 335 |
+
"metadata": {
|
| 336 |
+
"kernelspec": {
|
| 337 |
+
"display_name": "Python 3",
|
| 338 |
+
"language": "python",
|
| 339 |
+
"name": "python3"
|
| 340 |
+
},
|
| 341 |
+
"language_info": {
|
| 342 |
+
"codemirror_mode": {
|
| 343 |
+
"name": "ipython",
|
| 344 |
+
"version": 3
|
| 345 |
+
},
|
| 346 |
+
"file_extension": ".py",
|
| 347 |
+
"mimetype": "text/x-python",
|
| 348 |
+
"name": "python",
|
| 349 |
+
"nbconvert_exporter": "python",
|
| 350 |
+
"pygments_lexer": "ipython3",
|
| 351 |
+
"version": "3.8.2"
|
| 352 |
+
}
|
| 353 |
+
},
|
| 354 |
+
"nbformat": 4,
|
| 355 |
+
"nbformat_minor": 4
|
| 356 |
+
}
|
Progress/topic_modelling_resumes.ipynb
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 1,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"import gensim\n",
|
| 10 |
+
"import gensim.corpora as corpora\n",
|
| 11 |
+
"from gensim.utils import simple_preprocess\n",
|
| 12 |
+
"from gensim.models import CoherenceModel\n",
|
| 13 |
+
"import re\n",
|
| 14 |
+
"import numpy as np\n",
|
| 15 |
+
"import pandas as pd\n",
|
| 16 |
+
"from pprint import pprint\n",
|
| 17 |
+
"import Cleaner\n",
|
| 18 |
+
"import Similar"
|
| 19 |
+
]
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"cell_type": "code",
|
| 23 |
+
"execution_count": 7,
|
| 24 |
+
"metadata": {},
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"source": [
|
| 27 |
+
"import os\n",
|
| 28 |
+
"import textract as tx"
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"cell_type": "code",
|
| 33 |
+
"execution_count": 8,
|
| 34 |
+
"metadata": {},
|
| 35 |
+
"outputs": [],
|
| 36 |
+
"source": [
|
| 37 |
+
"resume_dir = \"Data/Resumes/\"\n",
|
| 38 |
+
"job_desc_dir = \"Data/JobDesc/\"\n",
|
| 39 |
+
"resume_names = os.listdir(resume_dir)"
|
| 40 |
+
]
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"cell_type": "code",
|
| 44 |
+
"execution_count": 58,
|
| 45 |
+
"metadata": {},
|
| 46 |
+
"outputs": [],
|
| 47 |
+
"source": [
|
| 48 |
+
"document = []\n",
|
| 49 |
+
"for res in resume_names:\n",
|
| 50 |
+
" text = tx.process(resume_dir+res, encoding='ascii')\n",
|
| 51 |
+
" text = str(text, 'utf-8')\n",
|
| 52 |
+
" temp = Cleaner.Cleaner(text)\n",
|
| 53 |
+
" document.append(temp[1])"
|
| 54 |
+
]
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"cell_type": "code",
|
| 58 |
+
"execution_count": null,
|
| 59 |
+
"metadata": {},
|
| 60 |
+
"outputs": [],
|
| 61 |
+
"source": []
|
| 62 |
+
},
|
| 63 |
+
{
|
| 64 |
+
"cell_type": "code",
|
| 65 |
+
"execution_count": 59,
|
| 66 |
+
"metadata": {},
|
| 67 |
+
"outputs": [],
|
| 68 |
+
"source": [
|
| 69 |
+
"id2word = corpora.Dictionary(document)"
|
| 70 |
+
]
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"cell_type": "code",
|
| 74 |
+
"execution_count": null,
|
| 75 |
+
"metadata": {},
|
| 76 |
+
"outputs": [],
|
| 77 |
+
"source": []
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"cell_type": "code",
|
| 81 |
+
"execution_count": 60,
|
| 82 |
+
"metadata": {},
|
| 83 |
+
"outputs": [],
|
| 84 |
+
"source": [
|
| 85 |
+
"corpus = [id2word.doc2bow(text) for text in document]"
|
| 86 |
+
]
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"cell_type": "code",
|
| 90 |
+
"execution_count": 69,
|
| 91 |
+
"metadata": {},
|
| 92 |
+
"outputs": [],
|
| 93 |
+
"source": [
|
| 94 |
+
"lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus,\n",
|
| 95 |
+
" id2word=id2word,\n",
|
| 96 |
+
" num_topics=2, \n",
|
| 97 |
+
" random_state=100,\n",
|
| 98 |
+
" update_every=1,\n",
|
| 99 |
+
" chunksize=100,\n",
|
| 100 |
+
" passes=50,\n",
|
| 101 |
+
" alpha='auto',\n",
|
| 102 |
+
" per_word_topics=True)"
|
| 103 |
+
]
|
| 104 |
+
},
|
| 105 |
+
{
|
| 106 |
+
"cell_type": "code",
|
| 107 |
+
"execution_count": 70,
|
| 108 |
+
"metadata": {},
|
| 109 |
+
"outputs": [
|
| 110 |
+
{
|
| 111 |
+
"name": "stdout",
|
| 112 |
+
"output_type": "stream",
|
| 113 |
+
"text": [
|
| 114 |
+
"[(0,\n",
|
| 115 |
+
" '0.002*\"role\" + 0.002*\"Experience\" + 0.002*\"Services\" + 0.002*\"Skills\" + '\n",
|
| 116 |
+
" '0.002*\"Skill\" + 0.002*\"Date\" + 0.002*\"skill\" + 0.002*\"back\" + 0.002*\"Area\" '\n",
|
| 117 |
+
" '+ 0.002*\"last\"'),\n",
|
| 118 |
+
" (1,\n",
|
| 119 |
+
" '0.003*\"IT\" + 0.003*\"Pref\" + 0.003*\"ID\" + 0.003*\"Active\" + 0.003*\"UG\" + '\n",
|
| 120 |
+
" '0.003*\"d\" + 0.003*\"Current\" + 0.003*\"experience\" + 0.003*\"Summary\" + '\n",
|
| 121 |
+
" '0.003*\"Months\"')]\n"
|
| 122 |
+
]
|
| 123 |
+
}
|
| 124 |
+
],
|
| 125 |
+
"source": [
|
| 126 |
+
"pprint(lda_model.print_topics())\n",
|
| 127 |
+
"doc_lda = lda_model[corpus]"
|
| 128 |
+
]
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"cell_type": "code",
|
| 132 |
+
"execution_count": 71,
|
| 133 |
+
"metadata": {},
|
| 134 |
+
"outputs": [],
|
| 135 |
+
"source": [
|
| 136 |
+
"import pyLDAvis\n",
|
| 137 |
+
"import pyLDAvis.gensim # don't skip this\n",
|
| 138 |
+
"import matplotlib.pyplot as plt\n",
|
| 139 |
+
"%matplotlib inline"
|
| 140 |
+
]
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"cell_type": "code",
|
| 144 |
+
"execution_count": 72,
|
| 145 |
+
"metadata": {},
|
| 146 |
+
"outputs": [
|
| 147 |
+
{
|
| 148 |
+
"data": {
|
| 149 |
+
"text/html": [
|
| 150 |
+
"\n",
|
| 151 |
+
"<link rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.rawgit.com/bmabey/pyLDAvis/files/ldavis.v1.0.0.css\">\n",
|
| 152 |
+
"\n",
|
| 153 |
+
"\n",
|
| 154 |
+
"<div id=\"ldavis_el22281400102997868488005527649\"></div>\n",
|
| 155 |
+
"<script type=\"text/javascript\">\n",
|
| 156 |
+
"\n",
|
| 157 |
+
"var ldavis_el22281400102997868488005527649_data = {\"mdsDat\": {\"x\": [0.045266102999448776, -0.045266102999448776], \"y\": [0.0, 0.0], \"topics\": [1, 2], \"cluster\": [1, 1], \"Freq\": [50.73581314086914, 49.264190673828125]}, \"tinfo\": {\"Term\": [\"improve\", \"less\", \"Days\", \"program\", \"reduce\", \"Technologies\", \"Sep\", \"lead\", \"NCR\", \"SQL\", \"notice\", \"Leader\", \"detect\", \"duration\", \"extensive\", \"track\", \"prediction\", \"structure\", \"document\", \"MTech\", \"enhance\", \"apply\", \"Apr\", \"user\", \"May\", \"Jump\", \"service\", \"cycle\", \"troubleshoot\", \"full\", \"NCR\", \"service\", \"cycle\", \"troubleshoot\", \"full\", \"cloud\", \"text\", \"actively\", \"leadership\", \"MS\", \"MCA\", \"Days\", \"less\", \"look\", \"sequence\", \"GCP\", \"monitoring\", \"junior\", \"level\", \"June\", \"transfer\", \"LearningDeep\", \"JDK\", \"box\", \"JIRA\", \"fashion\", \"pretraine\", \"domain\", \"working\", \"accordingly\", \"Marathi\", \"Jquery\", \"SQL\", \"client\", \"perform\", \"Marital\", \"notice\", \"Jump\", \"singleunmarrie\", \"degree\", \"professional\", \"SectionWork\", \"Resume\", \"IT\", \"Modified\", \"industry\", \"Highest\", \"Pref\", \"d\", \"UG\", \"ID\", \"Active\", \"experience\", \"Current\", \"Summary\", \"Months\", \"Location\", \"Status\", \"Phone\", \"Period\", \"Education\", \"total\", \"Functional\", \"year\", \"Key\", \"Number\", \"Email\", \"it\", \"Top\", \"version\", \"use\", \"back\", \"Name\", \"skill\", \"Area\", \"last\", \"Date\", \"Skills\", \"improve\", \"reduce\", \"program\", \"Technologies\", \"Leader\", \"track\", \"prediction\", \"extensive\", \"duration\", \"detect\", \"structure\", \"document\", \"enhance\", \"MTech\", \"apply\", \"value\", \"many\", \"control\", \"artificial\", \"Till\", \"tuning\", \"coordinate\", \"Numpy\", \"index\", \"enable\", \"Entity\", \"suggest\", \"steering\", \"architecture\", \"OpenCV\", \"Gradient\", \"camera\", \"Sep\", \"lead\", \"information\", \"identification\", \"Apr\", \"user\", \"May\", \"code\", \"identify\", \"performance\", \"algorithm\", \"deliver\", \"website\", \"section\", \"component\", \"css\", \"Maintenance\", \"company\", \"Nov\", \"issue\", \"test\", \"new\", \"system\", \"design\", \"develop\", \"role\", \"datum\", \"Computer\", \"Experience\", \"Services\", \"Engineering\", \"application\", \"ITSoftwareSoftware\", \"marital\", \"Skills\", \"Skill\", \"Date\", \"Area\", \"back\", \"skill\", \"last\", \"Name\", \"it\", \"version\", \"use\", \"Top\", \"Number\", \"Email\", \"year\", \"Key\", \"Education\", \"Period\", \"total\", \"Functional\", \"Status\", \"Phone\", \"Months\", \"Location\", \"ID\", \"UG\", \"d\", \"experience\", \"Summary\", \"Active\", \"Pref\"], \"Freq\": [4.0, 6.0, 6.0, 3.0, 3.0, 3.0, 5.0, 5.0, 3.0, 7.0, 10.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0, 4.0, 4.0, 12.0, 2.0, 2.0, 2.0, 2.0, 3.0877420902252197, 2.4015042781829834, 2.4015040397644043, 2.4015040397644043, 2.401503801345825, 2.401503801345825, 2.401503801345825, 2.401503562927246, 2.401503562927246, 2.401503562927246, 2.401503324508667, 5.146483421325684, 5.146483421325684, 1.7152669429779053, 1.7152669429779053, 1.7152669429779053, 1.7152668237686157, 1.7152668237686157, 1.7152665853500366, 1.7152668237686157, 1.7152668237686157, 1.7152668237686157, 1.7152668237686157, 1.7152668237686157, 1.7152668237686157, 1.7152665853500366, 1.7152668237686157, 1.7152668237686157, 1.7152668237686157, 1.7152665853500366, 1.7152668237686157, 1.7152668237686157, 5.832734107971191, 3.7740161418914795, 3.7740161418914795, 3.774012565612793, 7.891523361206055, 8.57771110534668, 7.205255508422852, 5.832758903503418, 5.832754135131836, 7.891510009765625, 7.891506671905518, 9.263986587524414, 9.263972282409668, 6.519008636474609, 7.891479969024658, 9.263984680175781, 9.263981819152832, 9.263981819152832, 9.263981819152832, 9.263981819152832, 9.263980865478516, 9.263980865478516, 9.2639799118042, 9.2639799118042, 9.2639799118042, 9.263978004455566, 9.26397705078125, 9.26397705078125, 9.26397705078125, 9.26397705078125, 9.26397705078125, 9.26397705078125, 9.263976097106934, 9.2639741897583, 9.2639741897583, 8.577733993530273, 8.577733993530273, 8.577733993530273, 8.57773208618164, 8.57773208618164, 8.57773208618164, 8.57773208618164, 8.57773208618164, 8.57773208618164, 8.57773208618164, 8.577725410461426, 3.736862897872925, 3.057375192642212, 3.057375192642212, 3.057373523712158, 2.377887010574341, 2.3778867721557617, 2.3778867721557617, 2.3778867721557617, 2.377887010574341, 2.377887010574341, 2.3778862953186035, 2.3778860569000244, 2.3778860569000244, 2.3778860569000244, 2.377885580062866, 1.6984007358551025, 1.6984007358551025, 1.6984007358551025, 1.6984007358551025, 1.6984007358551025, 1.6984003782272339, 1.6984003782272339, 1.6984003782272339, 1.6984003782272339, 1.6984003782272339, 1.698400616645813, 1.698400616645813, 1.698400616645813, 1.698400616645813, 1.698400616645813, 1.698400616645813, 1.698400616645813, 4.416375637054443, 4.4163618087768555, 1.698400616645813, 1.698400616645813, 3.7368907928466797, 3.7368743419647217, 3.7368698120117188, 4.416360855102539, 3.0574069023132324, 3.0573978424072266, 3.05739688873291, 3.057391881942749, 3.0573902130126953, 3.0573861598968506, 3.0573785305023193, 3.0573790073394775, 3.0573790073394775, 3.7368814945220947, 3.7368786334991455, 3.7368733882904053, 3.7368719577789307, 3.7368710041046143, 3.736868143081665, 4.416351318359375, 5.09583044052124, 5.775294303894043, 4.416353225708008, 4.416348934173584, 5.775278568267822, 5.775277137756348, 4.416343688964844, 4.4163289070129395, 5.095806121826172, 5.095800399780273, 5.775272369384766, 5.775270938873291, 5.775267124176025, 5.775265693664551, 5.775265693664551, 5.775265693664551, 5.775265693664551, 5.775265216827393, 5.775263786315918, 5.775264739990234, 5.775264739990234, 5.775262832641602, 5.775261878967285, 5.775261878967285, 5.775261402130127, 5.775261402130127, 5.775259971618652, 5.775259971618652, 5.775259971618652, 5.775259971618652, 5.775259494781494, 5.775259971618652, 5.7752580642700195, 5.7752580642700195, 5.7752556800842285, 5.775256156921387, 5.775256156921387, 5.775257110595703, 5.775257587432861, 5.775256156921387, 5.775253772735596], \"Total\": [4.0, 6.0, 6.0, 3.0, 3.0, 3.0, 5.0, 5.0, 3.0, 7.0, 10.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0, 4.0, 4.0, 12.0, 2.0, 2.0, 2.0, 2.0, 3.427489757537842, 2.741250991821289, 2.741250991821289, 2.741250991821289, 2.74125075340271, 2.741250991821289, 2.741250991821289, 2.74125075340271, 2.741250991821289, 2.74125075340271, 2.74125075340271, 6.165385723114014, 6.165386199951172, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550129413604736, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550129413604736, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.055013418197632, 2.055013418197632, 7.531106948852539, 4.792906284332275, 4.792906761169434, 4.792906761169434, 10.948802947998047, 12.314531326293945, 10.262561798095703, 8.210594177246094, 8.210594177246094, 11.628292083740234, 11.628291130065918, 14.359748840332031, 14.359748840332031, 9.576322555541992, 12.307779312133789, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237022399902, 15.039237022399902, 15.039237022399902, 15.039237022399902, 15.039237022399902, 15.039237976074219, 15.039237976074219, 15.039236068725586, 15.039236068725586, 14.352997779846191, 14.352996826171875, 14.352998733520508, 14.352996826171875, 14.352997779846191, 14.352996826171875, 14.352997779846191, 14.352997779846191, 14.352997779846191, 14.352998733520508, 14.352997779846191, 4.079987525939941, 3.400498390197754, 3.400498628616333, 3.400498390197754, 2.7210097312927246, 2.7210097312927246, 2.7210097312927246, 2.7210097312927246, 2.7210099697113037, 2.7210099697113037, 2.7210097312927246, 2.7210097312927246, 2.7210097312927246, 2.7210097312927246, 2.7210097312927246, 2.0415220260620117, 2.0415220260620117, 2.041522264480591, 2.041522264480591, 2.041522264480591, 2.0415220260620117, 2.0415220260620117, 2.0415220260620117, 2.0415220260620117, 2.0415220260620117, 2.041522264480591, 2.041522264480591, 2.041522264480591, 2.041522264480591, 2.041522264480591, 2.041522264480591, 2.041522264480591, 5.445394039154053, 5.445394039154053, 2.041522264480591, 2.041522264480591, 4.765904903411865, 4.765905380249023, 4.765904903411865, 6.131625175476074, 4.086416244506836, 4.086416244506836, 4.086415767669678, 4.086415767669678, 4.086415767669678, 4.086416244506836, 4.086415767669678, 4.086416244506836, 4.086416244506836, 5.452136039733887, 5.452136039733887, 5.452136039733887, 5.452136039733887, 5.452136039733887, 5.452136516571045, 7.504101753234863, 9.556071281433105, 12.294279098510742, 8.19034194946289, 8.19034194946289, 13.666759490966797, 13.666759490966797, 8.19034194946289, 8.19034194946289, 10.928550720214844, 10.928550720214844, 14.352997779846191, 14.352998733520508, 14.352998733520508, 14.352997779846191, 14.352997779846191, 14.352997779846191, 14.352997779846191, 14.352996826171875, 14.352997779846191, 14.352998733520508, 14.352996826171875, 14.352996826171875, 15.039236068725586, 15.039236068725586, 15.039237976074219, 15.039237976074219, 15.039237022399902, 15.039237022399902, 15.039237022399902, 15.039237022399902, 15.039237976074219, 15.039237022399902, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219], \"Category\": [\"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\"], \"logprob\": [30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, -6.811100006103516, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -6.30019998550415, -6.30019998550415, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -6.175000190734863, -6.610400199890137, -6.610400199890137, -6.610400199890137, -5.872700214385986, -5.789299964904785, -5.963699817657471, -6.175000190734863, -6.175000190734863, -5.872700214385986, -5.872700214385986, -5.712399959564209, -5.712399959564209, -6.063799858093262, -5.872700214385986, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -6.590799808502197, -6.791500091552734, -6.791500091552734, -6.791500091552734, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -6.423699855804443, -6.423699855804443, -7.37939977645874, -7.37939977645874, -6.590799808502197, -6.590799808502197, -6.590799808502197, -6.423799991607666, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.590799808502197, -6.590799808502197, -6.590799808502197, -6.590799808502197, -6.590799808502197, -6.590799808502197, -6.423799991607666, -6.280600070953369, -6.1554999351501465, -6.423799991607666, -6.423799991607666, -6.1554999351501465, -6.1554999351501465, -6.423799991607666, -6.423799991607666, -6.280600070953369, -6.280600070953369, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465], \"loglift\": [30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.5741999745368958, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.49790000915527344, 0.49790000915527344, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.4230000078678131, 0.43950000405311584, 0.43950000405311584, 0.43950000405311584, 0.35109999775886536, 0.31690001487731934, 0.3248000144958496, 0.33660000562667847, 0.33660000562667847, 0.29089999198913574, 0.29089999198913574, 0.2401999980211258, 0.2401999980211258, 0.2939999997615814, 0.23409999907016754, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.6201000213623047, 0.6015999913215637, 0.6015999913215637, 0.6015999913215637, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.4984999895095825, 0.4984999895095825, 0.5239999890327454, 0.5239999890327454, 0.46470001339912415, 0.46470001339912415, 0.46470001339912415, 0.3797999918460846, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.3301999866962433, 0.3301999866962433, 0.3301999866962433, 0.3301999866962433, 0.3301999866962433, 0.3301999866962433, 0.1777999997138977, 0.07919999957084656, -0.047600001096725464, 0.09030000120401382, 0.09030000120401382, -0.1534000039100647, -0.1534000039100647, 0.09030000120401382, 0.09030000120401382, -0.054999999701976776, -0.054999999701976776, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214]}, \"token.table\": {\"Topic\": [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 1, 1, 2, 2, 1, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 1, 2], \"Freq\": [0.598434567451477, 0.39895638823509216, 0.20982374250888824, 0.839294970035553, 0.6270467042922974, 0.41803112626075745, 0.48838010430336, 0.48838010430336, 0.598434567451477, 0.39895638823509216, 0.6270466446876526, 0.41803109645843506, 0.8109792470932007, 0.1621958464384079, 0.5984346270561218, 0.39895641803741455, 0.5984346270561218, 0.39895644783973694, 0.48838010430336, 0.48838010430336, 0.9796611070632935, 0.585361897945404, 0.43902140855789185, 0.5984346270561218, 0.39895641803741455, 0.9732297658920288, 0.9796611070632935, 0.6499953866004944, 0.3249976933002472, 0.598434567451477, 0.39895638823509216, 0.6267518997192383, 0.34819549322128296, 0.5490206480026245, 0.45751720666885376, 0.9732297658920288, 0.9732297658920288, 0.9732296466827393, 0.7308439016342163, 0.32481950521469116, 0.9732297658920288, 0.598434567451477, 0.39895638823509216, 0.7350212335586548, 0.9732297658920288, 0.598434567451477, 0.39895638823509216, 0.7295939922332764, 0.7295939922332764, 0.7350212335586548, 0.2447132021188736, 0.734139621257782, 0.9732296466827393, 0.834566593170166, 0.2086416482925415, 0.20982374250888824, 0.839294970035553, 0.6267518997192383, 0.34819549322128296, 0.598434567451477, 0.39895638823509216, 0.8752761483192444, 0.6270467638969421, 0.41803115606307983, 0.36682870984077454, 0.7336574196815491, 0.5984346270561218, 0.39895644783973694, 0.979661226272583, 0.9796611070632935, 0.5984346270561218, 0.39895641803741455, 0.5984346270561218, 0.39895641803741455, 0.598434567451477, 0.39895638823509216, 0.6879772543907166, 0.3439886271953583, 0.7966956496238708, 0.2655652165412903, 0.6879771947860718, 0.3439885973930359, 0.1836414337158203, 0.7345657348632812, 0.585361897945404, 0.43902140855789185, 0.6270466446876526, 0.41803109645843506, 0.6270467042922974, 0.41803112626075745, 0.598434567451477, 0.39895638823509216, 0.598434567451477, 0.39895638823509216, 0.8822236061096191, 0.9796611070632935, 0.6270467638969421, 0.41803115606307983, 0.598434567451477, 0.39895638823509216, 0.9732297658920288, 0.7295939922332764, 0.24471323192119598, 0.7341396808624268, 0.48838010430336, 0.48838010430336, 0.7350212335586548, 0.9796611070632935, 0.9796611070632935, 0.6270467042922974, 0.41803112626075745, 0.9732297658920288, 0.9796611070632935, 0.8345667123794556, 0.2086416780948639, 0.7295938730239868, 0.32617780566215515, 0.6523556113243103, 0.36682870984077454, 0.7336574196815491, 0.24471323192119598, 0.7341396808624268, 0.9796611070632935, 0.979661226272583, 0.2447132021188736, 0.734139621257782, 0.7295938730239868, 0.598434567451477, 0.39895638823509216, 0.48838010430336, 0.48838010430336, 0.7307631969451904, 0.24358773231506348, 0.24471323192119598, 0.7341396808624268, 0.39978134632110596, 0.5330418348312378, 0.73502117395401, 0.4185820519924164, 0.5232275724411011, 0.7350212335586548, 0.9732297658920288, 0.73502117395401, 0.979661226272583, 0.7350212335586548, 0.598434567451477, 0.39895638823509216, 0.7350212335586548, 0.9732298851013184, 0.7295939922332764, 0.9796611070632935, 0.2447132021188736, 0.734139621257782, 0.9803951382637024, 0.979661226272583, 0.7309695482254028, 0.3132726550102234, 0.9796611070632935, 0.36682870984077454, 0.7336574196815491, 0.6270467042922974, 0.41803112626075745, 0.9732297658920288, 0.6270467042922974, 0.41803112626075745, 0.1836414337158203, 0.7345657348632812, 0.7295938730239868, 0.8109791874885559, 0.1621958464384079, 0.9732298851013184, 0.9732297658920288, 0.979661226272583, 0.5490206480026245, 0.45751720666885376, 0.9732297658920288, 0.36682870984077454, 0.7336574196815491, 0.7306734919548035, 0.2740025520324707, 0.834566593170166, 0.2086416482925415, 0.2447132021188736, 0.734139621257782, 0.7350212335586548, 0.9732297658920288, 0.7307631969451904, 0.24358773231506348, 0.8822235465049744, 0.8822236061096191, 0.5693705081939697, 0.48803186416625977, 0.2447132021188736, 0.734139621257782, 0.9732297658920288, 0.7295938730239868, 0.6820908784866333, 0.29232466220855713, 0.6270467042922974, 0.41803112626075745, 0.9796611070632935, 0.7350212335586548, 0.9796611070632935, 0.36682868003845215, 0.7336573600769043, 0.36682870984077454, 0.7336574196815491, 0.7295938730239868, 0.5984346270561218, 0.39895641803741455, 0.7350212335586548, 0.9732297658920288, 0.7295938730239868, 0.979661226272583, 0.6270467638969421, 0.41803115606307983, 0.20982372760772705, 0.8392949104309082, 0.979661226272583, 0.6270466446876526, 0.41803109645843506, 0.24471323192119598, 0.7341396808624268, 0.9732297658920288, 0.598434567451477, 0.39895638823509216], \"Term\": [\"Active\", \"Active\", \"Apr\", \"Apr\", \"Area\", \"Area\", \"Computer\", \"Computer\", \"Current\", \"Current\", \"Date\", \"Date\", \"Days\", \"Days\", \"Education\", \"Education\", \"Email\", \"Email\", \"Engineering\", \"Engineering\", \"Entity\", \"Experience\", \"Experience\", \"Functional\", \"Functional\", \"GCP\", \"Gradient\", \"Highest\", \"Highest\", \"ID\", \"ID\", \"IT\", \"IT\", \"ITSoftwareSoftware\", \"ITSoftwareSoftware\", \"JDK\", \"JIRA\", \"Jquery\", \"Jump\", \"Jump\", \"June\", \"Key\", \"Key\", \"Leader\", \"LearningDeep\", \"Location\", \"Location\", \"MCA\", \"MS\", \"MTech\", \"Maintenance\", \"Maintenance\", \"Marathi\", \"Marital\", \"Marital\", \"May\", \"May\", \"Modified\", \"Modified\", \"Months\", \"Months\", \"NCR\", \"Name\", \"Name\", \"Nov\", \"Nov\", \"Number\", \"Number\", \"Numpy\", \"OpenCV\", \"Period\", \"Period\", \"Phone\", \"Phone\", \"Pref\", \"Pref\", \"Resume\", \"Resume\", \"SQL\", \"SQL\", \"SectionWork\", \"SectionWork\", \"Sep\", \"Sep\", \"Services\", \"Services\", \"Skill\", \"Skill\", \"Skills\", \"Skills\", \"Status\", \"Status\", \"Summary\", \"Summary\", \"Technologies\", \"Till\", \"Top\", \"Top\", \"UG\", \"UG\", \"accordingly\", \"actively\", \"algorithm\", \"algorithm\", \"application\", \"application\", \"apply\", \"architecture\", \"artificial\", \"back\", \"back\", \"box\", \"camera\", \"client\", \"client\", \"cloud\", \"code\", \"code\", \"company\", \"company\", \"component\", \"component\", \"control\", \"coordinate\", \"css\", \"css\", \"cycle\", \"d\", \"d\", \"datum\", \"datum\", \"degree\", \"degree\", \"deliver\", \"deliver\", \"design\", \"design\", \"detect\", \"develop\", \"develop\", \"document\", \"domain\", \"duration\", \"enable\", \"enhance\", \"experience\", \"experience\", \"extensive\", \"fashion\", \"full\", \"identification\", \"identify\", \"identify\", \"improve\", \"index\", \"industry\", \"industry\", \"information\", \"issue\", \"issue\", \"it\", \"it\", \"junior\", \"last\", \"last\", \"lead\", \"lead\", \"leadership\", \"less\", \"less\", \"level\", \"look\", \"many\", \"marital\", \"marital\", \"monitoring\", \"new\", \"new\", \"notice\", \"notice\", \"perform\", \"perform\", \"performance\", \"performance\", \"prediction\", \"pretraine\", \"professional\", \"professional\", \"program\", \"reduce\", \"role\", \"role\", \"section\", \"section\", \"sequence\", \"service\", \"singleunmarrie\", \"singleunmarrie\", \"skill\", \"skill\", \"steering\", \"structure\", \"suggest\", \"system\", \"system\", \"test\", \"test\", \"text\", \"total\", \"total\", \"track\", \"transfer\", \"troubleshoot\", \"tuning\", \"use\", \"use\", \"user\", \"user\", \"value\", \"version\", \"version\", \"website\", \"website\", \"working\", \"year\", \"year\"]}, \"R\": 30, \"lambda.step\": 0.01, \"plot.opts\": {\"xlab\": \"PC1\", \"ylab\": \"PC2\"}, \"topic.order\": [2, 1]};\n",
|
| 158 |
+
"\n",
|
| 159 |
+
"function LDAvis_load_lib(url, callback){\n",
|
| 160 |
+
" var s = document.createElement('script');\n",
|
| 161 |
+
" s.src = url;\n",
|
| 162 |
+
" s.async = true;\n",
|
| 163 |
+
" s.onreadystatechange = s.onload = callback;\n",
|
| 164 |
+
" s.onerror = function(){console.warn(\"failed to load library \" + url);};\n",
|
| 165 |
+
" document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
|
| 166 |
+
"}\n",
|
| 167 |
+
"\n",
|
| 168 |
+
"if(typeof(LDAvis) !== \"undefined\"){\n",
|
| 169 |
+
" // already loaded: just create the visualization\n",
|
| 170 |
+
" !function(LDAvis){\n",
|
| 171 |
+
" new LDAvis(\"#\" + \"ldavis_el22281400102997868488005527649\", ldavis_el22281400102997868488005527649_data);\n",
|
| 172 |
+
" }(LDAvis);\n",
|
| 173 |
+
"}else if(typeof define === \"function\" && define.amd){\n",
|
| 174 |
+
" // require.js is available: use it to load d3/LDAvis\n",
|
| 175 |
+
" require.config({paths: {d3: \"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min\"}});\n",
|
| 176 |
+
" require([\"d3\"], function(d3){\n",
|
| 177 |
+
" window.d3 = d3;\n",
|
| 178 |
+
" LDAvis_load_lib(\"https://cdn.rawgit.com/bmabey/pyLDAvis/files/ldavis.v1.0.0.js\", function(){\n",
|
| 179 |
+
" new LDAvis(\"#\" + \"ldavis_el22281400102997868488005527649\", ldavis_el22281400102997868488005527649_data);\n",
|
| 180 |
+
" });\n",
|
| 181 |
+
" });\n",
|
| 182 |
+
"}else{\n",
|
| 183 |
+
" // require.js not available: dynamically load d3 & LDAvis\n",
|
| 184 |
+
" LDAvis_load_lib(\"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min.js\", function(){\n",
|
| 185 |
+
" LDAvis_load_lib(\"https://cdn.rawgit.com/bmabey/pyLDAvis/files/ldavis.v1.0.0.js\", function(){\n",
|
| 186 |
+
" new LDAvis(\"#\" + \"ldavis_el22281400102997868488005527649\", ldavis_el22281400102997868488005527649_data);\n",
|
| 187 |
+
" })\n",
|
| 188 |
+
" });\n",
|
| 189 |
+
"}\n",
|
| 190 |
+
"</script>"
|
| 191 |
+
],
|
| 192 |
+
"text/plain": [
|
| 193 |
+
"PreparedData(topic_coordinates= x y topics cluster Freq\n",
|
| 194 |
+
"topic \n",
|
| 195 |
+
"1 0.045266 0.0 1 1 50.735813\n",
|
| 196 |
+
"0 -0.045266 0.0 2 1 49.264191, topic_info= Term Freq Total Category logprob loglift\n",
|
| 197 |
+
"759 improve 4.000000 4.000000 Default 30.0000 30.0000\n",
|
| 198 |
+
"565 less 6.000000 6.000000 Default 29.0000 29.0000\n",
|
| 199 |
+
"520 Days 6.000000 6.000000 Default 28.0000 28.0000\n",
|
| 200 |
+
"781 program 3.000000 3.000000 Default 27.0000 27.0000\n",
|
| 201 |
+
"1734 reduce 3.000000 3.000000 Default 26.0000 26.0000\n",
|
| 202 |
+
"... ... ... ... ... ... ...\n",
|
| 203 |
+
"185 d 5.775256 15.039238 Topic2 -6.1555 -0.2491\n",
|
| 204 |
+
"197 experience 5.775257 15.039238 Topic2 -6.1555 -0.2491\n",
|
| 205 |
+
"152 Summary 5.775258 15.039238 Topic2 -6.1555 -0.2491\n",
|
| 206 |
+
"0 Active 5.775256 15.039238 Topic2 -6.1555 -0.2491\n",
|
| 207 |
+
"117 Pref 5.775254 15.039238 Topic2 -6.1555 -0.2491\n",
|
| 208 |
+
"\n",
|
| 209 |
+
"[205 rows x 6 columns], token_table= Topic Freq Term\n",
|
| 210 |
+
"term \n",
|
| 211 |
+
"0 1 0.598435 Active\n",
|
| 212 |
+
"0 2 0.398956 Active\n",
|
| 213 |
+
"1107 1 0.209824 Apr\n",
|
| 214 |
+
"1107 2 0.839295 Apr\n",
|
| 215 |
+
"7 1 0.627047 Area\n",
|
| 216 |
+
"... ... ... ...\n",
|
| 217 |
+
"671 1 0.244713 website\n",
|
| 218 |
+
"671 2 0.734140 website\n",
|
| 219 |
+
"515 1 0.973230 working\n",
|
| 220 |
+
"271 1 0.598435 year\n",
|
| 221 |
+
"271 2 0.398956 year\n",
|
| 222 |
+
"\n",
|
| 223 |
+
"[226 rows x 3 columns], R=30, lambda_step=0.01, plot_opts={'xlab': 'PC1', 'ylab': 'PC2'}, topic_order=[2, 1])"
|
| 224 |
+
]
|
| 225 |
+
},
|
| 226 |
+
"execution_count": 72,
|
| 227 |
+
"metadata": {},
|
| 228 |
+
"output_type": "execute_result"
|
| 229 |
+
}
|
| 230 |
+
],
|
| 231 |
+
"source": [
|
| 232 |
+
"# Visualize the topics\n",
|
| 233 |
+
"pyLDAvis.enable_notebook()\n",
|
| 234 |
+
"vis = pyLDAvis.gensim.prepare(lda_model, corpus, id2word)\n",
|
| 235 |
+
"vis"
|
| 236 |
+
]
|
| 237 |
+
},
|
| 238 |
+
{
|
| 239 |
+
"cell_type": "code",
|
| 240 |
+
"execution_count": null,
|
| 241 |
+
"metadata": {},
|
| 242 |
+
"outputs": [],
|
| 243 |
+
"source": []
|
| 244 |
+
}
|
| 245 |
+
],
|
| 246 |
+
"metadata": {
|
| 247 |
+
"kernelspec": {
|
| 248 |
+
"display_name": "Python 3",
|
| 249 |
+
"language": "python",
|
| 250 |
+
"name": "python3"
|
| 251 |
+
},
|
| 252 |
+
"language_info": {
|
| 253 |
+
"codemirror_mode": {
|
| 254 |
+
"name": "ipython",
|
| 255 |
+
"version": 3
|
| 256 |
+
},
|
| 257 |
+
"file_extension": ".py",
|
| 258 |
+
"mimetype": "text/x-python",
|
| 259 |
+
"name": "python",
|
| 260 |
+
"nbconvert_exporter": "python",
|
| 261 |
+
"pygments_lexer": "ipython3",
|
| 262 |
+
"version": "3.8.2"
|
| 263 |
+
}
|
| 264 |
+
},
|
| 265 |
+
"nbformat": 4,
|
| 266 |
+
"nbformat_minor": 4
|
| 267 |
+
}
|