Spaces:

marchji2415
/

Resume

Runtime error

App Files Files Community

srbhr commited on Sep 16, 2020

Commit

cc44155

1 Parent(s): a44ae9a

Updating Progress

Browse files

Files changed (15) hide show

Progress/Job_Data.csv +3 -3
Progress/Resume_data.csv +0 -0
Progress/Word Vector Model.ipynb +379 -0
Progress/WordCloud.ipynb +0 -0
Progress/Word_relevance.ipynb +282 -0
Progress/app.py +0 -45
Progress/app2.py +62 -0
Progress/app_testing.ipynb +64 -21
Progress/fileReader.py +63 -0
Progress/generate_wordcloud.py +15 -0
Progress/performLDA.py +35 -0
Progress/tf_idf.py +7 -0
Progress/topic_modeller_pd_returner.ipynb +0 -0
Progress/topic_modelling_resumes.ipynb +14 -98
Progress/untitled.md +0 -0

Progress/Job_Data.csv CHANGED Viewed

@@ -1,4 +1,4 @@
-Name,Context,Cleaned,Selective,Selective_Reduced
 Data Scientist.docx,"Data Scientist/ ML Engineer
 Game Change Solutions
@@ -69,7 +69,7 @@ work: 1 year (Preferred)
 Education:
-Bachelor's (Preferred)","['Data', 'scientist', 'ML', 'Engineer', 'Game', 'Change', 'Solutions', 'Gurgaon', 'Haryana', 'temporarily', 'remote', 'year', 'look', 'data', 'scientist', 'help', 'discover', 'information', 'hide', 'vast', 'amount', 'datum', 'help', 'make', 'smart', 'decision', 'deliver', 'even', 'well', 'product', 'primary', 'focus', 'apply', 'datum', 'mining', 'technique', 'statistical', 'analysis', 'build', 'high', 'quality', 'prediction', 'system', 'integrate', 'product', 'Data', 'Scientist', 'GameChange', 'must', 'energetic', 'selfstarter', 'quickly', 'grasp', 'companys', 'vision', 'develop', 'specific', 'tactical', 'plan', 'begin', 'implementation', 'appropriate', 'approval', 'candidate', 'must', 'resourceful', 'able', 'deliver', 'plan', 'define', 'Responsibilities', 'Data', 'Crunching', 'datum', 'massage', 'structured', 'unstructured', 'datum', 'include', 'Numerical', 'Text', 'Audio', 'Video', 'datum', 'implementationfinetune', 'machine', 'learn', 'algorithm', 'Conducting', 'Data', 'sciencemachine', 'learning', 'experiment', 'present', 'insight', 'build', 'innovative', 'datum', 'product', 'use', 'cut', 'edge', 'tool', 'technology', 'conduct', 'datum', 'analysis', 'independently', 'limit', 'supervision', 'generate', 'business', 'insight', 'conduct', 'diagnostic', 'datum', 'identify', 'improvement', 'opportunity', 'design', 'databacke', 'performance', 'improvement', 'strategy', 'liaise', 'internal', 'team', 'Record', 'keep', 'datum', 'warehousing', 'enable', 'Advanced', 'Analytics', 'operation', 'generate', 'meaningful', 'business', 'insight', 'develop', 'high', 'quality', 'client', 'deliverable', 'r', 'model', 'Python', 'script', 'Skills', 'require', 'excellent', 'understanding', 'machine', 'learning', 'technique', 'algorithm', 'kNN', 'Naive', 'Bayes', 'SVM', 'Decision', 'Forests', 'CNN', 'rnn', 'LSTM', 'experience', 'common', 'datum', 'science', 'toolkit', 'R', 'Weka', 'numpy', 'OpenCV', 'MatLab', 'great', 'communication', 'skill', 'experience', 'data', 'visualisation', 'tool', 'D3js', 'GGplot', 'Proficiency', 'use', 'query', 'language', 'SQL', 'Hive', 'Pig', 'Experience', 'NoSQL', 'database', 'mongodb', 'Cassandra', 'HBase', 'good', 'applied', 'statistic', 'skill', 'distribution', 'statistical', 'testing', 'regression', 'good', 'scripting', 'programming', 'skill', 'Job', 'Type', 'Fulltime', 'Salary', 'year', 'experience', 'Data', 'Science', 'year', 'prefer', 'work', 'year', 'Preferred', 'Education', 'Bachelors', 'prefer']","['Data', 'scientist', 'ML', 'Engineer', 'Game', 'Change', 'Solutions', 'Gurgaon', 'Haryana', 'temporarily', 'remote', 'year', 'look', 'data', 'scientist', 'help', 'discover', 'information', 'hide', 'vast', 'amount', 'datum', 'help', 'make', 'smart', 'decision', 'deliver', 'even', 'well', 'product', 'primary', 'focus', 'apply', 'datum', 'mining', 'technique', 'statistical', 'analysis', 'build', 'high', 'quality', 'prediction', 'system', 'integrate', 'product', 'Data', 'Scientist', 'GameChange', 'must', 'energetic', 'selfstarter', 'quickly', 'grasp', 'companys', 'vision', 'develop', 'specific', 'tactical', 'plan', 'begin', 'implementation', 'appropriate', 'approval', 'candidate', 'must', 'resourceful', 'able', 'deliver', 'plan', 'define', 'Responsibilities', 'Data', 'Crunching', 'datum', 'massage', 'structured', 'unstructured', 'datum', 'include', 'Numerical', 'Text', 'Audio', 'Video', 'datum', 'implementationfinetune', 'machine', 'learn', 'algorithm', 'Conducting', 'Data', 'sciencemachine', 'learning', 'experiment', 'present', 'insight', 'build', 'innovative', 'datum', 'product', 'use', 'cut', 'edge', 'tool', 'technology', 'conduct', 'datum', 'analysis', 'independently', 'limit', 'supervision', 'generate', 'business', 'insight', 'conduct', 'diagnostic', 'datum', 'identify', 'improvement', 'opportunity', 'design', 'databacke', 'performance', 'improvement', 'strategy', 'liaise', 'internal', 'team', 'Record', 'keep', 'datum', 'warehousing', 'enable', 'Advanced', 'Analytics', 'operation', 'generate', 'meaningful', 'business', 'insight', 'develop', 'high', 'quality', 'client', 'deliverable', 'r', 'model', 'Python', 'script', 'Skills', 'require', 'excellent', 'understanding', 'machine', 'learning', 'technique', 'algorithm', 'kNN', 'Naive', 'Bayes', 'SVM', 'Decision', 'Forests', 'CNN', 'rnn', 'LSTM', 'experience', 'common', 'datum', 'science', 'toolkit', 'R', 'Weka', 'numpy', 'OpenCV', 'MatLab', 'great', 'communication', 'skill', 'experience', 'data', 'visualisation', 'tool', 'D3js', 'GGplot', 'Proficiency', 'use', 'query', 'language', 'SQL', 'Hive', 'Pig', 'Experience', 'NoSQL', 'database', 'mongodb', 'Cassandra', 'HBase', 'good', 'applied', 'statistic', 'skill', 'distribution', 'statistical', 'testing', 'regression', 'good', 'scripting', 'programming', 'skill', 'Job', 'Type', 'Fulltime', 'Salary', 'year', 'experience', 'Data', 'Science', 'year', 'prefer', 'work', 'year', 'Preferred', 'Education', 'Bachelors', 'prefer']","['Data', 'scientist', 'ML', 'Engineer', 'Game', 'Change', 'Solutions', 'Gurgaon', 'Haryana', 'temporarily', 'remote', 'year', 'look', 'data', 'scientist', 'help', 'discover', 'information', 'hide', 'vast', 'amount', 'datum', 'help', 'make', 'smart', 'decision', 'deliver', 'even', 'well', 'product', 'primary', 'focus', 'apply', 'datum', 'mining', 'technique', 'statistical', 'analysis', 'build', 'high', 'quality', 'prediction', 'system', 'integrate', 'product', 'Data', 'Scientist', 'GameChange', 'must', 'energetic', 'selfstarter', 'quickly', 'grasp', 'companys', 'vision', 'develop', 'specific', 'tactical', 'plan', 'begin', 'implementation', 'appropriate', 'approval', 'candidate', 'must', 'resourceful', 'able', 'deliver', 'plan', 'define', 'Responsibilities', 'Data', 'Crunching', 'datum', 'massage', 'structured', 'unstructured', 'datum', 'include', 'Numerical', 'Text', 'Audio', 'Video', 'datum', 'implementationfinetune', 'machine', 'learn', 'algorithm', 'Conducting', 'Data', 'sciencemachine', 'learning', 'experiment', 'present', 'insight', 'build', 'innovative', 'datum', 'product', 'use', 'cut', 'edge', 'tool', 'technology', 'conduct', 'datum', 'analysis', 'independently', 'limit', 'supervision', 'generate', 'business', 'insight', 'conduct', 'diagnostic', 'datum', 'identify', 'improvement', 'opportunity', 'design', 'databacke', 'performance', 'improvement', 'strategy', 'liaise', 'internal', 'team', 'Record', 'keep', 'datum', 'warehousing', 'enable', 'Advanced', 'Analytics', 'operation', 'generate', 'meaningful', 'business', 'insight', 'develop', 'high', 'quality', 'client', 'deliverable', 'r', 'model', 'Python', 'script', 'Skills', 'require', 'excellent', 'understanding', 'machine', 'learning', 'technique', 'algorithm', 'kNN', 'Naive', 'Bayes', 'SVM', 'Decision', 'Forests', 'CNN', 'rnn', 'LSTM', 'experience', 'common', 'datum', 'science', 'toolkit', 'R', 'Weka', 'numpy', 'OpenCV', 'MatLab', 'great', 'communication', 'skill', 'experience', 'data', 'visualisation', 'tool', 'D3js', 'GGplot', 'Proficiency', 'use', 'query', 'language', 'SQL', 'Hive', 'Pig', 'Experience', 'NoSQL', 'database', 'mongodb', 'Cassandra', 'HBase', 'good', 'applied', 'statistic', 'skill', 'distribution', 'statistical', 'testing', 'regression', 'good', 'scripting', 'programming', 'skill', 'Job', 'Type', 'Fulltime', 'Salary', 'year', 'experience', 'Data', 'Science', 'year', 'prefer', 'work', 'year', 'Preferred', 'Education', 'Bachelors', 'prefer']"
 Web_dev_job.docx,"The position holder will be responsible for creation and implementation of a wide variety of Web-based products using PHP, JavaScript, MySQL and AJAX.
@@ -130,4 +130,4 @@ Understand Severity and Priority needs
 Time Management
-Quick decision making","['position', 'holder', 'responsible', 'creation', 'implementation', 'wide', 'variety', 'webbase', 'product', 'use', 'PHP', 'JavaScript', 'MySQL', 'AJAX', 'Key', 'Responsibility', 'Areas', 'create', 'design', 'modify', 'website', 'suit', 'requirement', 'client', 'PHP', 'Developers', 'need', 'thorough', 'knowledge', 'develop', 'cross', 'platform', 'compatible', 'web', 'mobile', 'web', 'application', 'sound', 'knowledge', 'working', 'experience', 'PHP', 'shall', 'hand', 'database', 'programming', 'experience', 'additional', 'skillset', 'oops', 'concept', 'PHP', 'LAMP', 'technology', 'xml', 'html', 'css', 'javascript', 'JQuery', 'Ajax', 'good', 'understanding', 'Oracle', 'PLSQL', 'relevant', 'software', 'architecture', 'software', 'development', 'software', 'testing', 'experience', 'Should', 'ability', 'work', 'team', 'business', 'owner', 'developer', 'designer', 'tester', 'Qulification', 'B', 'Tech', 'minimum', '10th', '12th', 'minimum', 'experience', 'year', 'PHP', 'development', 'experience', 'Skill', 'Sets', 'requirement', 'Experience', 'PHP', 'program', 'open', 'source', 'tool', 'implementation', 'ecommerce', 'project', 'good', 'command', 'AJAX', 'Database', 'SQL', 'programming', 'experience', 'PHP', 'program', 'open', 'Source', 'Tools', 'Knowledge', 'PHP', 'HTML5', 'css', 'Javascript', 'AJAX', 'Good', 'Knowledge', 'object', 'orient', 'programming', 'Knowledge', 'java', 'script', 'library', 'Jquery', 'Prototype', 'effective', 'communication', 'comprehension', 'skill', 'Understand', 'Severity', 'Priority', 'need', 'Time', 'Management', 'Quick', 'decision', 'making']","['position', 'holder', 'responsible', 'creation', 'implementation', 'wide', 'variety', 'webbase', 'product', 'use', 'PHP', 'JavaScript', 'MySQL', 'AJAX', 'Key', 'Responsibility', 'Areas', 'create', 'design', 'modify', 'website', 'suit', 'requirement', 'client', 'PHP', 'Developers', 'need', 'thorough', 'knowledge', 'develop', 'cross', 'platform', 'compatible', 'web', 'mobile', 'web', 'application', 'sound', 'knowledge', 'working', 'experience', 'PHP', 'shall', 'hand', 'database', 'programming', 'experience', 'additional', 'skillset', 'oops', 'concept', 'PHP', 'LAMP', 'technology', 'xml', 'html', 'css', 'javascript', 'JQuery', 'Ajax', 'good', 'understanding', 'Oracle', 'PLSQL', 'relevant', 'software', 'architecture', 'software', 'development', 'software', 'testing', 'experience', 'Should', 'ability', 'work', 'team', 'business', 'owner', 'developer', 'designer', 'tester', 'Qulification', 'B', 'Tech', 'minimum', '10th', '12th', 'minimum', 'experience', 'year', 'PHP', 'development', 'experience', 'Skill', 'Sets', 'requirement', 'Experience', 'PHP', 'program', 'open', 'source', 'tool', 'implementation', 'ecommerce', 'project', 'good', 'command', 'AJAX', 'Database', 'SQL', 'programming', 'experience', 'PHP', 'program', 'open', 'Source', 'Tools', 'Knowledge', 'PHP', 'HTML5', 'css', 'Javascript', 'AJAX', 'Good', 'Knowledge', 'object', 'orient', 'programming', 'Knowledge', 'java', 'script', 'library', 'Jquery', 'Prototype', 'effective', 'communication', 'comprehension', 'skill', 'Understand', 'Severity', 'Priority', 'need', 'Time', 'Management', 'Quick', 'decision', 'making']","['position', 'holder', 'responsible', 'creation', 'implementation', 'wide', 'variety', 'webbase', 'product', 'use', 'PHP', 'JavaScript', 'MySQL', 'AJAX', 'Key', 'Responsibility', 'Areas', 'create', 'design', 'modify', 'website', 'suit', 'requirement', 'client', 'PHP', 'Developers', 'need', 'thorough', 'knowledge', 'develop', 'cross', 'platform', 'compatible', 'web', 'mobile', 'web', 'application', 'sound', 'knowledge', 'working', 'experience', 'PHP', 'shall', 'hand', 'database', 'programming', 'experience', 'additional', 'skillset', 'oops', 'concept', 'PHP', 'LAMP', 'technology', 'xml', 'html', 'css', 'javascript', 'JQuery', 'Ajax', 'good', 'understanding', 'Oracle', 'PLSQL', 'relevant', 'software', 'architecture', 'software', 'development', 'software', 'testing', 'experience', 'Should', 'ability', 'work', 'team', 'business', 'owner', 'developer', 'designer', 'tester', 'Qulification', 'B', 'Tech', 'minimum', '10th', '12th', 'minimum', 'experience', 'year', 'PHP', 'development', 'experience', 'Skill', 'Sets', 'requirement', 'Experience', 'PHP', 'program', 'open', 'source', 'tool', 'implementation', 'ecommerce', 'project', 'good', 'command', 'AJAX', 'Database', 'SQL', 'programming', 'experience', 'PHP', 'program', 'open', 'Source', 'Tools', 'Knowledge', 'PHP', 'HTML5', 'css', 'Javascript', 'AJAX', 'Good', 'Knowledge', 'object', 'orient', 'programming', 'Knowledge', 'java', 'script', 'library', 'Jquery', 'Prototype', 'effective', 'communication', 'comprehension', 'skill', 'Understand', 'Severity', 'Priority', 'need', 'Time', 'Management', 'Quick', 'decision', 'making']"

+Name,Context,Cleaned,Selective,Selective_Reduced,TF_Based
 Data Scientist.docx,"Data Scientist/ ML Engineer
 Game Change Solutions
 Education:
+Bachelor's (Preferred)",Data scientist ML Engineer Game Change Solutions Gurgaon Haryana temporarily remote year look data scientist help discover information hide vast amount datum help make smart decision deliver even well product primary focus apply datum mining technique statistical analysis build high quality prediction system integrate product Data Scientist GameChange must energetic selfstarter quickly grasp companys vision develop specific tactical plan begin implementation appropriate approval candidate must resourceful able deliver plan define Responsibilities Data Crunching datum massage structured unstructured datum include Numerical Text Audio Video datum implementationfinetune machine learn algorithm Conducting Data sciencemachine learning experiment present insight build innovative datum product use cut edge tool technology conduct datum analysis independently limit supervision generate business insight conduct diagnostic datum identify improvement opportunity design databacke performance improvement strategy liaise internal team Record keep datum warehousing enable Advanced Analytics operation generate meaningful business insight develop high quality client deliverable r model Python script Skills require excellent understanding machine learning technique algorithm kNN Naive Bayes SVM Decision Forests CNN rnn LSTM experience common datum science toolkit R Weka numpy OpenCV MatLab great communication skill experience data visualisation tool D3js GGplot Proficiency use query language SQL Hive Pig Experience NoSQL database mongodb Cassandra HBase good applied statistic skill distribution statistical testing regression good scripting programming skill Job Type Fulltime Salary year experience Data Science year prefer work year Preferred Education Bachelors prefer,meaningful begin conduct temporarily experience Salary team tactical database datum science candidate Cassandra deliver applied Text model GGplot look present enable Education toolkit Weka supervision Scientist primary include keep grasp learning independently machine Conducting Experience Pig require make remote technique analysis experiment data diagnostic CNN edge approval plan HBase apply high good innovative NoSQL internal product tool ML script must selfstarter technology OpenCV kNN information Change strategy quality hide r Type Haryana energetic work programming Skills Proficiency Bachelors Preferred Naive communication MatLab Solutions rnn LSTM deliverable Bayes mining databacke language liaise Data Numerical Crunching cut vast Game regression develop warehousing statistical build Decision companys discover quickly testing amount Science operation business Video Job generate help SQL Advanced Engineer learn numpy design Forests R prefer Analytics great distribution implementation visualisation GameChange implementationfinetune mongodb identify performance scientist vision able Python appropriate Fulltime Audio opportunity insight improvement resourceful focus sciencemachine well understanding query client SVM specific prediction statistic Gurgaon define smart structured Record excellent D3js limit Hive decision scripting year use algorithm even unstructured common skill Responsibilities system integrate massage,meaningful begin Salary team database datum science candidate Cassandra Text model GGplot enable Education toolkit Weka supervision Scientist grasp machine Conducting Experience Pig technique analysis experiment CNN edge approval plan HBase NoSQL product tool ML script technology OpenCV kNN information Change strategy quality hide r Type Haryana work Proficiency Bachelors Preferred Naive communication MatLab LSTM Bayes mining language liaise Data Numerical Crunching Game regression build Decision companys amount Science operation business Video Job generate help SQL Advanced Engineer design Analytics distribution implementation visualisation GameChange mongodb performance scientist vision Python Fulltime Audio opportunity insight improvement focus sciencemachine query client SVM prediction statistic Gurgaon define Record D3js limit Hive decision scripting year use algorithm skill system massage,able advanced algorithm amount analysis analytics applied apply appropriate approval audio bachelors bayes begin build business candidate cassandra change client cnn common communication companys conduct conducting crunching cut d3js data databacke database datum decision define deliver deliverable design develop diagnostic discover distribution edge education enable energetic engineer even excellent experience experiment focus forests fulltime game gamechange generate ggplot good grasp great gurgaon haryana hbase help hide high hive identify implementation implementationfinetune improvement include independently information innovative insight integrate internal job keep knn language learn learning liaise limit look lstm machine make massage matlab meaningful mining ml model mongodb must naive nosql numerical numpy opencv operation opportunity performance pig plan prediction prefer preferred present primary product proficiency programming python quality query quickly record regression remote require resourceful responsibilities rnn salary science sciencemachine scientist script scripting selfstarter skill skills smart solutions specific sql statistic statistical strategy structured supervision svm system tactical team technique technology temporarily testing text tool toolkit type understanding unstructured use vast video vision visualisation warehousing weka well work year
 Web_dev_job.docx,"The position holder will be responsible for creation and implementation of a wide variety of Web-based products using PHP, JavaScript, MySQL and AJAX.
 Time Management
+Quick decision making",position holder responsible creation implementation wide variety webbase product use PHP JavaScript MySQL AJAX Key Responsibility Areas create design modify website suit requirement client PHP Developers need thorough knowledge develop cross platform compatible web mobile web application sound knowledge working experience PHP shall hand database programming experience additional skillset oops concept PHP LAMP technology xml html css javascript JQuery Ajax good understanding Oracle PLSQL relevant software architecture software development software testing experience Should ability work team business owner developer designer tester Qulification B Tech minimum 10th 12th minimum experience year PHP development experience Skill Sets requirement Experience PHP program open source tool implementation ecommerce project good command AJAX Database SQL programming experience PHP program open Source Tools Knowledge PHP HTML5 css Javascript AJAX Good Knowledge object orient programming Knowledge java script library Jquery Prototype effective communication comprehension skill Understand Severity Priority need Time Management Quick decision making,Sets AJAX ability experience team Javascript database comprehension Skill owner hand open Source working library web position Management create website creation Experience Developers cross LAMP Areas application 12th source good Jquery effective skillset product tool Responsibility Good script Severity tester technology webbase project suit software ecommerce mobile PLSQL holder minimum compatible orient work variety programming Time Tools responsible communication developer Quick html Qulification Oracle designer architecture knowledge develop oops making sound javascript JQuery testing Tech JavaScript business object SQL Priority Database command design B Knowledge modify relevant implementation additional need Understand Should css understanding client concept requirement HTML5 Prototype xml 10th decision MySQL program wide development year use Ajax Key java shall skill platform thorough PHP,AJAX ability experience team Javascript database comprehension Skill owner hand Source library web position Management website creation Experience LAMP Areas application source Jquery skillset product tool Responsibility script Severity tester technology webbase project suit software ecommerce PLSQL holder orient work variety programming Time Tools communication developer Quick html Qulification Oracle designer architecture knowledge sound javascript JQuery testing Tech JavaScript business object SQL Priority Database command design B Knowledge implementation need Understand client concept requirement Prototype xml decision MySQL program development year use Ajax Key java platform PHP,10th 12th ability additional ajax application architecture areas business client command communication compatible comprehension concept create creation cross css database decision design designer develop developer developers development ecommerce effective experience good hand holder html html5 implementation java javascript jquery key knowledge lamp library making management minimum mobile modify mysql need object oops open oracle orient owner platform plsql position priority product program programming project prototype quick qulification relevant requirement responsibility responsible script sets severity shall should skill skillset software sound source sql suit team tech technology tester testing thorough time tool tools understand understanding use variety web webbase website wide work working xml year

Progress/Resume_data.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

Progress/Word Vector Model.ipynb ADDED Viewed

	@@ -0,0 +1,379 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Checking out doc2Vec and Word Vectors to find out Similarities between documents. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.feature_extraction.text import TfidfVectorizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import pairwise_distances"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.decomposition import TruncatedSVD"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from scipy.spatial import distance"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import spacy\n",
+    "nlp=spacy.load('en_vectors_web_lg')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Resume = pd.read_csv('Resume_data.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Jobs = pd.read_csv('Job_Data.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# tfidf = TfidfVectorizer(max_features=100, max_df=3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "token = Resume['Cleaned'][14].split(\" \")\n",
+    "tfidf = TfidfVectorizer(max_df=0.05, min_df=0.001)\n",
+    "words = tfidf.fit_transform(token)\n",
+    "ss1 = \" \".join(tfidf.get_feature_names())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "job_token = Jobs['Cleaned'][0].split(\" \")\n",
+    "tfidf2 = TfidfVectorizer(max_df=0.05, min_df=0.001)\n",
+    "jwords = tfidf2.fit_transform(job_token)\n",
+    "ss2 = \" \".join(tfidf2.get_feature_names())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "doc1 = nlp(ss1)\n",
+    "doc2 = nlp(ss2)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# for a in doc1:\n",
+    "#     print(a.has_vector)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# def check_vector(doc):\n",
+    "#     for token in doc:\n",
+    "#         if token.has_vector\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# def compute_similarity(document,reference):\n",
+    "#     scores = []\n",
+    "#     for a in document:\n",
+    "#         similars = []\n",
+    "#         for b in reference:\n",
+    "#             similars.append(a.similarity(b))\n",
+    "#         scores.append(similars)\n",
+    "#     return scores"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# scores = compute_similarity(doc1, doc2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

Progress/WordCloud.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

Progress/Word_relevance.ipynb ADDED Viewed

	@@ -0,0 +1,282 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Data = pd.read_csv('Resume_data.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.feature_extraction.text import TfidfVectorizer \n",
+    " \n",
+    "def get_scores(token):\n",
+    "    tfidf_vectorizer=TfidfVectorizer(max_df=0.05, min_df=0.001) \n",
+    "    tfidf_vectorizer_vectors=tfidf_vectorizer.fit_transform(token)\n",
+    "    first_vector_tfidfvectorizer=tfidf_vectorizer_vectors[0]\n",
+    "    df = pd.DataFrame(first_vector_tfidfvectorizer.T.todense(), index=tfidf_vectorizer.get_feature_names(), columns=[\"tfidf\"])\n",
+    "    df = df.sort_values(by=[\"tfidf\"],ascending=False)\n",
+    "    return df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# get the first vector out (for the first document) \n",
+    "sdf = get_scores(Data['Cleaned'][0].split(\" \"))\n",
+    " \n",
+    "# place tf-idf values in a pandas data frame \n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0     accuracy achieve acquire active actively alank...\n",
+       "1     730aca854657000679last accuracy active activel...\n",
+       "2     6c4b0bb78deb4912bd26b1a8a5f0218blast active al...\n",
+       "3     12c 2nd accord active amruta android api app a...\n",
+       "4     09e93b7a617e329a87last 9sep20 active agile all...\n",
+       "5     1sep20 accuracy active ambedkar analyse analys...\n",
+       "6     abhishek active airflow area article automate ...\n",
+       "7     active application architect area aspnet aug a...\n",
+       "8     0196ef52240b916a74last active adabooste adapti...\n",
+       "9     11sep20last access accordinglly active activit...\n",
+       "10    11sep20last active administrationperformance a...\n",
+       "11    accuracy aerospace ai algorithm algorithms ana...\n",
+       "12    21c5d05765c7180c58last 45xx 7sep20 active ad a...\n",
+       "13    accuracy active activity administration admini...\n",
+       "14    863c41556ec744d7b178c3c114e76e35last 9sep20 ac...\n",
+       "15    04a3bcbf1231f98382last accessibility active aj...\n",
+       "16    11sep20last 72beef2a10740d8848last access acco...\n",
+       "17    accordingly active adobe ajax ambedkar angular...\n",
+       "18    11sep20last abul access active actively activi...\n",
+       "19    active adsense ajax analytic apache applicatio...\n",
+       "20    07year 1providing accuracy active address afte...\n",
+       "Name: TF_Based, dtype: object"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "Data['TF_Based']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>tfidf</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>alankrit</th>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>accuracy</th>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>r2</th>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>phone</th>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>practitioner</th>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>precision</th>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>prediction</th>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>pref</th>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>preprocesse</th>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>pretraine</th>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>processing</th>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>proficiency</th>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>project</th>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>projects</th>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>python</th>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>qualificationscertificationsprogram</th>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>read</th>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>period</th>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>realtime</th>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>recall</th>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                     tfidf\n",
+       "alankrit                               1.0\n",
+       "accuracy                               0.0\n",
+       "r2                                     0.0\n",
+       "phone                                  0.0\n",
+       "practitioner                           0.0\n",
+       "precision                              0.0\n",
+       "prediction                             0.0\n",
+       "pref                                   0.0\n",
+       "preprocesse                            0.0\n",
+       "pretraine                              0.0\n",
+       "processing                             0.0\n",
+       "proficiency                            0.0\n",
+       "project                                0.0\n",
+       "projects                               0.0\n",
+       "python                                 0.0\n",
+       "qualificationscertificationsprogram    0.0\n",
+       "read                                   0.0\n",
+       "period                                 0.0\n",
+       "realtime                               0.0\n",
+       "recall                                 0.0"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sdf.head(20)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

Progress/app.py CHANGED Viewed

@@ -2,11 +2,7 @@ from wordcloud import STOPWORDS
 from operator import index
 from wordcloud import WordCloud
 from pandas._config.config import options
-import Cleaner
-import Similar
-import textract as tx
 import pandas as pd
-import os
 import streamlit as st
 import plotly.express as px
 import plotly.graph_objects as go
@@ -29,35 +25,9 @@ Algorihms used:-
 Total Score calculate is the overall average of the 4 mentioned token based algorithms and string based.
 """)
-resume_dir = "Data/Resumes/"
-job_desc_dir = "Data/JobDesc/"
-resume_names = os.listdir(resume_dir)
-job_description_names = os.listdir(job_desc_dir)
-document = []
-st.write("Total Resumes found : ", len(resume_names))
-st.write("Total Job Descriptions found : ", len(job_description_names))
 # to read all the resumes in the directory as provided by the user
-def read_resumes(list_of_resumes, resume_directory):
-    placeholder = []
-    for res in list_of_resumes:
-        temp = []
-        temp.append(res)
-        text = tx.process(resume_directory+res, encoding='ascii')
-        text = str(text, 'utf-8')
-        temp.append(text)
-        placeholder.append(temp)
-    return placeholder
-document = read_resumes(resume_names, resume_dir)
-df = pd.DataFrame(document, columns=['Name', 'Context'])
 if len(job_description_names) <= 1:
     st.write("There is only ", len(job_description_names),
              "present. It will be used to create scores.")
@@ -75,15 +45,6 @@ index = st.slider("Which JD to select ? : ", 0,
                   len(job_description_names)-1, 1)
-def read_job_description(n, list_of_job_files, job_description_directory):
-    job_desc = tx.process(
-        job_description_directory+list_of_job_files[n], extension='docx', encoding='ascii')
-    job_desc = str(job_desc, 'utf-8')
-    job_description = Cleaner.Cleaner(job_desc)
-    return [job_desc, job_description]
 job = read_job_description(index, job_description_names, job_desc_dir)
 option_yn = st.selectbox("Show the Job Description ?", options=['NO', 'YES'])
@@ -149,12 +110,6 @@ def get_text_from_df(df_iter):
         return output
-text_wc = get_text_from_df(cleaned_df['Selective'])
-wordcloud = WordCloud(width=3000, height=2000, random_state=1, background_color='salmon',
-                      colormap='Pastel1', collocations=False, stopwords=STOPWORDS).generate(text_wc)
-st.write(plt.imshow(wordcloud))
 option_2 = st.selectbox("Show the Best Matching Resumes?", options=[
     'NO', 'YES'])

 from operator import index
 from wordcloud import WordCloud
 from pandas._config.config import options
 import pandas as pd
 import streamlit as st
 import plotly.express as px
 import plotly.graph_objects as go
 Total Score calculate is the overall average of the 4 mentioned token based algorithms and string based.
 """)
 # to read all the resumes in the directory as provided by the user
 if len(job_description_names) <= 1:
     st.write("There is only ", len(job_description_names),
              "present. It will be used to create scores.")
                   len(job_description_names)-1, 1)
 job = read_job_description(index, job_description_names, job_desc_dir)
 option_yn = st.selectbox("Show the Job Description ?", options=['NO', 'YES'])
         return output
 option_2 = st.selectbox("Show the Best Matching Resumes?", options=[
     'NO', 'YES'])

Progress/app2.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from wordcloud import STOPWORDS
+from operator import index
+from wordcloud import WordCloud
+from pandas._config.config import options
+import pandas as pd
+import streamlit as st
+import plotly.express as px
+import plotly.graph_objects as go
+import matplotlib.pyplot as plt
+import Similar
+# Reading the CSV files prepared by the fileReader.py
+Resumes = pd.read_csv('Resume_data.csv')
+Jobs = pd.read_csv('Job_Data.csv')
+# Checking for Multiple Job Descriptions
+# If more than one Job Descriptions are available, it asks user to select one as well.
+if len(Jobs['Name']) <= 1:
+    st.write(
+        "There is only 1 Job Description present. It will be used to create scores.")
+else:
+    st.write("There are ", len(Jobs['Name']),
+             "Job Descriptions available. Please select one.")
+# Asking to Print the Job Desciption Names
+if len(Jobs['Name']) > 1:
+    option_yn = st.selectbox(
+        "Show the Job Description Names?", options=['NO', 'YES'])
+    if option_yn == 'YES':
+        index = [a for a in range(len(Jobs['Name']))]
+        fig = go.Figure(data=[go.Table(header=dict(values=["Job No.", "Job Desc. Name"], line_color='darkslategray',
+                                                   fill_color='lightskyblue'),
+                                       cells=dict(values=[index, Jobs['Name']], line_color='darkslategray',
+                                                  fill_color='cyan'))
+                              ])
+        st.write(fig)
+# Asking to chose the Job Description
+index = st.slider("Which JD to select ? : ", 0,
+                  len(Jobs['Name'])-1, 1)
+option_yn = st.selectbox("Show the Job Description ?", options=['NO', 'YES'])
+if option_yn == 'YES':
+    st.markdown("---")
+    st.markdown("### Job Description :")
+    st.text(Jobs['Context'][index])
+    st.markdown("---")
+# def calculate_scores(resumes, job_description, x=5, y=5):
+#     scores = []
+#     for text in resumes:
+#         score = Similar.match(esumes['TF_Based'][x], Jobs[])
+#         scores.append(score)
+#     return scores
+def Scoring()

Progress/app_testing.ipynb CHANGED Viewed

@@ -19,7 +19,8 @@
     "import Similar\n",
     "import textract as tx\n",
     "import pandas as pd\n",
-    "import os"
    ]
   },
   {
@@ -72,22 +73,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
     "def get_cleaned_words(document):\n",
     "    for i in range(len(document)):\n",
     "        raw = Cleaner.Cleaner(document[i][1])\n",
-    "        document[i].append(raw[0])\n",
-    "        document[i].append(raw[0])\n",
-    "        document[i].append(raw[0])\n",
     "    return document"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -96,16 +106,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
-    "Database = pd.DataFrame(document,columns=[\"Name\",\"Context\",\"Cleaned\",\"Selective\",\"Selective_Reduced\"])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -114,7 +131,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -132,7 +149,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -141,7 +158,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -150,16 +167,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
    "metadata": {},
    "outputs": [],
    "source": [
-    "jd_database = pd.DataFrame(Jd,columns=[\"Name\",\"Context\",\"Cleaned\",\"Selective\",\"Selective_Reduced\"])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -168,17 +185,43 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
-   "outputs": [],
-   "source": []
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
-   "outputs": [],
-   "source": []
   },
   {
    "cell_type": "code",

     "import Similar\n",
     "import textract as tx\n",
     "import pandas as pd\n",
+    "import os\n",
+    "import tf_idf"
    ]
   },
   {
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
     "def get_cleaned_words(document):\n",
     "    for i in range(len(document)):\n",
     "        raw = Cleaner.Cleaner(document[i][1])\n",
+    "        document[i].append(\" \".join(raw[0]))\n",
+    "        document[i].append(\" \".join(raw[1]))\n",
+    "        document[i].append(\" \".join(raw[2]))\n",
+    "        sentence = tf_idf.do_tfidf(document[i][2].split(\" \"))\n",
+    "        document[i].append(sentence)\n",
     "    return document"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
+    "Database = pd.DataFrame(document,columns=[\"Name\",\"Context\",\"Cleaned\",\"Selective\",\"Selective_Reduced\",\"TF_Based\"])"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
+    "jd_database = pd.DataFrame(Jd,columns=[\"Name\",\"Context\",\"Cleaned\",\"Selective\",\"Selective_Reduced\",\"TF_Based\"])"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 15,
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'able advanced algorithm amount analysis analytics applied apply appropriate approval audio bachelors bayes begin build business candidate cassandra change client cnn common communication companys conduct conducting crunching cut d3js data databacke database datum decision define deliver deliverable design develop diagnostic discover distribution edge education enable energetic engineer even excellent experience experiment focus forests fulltime game gamechange generate ggplot good grasp great gurgaon haryana hbase help hide high hive identify implementation implementationfinetune improvement include independently information innovative insight integrate internal job keep knn language learn learning liaise limit look lstm machine make massage matlab meaningful mining ml model mongodb must naive nosql numerical numpy opencv operation opportunity performance pig plan prediction prefer preferred present primary product proficiency programming python quality query quickly record regression remote require resourceful responsibilities rnn salary science sciencemachine scientist script scripting selfstarter skill skills smart solutions specific sql statistic statistical strategy structured supervision svm system tactical team technique technology temporarily testing text tool toolkit type understanding unstructured use vast video vision visualisation warehousing weka well work year'"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "jd_database['TF_Based'][0]"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": 16,
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'accuracy achieve acquire active actively alankrit analysisdeep analysismachine analysissql analyst analystdata analytics area artificial aug augment authorization automated automation ba56b4f594cd449891db291ae8e04206last back bangaloredelhi bidirectional bilaspur binary brightness btech business capgemini category certification classification clean client cnn college commonly company compute computer computing context current cv data dataset datasets date datum day dce deep degree delhi dense designation details detecting detection develop duration education email embedding employment encode engineer engineering english epoch excel exist experience expert featured feeding field filter fire flatten follow foundation framework full functional glove google high hindi id image imagedatagenerator images incoming indiabengaluru industry initial integrated intelligence it join julsep jump kaggle kera keras key know kpo l2 lakh language languages last layer layers learn learning learningartificial learningdata library linux location look lstm machine manner marital mathematics mean ml model modeling models modified monitoring month months msexcel mttr name natural nature ncr networks neural nirjharpremium nlp notice number obtain occur offsite onsite other output padded percent perform period pg phone practitioner precision prediction pref preprocesse pretraine processing proficiency project projects python qualificationscertificationsprogram r2 read realtime recall record regex regression regularisation remote remove research resolution resume ridge role rotation rows sciencedata scientist score sectionwork self senior sentence sep sequence sequential server show singleunmarrie size skill skills smoke smokefire smokefiresafe speak sql stack statistical status stopword subcategory summary switch team technical tensorflow term test text ticket time title tokenize tool tools top total train trained training transfer ug unsafe use verified version vision visualizationsqlpythonmsexcelstatistical windows word word2vec work workfromhome write year years zoom'"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "Database['TF_Based'][0]"
+   ]
   },
   {
    "cell_type": "code",

Progress/fileReader.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from operator import index
+from pandas._config.config import options
+import Cleaner
+import Similar
+import textract as tx
+import pandas as pd
+import os
+import tf_idf
+resume_dir = "Data/Resumes/"
+job_desc_dir = "Data/JobDesc/"
+resume_names = os.listdir(resume_dir)
+job_description_names = os.listdir(job_desc_dir)
+document = []
+def read_resumes(list_of_resumes, resume_directory):
+    placeholder = []
+    for res in list_of_resumes:
+        temp = []
+        temp.append(res)
+        text = tx.process(resume_directory+res, encoding='ascii')
+        text = str(text, 'utf-8')
+        temp.append(text)
+        placeholder.append(temp)
+    return placeholder
+document = read_resumes(resume_names, resume_dir)
+def get_cleaned_words(document):
+    for i in range(len(document)):
+        raw = Cleaner.Cleaner(document[i][1])
+        document[i].append(" ".join(raw[0]))
+        document[i].append(" ".join(raw[1]))
+        document[i].append(" ".join(raw[2]))
+        sentence = tf_idf.do_tfidf(document[i][2].split(" "))
+        document[i].append(sentence)
+    return document
+Doc=get_cleaned_words(document)
+Database = pd.DataFrame(document,columns=["Name","Context","Cleaned","Selective","Selective_Reduced","TF_Based"])
+Database.to_csv("Resume_data.csv", index=False)
+def read_jobdescriptions(job_description_names, job_desc_dir):
+    placeholder = []
+    for tes in job_description_names:
+        temp = []
+        temp.append(tes)
+        text = tx.process(job_desc_dir+tes, encoding='ascii')
+        text = str(text, 'utf-8')
+        temp.append(text)
+        placeholder.append(temp)
+    return placeholder
+job_document = read_jobdescriptions(job_description_names, job_desc_dir)
+Jd=get_cleaned_words(job_document)
+jd_database = pd.DataFrame(Jd,columns=["Name","Context","Cleaned","Selective","Selective_Reduced","TF_Based"])
+jd_database.to_csv("Job_Data.csv",index=False)

Progress/generate_wordcloud.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+def generate_wordcloud(text):
+    wordcloud = WordCloud(width=800, height=800,
+                          background_color='white',
+                          colormap='viridis', collocations=False,
+                          min_font_size=10).generate(text)
+    plt.figure(figsize=(8, 8), facecolor=None)
+    plt.imshow(wordcloud)
+    plt.axis("off")
+    plt.tight_layout(pad=0)
+    plt.show()

Progress/performLDA.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import gensim
+import gensim.corpora as corpora
+def get_list_of_words(document):
+    Document = []
+    for a in document:
+        raw = a.split(" ")
+        Document.append(raw)
+    return Document
+def LDA(document):
+    id2word = corpora.Dictionary(document)
+    corpus = [id2word.doc2bow(text) for text in document]
+    lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus, id2word=id2word, num_topics=5, random_state=100,
+                                                update_every=1, chunksize=100, passes=50, alpha='auto', per_word_topics=True)
+    return lda_model[corpus]
+def format_topics_sentences(ldamodel=None, corpus=corpus, texts=Document):
+    sent_topics_df = []
+    for i, row_list in enumerate(ldamodel[corpus]):
+        row = row_list[0] if ldamodel.per_word_topics else row_list
+        row = sorted(row, key=lambda x: (x[1]), reverse=True)
+        for j, (topic_num, prop_topic) in enumerate(row):
+            if j == 0:
+                wp = ldamodel.show_topic(topic_num)
+                topic_keywords = ", ".join([word for word, prop in wp])
+                sent_topics_df.append(
+                    [i, int(topic_num), round(prop_topic, 4)*100, topic_keywords])
+            else:
+                break
+    return(sent_topics_df)

Progress/tf_idf.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from sklearn.feature_extraction.text import TfidfVectorizer
+def do_tfidf(token):
+    tfidf = TfidfVectorizer(max_df=0.05, min_df=0.001)
+    words = tfidf.fit_transform(token)
+    sentence = " ".join(tfidf.get_feature_names())
+    return sentence

Progress/topic_modeller_pd_returner.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

Progress/topic_modelling_resumes.ipynb CHANGED Viewed

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -20,7 +20,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -30,7 +30,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -41,7 +41,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -62,7 +62,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -78,7 +78,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 60,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -87,7 +87,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 69,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -104,7 +104,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 70,
    "metadata": {},
    "outputs": [
     {
@@ -129,7 +129,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 71,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -141,98 +141,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 72,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "<link rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.rawgit.com/bmabey/pyLDAvis/files/ldavis.v1.0.0.css\">\n",
-       "\n",
-       "\n",
-       "<div id=\"ldavis_el22281400102997868488005527649\"></div>\n",
-       "<script type=\"text/javascript\">\n",
-       "\n",
-       "var ldavis_el22281400102997868488005527649_data = {\"mdsDat\": {\"x\": [0.045266102999448776, -0.045266102999448776], \"y\": [0.0, 0.0], \"topics\": [1, 2], \"cluster\": [1, 1], \"Freq\": [50.73581314086914, 49.264190673828125]}, \"tinfo\": {\"Term\": [\"improve\", \"less\", \"Days\", \"program\", \"reduce\", \"Technologies\", \"Sep\", \"lead\", \"NCR\", \"SQL\", \"notice\", \"Leader\", \"detect\", \"duration\", \"extensive\", \"track\", \"prediction\", \"structure\", \"document\", \"MTech\", \"enhance\", \"apply\", \"Apr\", \"user\", \"May\", \"Jump\", \"service\", \"cycle\", \"troubleshoot\", \"full\", \"NCR\", \"service\", \"cycle\", \"troubleshoot\", \"full\", \"cloud\", \"text\", \"actively\", \"leadership\", \"MS\", \"MCA\", \"Days\", \"less\", \"look\", \"sequence\", \"GCP\", \"monitoring\", \"junior\", \"level\", \"June\", \"transfer\", \"LearningDeep\", \"JDK\", \"box\", \"JIRA\", \"fashion\", \"pretraine\", \"domain\", \"working\", \"accordingly\", \"Marathi\", \"Jquery\", \"SQL\", \"client\", \"perform\", \"Marital\", \"notice\", \"Jump\", \"singleunmarrie\", \"degree\", \"professional\", \"SectionWork\", \"Resume\", \"IT\", \"Modified\", \"industry\", \"Highest\", \"Pref\", \"d\", \"UG\", \"ID\", \"Active\", \"experience\", \"Current\", \"Summary\", \"Months\", \"Location\", \"Status\", \"Phone\", \"Period\", \"Education\", \"total\", \"Functional\", \"year\", \"Key\", \"Number\", \"Email\", \"it\", \"Top\", \"version\", \"use\", \"back\", \"Name\", \"skill\", \"Area\", \"last\", \"Date\", \"Skills\", \"improve\", \"reduce\", \"program\", \"Technologies\", \"Leader\", \"track\", \"prediction\", \"extensive\", \"duration\", \"detect\", \"structure\", \"document\", \"enhance\", \"MTech\", \"apply\", \"value\", \"many\", \"control\", \"artificial\", \"Till\", \"tuning\", \"coordinate\", \"Numpy\", \"index\", \"enable\", \"Entity\", \"suggest\", \"steering\", \"architecture\", \"OpenCV\", \"Gradient\", \"camera\", \"Sep\", \"lead\", \"information\", \"identification\", \"Apr\", \"user\", \"May\", \"code\", \"identify\", \"performance\", \"algorithm\", \"deliver\", \"website\", \"section\", \"component\", \"css\", \"Maintenance\", \"company\", \"Nov\", \"issue\", \"test\", \"new\", \"system\", \"design\", \"develop\", \"role\", \"datum\", \"Computer\", \"Experience\", \"Services\", \"Engineering\", \"application\", \"ITSoftwareSoftware\", \"marital\", \"Skills\", \"Skill\", \"Date\", \"Area\", \"back\", \"skill\", \"last\", \"Name\", \"it\", \"version\", \"use\", \"Top\", \"Number\", \"Email\", \"year\", \"Key\", \"Education\", \"Period\", \"total\", \"Functional\", \"Status\", \"Phone\", \"Months\", \"Location\", \"ID\", \"UG\", \"d\", \"experience\", \"Summary\", \"Active\", \"Pref\"], \"Freq\": [4.0, 6.0, 6.0, 3.0, 3.0, 3.0, 5.0, 5.0, 3.0, 7.0, 10.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0, 4.0, 4.0, 12.0, 2.0, 2.0, 2.0, 2.0, 3.0877420902252197, 2.4015042781829834, 2.4015040397644043, 2.4015040397644043, 2.401503801345825, 2.401503801345825, 2.401503801345825, 2.401503562927246, 2.401503562927246, 2.401503562927246, 2.401503324508667, 5.146483421325684, 5.146483421325684, 1.7152669429779053, 1.7152669429779053, 1.7152669429779053, 1.7152668237686157, 1.7152668237686157, 1.7152665853500366, 1.7152668237686157, 1.7152668237686157, 1.7152668237686157, 1.7152668237686157, 1.7152668237686157, 1.7152668237686157, 1.7152665853500366, 1.7152668237686157, 1.7152668237686157, 1.7152668237686157, 1.7152665853500366, 1.7152668237686157, 1.7152668237686157, 5.832734107971191, 3.7740161418914795, 3.7740161418914795, 3.774012565612793, 7.891523361206055, 8.57771110534668, 7.205255508422852, 5.832758903503418, 5.832754135131836, 7.891510009765625, 7.891506671905518, 9.263986587524414, 9.263972282409668, 6.519008636474609, 7.891479969024658, 9.263984680175781, 9.263981819152832, 9.263981819152832, 9.263981819152832, 9.263981819152832, 9.263980865478516, 9.263980865478516, 9.2639799118042, 9.2639799118042, 9.2639799118042, 9.263978004455566, 9.26397705078125, 9.26397705078125, 9.26397705078125, 9.26397705078125, 9.26397705078125, 9.26397705078125, 9.263976097106934, 9.2639741897583, 9.2639741897583, 8.577733993530273, 8.577733993530273, 8.577733993530273, 8.57773208618164, 8.57773208618164, 8.57773208618164, 8.57773208618164, 8.57773208618164, 8.57773208618164, 8.57773208618164, 8.577725410461426, 3.736862897872925, 3.057375192642212, 3.057375192642212, 3.057373523712158, 2.377887010574341, 2.3778867721557617, 2.3778867721557617, 2.3778867721557617, 2.377887010574341, 2.377887010574341, 2.3778862953186035, 2.3778860569000244, 2.3778860569000244, 2.3778860569000244, 2.377885580062866, 1.6984007358551025, 1.6984007358551025, 1.6984007358551025, 1.6984007358551025, 1.6984007358551025, 1.6984003782272339, 1.6984003782272339, 1.6984003782272339, 1.6984003782272339, 1.6984003782272339, 1.698400616645813, 1.698400616645813, 1.698400616645813, 1.698400616645813, 1.698400616645813, 1.698400616645813, 1.698400616645813, 4.416375637054443, 4.4163618087768555, 1.698400616645813, 1.698400616645813, 3.7368907928466797, 3.7368743419647217, 3.7368698120117188, 4.416360855102539, 3.0574069023132324, 3.0573978424072266, 3.05739688873291, 3.057391881942749, 3.0573902130126953, 3.0573861598968506, 3.0573785305023193, 3.0573790073394775, 3.0573790073394775, 3.7368814945220947, 3.7368786334991455, 3.7368733882904053, 3.7368719577789307, 3.7368710041046143, 3.736868143081665, 4.416351318359375, 5.09583044052124, 5.775294303894043, 4.416353225708008, 4.416348934173584, 5.775278568267822, 5.775277137756348, 4.416343688964844, 4.4163289070129395, 5.095806121826172, 5.095800399780273, 5.775272369384766, 5.775270938873291, 5.775267124176025, 5.775265693664551, 5.775265693664551, 5.775265693664551, 5.775265693664551, 5.775265216827393, 5.775263786315918, 5.775264739990234, 5.775264739990234, 5.775262832641602, 5.775261878967285, 5.775261878967285, 5.775261402130127, 5.775261402130127, 5.775259971618652, 5.775259971618652, 5.775259971618652, 5.775259971618652, 5.775259494781494, 5.775259971618652, 5.7752580642700195, 5.7752580642700195, 5.7752556800842285, 5.775256156921387, 5.775256156921387, 5.775257110595703, 5.775257587432861, 5.775256156921387, 5.775253772735596], \"Total\": [4.0, 6.0, 6.0, 3.0, 3.0, 3.0, 5.0, 5.0, 3.0, 7.0, 10.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0, 4.0, 4.0, 12.0, 2.0, 2.0, 2.0, 2.0, 3.427489757537842, 2.741250991821289, 2.741250991821289, 2.741250991821289, 2.74125075340271, 2.741250991821289, 2.741250991821289, 2.74125075340271, 2.741250991821289, 2.74125075340271, 2.74125075340271, 6.165385723114014, 6.165386199951172, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550129413604736, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550129413604736, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.055013418197632, 2.055013418197632, 7.531106948852539, 4.792906284332275, 4.792906761169434, 4.792906761169434, 10.948802947998047, 12.314531326293945, 10.262561798095703, 8.210594177246094, 8.210594177246094, 11.628292083740234, 11.628291130065918, 14.359748840332031, 14.359748840332031, 9.576322555541992, 12.307779312133789, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237022399902, 15.039237022399902, 15.039237022399902, 15.039237022399902, 15.039237022399902, 15.039237976074219, 15.039237976074219, 15.039236068725586, 15.039236068725586, 14.352997779846191, 14.352996826171875, 14.352998733520508, 14.352996826171875, 14.352997779846191, 14.352996826171875, 14.352997779846191, 14.352997779846191, 14.352997779846191, 14.352998733520508, 14.352997779846191, 4.079987525939941, 3.400498390197754, 3.400498628616333, 3.400498390197754, 2.7210097312927246, 2.7210097312927246, 2.7210097312927246, 2.7210097312927246, 2.7210099697113037, 2.7210099697113037, 2.7210097312927246, 2.7210097312927246, 2.7210097312927246, 2.7210097312927246, 2.7210097312927246, 2.0415220260620117, 2.0415220260620117, 2.041522264480591, 2.041522264480591, 2.041522264480591, 2.0415220260620117, 2.0415220260620117, 2.0415220260620117, 2.0415220260620117, 2.0415220260620117, 2.041522264480591, 2.041522264480591, 2.041522264480591, 2.041522264480591, 2.041522264480591, 2.041522264480591, 2.041522264480591, 5.445394039154053, 5.445394039154053, 2.041522264480591, 2.041522264480591, 4.765904903411865, 4.765905380249023, 4.765904903411865, 6.131625175476074, 4.086416244506836, 4.086416244506836, 4.086415767669678, 4.086415767669678, 4.086415767669678, 4.086416244506836, 4.086415767669678, 4.086416244506836, 4.086416244506836, 5.452136039733887, 5.452136039733887, 5.452136039733887, 5.452136039733887, 5.452136039733887, 5.452136516571045, 7.504101753234863, 9.556071281433105, 12.294279098510742, 8.19034194946289, 8.19034194946289, 13.666759490966797, 13.666759490966797, 8.19034194946289, 8.19034194946289, 10.928550720214844, 10.928550720214844, 14.352997779846191, 14.352998733520508, 14.352998733520508, 14.352997779846191, 14.352997779846191, 14.352997779846191, 14.352997779846191, 14.352996826171875, 14.352997779846191, 14.352998733520508, 14.352996826171875, 14.352996826171875, 15.039236068725586, 15.039236068725586, 15.039237976074219, 15.039237976074219, 15.039237022399902, 15.039237022399902, 15.039237022399902, 15.039237022399902, 15.039237976074219, 15.039237022399902, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219], \"Category\": [\"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\"], \"logprob\": [30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, -6.811100006103516, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -6.30019998550415, -6.30019998550415, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -6.175000190734863, -6.610400199890137, -6.610400199890137, -6.610400199890137, -5.872700214385986, -5.789299964904785, -5.963699817657471, -6.175000190734863, -6.175000190734863, -5.872700214385986, -5.872700214385986, -5.712399959564209, -5.712399959564209, -6.063799858093262, -5.872700214385986, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -6.590799808502197, -6.791500091552734, -6.791500091552734, -6.791500091552734, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -6.423699855804443, -6.423699855804443, -7.37939977645874, -7.37939977645874, -6.590799808502197, -6.590799808502197, -6.590799808502197, -6.423799991607666, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.590799808502197, -6.590799808502197, -6.590799808502197, -6.590799808502197, -6.590799808502197, -6.590799808502197, -6.423799991607666, -6.280600070953369, -6.1554999351501465, -6.423799991607666, -6.423799991607666, -6.1554999351501465, -6.1554999351501465, -6.423799991607666, -6.423799991607666, -6.280600070953369, -6.280600070953369, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465], \"loglift\": [30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.5741999745368958, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.49790000915527344, 0.49790000915527344, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.4230000078678131, 0.43950000405311584, 0.43950000405311584, 0.43950000405311584, 0.35109999775886536, 0.31690001487731934, 0.3248000144958496, 0.33660000562667847, 0.33660000562667847, 0.29089999198913574, 0.29089999198913574, 0.2401999980211258, 0.2401999980211258, 0.2939999997615814, 0.23409999907016754, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.6201000213623047, 0.6015999913215637, 0.6015999913215637, 0.6015999913215637, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.4984999895095825, 0.4984999895095825, 0.5239999890327454, 0.5239999890327454, 0.46470001339912415, 0.46470001339912415, 0.46470001339912415, 0.3797999918460846, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.3301999866962433, 0.3301999866962433, 0.3301999866962433, 0.3301999866962433, 0.3301999866962433, 0.3301999866962433, 0.1777999997138977, 0.07919999957084656, -0.047600001096725464, 0.09030000120401382, 0.09030000120401382, -0.1534000039100647, -0.1534000039100647, 0.09030000120401382, 0.09030000120401382, -0.054999999701976776, -0.054999999701976776, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214]}, \"token.table\": {\"Topic\": [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 1, 1, 2, 2, 1, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 1, 2], \"Freq\": [0.598434567451477, 0.39895638823509216, 0.20982374250888824, 0.839294970035553, 0.6270467042922974, 0.41803112626075745, 0.48838010430336, 0.48838010430336, 0.598434567451477, 0.39895638823509216, 0.6270466446876526, 0.41803109645843506, 0.8109792470932007, 0.1621958464384079, 0.5984346270561218, 0.39895641803741455, 0.5984346270561218, 0.39895644783973694, 0.48838010430336, 0.48838010430336, 0.9796611070632935, 0.585361897945404, 0.43902140855789185, 0.5984346270561218, 0.39895641803741455, 0.9732297658920288, 0.9796611070632935, 0.6499953866004944, 0.3249976933002472, 0.598434567451477, 0.39895638823509216, 0.6267518997192383, 0.34819549322128296, 0.5490206480026245, 0.45751720666885376, 0.9732297658920288, 0.9732297658920288, 0.9732296466827393, 0.7308439016342163, 0.32481950521469116, 0.9732297658920288, 0.598434567451477, 0.39895638823509216, 0.7350212335586548, 0.9732297658920288, 0.598434567451477, 0.39895638823509216, 0.7295939922332764, 0.7295939922332764, 0.7350212335586548, 0.2447132021188736, 0.734139621257782, 0.9732296466827393, 0.834566593170166, 0.2086416482925415, 0.20982374250888824, 0.839294970035553, 0.6267518997192383, 0.34819549322128296, 0.598434567451477, 0.39895638823509216, 0.8752761483192444, 0.6270467638969421, 0.41803115606307983, 0.36682870984077454, 0.7336574196815491, 0.5984346270561218, 0.39895644783973694, 0.979661226272583, 0.9796611070632935, 0.5984346270561218, 0.39895641803741455, 0.5984346270561218, 0.39895641803741455, 0.598434567451477, 0.39895638823509216, 0.6879772543907166, 0.3439886271953583, 0.7966956496238708, 0.2655652165412903, 0.6879771947860718, 0.3439885973930359, 0.1836414337158203, 0.7345657348632812, 0.585361897945404, 0.43902140855789185, 0.6270466446876526, 0.41803109645843506, 0.6270467042922974, 0.41803112626075745, 0.598434567451477, 0.39895638823509216, 0.598434567451477, 0.39895638823509216, 0.8822236061096191, 0.9796611070632935, 0.6270467638969421, 0.41803115606307983, 0.598434567451477, 0.39895638823509216, 0.9732297658920288, 0.7295939922332764, 0.24471323192119598, 0.7341396808624268, 0.48838010430336, 0.48838010430336, 0.7350212335586548, 0.9796611070632935, 0.9796611070632935, 0.6270467042922974, 0.41803112626075745, 0.9732297658920288, 0.9796611070632935, 0.8345667123794556, 0.2086416780948639, 0.7295938730239868, 0.32617780566215515, 0.6523556113243103, 0.36682870984077454, 0.7336574196815491, 0.24471323192119598, 0.7341396808624268, 0.9796611070632935, 0.979661226272583, 0.2447132021188736, 0.734139621257782, 0.7295938730239868, 0.598434567451477, 0.39895638823509216, 0.48838010430336, 0.48838010430336, 0.7307631969451904, 0.24358773231506348, 0.24471323192119598, 0.7341396808624268, 0.39978134632110596, 0.5330418348312378, 0.73502117395401, 0.4185820519924164, 0.5232275724411011, 0.7350212335586548, 0.9732297658920288, 0.73502117395401, 0.979661226272583, 0.7350212335586548, 0.598434567451477, 0.39895638823509216, 0.7350212335586548, 0.9732298851013184, 0.7295939922332764, 0.9796611070632935, 0.2447132021188736, 0.734139621257782, 0.9803951382637024, 0.979661226272583, 0.7309695482254028, 0.3132726550102234, 0.9796611070632935, 0.36682870984077454, 0.7336574196815491, 0.6270467042922974, 0.41803112626075745, 0.9732297658920288, 0.6270467042922974, 0.41803112626075745, 0.1836414337158203, 0.7345657348632812, 0.7295938730239868, 0.8109791874885559, 0.1621958464384079, 0.9732298851013184, 0.9732297658920288, 0.979661226272583, 0.5490206480026245, 0.45751720666885376, 0.9732297658920288, 0.36682870984077454, 0.7336574196815491, 0.7306734919548035, 0.2740025520324707, 0.834566593170166, 0.2086416482925415, 0.2447132021188736, 0.734139621257782, 0.7350212335586548, 0.9732297658920288, 0.7307631969451904, 0.24358773231506348, 0.8822235465049744, 0.8822236061096191, 0.5693705081939697, 0.48803186416625977, 0.2447132021188736, 0.734139621257782, 0.9732297658920288, 0.7295938730239868, 0.6820908784866333, 0.29232466220855713, 0.6270467042922974, 0.41803112626075745, 0.9796611070632935, 0.7350212335586548, 0.9796611070632935, 0.36682868003845215, 0.7336573600769043, 0.36682870984077454, 0.7336574196815491, 0.7295938730239868, 0.5984346270561218, 0.39895641803741455, 0.7350212335586548, 0.9732297658920288, 0.7295938730239868, 0.979661226272583, 0.6270467638969421, 0.41803115606307983, 0.20982372760772705, 0.8392949104309082, 0.979661226272583, 0.6270466446876526, 0.41803109645843506, 0.24471323192119598, 0.7341396808624268, 0.9732297658920288, 0.598434567451477, 0.39895638823509216], \"Term\": [\"Active\", \"Active\", \"Apr\", \"Apr\", \"Area\", \"Area\", \"Computer\", \"Computer\", \"Current\", \"Current\", \"Date\", \"Date\", \"Days\", \"Days\", \"Education\", \"Education\", \"Email\", \"Email\", \"Engineering\", \"Engineering\", \"Entity\", \"Experience\", \"Experience\", \"Functional\", \"Functional\", \"GCP\", \"Gradient\", \"Highest\", \"Highest\", \"ID\", \"ID\", \"IT\", \"IT\", \"ITSoftwareSoftware\", \"ITSoftwareSoftware\", \"JDK\", \"JIRA\", \"Jquery\", \"Jump\", \"Jump\", \"June\", \"Key\", \"Key\", \"Leader\", \"LearningDeep\", \"Location\", \"Location\", \"MCA\", \"MS\", \"MTech\", \"Maintenance\", \"Maintenance\", \"Marathi\", \"Marital\", \"Marital\", \"May\", \"May\", \"Modified\", \"Modified\", \"Months\", \"Months\", \"NCR\", \"Name\", \"Name\", \"Nov\", \"Nov\", \"Number\", \"Number\", \"Numpy\", \"OpenCV\", \"Period\", \"Period\", \"Phone\", \"Phone\", \"Pref\", \"Pref\", \"Resume\", \"Resume\", \"SQL\", \"SQL\", \"SectionWork\", \"SectionWork\", \"Sep\", \"Sep\", \"Services\", \"Services\", \"Skill\", \"Skill\", \"Skills\", \"Skills\", \"Status\", \"Status\", \"Summary\", \"Summary\", \"Technologies\", \"Till\", \"Top\", \"Top\", \"UG\", \"UG\", \"accordingly\", \"actively\", \"algorithm\", \"algorithm\", \"application\", \"application\", \"apply\", \"architecture\", \"artificial\", \"back\", \"back\", \"box\", \"camera\", \"client\", \"client\", \"cloud\", \"code\", \"code\", \"company\", \"company\", \"component\", \"component\", \"control\", \"coordinate\", \"css\", \"css\", \"cycle\", \"d\", \"d\", \"datum\", \"datum\", \"degree\", \"degree\", \"deliver\", \"deliver\", \"design\", \"design\", \"detect\", \"develop\", \"develop\", \"document\", \"domain\", \"duration\", \"enable\", \"enhance\", \"experience\", \"experience\", \"extensive\", \"fashion\", \"full\", \"identification\", \"identify\", \"identify\", \"improve\", \"index\", \"industry\", \"industry\", \"information\", \"issue\", \"issue\", \"it\", \"it\", \"junior\", \"last\", \"last\", \"lead\", \"lead\", \"leadership\", \"less\", \"less\", \"level\", \"look\", \"many\", \"marital\", \"marital\", \"monitoring\", \"new\", \"new\", \"notice\", \"notice\", \"perform\", \"perform\", \"performance\", \"performance\", \"prediction\", \"pretraine\", \"professional\", \"professional\", \"program\", \"reduce\", \"role\", \"role\", \"section\", \"section\", \"sequence\", \"service\", \"singleunmarrie\", \"singleunmarrie\", \"skill\", \"skill\", \"steering\", \"structure\", \"suggest\", \"system\", \"system\", \"test\", \"test\", \"text\", \"total\", \"total\", \"track\", \"transfer\", \"troubleshoot\", \"tuning\", \"use\", \"use\", \"user\", \"user\", \"value\", \"version\", \"version\", \"website\", \"website\", \"working\", \"year\", \"year\"]}, \"R\": 30, \"lambda.step\": 0.01, \"plot.opts\": {\"xlab\": \"PC1\", \"ylab\": \"PC2\"}, \"topic.order\": [2, 1]};\n",
-       "\n",
-       "function LDAvis_load_lib(url, callback){\n",
-       "  var s = document.createElement('script');\n",
-       "  s.src = url;\n",
-       "  s.async = true;\n",
-       "  s.onreadystatechange = s.onload = callback;\n",
-       "  s.onerror = function(){console.warn(\"failed to load library \" + url);};\n",
-       "  document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
-       "}\n",
-       "\n",
-       "if(typeof(LDAvis) !== \"undefined\"){\n",
-       "   // already loaded: just create the visualization\n",
-       "   !function(LDAvis){\n",
-       "       new LDAvis(\"#\" + \"ldavis_el22281400102997868488005527649\", ldavis_el22281400102997868488005527649_data);\n",
-       "   }(LDAvis);\n",
-       "}else if(typeof define === \"function\" && define.amd){\n",
-       "   // require.js is available: use it to load d3/LDAvis\n",
-       "   require.config({paths: {d3: \"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min\"}});\n",
-       "   require([\"d3\"], function(d3){\n",
-       "      window.d3 = d3;\n",
-       "      LDAvis_load_lib(\"https://cdn.rawgit.com/bmabey/pyLDAvis/files/ldavis.v1.0.0.js\", function(){\n",
-       "        new LDAvis(\"#\" + \"ldavis_el22281400102997868488005527649\", ldavis_el22281400102997868488005527649_data);\n",
-       "      });\n",
-       "    });\n",
-       "}else{\n",
-       "    // require.js not available: dynamically load d3 & LDAvis\n",
-       "    LDAvis_load_lib(\"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min.js\", function(){\n",
-       "         LDAvis_load_lib(\"https://cdn.rawgit.com/bmabey/pyLDAvis/files/ldavis.v1.0.0.js\", function(){\n",
-       "                 new LDAvis(\"#\" + \"ldavis_el22281400102997868488005527649\", ldavis_el22281400102997868488005527649_data);\n",
-       "            })\n",
-       "         });\n",
-       "}\n",
-       "</script>"
-      ],
-      "text/plain": [
-       "PreparedData(topic_coordinates=              x    y  topics  cluster       Freq\n",
-       "topic                                           \n",
-       "1      0.045266  0.0       1        1  50.735813\n",
-       "0     -0.045266  0.0       2        1  49.264191, topic_info=            Term      Freq      Total Category  logprob  loglift\n",
-       "759      improve  4.000000   4.000000  Default  30.0000  30.0000\n",
-       "565         less  6.000000   6.000000  Default  29.0000  29.0000\n",
-       "520         Days  6.000000   6.000000  Default  28.0000  28.0000\n",
-       "781      program  3.000000   3.000000  Default  27.0000  27.0000\n",
-       "1734      reduce  3.000000   3.000000  Default  26.0000  26.0000\n",
-       "...          ...       ...        ...      ...      ...      ...\n",
-       "185            d  5.775256  15.039238   Topic2  -6.1555  -0.2491\n",
-       "197   experience  5.775257  15.039238   Topic2  -6.1555  -0.2491\n",
-       "152      Summary  5.775258  15.039238   Topic2  -6.1555  -0.2491\n",
-       "0         Active  5.775256  15.039238   Topic2  -6.1555  -0.2491\n",
-       "117         Pref  5.775254  15.039238   Topic2  -6.1555  -0.2491\n",
-       "\n",
-       "[205 rows x 6 columns], token_table=      Topic      Freq     Term\n",
-       "term                          \n",
-       "0         1  0.598435   Active\n",
-       "0         2  0.398956   Active\n",
-       "1107      1  0.209824      Apr\n",
-       "1107      2  0.839295      Apr\n",
-       "7         1  0.627047     Area\n",
-       "...     ...       ...      ...\n",
-       "671       1  0.244713  website\n",
-       "671       2  0.734140  website\n",
-       "515       1  0.973230  working\n",
-       "271       1  0.598435     year\n",
-       "271       2  0.398956     year\n",
-       "\n",
-       "[226 rows x 3 columns], R=30, lambda_step=0.01, plot_opts={'xlab': 'PC1', 'ylab': 'PC2'}, topic_order=[2, 1])"
-      ]
-     },
-     "execution_count": 72,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
     "# Visualize the topics\n",
-    "pyLDAvis.enable_notebook()\n",
-    "vis = pyLDAvis.gensim.prepare(lda_model, corpus, id2word)\n",
-    "vis"
    ]
   },
   {

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "metadata": {},
+   "outputs": [],
    "source": [
     "# Visualize the topics\n",
+    "# pyLDAvis.enable_notebook()\n",
+    "# vis = pyLDAvis.gensim.prepare(lda_model, corpus, id2word)\n",
+    "# vis"
    ]
   },
   {

Progress/untitled.md ADDED Viewed

File without changes