srbhr commited on
Commit
cc44155
·
1 Parent(s): a44ae9a

Updating Progress

Browse files
Progress/Job_Data.csv CHANGED
@@ -1,4 +1,4 @@
1
- Name,Context,Cleaned,Selective,Selective_Reduced
2
  Data Scientist.docx,"Data Scientist/ ML Engineer
3
 
4
  Game Change Solutions
@@ -69,7 +69,7 @@ work: 1 year (Preferred)
69
 
70
  Education:
71
 
72
- Bachelor's (Preferred)","['Data', 'scientist', 'ML', 'Engineer', 'Game', 'Change', 'Solutions', 'Gurgaon', 'Haryana', 'temporarily', 'remote', 'year', 'look', 'data', 'scientist', 'help', 'discover', 'information', 'hide', 'vast', 'amount', 'datum', 'help', 'make', 'smart', 'decision', 'deliver', 'even', 'well', 'product', 'primary', 'focus', 'apply', 'datum', 'mining', 'technique', 'statistical', 'analysis', 'build', 'high', 'quality', 'prediction', 'system', 'integrate', 'product', 'Data', 'Scientist', 'GameChange', 'must', 'energetic', 'selfstarter', 'quickly', 'grasp', 'companys', 'vision', 'develop', 'specific', 'tactical', 'plan', 'begin', 'implementation', 'appropriate', 'approval', 'candidate', 'must', 'resourceful', 'able', 'deliver', 'plan', 'define', 'Responsibilities', 'Data', 'Crunching', 'datum', 'massage', 'structured', 'unstructured', 'datum', 'include', 'Numerical', 'Text', 'Audio', 'Video', 'datum', 'implementationfinetune', 'machine', 'learn', 'algorithm', 'Conducting', 'Data', 'sciencemachine', 'learning', 'experiment', 'present', 'insight', 'build', 'innovative', 'datum', 'product', 'use', 'cut', 'edge', 'tool', 'technology', 'conduct', 'datum', 'analysis', 'independently', 'limit', 'supervision', 'generate', 'business', 'insight', 'conduct', 'diagnostic', 'datum', 'identify', 'improvement', 'opportunity', 'design', 'databacke', 'performance', 'improvement', 'strategy', 'liaise', 'internal', 'team', 'Record', 'keep', 'datum', 'warehousing', 'enable', 'Advanced', 'Analytics', 'operation', 'generate', 'meaningful', 'business', 'insight', 'develop', 'high', 'quality', 'client', 'deliverable', 'r', 'model', 'Python', 'script', 'Skills', 'require', 'excellent', 'understanding', 'machine', 'learning', 'technique', 'algorithm', 'kNN', 'Naive', 'Bayes', 'SVM', 'Decision', 'Forests', 'CNN', 'rnn', 'LSTM', 'experience', 'common', 'datum', 'science', 'toolkit', 'R', 'Weka', 'numpy', 'OpenCV', 'MatLab', 'great', 'communication', 'skill', 'experience', 'data', 'visualisation', 'tool', 'D3js', 'GGplot', 'Proficiency', 'use', 'query', 'language', 'SQL', 'Hive', 'Pig', 'Experience', 'NoSQL', 'database', 'mongodb', 'Cassandra', 'HBase', 'good', 'applied', 'statistic', 'skill', 'distribution', 'statistical', 'testing', 'regression', 'good', 'scripting', 'programming', 'skill', 'Job', 'Type', 'Fulltime', 'Salary', 'year', 'experience', 'Data', 'Science', 'year', 'prefer', 'work', 'year', 'Preferred', 'Education', 'Bachelors', 'prefer']","['Data', 'scientist', 'ML', 'Engineer', 'Game', 'Change', 'Solutions', 'Gurgaon', 'Haryana', 'temporarily', 'remote', 'year', 'look', 'data', 'scientist', 'help', 'discover', 'information', 'hide', 'vast', 'amount', 'datum', 'help', 'make', 'smart', 'decision', 'deliver', 'even', 'well', 'product', 'primary', 'focus', 'apply', 'datum', 'mining', 'technique', 'statistical', 'analysis', 'build', 'high', 'quality', 'prediction', 'system', 'integrate', 'product', 'Data', 'Scientist', 'GameChange', 'must', 'energetic', 'selfstarter', 'quickly', 'grasp', 'companys', 'vision', 'develop', 'specific', 'tactical', 'plan', 'begin', 'implementation', 'appropriate', 'approval', 'candidate', 'must', 'resourceful', 'able', 'deliver', 'plan', 'define', 'Responsibilities', 'Data', 'Crunching', 'datum', 'massage', 'structured', 'unstructured', 'datum', 'include', 'Numerical', 'Text', 'Audio', 'Video', 'datum', 'implementationfinetune', 'machine', 'learn', 'algorithm', 'Conducting', 'Data', 'sciencemachine', 'learning', 'experiment', 'present', 'insight', 'build', 'innovative', 'datum', 'product', 'use', 'cut', 'edge', 'tool', 'technology', 'conduct', 'datum', 'analysis', 'independently', 'limit', 'supervision', 'generate', 'business', 'insight', 'conduct', 'diagnostic', 'datum', 'identify', 'improvement', 'opportunity', 'design', 'databacke', 'performance', 'improvement', 'strategy', 'liaise', 'internal', 'team', 'Record', 'keep', 'datum', 'warehousing', 'enable', 'Advanced', 'Analytics', 'operation', 'generate', 'meaningful', 'business', 'insight', 'develop', 'high', 'quality', 'client', 'deliverable', 'r', 'model', 'Python', 'script', 'Skills', 'require', 'excellent', 'understanding', 'machine', 'learning', 'technique', 'algorithm', 'kNN', 'Naive', 'Bayes', 'SVM', 'Decision', 'Forests', 'CNN', 'rnn', 'LSTM', 'experience', 'common', 'datum', 'science', 'toolkit', 'R', 'Weka', 'numpy', 'OpenCV', 'MatLab', 'great', 'communication', 'skill', 'experience', 'data', 'visualisation', 'tool', 'D3js', 'GGplot', 'Proficiency', 'use', 'query', 'language', 'SQL', 'Hive', 'Pig', 'Experience', 'NoSQL', 'database', 'mongodb', 'Cassandra', 'HBase', 'good', 'applied', 'statistic', 'skill', 'distribution', 'statistical', 'testing', 'regression', 'good', 'scripting', 'programming', 'skill', 'Job', 'Type', 'Fulltime', 'Salary', 'year', 'experience', 'Data', 'Science', 'year', 'prefer', 'work', 'year', 'Preferred', 'Education', 'Bachelors', 'prefer']","['Data', 'scientist', 'ML', 'Engineer', 'Game', 'Change', 'Solutions', 'Gurgaon', 'Haryana', 'temporarily', 'remote', 'year', 'look', 'data', 'scientist', 'help', 'discover', 'information', 'hide', 'vast', 'amount', 'datum', 'help', 'make', 'smart', 'decision', 'deliver', 'even', 'well', 'product', 'primary', 'focus', 'apply', 'datum', 'mining', 'technique', 'statistical', 'analysis', 'build', 'high', 'quality', 'prediction', 'system', 'integrate', 'product', 'Data', 'Scientist', 'GameChange', 'must', 'energetic', 'selfstarter', 'quickly', 'grasp', 'companys', 'vision', 'develop', 'specific', 'tactical', 'plan', 'begin', 'implementation', 'appropriate', 'approval', 'candidate', 'must', 'resourceful', 'able', 'deliver', 'plan', 'define', 'Responsibilities', 'Data', 'Crunching', 'datum', 'massage', 'structured', 'unstructured', 'datum', 'include', 'Numerical', 'Text', 'Audio', 'Video', 'datum', 'implementationfinetune', 'machine', 'learn', 'algorithm', 'Conducting', 'Data', 'sciencemachine', 'learning', 'experiment', 'present', 'insight', 'build', 'innovative', 'datum', 'product', 'use', 'cut', 'edge', 'tool', 'technology', 'conduct', 'datum', 'analysis', 'independently', 'limit', 'supervision', 'generate', 'business', 'insight', 'conduct', 'diagnostic', 'datum', 'identify', 'improvement', 'opportunity', 'design', 'databacke', 'performance', 'improvement', 'strategy', 'liaise', 'internal', 'team', 'Record', 'keep', 'datum', 'warehousing', 'enable', 'Advanced', 'Analytics', 'operation', 'generate', 'meaningful', 'business', 'insight', 'develop', 'high', 'quality', 'client', 'deliverable', 'r', 'model', 'Python', 'script', 'Skills', 'require', 'excellent', 'understanding', 'machine', 'learning', 'technique', 'algorithm', 'kNN', 'Naive', 'Bayes', 'SVM', 'Decision', 'Forests', 'CNN', 'rnn', 'LSTM', 'experience', 'common', 'datum', 'science', 'toolkit', 'R', 'Weka', 'numpy', 'OpenCV', 'MatLab', 'great', 'communication', 'skill', 'experience', 'data', 'visualisation', 'tool', 'D3js', 'GGplot', 'Proficiency', 'use', 'query', 'language', 'SQL', 'Hive', 'Pig', 'Experience', 'NoSQL', 'database', 'mongodb', 'Cassandra', 'HBase', 'good', 'applied', 'statistic', 'skill', 'distribution', 'statistical', 'testing', 'regression', 'good', 'scripting', 'programming', 'skill', 'Job', 'Type', 'Fulltime', 'Salary', 'year', 'experience', 'Data', 'Science', 'year', 'prefer', 'work', 'year', 'Preferred', 'Education', 'Bachelors', 'prefer']"
73
  Web_dev_job.docx,"The position holder will be responsible for creation and implementation of a wide variety of Web-based products using PHP, JavaScript, MySQL and AJAX.
74
 
75
 
@@ -130,4 +130,4 @@ Understand Severity and Priority needs
130
 
131
  Time Management
132
 
133
- Quick decision making","['position', 'holder', 'responsible', 'creation', 'implementation', 'wide', 'variety', 'webbase', 'product', 'use', 'PHP', 'JavaScript', 'MySQL', 'AJAX', 'Key', 'Responsibility', 'Areas', 'create', 'design', 'modify', 'website', 'suit', 'requirement', 'client', 'PHP', 'Developers', 'need', 'thorough', 'knowledge', 'develop', 'cross', 'platform', 'compatible', 'web', 'mobile', 'web', 'application', 'sound', 'knowledge', 'working', 'experience', 'PHP', 'shall', 'hand', 'database', 'programming', 'experience', 'additional', 'skillset', 'oops', 'concept', 'PHP', 'LAMP', 'technology', 'xml', 'html', 'css', 'javascript', 'JQuery', 'Ajax', 'good', 'understanding', 'Oracle', 'PLSQL', 'relevant', 'software', 'architecture', 'software', 'development', 'software', 'testing', 'experience', 'Should', 'ability', 'work', 'team', 'business', 'owner', 'developer', 'designer', 'tester', 'Qulification', 'B', 'Tech', 'minimum', '10th', '12th', 'minimum', 'experience', 'year', 'PHP', 'development', 'experience', 'Skill', 'Sets', 'requirement', 'Experience', 'PHP', 'program', 'open', 'source', 'tool', 'implementation', 'ecommerce', 'project', 'good', 'command', 'AJAX', 'Database', 'SQL', 'programming', 'experience', 'PHP', 'program', 'open', 'Source', 'Tools', 'Knowledge', 'PHP', 'HTML5', 'css', 'Javascript', 'AJAX', 'Good', 'Knowledge', 'object', 'orient', 'programming', 'Knowledge', 'java', 'script', 'library', 'Jquery', 'Prototype', 'effective', 'communication', 'comprehension', 'skill', 'Understand', 'Severity', 'Priority', 'need', 'Time', 'Management', 'Quick', 'decision', 'making']","['position', 'holder', 'responsible', 'creation', 'implementation', 'wide', 'variety', 'webbase', 'product', 'use', 'PHP', 'JavaScript', 'MySQL', 'AJAX', 'Key', 'Responsibility', 'Areas', 'create', 'design', 'modify', 'website', 'suit', 'requirement', 'client', 'PHP', 'Developers', 'need', 'thorough', 'knowledge', 'develop', 'cross', 'platform', 'compatible', 'web', 'mobile', 'web', 'application', 'sound', 'knowledge', 'working', 'experience', 'PHP', 'shall', 'hand', 'database', 'programming', 'experience', 'additional', 'skillset', 'oops', 'concept', 'PHP', 'LAMP', 'technology', 'xml', 'html', 'css', 'javascript', 'JQuery', 'Ajax', 'good', 'understanding', 'Oracle', 'PLSQL', 'relevant', 'software', 'architecture', 'software', 'development', 'software', 'testing', 'experience', 'Should', 'ability', 'work', 'team', 'business', 'owner', 'developer', 'designer', 'tester', 'Qulification', 'B', 'Tech', 'minimum', '10th', '12th', 'minimum', 'experience', 'year', 'PHP', 'development', 'experience', 'Skill', 'Sets', 'requirement', 'Experience', 'PHP', 'program', 'open', 'source', 'tool', 'implementation', 'ecommerce', 'project', 'good', 'command', 'AJAX', 'Database', 'SQL', 'programming', 'experience', 'PHP', 'program', 'open', 'Source', 'Tools', 'Knowledge', 'PHP', 'HTML5', 'css', 'Javascript', 'AJAX', 'Good', 'Knowledge', 'object', 'orient', 'programming', 'Knowledge', 'java', 'script', 'library', 'Jquery', 'Prototype', 'effective', 'communication', 'comprehension', 'skill', 'Understand', 'Severity', 'Priority', 'need', 'Time', 'Management', 'Quick', 'decision', 'making']","['position', 'holder', 'responsible', 'creation', 'implementation', 'wide', 'variety', 'webbase', 'product', 'use', 'PHP', 'JavaScript', 'MySQL', 'AJAX', 'Key', 'Responsibility', 'Areas', 'create', 'design', 'modify', 'website', 'suit', 'requirement', 'client', 'PHP', 'Developers', 'need', 'thorough', 'knowledge', 'develop', 'cross', 'platform', 'compatible', 'web', 'mobile', 'web', 'application', 'sound', 'knowledge', 'working', 'experience', 'PHP', 'shall', 'hand', 'database', 'programming', 'experience', 'additional', 'skillset', 'oops', 'concept', 'PHP', 'LAMP', 'technology', 'xml', 'html', 'css', 'javascript', 'JQuery', 'Ajax', 'good', 'understanding', 'Oracle', 'PLSQL', 'relevant', 'software', 'architecture', 'software', 'development', 'software', 'testing', 'experience', 'Should', 'ability', 'work', 'team', 'business', 'owner', 'developer', 'designer', 'tester', 'Qulification', 'B', 'Tech', 'minimum', '10th', '12th', 'minimum', 'experience', 'year', 'PHP', 'development', 'experience', 'Skill', 'Sets', 'requirement', 'Experience', 'PHP', 'program', 'open', 'source', 'tool', 'implementation', 'ecommerce', 'project', 'good', 'command', 'AJAX', 'Database', 'SQL', 'programming', 'experience', 'PHP', 'program', 'open', 'Source', 'Tools', 'Knowledge', 'PHP', 'HTML5', 'css', 'Javascript', 'AJAX', 'Good', 'Knowledge', 'object', 'orient', 'programming', 'Knowledge', 'java', 'script', 'library', 'Jquery', 'Prototype', 'effective', 'communication', 'comprehension', 'skill', 'Understand', 'Severity', 'Priority', 'need', 'Time', 'Management', 'Quick', 'decision', 'making']"
 
1
+ Name,Context,Cleaned,Selective,Selective_Reduced,TF_Based
2
  Data Scientist.docx,"Data Scientist/ ML Engineer
3
 
4
  Game Change Solutions
 
69
 
70
  Education:
71
 
72
+ Bachelor's (Preferred)",Data scientist ML Engineer Game Change Solutions Gurgaon Haryana temporarily remote year look data scientist help discover information hide vast amount datum help make smart decision deliver even well product primary focus apply datum mining technique statistical analysis build high quality prediction system integrate product Data Scientist GameChange must energetic selfstarter quickly grasp companys vision develop specific tactical plan begin implementation appropriate approval candidate must resourceful able deliver plan define Responsibilities Data Crunching datum massage structured unstructured datum include Numerical Text Audio Video datum implementationfinetune machine learn algorithm Conducting Data sciencemachine learning experiment present insight build innovative datum product use cut edge tool technology conduct datum analysis independently limit supervision generate business insight conduct diagnostic datum identify improvement opportunity design databacke performance improvement strategy liaise internal team Record keep datum warehousing enable Advanced Analytics operation generate meaningful business insight develop high quality client deliverable r model Python script Skills require excellent understanding machine learning technique algorithm kNN Naive Bayes SVM Decision Forests CNN rnn LSTM experience common datum science toolkit R Weka numpy OpenCV MatLab great communication skill experience data visualisation tool D3js GGplot Proficiency use query language SQL Hive Pig Experience NoSQL database mongodb Cassandra HBase good applied statistic skill distribution statistical testing regression good scripting programming skill Job Type Fulltime Salary year experience Data Science year prefer work year Preferred Education Bachelors prefer,meaningful begin conduct temporarily experience Salary team tactical database datum science candidate Cassandra deliver applied Text model GGplot look present enable Education toolkit Weka supervision Scientist primary include keep grasp learning independently machine Conducting Experience Pig require make remote technique analysis experiment data diagnostic CNN edge approval plan HBase apply high good innovative NoSQL internal product tool ML script must selfstarter technology OpenCV kNN information Change strategy quality hide r Type Haryana energetic work programming Skills Proficiency Bachelors Preferred Naive communication MatLab Solutions rnn LSTM deliverable Bayes mining databacke language liaise Data Numerical Crunching cut vast Game regression develop warehousing statistical build Decision companys discover quickly testing amount Science operation business Video Job generate help SQL Advanced Engineer learn numpy design Forests R prefer Analytics great distribution implementation visualisation GameChange implementationfinetune mongodb identify performance scientist vision able Python appropriate Fulltime Audio opportunity insight improvement resourceful focus sciencemachine well understanding query client SVM specific prediction statistic Gurgaon define smart structured Record excellent D3js limit Hive decision scripting year use algorithm even unstructured common skill Responsibilities system integrate massage,meaningful begin Salary team database datum science candidate Cassandra Text model GGplot enable Education toolkit Weka supervision Scientist grasp machine Conducting Experience Pig technique analysis experiment CNN edge approval plan HBase NoSQL product tool ML script technology OpenCV kNN information Change strategy quality hide r Type Haryana work Proficiency Bachelors Preferred Naive communication MatLab LSTM Bayes mining language liaise Data Numerical Crunching Game regression build Decision companys amount Science operation business Video Job generate help SQL Advanced Engineer design Analytics distribution implementation visualisation GameChange mongodb performance scientist vision Python Fulltime Audio opportunity insight improvement focus sciencemachine query client SVM prediction statistic Gurgaon define Record D3js limit Hive decision scripting year use algorithm skill system massage,able advanced algorithm amount analysis analytics applied apply appropriate approval audio bachelors bayes begin build business candidate cassandra change client cnn common communication companys conduct conducting crunching cut d3js data databacke database datum decision define deliver deliverable design develop diagnostic discover distribution edge education enable energetic engineer even excellent experience experiment focus forests fulltime game gamechange generate ggplot good grasp great gurgaon haryana hbase help hide high hive identify implementation implementationfinetune improvement include independently information innovative insight integrate internal job keep knn language learn learning liaise limit look lstm machine make massage matlab meaningful mining ml model mongodb must naive nosql numerical numpy opencv operation opportunity performance pig plan prediction prefer preferred present primary product proficiency programming python quality query quickly record regression remote require resourceful responsibilities rnn salary science sciencemachine scientist script scripting selfstarter skill skills smart solutions specific sql statistic statistical strategy structured supervision svm system tactical team technique technology temporarily testing text tool toolkit type understanding unstructured use vast video vision visualisation warehousing weka well work year
73
  Web_dev_job.docx,"The position holder will be responsible for creation and implementation of a wide variety of Web-based products using PHP, JavaScript, MySQL and AJAX.
74
 
75
 
 
130
 
131
  Time Management
132
 
133
+ Quick decision making",position holder responsible creation implementation wide variety webbase product use PHP JavaScript MySQL AJAX Key Responsibility Areas create design modify website suit requirement client PHP Developers need thorough knowledge develop cross platform compatible web mobile web application sound knowledge working experience PHP shall hand database programming experience additional skillset oops concept PHP LAMP technology xml html css javascript JQuery Ajax good understanding Oracle PLSQL relevant software architecture software development software testing experience Should ability work team business owner developer designer tester Qulification B Tech minimum 10th 12th minimum experience year PHP development experience Skill Sets requirement Experience PHP program open source tool implementation ecommerce project good command AJAX Database SQL programming experience PHP program open Source Tools Knowledge PHP HTML5 css Javascript AJAX Good Knowledge object orient programming Knowledge java script library Jquery Prototype effective communication comprehension skill Understand Severity Priority need Time Management Quick decision making,Sets AJAX ability experience team Javascript database comprehension Skill owner hand open Source working library web position Management create website creation Experience Developers cross LAMP Areas application 12th source good Jquery effective skillset product tool Responsibility Good script Severity tester technology webbase project suit software ecommerce mobile PLSQL holder minimum compatible orient work variety programming Time Tools responsible communication developer Quick html Qulification Oracle designer architecture knowledge develop oops making sound javascript JQuery testing Tech JavaScript business object SQL Priority Database command design B Knowledge modify relevant implementation additional need Understand Should css understanding client concept requirement HTML5 Prototype xml 10th decision MySQL program wide development year use Ajax Key java shall skill platform thorough PHP,AJAX ability experience team Javascript database comprehension Skill owner hand Source library web position Management website creation Experience LAMP Areas application source Jquery skillset product tool Responsibility script Severity tester technology webbase project suit software ecommerce PLSQL holder orient work variety programming Time Tools communication developer Quick html Qulification Oracle designer architecture knowledge sound javascript JQuery testing Tech JavaScript business object SQL Priority Database command design B Knowledge implementation need Understand client concept requirement Prototype xml decision MySQL program development year use Ajax Key java platform PHP,10th 12th ability additional ajax application architecture areas business client command communication compatible comprehension concept create creation cross css database decision design designer develop developer developers development ecommerce effective experience good hand holder html html5 implementation java javascript jquery key knowledge lamp library making management minimum mobile modify mysql need object oops open oracle orient owner platform plsql position priority product program programming project prototype quick qulification relevant requirement responsibility responsible script sets severity shall should skill skillset software sound source sql suit team tech technology tester testing thorough time tool tools understand understanding use variety web webbase website wide work working xml year
Progress/Resume_data.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Progress/Word Vector Model.ipynb ADDED
@@ -0,0 +1,379 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "## Checking out doc2Vec and Word Vectors to find out Similarities between documents. "
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "code",
12
+ "execution_count": 1,
13
+ "metadata": {},
14
+ "outputs": [],
15
+ "source": [
16
+ "import pandas as pd"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": 2,
22
+ "metadata": {},
23
+ "outputs": [],
24
+ "source": [
25
+ "from sklearn.feature_extraction.text import TfidfVectorizer"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": 18,
31
+ "metadata": {},
32
+ "outputs": [],
33
+ "source": [
34
+ "from sklearn.metrics import pairwise_distances"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": 3,
40
+ "metadata": {},
41
+ "outputs": [],
42
+ "source": [
43
+ "from sklearn.decomposition import TruncatedSVD"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 4,
49
+ "metadata": {},
50
+ "outputs": [],
51
+ "source": [
52
+ "from scipy.spatial import distance"
53
+ ]
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": 33,
58
+ "metadata": {},
59
+ "outputs": [],
60
+ "source": [
61
+ "import numpy as np"
62
+ ]
63
+ },
64
+ {
65
+ "cell_type": "code",
66
+ "execution_count": 44,
67
+ "metadata": {},
68
+ "outputs": [],
69
+ "source": [
70
+ "import spacy\n",
71
+ "nlp=spacy.load('en_vectors_web_lg')"
72
+ ]
73
+ },
74
+ {
75
+ "cell_type": "code",
76
+ "execution_count": 6,
77
+ "metadata": {},
78
+ "outputs": [],
79
+ "source": [
80
+ "Resume = pd.read_csv('Resume_data.csv')"
81
+ ]
82
+ },
83
+ {
84
+ "cell_type": "code",
85
+ "execution_count": 7,
86
+ "metadata": {},
87
+ "outputs": [],
88
+ "source": [
89
+ "Jobs = pd.read_csv('Job_Data.csv')"
90
+ ]
91
+ },
92
+ {
93
+ "cell_type": "code",
94
+ "execution_count": 8,
95
+ "metadata": {},
96
+ "outputs": [],
97
+ "source": [
98
+ "# tfidf = TfidfVectorizer(max_features=100, max_df=3)"
99
+ ]
100
+ },
101
+ {
102
+ "cell_type": "code",
103
+ "execution_count": 14,
104
+ "metadata": {},
105
+ "outputs": [],
106
+ "source": [
107
+ "token = Resume['Cleaned'][14].split(\" \")\n",
108
+ "tfidf = TfidfVectorizer(max_df=0.05, min_df=0.001)\n",
109
+ "words = tfidf.fit_transform(token)\n",
110
+ "ss1 = \" \".join(tfidf.get_feature_names())"
111
+ ]
112
+ },
113
+ {
114
+ "cell_type": "code",
115
+ "execution_count": 15,
116
+ "metadata": {},
117
+ "outputs": [],
118
+ "source": [
119
+ "job_token = Jobs['Cleaned'][0].split(\" \")\n",
120
+ "tfidf2 = TfidfVectorizer(max_df=0.05, min_df=0.001)\n",
121
+ "jwords = tfidf2.fit_transform(job_token)\n",
122
+ "ss2 = \" \".join(tfidf2.get_feature_names())"
123
+ ]
124
+ },
125
+ {
126
+ "cell_type": "code",
127
+ "execution_count": 45,
128
+ "metadata": {},
129
+ "outputs": [],
130
+ "source": [
131
+ "doc1 = nlp(ss1)\n",
132
+ "doc2 = nlp(ss2)\n"
133
+ ]
134
+ },
135
+ {
136
+ "cell_type": "code",
137
+ "execution_count": 53,
138
+ "metadata": {},
139
+ "outputs": [],
140
+ "source": [
141
+ "# for a in doc1:\n",
142
+ "# print(a.has_vector)"
143
+ ]
144
+ },
145
+ {
146
+ "cell_type": "code",
147
+ "execution_count": null,
148
+ "metadata": {},
149
+ "outputs": [],
150
+ "source": [
151
+ "# def check_vector(doc):\n",
152
+ "# for token in doc:\n",
153
+ "# if token.has_vector\n",
154
+ " "
155
+ ]
156
+ },
157
+ {
158
+ "cell_type": "code",
159
+ "execution_count": null,
160
+ "metadata": {},
161
+ "outputs": [],
162
+ "source": []
163
+ },
164
+ {
165
+ "cell_type": "code",
166
+ "execution_count": 65,
167
+ "metadata": {},
168
+ "outputs": [],
169
+ "source": [
170
+ "# def compute_similarity(document,reference):\n",
171
+ "# scores = []\n",
172
+ "# for a in document:\n",
173
+ "# similars = []\n",
174
+ "# for b in reference:\n",
175
+ "# similars.append(a.similarity(b))\n",
176
+ "# scores.append(similars)\n",
177
+ "# return scores"
178
+ ]
179
+ },
180
+ {
181
+ "cell_type": "code",
182
+ "execution_count": null,
183
+ "metadata": {},
184
+ "outputs": [],
185
+ "source": [
186
+ "# scores = compute_similarity(doc1, doc2)"
187
+ ]
188
+ },
189
+ {
190
+ "cell_type": "code",
191
+ "execution_count": null,
192
+ "metadata": {},
193
+ "outputs": [],
194
+ "source": []
195
+ },
196
+ {
197
+ "cell_type": "code",
198
+ "execution_count": null,
199
+ "metadata": {},
200
+ "outputs": [],
201
+ "source": []
202
+ },
203
+ {
204
+ "cell_type": "code",
205
+ "execution_count": null,
206
+ "metadata": {},
207
+ "outputs": [],
208
+ "source": []
209
+ },
210
+ {
211
+ "cell_type": "code",
212
+ "execution_count": null,
213
+ "metadata": {},
214
+ "outputs": [],
215
+ "source": []
216
+ },
217
+ {
218
+ "cell_type": "code",
219
+ "execution_count": null,
220
+ "metadata": {},
221
+ "outputs": [],
222
+ "source": []
223
+ },
224
+ {
225
+ "cell_type": "code",
226
+ "execution_count": null,
227
+ "metadata": {},
228
+ "outputs": [],
229
+ "source": []
230
+ },
231
+ {
232
+ "cell_type": "code",
233
+ "execution_count": null,
234
+ "metadata": {},
235
+ "outputs": [],
236
+ "source": []
237
+ },
238
+ {
239
+ "cell_type": "code",
240
+ "execution_count": null,
241
+ "metadata": {},
242
+ "outputs": [],
243
+ "source": []
244
+ },
245
+ {
246
+ "cell_type": "code",
247
+ "execution_count": null,
248
+ "metadata": {},
249
+ "outputs": [],
250
+ "source": []
251
+ },
252
+ {
253
+ "cell_type": "code",
254
+ "execution_count": null,
255
+ "metadata": {},
256
+ "outputs": [],
257
+ "source": []
258
+ },
259
+ {
260
+ "cell_type": "code",
261
+ "execution_count": null,
262
+ "metadata": {},
263
+ "outputs": [],
264
+ "source": []
265
+ },
266
+ {
267
+ "cell_type": "code",
268
+ "execution_count": null,
269
+ "metadata": {},
270
+ "outputs": [],
271
+ "source": []
272
+ },
273
+ {
274
+ "cell_type": "code",
275
+ "execution_count": null,
276
+ "metadata": {},
277
+ "outputs": [],
278
+ "source": []
279
+ },
280
+ {
281
+ "cell_type": "code",
282
+ "execution_count": null,
283
+ "metadata": {},
284
+ "outputs": [],
285
+ "source": []
286
+ },
287
+ {
288
+ "cell_type": "code",
289
+ "execution_count": null,
290
+ "metadata": {},
291
+ "outputs": [],
292
+ "source": []
293
+ },
294
+ {
295
+ "cell_type": "code",
296
+ "execution_count": null,
297
+ "metadata": {},
298
+ "outputs": [],
299
+ "source": []
300
+ },
301
+ {
302
+ "cell_type": "code",
303
+ "execution_count": null,
304
+ "metadata": {},
305
+ "outputs": [],
306
+ "source": []
307
+ },
308
+ {
309
+ "cell_type": "code",
310
+ "execution_count": null,
311
+ "metadata": {},
312
+ "outputs": [],
313
+ "source": []
314
+ },
315
+ {
316
+ "cell_type": "code",
317
+ "execution_count": null,
318
+ "metadata": {},
319
+ "outputs": [],
320
+ "source": []
321
+ },
322
+ {
323
+ "cell_type": "code",
324
+ "execution_count": null,
325
+ "metadata": {},
326
+ "outputs": [],
327
+ "source": []
328
+ },
329
+ {
330
+ "cell_type": "code",
331
+ "execution_count": null,
332
+ "metadata": {},
333
+ "outputs": [],
334
+ "source": []
335
+ },
336
+ {
337
+ "cell_type": "code",
338
+ "execution_count": null,
339
+ "metadata": {},
340
+ "outputs": [],
341
+ "source": []
342
+ },
343
+ {
344
+ "cell_type": "code",
345
+ "execution_count": null,
346
+ "metadata": {},
347
+ "outputs": [],
348
+ "source": []
349
+ },
350
+ {
351
+ "cell_type": "code",
352
+ "execution_count": null,
353
+ "metadata": {},
354
+ "outputs": [],
355
+ "source": []
356
+ }
357
+ ],
358
+ "metadata": {
359
+ "kernelspec": {
360
+ "display_name": "Python 3",
361
+ "language": "python",
362
+ "name": "python3"
363
+ },
364
+ "language_info": {
365
+ "codemirror_mode": {
366
+ "name": "ipython",
367
+ "version": 3
368
+ },
369
+ "file_extension": ".py",
370
+ "mimetype": "text/x-python",
371
+ "name": "python",
372
+ "nbconvert_exporter": "python",
373
+ "pygments_lexer": "ipython3",
374
+ "version": "3.8.2"
375
+ }
376
+ },
377
+ "nbformat": 4,
378
+ "nbformat_minor": 4
379
+ }
Progress/WordCloud.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
Progress/Word_relevance.ipynb ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 17,
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "Data = pd.read_csv('Resume_data.csv')"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": 18,
24
+ "metadata": {},
25
+ "outputs": [],
26
+ "source": [
27
+ "from sklearn.feature_extraction.text import TfidfVectorizer \n",
28
+ " \n",
29
+ "def get_scores(token):\n",
30
+ " tfidf_vectorizer=TfidfVectorizer(max_df=0.05, min_df=0.001) \n",
31
+ " tfidf_vectorizer_vectors=tfidf_vectorizer.fit_transform(token)\n",
32
+ " first_vector_tfidfvectorizer=tfidf_vectorizer_vectors[0]\n",
33
+ " df = pd.DataFrame(first_vector_tfidfvectorizer.T.todense(), index=tfidf_vectorizer.get_feature_names(), columns=[\"tfidf\"])\n",
34
+ " df = df.sort_values(by=[\"tfidf\"],ascending=False)\n",
35
+ " return df"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "execution_count": 19,
41
+ "metadata": {},
42
+ "outputs": [],
43
+ "source": [
44
+ "# get the first vector out (for the first document) \n",
45
+ "sdf = get_scores(Data['Cleaned'][0].split(\" \"))\n",
46
+ " \n",
47
+ "# place tf-idf values in a pandas data frame \n",
48
+ "\n"
49
+ ]
50
+ },
51
+ {
52
+ "cell_type": "code",
53
+ "execution_count": 21,
54
+ "metadata": {},
55
+ "outputs": [
56
+ {
57
+ "data": {
58
+ "text/plain": [
59
+ "0 accuracy achieve acquire active actively alank...\n",
60
+ "1 730aca854657000679last accuracy active activel...\n",
61
+ "2 6c4b0bb78deb4912bd26b1a8a5f0218blast active al...\n",
62
+ "3 12c 2nd accord active amruta android api app a...\n",
63
+ "4 09e93b7a617e329a87last 9sep20 active agile all...\n",
64
+ "5 1sep20 accuracy active ambedkar analyse analys...\n",
65
+ "6 abhishek active airflow area article automate ...\n",
66
+ "7 active application architect area aspnet aug a...\n",
67
+ "8 0196ef52240b916a74last active adabooste adapti...\n",
68
+ "9 11sep20last access accordinglly active activit...\n",
69
+ "10 11sep20last active administrationperformance a...\n",
70
+ "11 accuracy aerospace ai algorithm algorithms ana...\n",
71
+ "12 21c5d05765c7180c58last 45xx 7sep20 active ad a...\n",
72
+ "13 accuracy active activity administration admini...\n",
73
+ "14 863c41556ec744d7b178c3c114e76e35last 9sep20 ac...\n",
74
+ "15 04a3bcbf1231f98382last accessibility active aj...\n",
75
+ "16 11sep20last 72beef2a10740d8848last access acco...\n",
76
+ "17 accordingly active adobe ajax ambedkar angular...\n",
77
+ "18 11sep20last abul access active actively activi...\n",
78
+ "19 active adsense ajax analytic apache applicatio...\n",
79
+ "20 07year 1providing accuracy active address afte...\n",
80
+ "Name: TF_Based, dtype: object"
81
+ ]
82
+ },
83
+ "execution_count": 21,
84
+ "metadata": {},
85
+ "output_type": "execute_result"
86
+ }
87
+ ],
88
+ "source": [
89
+ "Data['TF_Based']"
90
+ ]
91
+ },
92
+ {
93
+ "cell_type": "code",
94
+ "execution_count": null,
95
+ "metadata": {},
96
+ "outputs": [],
97
+ "source": []
98
+ },
99
+ {
100
+ "cell_type": "code",
101
+ "execution_count": 20,
102
+ "metadata": {},
103
+ "outputs": [
104
+ {
105
+ "data": {
106
+ "text/html": [
107
+ "<div>\n",
108
+ "<style scoped>\n",
109
+ " .dataframe tbody tr th:only-of-type {\n",
110
+ " vertical-align: middle;\n",
111
+ " }\n",
112
+ "\n",
113
+ " .dataframe tbody tr th {\n",
114
+ " vertical-align: top;\n",
115
+ " }\n",
116
+ "\n",
117
+ " .dataframe thead th {\n",
118
+ " text-align: right;\n",
119
+ " }\n",
120
+ "</style>\n",
121
+ "<table border=\"1\" class=\"dataframe\">\n",
122
+ " <thead>\n",
123
+ " <tr style=\"text-align: right;\">\n",
124
+ " <th></th>\n",
125
+ " <th>tfidf</th>\n",
126
+ " </tr>\n",
127
+ " </thead>\n",
128
+ " <tbody>\n",
129
+ " <tr>\n",
130
+ " <th>alankrit</th>\n",
131
+ " <td>1.0</td>\n",
132
+ " </tr>\n",
133
+ " <tr>\n",
134
+ " <th>accuracy</th>\n",
135
+ " <td>0.0</td>\n",
136
+ " </tr>\n",
137
+ " <tr>\n",
138
+ " <th>r2</th>\n",
139
+ " <td>0.0</td>\n",
140
+ " </tr>\n",
141
+ " <tr>\n",
142
+ " <th>phone</th>\n",
143
+ " <td>0.0</td>\n",
144
+ " </tr>\n",
145
+ " <tr>\n",
146
+ " <th>practitioner</th>\n",
147
+ " <td>0.0</td>\n",
148
+ " </tr>\n",
149
+ " <tr>\n",
150
+ " <th>precision</th>\n",
151
+ " <td>0.0</td>\n",
152
+ " </tr>\n",
153
+ " <tr>\n",
154
+ " <th>prediction</th>\n",
155
+ " <td>0.0</td>\n",
156
+ " </tr>\n",
157
+ " <tr>\n",
158
+ " <th>pref</th>\n",
159
+ " <td>0.0</td>\n",
160
+ " </tr>\n",
161
+ " <tr>\n",
162
+ " <th>preprocesse</th>\n",
163
+ " <td>0.0</td>\n",
164
+ " </tr>\n",
165
+ " <tr>\n",
166
+ " <th>pretraine</th>\n",
167
+ " <td>0.0</td>\n",
168
+ " </tr>\n",
169
+ " <tr>\n",
170
+ " <th>processing</th>\n",
171
+ " <td>0.0</td>\n",
172
+ " </tr>\n",
173
+ " <tr>\n",
174
+ " <th>proficiency</th>\n",
175
+ " <td>0.0</td>\n",
176
+ " </tr>\n",
177
+ " <tr>\n",
178
+ " <th>project</th>\n",
179
+ " <td>0.0</td>\n",
180
+ " </tr>\n",
181
+ " <tr>\n",
182
+ " <th>projects</th>\n",
183
+ " <td>0.0</td>\n",
184
+ " </tr>\n",
185
+ " <tr>\n",
186
+ " <th>python</th>\n",
187
+ " <td>0.0</td>\n",
188
+ " </tr>\n",
189
+ " <tr>\n",
190
+ " <th>qualificationscertificationsprogram</th>\n",
191
+ " <td>0.0</td>\n",
192
+ " </tr>\n",
193
+ " <tr>\n",
194
+ " <th>read</th>\n",
195
+ " <td>0.0</td>\n",
196
+ " </tr>\n",
197
+ " <tr>\n",
198
+ " <th>period</th>\n",
199
+ " <td>0.0</td>\n",
200
+ " </tr>\n",
201
+ " <tr>\n",
202
+ " <th>realtime</th>\n",
203
+ " <td>0.0</td>\n",
204
+ " </tr>\n",
205
+ " <tr>\n",
206
+ " <th>recall</th>\n",
207
+ " <td>0.0</td>\n",
208
+ " </tr>\n",
209
+ " </tbody>\n",
210
+ "</table>\n",
211
+ "</div>"
212
+ ],
213
+ "text/plain": [
214
+ " tfidf\n",
215
+ "alankrit 1.0\n",
216
+ "accuracy 0.0\n",
217
+ "r2 0.0\n",
218
+ "phone 0.0\n",
219
+ "practitioner 0.0\n",
220
+ "precision 0.0\n",
221
+ "prediction 0.0\n",
222
+ "pref 0.0\n",
223
+ "preprocesse 0.0\n",
224
+ "pretraine 0.0\n",
225
+ "processing 0.0\n",
226
+ "proficiency 0.0\n",
227
+ "project 0.0\n",
228
+ "projects 0.0\n",
229
+ "python 0.0\n",
230
+ "qualificationscertificationsprogram 0.0\n",
231
+ "read 0.0\n",
232
+ "period 0.0\n",
233
+ "realtime 0.0\n",
234
+ "recall 0.0"
235
+ ]
236
+ },
237
+ "execution_count": 20,
238
+ "metadata": {},
239
+ "output_type": "execute_result"
240
+ }
241
+ ],
242
+ "source": [
243
+ "sdf.head(20)"
244
+ ]
245
+ },
246
+ {
247
+ "cell_type": "code",
248
+ "execution_count": null,
249
+ "metadata": {},
250
+ "outputs": [],
251
+ "source": []
252
+ },
253
+ {
254
+ "cell_type": "code",
255
+ "execution_count": null,
256
+ "metadata": {},
257
+ "outputs": [],
258
+ "source": []
259
+ }
260
+ ],
261
+ "metadata": {
262
+ "kernelspec": {
263
+ "display_name": "Python 3",
264
+ "language": "python",
265
+ "name": "python3"
266
+ },
267
+ "language_info": {
268
+ "codemirror_mode": {
269
+ "name": "ipython",
270
+ "version": 3
271
+ },
272
+ "file_extension": ".py",
273
+ "mimetype": "text/x-python",
274
+ "name": "python",
275
+ "nbconvert_exporter": "python",
276
+ "pygments_lexer": "ipython3",
277
+ "version": "3.8.2"
278
+ }
279
+ },
280
+ "nbformat": 4,
281
+ "nbformat_minor": 4
282
+ }
Progress/app.py CHANGED
@@ -2,11 +2,7 @@ from wordcloud import STOPWORDS
2
  from operator import index
3
  from wordcloud import WordCloud
4
  from pandas._config.config import options
5
- import Cleaner
6
- import Similar
7
- import textract as tx
8
  import pandas as pd
9
- import os
10
  import streamlit as st
11
  import plotly.express as px
12
  import plotly.graph_objects as go
@@ -29,35 +25,9 @@ Algorihms used:-
29
  Total Score calculate is the overall average of the 4 mentioned token based algorithms and string based.
30
  """)
31
 
32
- resume_dir = "Data/Resumes/"
33
- job_desc_dir = "Data/JobDesc/"
34
- resume_names = os.listdir(resume_dir)
35
- job_description_names = os.listdir(job_desc_dir)
36
-
37
- document = []
38
-
39
- st.write("Total Resumes found : ", len(resume_names))
40
- st.write("Total Job Descriptions found : ", len(job_description_names))
41
 
42
  # to read all the resumes in the directory as provided by the user
43
 
44
-
45
- def read_resumes(list_of_resumes, resume_directory):
46
- placeholder = []
47
- for res in list_of_resumes:
48
- temp = []
49
- temp.append(res)
50
- text = tx.process(resume_directory+res, encoding='ascii')
51
- text = str(text, 'utf-8')
52
- temp.append(text)
53
- placeholder.append(temp)
54
- return placeholder
55
-
56
-
57
- document = read_resumes(resume_names, resume_dir)
58
-
59
- df = pd.DataFrame(document, columns=['Name', 'Context'])
60
-
61
  if len(job_description_names) <= 1:
62
  st.write("There is only ", len(job_description_names),
63
  "present. It will be used to create scores.")
@@ -75,15 +45,6 @@ index = st.slider("Which JD to select ? : ", 0,
75
  len(job_description_names)-1, 1)
76
 
77
 
78
- def read_job_description(n, list_of_job_files, job_description_directory):
79
- job_desc = tx.process(
80
- job_description_directory+list_of_job_files[n], extension='docx', encoding='ascii')
81
-
82
- job_desc = str(job_desc, 'utf-8')
83
- job_description = Cleaner.Cleaner(job_desc)
84
- return [job_desc, job_description]
85
-
86
-
87
  job = read_job_description(index, job_description_names, job_desc_dir)
88
 
89
  option_yn = st.selectbox("Show the Job Description ?", options=['NO', 'YES'])
@@ -149,12 +110,6 @@ def get_text_from_df(df_iter):
149
  return output
150
 
151
 
152
- text_wc = get_text_from_df(cleaned_df['Selective'])
153
-
154
- wordcloud = WordCloud(width=3000, height=2000, random_state=1, background_color='salmon',
155
- colormap='Pastel1', collocations=False, stopwords=STOPWORDS).generate(text_wc)
156
- st.write(plt.imshow(wordcloud))
157
-
158
  option_2 = st.selectbox("Show the Best Matching Resumes?", options=[
159
  'NO', 'YES'])
160
 
 
2
  from operator import index
3
  from wordcloud import WordCloud
4
  from pandas._config.config import options
 
 
 
5
  import pandas as pd
 
6
  import streamlit as st
7
  import plotly.express as px
8
  import plotly.graph_objects as go
 
25
  Total Score calculate is the overall average of the 4 mentioned token based algorithms and string based.
26
  """)
27
 
 
 
 
 
 
 
 
 
 
28
 
29
  # to read all the resumes in the directory as provided by the user
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  if len(job_description_names) <= 1:
32
  st.write("There is only ", len(job_description_names),
33
  "present. It will be used to create scores.")
 
45
  len(job_description_names)-1, 1)
46
 
47
 
 
 
 
 
 
 
 
 
 
48
  job = read_job_description(index, job_description_names, job_desc_dir)
49
 
50
  option_yn = st.selectbox("Show the Job Description ?", options=['NO', 'YES'])
 
110
  return output
111
 
112
 
 
 
 
 
 
 
113
  option_2 = st.selectbox("Show the Best Matching Resumes?", options=[
114
  'NO', 'YES'])
115
 
Progress/app2.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from wordcloud import STOPWORDS
2
+ from operator import index
3
+ from wordcloud import WordCloud
4
+ from pandas._config.config import options
5
+ import pandas as pd
6
+ import streamlit as st
7
+ import plotly.express as px
8
+ import plotly.graph_objects as go
9
+ import matplotlib.pyplot as plt
10
+ import Similar
11
+
12
+ # Reading the CSV files prepared by the fileReader.py
13
+ Resumes = pd.read_csv('Resume_data.csv')
14
+ Jobs = pd.read_csv('Job_Data.csv')
15
+
16
+
17
+ # Checking for Multiple Job Descriptions
18
+ # If more than one Job Descriptions are available, it asks user to select one as well.
19
+
20
+ if len(Jobs['Name']) <= 1:
21
+ st.write(
22
+ "There is only 1 Job Description present. It will be used to create scores.")
23
+ else:
24
+ st.write("There are ", len(Jobs['Name']),
25
+ "Job Descriptions available. Please select one.")
26
+
27
+
28
+ # Asking to Print the Job Desciption Names
29
+ if len(Jobs['Name']) > 1:
30
+ option_yn = st.selectbox(
31
+ "Show the Job Description Names?", options=['NO', 'YES'])
32
+ if option_yn == 'YES':
33
+ index = [a for a in range(len(Jobs['Name']))]
34
+ fig = go.Figure(data=[go.Table(header=dict(values=["Job No.", "Job Desc. Name"], line_color='darkslategray',
35
+ fill_color='lightskyblue'),
36
+ cells=dict(values=[index, Jobs['Name']], line_color='darkslategray',
37
+ fill_color='cyan'))
38
+ ])
39
+ st.write(fig)
40
+
41
+
42
+ # Asking to chose the Job Description
43
+ index = st.slider("Which JD to select ? : ", 0,
44
+ len(Jobs['Name'])-1, 1)
45
+
46
+
47
+ option_yn = st.selectbox("Show the Job Description ?", options=['NO', 'YES'])
48
+ if option_yn == 'YES':
49
+ st.markdown("---")
50
+ st.markdown("### Job Description :")
51
+ st.text(Jobs['Context'][index])
52
+ st.markdown("---")
53
+
54
+
55
+ # def calculate_scores(resumes, job_description, x=5, y=5):
56
+ # scores = []
57
+ # for text in resumes:
58
+ # score = Similar.match(esumes['TF_Based'][x], Jobs[])
59
+ # scores.append(score)
60
+ # return scores
61
+
62
+ def Scoring()
Progress/app_testing.ipynb CHANGED
@@ -19,7 +19,8 @@
19
  "import Similar\n",
20
  "import textract as tx\n",
21
  "import pandas as pd\n",
22
- "import os"
 
23
  ]
24
  },
25
  {
@@ -72,22 +73,31 @@
72
  },
73
  {
74
  "cell_type": "code",
75
- "execution_count": 19,
 
 
 
 
 
 
 
76
  "metadata": {},
77
  "outputs": [],
78
  "source": [
79
  "def get_cleaned_words(document):\n",
80
  " for i in range(len(document)):\n",
81
  " raw = Cleaner.Cleaner(document[i][1])\n",
82
- " document[i].append(raw[0])\n",
83
- " document[i].append(raw[0])\n",
84
- " document[i].append(raw[0])\n",
 
 
85
  " return document"
86
  ]
87
  },
88
  {
89
  "cell_type": "code",
90
- "execution_count": 20,
91
  "metadata": {},
92
  "outputs": [],
93
  "source": [
@@ -96,16 +106,23 @@
96
  },
97
  {
98
  "cell_type": "code",
99
- "execution_count": 26,
100
  "metadata": {},
101
  "outputs": [],
102
  "source": [
103
- "Database = pd.DataFrame(document,columns=[\"Name\",\"Context\",\"Cleaned\",\"Selective\",\"Selective_Reduced\"])"
104
  ]
105
  },
106
  {
107
  "cell_type": "code",
108
- "execution_count": 28,
 
 
 
 
 
 
 
109
  "metadata": {},
110
  "outputs": [],
111
  "source": [
@@ -114,7 +131,7 @@
114
  },
115
  {
116
  "cell_type": "code",
117
- "execution_count": 29,
118
  "metadata": {},
119
  "outputs": [],
120
  "source": [
@@ -132,7 +149,7 @@
132
  },
133
  {
134
  "cell_type": "code",
135
- "execution_count": 30,
136
  "metadata": {},
137
  "outputs": [],
138
  "source": [
@@ -141,7 +158,7 @@
141
  },
142
  {
143
  "cell_type": "code",
144
- "execution_count": 31,
145
  "metadata": {},
146
  "outputs": [],
147
  "source": [
@@ -150,16 +167,16 @@
150
  },
151
  {
152
  "cell_type": "code",
153
- "execution_count": 34,
154
  "metadata": {},
155
  "outputs": [],
156
  "source": [
157
- "jd_database = pd.DataFrame(Jd,columns=[\"Name\",\"Context\",\"Cleaned\",\"Selective\",\"Selective_Reduced\"])"
158
  ]
159
  },
160
  {
161
  "cell_type": "code",
162
- "execution_count": 38,
163
  "metadata": {},
164
  "outputs": [],
165
  "source": [
@@ -168,17 +185,43 @@
168
  },
169
  {
170
  "cell_type": "code",
171
- "execution_count": null,
172
  "metadata": {},
173
- "outputs": [],
174
- "source": []
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  },
176
  {
177
  "cell_type": "code",
178
- "execution_count": null,
179
  "metadata": {},
180
- "outputs": [],
181
- "source": []
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  },
183
  {
184
  "cell_type": "code",
 
19
  "import Similar\n",
20
  "import textract as tx\n",
21
  "import pandas as pd\n",
22
+ "import os\n",
23
+ "import tf_idf"
24
  ]
25
  },
26
  {
 
73
  },
74
  {
75
  "cell_type": "code",
76
+ "execution_count": null,
77
+ "metadata": {},
78
+ "outputs": [],
79
+ "source": []
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": 6,
84
  "metadata": {},
85
  "outputs": [],
86
  "source": [
87
  "def get_cleaned_words(document):\n",
88
  " for i in range(len(document)):\n",
89
  " raw = Cleaner.Cleaner(document[i][1])\n",
90
+ " document[i].append(\" \".join(raw[0]))\n",
91
+ " document[i].append(\" \".join(raw[1]))\n",
92
+ " document[i].append(\" \".join(raw[2]))\n",
93
+ " sentence = tf_idf.do_tfidf(document[i][2].split(\" \"))\n",
94
+ " document[i].append(sentence)\n",
95
  " return document"
96
  ]
97
  },
98
  {
99
  "cell_type": "code",
100
+ "execution_count": 7,
101
  "metadata": {},
102
  "outputs": [],
103
  "source": [
 
106
  },
107
  {
108
  "cell_type": "code",
109
+ "execution_count": 8,
110
  "metadata": {},
111
  "outputs": [],
112
  "source": [
113
+ "Database = pd.DataFrame(document,columns=[\"Name\",\"Context\",\"Cleaned\",\"Selective\",\"Selective_Reduced\",\"TF_Based\"])"
114
  ]
115
  },
116
  {
117
  "cell_type": "code",
118
+ "execution_count": null,
119
+ "metadata": {},
120
+ "outputs": [],
121
+ "source": []
122
+ },
123
+ {
124
+ "cell_type": "code",
125
+ "execution_count": 9,
126
  "metadata": {},
127
  "outputs": [],
128
  "source": [
 
131
  },
132
  {
133
  "cell_type": "code",
134
+ "execution_count": 10,
135
  "metadata": {},
136
  "outputs": [],
137
  "source": [
 
149
  },
150
  {
151
  "cell_type": "code",
152
+ "execution_count": 11,
153
  "metadata": {},
154
  "outputs": [],
155
  "source": [
 
158
  },
159
  {
160
  "cell_type": "code",
161
+ "execution_count": 12,
162
  "metadata": {},
163
  "outputs": [],
164
  "source": [
 
167
  },
168
  {
169
  "cell_type": "code",
170
+ "execution_count": 13,
171
  "metadata": {},
172
  "outputs": [],
173
  "source": [
174
+ "jd_database = pd.DataFrame(Jd,columns=[\"Name\",\"Context\",\"Cleaned\",\"Selective\",\"Selective_Reduced\",\"TF_Based\"])"
175
  ]
176
  },
177
  {
178
  "cell_type": "code",
179
+ "execution_count": 14,
180
  "metadata": {},
181
  "outputs": [],
182
  "source": [
 
185
  },
186
  {
187
  "cell_type": "code",
188
+ "execution_count": 15,
189
  "metadata": {},
190
+ "outputs": [
191
+ {
192
+ "data": {
193
+ "text/plain": [
194
+ "'able advanced algorithm amount analysis analytics applied apply appropriate approval audio bachelors bayes begin build business candidate cassandra change client cnn common communication companys conduct conducting crunching cut d3js data databacke database datum decision define deliver deliverable design develop diagnostic discover distribution edge education enable energetic engineer even excellent experience experiment focus forests fulltime game gamechange generate ggplot good grasp great gurgaon haryana hbase help hide high hive identify implementation implementationfinetune improvement include independently information innovative insight integrate internal job keep knn language learn learning liaise limit look lstm machine make massage matlab meaningful mining ml model mongodb must naive nosql numerical numpy opencv operation opportunity performance pig plan prediction prefer preferred present primary product proficiency programming python quality query quickly record regression remote require resourceful responsibilities rnn salary science sciencemachine scientist script scripting selfstarter skill skills smart solutions specific sql statistic statistical strategy structured supervision svm system tactical team technique technology temporarily testing text tool toolkit type understanding unstructured use vast video vision visualisation warehousing weka well work year'"
195
+ ]
196
+ },
197
+ "execution_count": 15,
198
+ "metadata": {},
199
+ "output_type": "execute_result"
200
+ }
201
+ ],
202
+ "source": [
203
+ "jd_database['TF_Based'][0]"
204
+ ]
205
  },
206
  {
207
  "cell_type": "code",
208
+ "execution_count": 16,
209
  "metadata": {},
210
+ "outputs": [
211
+ {
212
+ "data": {
213
+ "text/plain": [
214
+ "'accuracy achieve acquire active actively alankrit analysisdeep analysismachine analysissql analyst analystdata analytics area artificial aug augment authorization automated automation ba56b4f594cd449891db291ae8e04206last back bangaloredelhi bidirectional bilaspur binary brightness btech business capgemini category certification classification clean client cnn college commonly company compute computer computing context current cv data dataset datasets date datum day dce deep degree delhi dense designation details detecting detection develop duration education email embedding employment encode engineer engineering english epoch excel exist experience expert featured feeding field filter fire flatten follow foundation framework full functional glove google high hindi id image imagedatagenerator images incoming indiabengaluru industry initial integrated intelligence it join julsep jump kaggle kera keras key know kpo l2 lakh language languages last layer layers learn learning learningartificial learningdata library linux location look lstm machine manner marital mathematics mean ml model modeling models modified monitoring month months msexcel mttr name natural nature ncr networks neural nirjharpremium nlp notice number obtain occur offsite onsite other output padded percent perform period pg phone practitioner precision prediction pref preprocesse pretraine processing proficiency project projects python qualificationscertificationsprogram r2 read realtime recall record regex regression regularisation remote remove research resolution resume ridge role rotation rows sciencedata scientist score sectionwork self senior sentence sep sequence sequential server show singleunmarrie size skill skills smoke smokefire smokefiresafe speak sql stack statistical status stopword subcategory summary switch team technical tensorflow term test text ticket time title tokenize tool tools top total train trained training transfer ug unsafe use verified version vision visualizationsqlpythonmsexcelstatistical windows word word2vec work workfromhome write year years zoom'"
215
+ ]
216
+ },
217
+ "execution_count": 16,
218
+ "metadata": {},
219
+ "output_type": "execute_result"
220
+ }
221
+ ],
222
+ "source": [
223
+ "Database['TF_Based'][0]"
224
+ ]
225
  },
226
  {
227
  "cell_type": "code",
Progress/fileReader.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from operator import index
2
+ from pandas._config.config import options
3
+ import Cleaner
4
+ import Similar
5
+ import textract as tx
6
+ import pandas as pd
7
+ import os
8
+ import tf_idf
9
+
10
+ resume_dir = "Data/Resumes/"
11
+ job_desc_dir = "Data/JobDesc/"
12
+ resume_names = os.listdir(resume_dir)
13
+ job_description_names = os.listdir(job_desc_dir)
14
+
15
+ document = []
16
+
17
+ def read_resumes(list_of_resumes, resume_directory):
18
+ placeholder = []
19
+ for res in list_of_resumes:
20
+ temp = []
21
+ temp.append(res)
22
+ text = tx.process(resume_directory+res, encoding='ascii')
23
+ text = str(text, 'utf-8')
24
+ temp.append(text)
25
+ placeholder.append(temp)
26
+ return placeholder
27
+
28
+ document = read_resumes(resume_names, resume_dir)
29
+
30
+ def get_cleaned_words(document):
31
+ for i in range(len(document)):
32
+ raw = Cleaner.Cleaner(document[i][1])
33
+ document[i].append(" ".join(raw[0]))
34
+ document[i].append(" ".join(raw[1]))
35
+ document[i].append(" ".join(raw[2]))
36
+ sentence = tf_idf.do_tfidf(document[i][2].split(" "))
37
+ document[i].append(sentence)
38
+ return document
39
+
40
+ Doc=get_cleaned_words(document)
41
+
42
+ Database = pd.DataFrame(document,columns=["Name","Context","Cleaned","Selective","Selective_Reduced","TF_Based"])
43
+
44
+ Database.to_csv("Resume_data.csv", index=False)
45
+
46
+ def read_jobdescriptions(job_description_names, job_desc_dir):
47
+ placeholder = []
48
+ for tes in job_description_names:
49
+ temp = []
50
+ temp.append(tes)
51
+ text = tx.process(job_desc_dir+tes, encoding='ascii')
52
+ text = str(text, 'utf-8')
53
+ temp.append(text)
54
+ placeholder.append(temp)
55
+ return placeholder
56
+
57
+ job_document = read_jobdescriptions(job_description_names, job_desc_dir)
58
+
59
+ Jd=get_cleaned_words(job_document)
60
+
61
+ jd_database = pd.DataFrame(Jd,columns=["Name","Context","Cleaned","Selective","Selective_Reduced","TF_Based"])
62
+
63
+ jd_database.to_csv("Job_Data.csv",index=False)
Progress/generate_wordcloud.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from wordcloud import WordCloud
2
+ import matplotlib.pyplot as plt
3
+
4
+
5
+ def generate_wordcloud(text):
6
+ wordcloud = WordCloud(width=800, height=800,
7
+ background_color='white',
8
+ colormap='viridis', collocations=False,
9
+ min_font_size=10).generate(text)
10
+ plt.figure(figsize=(8, 8), facecolor=None)
11
+ plt.imshow(wordcloud)
12
+ plt.axis("off")
13
+ plt.tight_layout(pad=0)
14
+
15
+ plt.show()
Progress/performLDA.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gensim
2
+ import gensim.corpora as corpora
3
+
4
+
5
+ def get_list_of_words(document):
6
+ Document = []
7
+ for a in document:
8
+ raw = a.split(" ")
9
+ Document.append(raw)
10
+ return Document
11
+
12
+
13
+ def LDA(document):
14
+ id2word = corpora.Dictionary(document)
15
+ corpus = [id2word.doc2bow(text) for text in document]
16
+ lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus, id2word=id2word, num_topics=5, random_state=100,
17
+ update_every=1, chunksize=100, passes=50, alpha='auto', per_word_topics=True)
18
+ return lda_model[corpus]
19
+
20
+
21
+ def format_topics_sentences(ldamodel=None, corpus=corpus, texts=Document):
22
+ sent_topics_df = []
23
+ for i, row_list in enumerate(ldamodel[corpus]):
24
+ row = row_list[0] if ldamodel.per_word_topics else row_list
25
+ row = sorted(row, key=lambda x: (x[1]), reverse=True)
26
+ for j, (topic_num, prop_topic) in enumerate(row):
27
+ if j == 0:
28
+ wp = ldamodel.show_topic(topic_num)
29
+ topic_keywords = ", ".join([word for word, prop in wp])
30
+ sent_topics_df.append(
31
+ [i, int(topic_num), round(prop_topic, 4)*100, topic_keywords])
32
+ else:
33
+ break
34
+
35
+ return(sent_topics_df)
Progress/tf_idf.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from sklearn.feature_extraction.text import TfidfVectorizer
2
+
3
+ def do_tfidf(token):
4
+ tfidf = TfidfVectorizer(max_df=0.05, min_df=0.001)
5
+ words = tfidf.fit_transform(token)
6
+ sentence = " ".join(tfidf.get_feature_names())
7
+ return sentence
Progress/topic_modeller_pd_returner.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Progress/topic_modelling_resumes.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -20,7 +20,7 @@
20
  },
21
  {
22
  "cell_type": "code",
23
- "execution_count": 7,
24
  "metadata": {},
25
  "outputs": [],
26
  "source": [
@@ -30,7 +30,7 @@
30
  },
31
  {
32
  "cell_type": "code",
33
- "execution_count": 8,
34
  "metadata": {},
35
  "outputs": [],
36
  "source": [
@@ -41,7 +41,7 @@
41
  },
42
  {
43
  "cell_type": "code",
44
- "execution_count": 58,
45
  "metadata": {},
46
  "outputs": [],
47
  "source": [
@@ -62,7 +62,7 @@
62
  },
63
  {
64
  "cell_type": "code",
65
- "execution_count": 59,
66
  "metadata": {},
67
  "outputs": [],
68
  "source": [
@@ -78,7 +78,7 @@
78
  },
79
  {
80
  "cell_type": "code",
81
- "execution_count": 60,
82
  "metadata": {},
83
  "outputs": [],
84
  "source": [
@@ -87,7 +87,7 @@
87
  },
88
  {
89
  "cell_type": "code",
90
- "execution_count": 69,
91
  "metadata": {},
92
  "outputs": [],
93
  "source": [
@@ -104,7 +104,7 @@
104
  },
105
  {
106
  "cell_type": "code",
107
- "execution_count": 70,
108
  "metadata": {},
109
  "outputs": [
110
  {
@@ -129,7 +129,7 @@
129
  },
130
  {
131
  "cell_type": "code",
132
- "execution_count": 71,
133
  "metadata": {},
134
  "outputs": [],
135
  "source": [
@@ -141,98 +141,14 @@
141
  },
142
  {
143
  "cell_type": "code",
144
- "execution_count": 72,
145
  "metadata": {},
146
- "outputs": [
147
- {
148
- "data": {
149
- "text/html": [
150
- "\n",
151
- "<link rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.rawgit.com/bmabey/pyLDAvis/files/ldavis.v1.0.0.css\">\n",
152
- "\n",
153
- "\n",
154
- "<div id=\"ldavis_el22281400102997868488005527649\"></div>\n",
155
- "<script type=\"text/javascript\">\n",
156
- "\n",
157
- "var ldavis_el22281400102997868488005527649_data = {\"mdsDat\": {\"x\": [0.045266102999448776, -0.045266102999448776], \"y\": [0.0, 0.0], \"topics\": [1, 2], \"cluster\": [1, 1], \"Freq\": [50.73581314086914, 49.264190673828125]}, \"tinfo\": {\"Term\": [\"improve\", \"less\", \"Days\", \"program\", \"reduce\", \"Technologies\", \"Sep\", \"lead\", \"NCR\", \"SQL\", \"notice\", \"Leader\", \"detect\", \"duration\", \"extensive\", \"track\", \"prediction\", \"structure\", \"document\", \"MTech\", \"enhance\", \"apply\", \"Apr\", \"user\", \"May\", \"Jump\", \"service\", \"cycle\", \"troubleshoot\", \"full\", \"NCR\", \"service\", \"cycle\", \"troubleshoot\", \"full\", \"cloud\", \"text\", \"actively\", \"leadership\", \"MS\", \"MCA\", \"Days\", \"less\", \"look\", \"sequence\", \"GCP\", \"monitoring\", \"junior\", \"level\", \"June\", \"transfer\", \"LearningDeep\", \"JDK\", \"box\", \"JIRA\", \"fashion\", \"pretraine\", \"domain\", \"working\", \"accordingly\", \"Marathi\", \"Jquery\", \"SQL\", \"client\", \"perform\", \"Marital\", \"notice\", \"Jump\", \"singleunmarrie\", \"degree\", \"professional\", \"SectionWork\", \"Resume\", \"IT\", \"Modified\", \"industry\", \"Highest\", \"Pref\", \"d\", \"UG\", \"ID\", \"Active\", \"experience\", \"Current\", \"Summary\", \"Months\", \"Location\", \"Status\", \"Phone\", \"Period\", \"Education\", \"total\", \"Functional\", \"year\", \"Key\", \"Number\", \"Email\", \"it\", \"Top\", \"version\", \"use\", \"back\", \"Name\", \"skill\", \"Area\", \"last\", \"Date\", \"Skills\", \"improve\", \"reduce\", \"program\", \"Technologies\", \"Leader\", \"track\", \"prediction\", \"extensive\", \"duration\", \"detect\", \"structure\", \"document\", \"enhance\", \"MTech\", \"apply\", \"value\", \"many\", \"control\", \"artificial\", \"Till\", \"tuning\", \"coordinate\", \"Numpy\", \"index\", \"enable\", \"Entity\", \"suggest\", \"steering\", \"architecture\", \"OpenCV\", \"Gradient\", \"camera\", \"Sep\", \"lead\", \"information\", \"identification\", \"Apr\", \"user\", \"May\", \"code\", \"identify\", \"performance\", \"algorithm\", \"deliver\", \"website\", \"section\", \"component\", \"css\", \"Maintenance\", \"company\", \"Nov\", \"issue\", \"test\", \"new\", \"system\", \"design\", \"develop\", \"role\", \"datum\", \"Computer\", \"Experience\", \"Services\", \"Engineering\", \"application\", \"ITSoftwareSoftware\", \"marital\", \"Skills\", \"Skill\", \"Date\", \"Area\", \"back\", \"skill\", \"last\", \"Name\", \"it\", \"version\", \"use\", \"Top\", \"Number\", \"Email\", \"year\", \"Key\", \"Education\", \"Period\", \"total\", \"Functional\", \"Status\", \"Phone\", \"Months\", \"Location\", \"ID\", \"UG\", \"d\", \"experience\", \"Summary\", \"Active\", \"Pref\"], \"Freq\": [4.0, 6.0, 6.0, 3.0, 3.0, 3.0, 5.0, 5.0, 3.0, 7.0, 10.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0, 4.0, 4.0, 12.0, 2.0, 2.0, 2.0, 2.0, 3.0877420902252197, 2.4015042781829834, 2.4015040397644043, 2.4015040397644043, 2.401503801345825, 2.401503801345825, 2.401503801345825, 2.401503562927246, 2.401503562927246, 2.401503562927246, 2.401503324508667, 5.146483421325684, 5.146483421325684, 1.7152669429779053, 1.7152669429779053, 1.7152669429779053, 1.7152668237686157, 1.7152668237686157, 1.7152665853500366, 1.7152668237686157, 1.7152668237686157, 1.7152668237686157, 1.7152668237686157, 1.7152668237686157, 1.7152668237686157, 1.7152665853500366, 1.7152668237686157, 1.7152668237686157, 1.7152668237686157, 1.7152665853500366, 1.7152668237686157, 1.7152668237686157, 5.832734107971191, 3.7740161418914795, 3.7740161418914795, 3.774012565612793, 7.891523361206055, 8.57771110534668, 7.205255508422852, 5.832758903503418, 5.832754135131836, 7.891510009765625, 7.891506671905518, 9.263986587524414, 9.263972282409668, 6.519008636474609, 7.891479969024658, 9.263984680175781, 9.263981819152832, 9.263981819152832, 9.263981819152832, 9.263981819152832, 9.263980865478516, 9.263980865478516, 9.2639799118042, 9.2639799118042, 9.2639799118042, 9.263978004455566, 9.26397705078125, 9.26397705078125, 9.26397705078125, 9.26397705078125, 9.26397705078125, 9.26397705078125, 9.263976097106934, 9.2639741897583, 9.2639741897583, 8.577733993530273, 8.577733993530273, 8.577733993530273, 8.57773208618164, 8.57773208618164, 8.57773208618164, 8.57773208618164, 8.57773208618164, 8.57773208618164, 8.57773208618164, 8.577725410461426, 3.736862897872925, 3.057375192642212, 3.057375192642212, 3.057373523712158, 2.377887010574341, 2.3778867721557617, 2.3778867721557617, 2.3778867721557617, 2.377887010574341, 2.377887010574341, 2.3778862953186035, 2.3778860569000244, 2.3778860569000244, 2.3778860569000244, 2.377885580062866, 1.6984007358551025, 1.6984007358551025, 1.6984007358551025, 1.6984007358551025, 1.6984007358551025, 1.6984003782272339, 1.6984003782272339, 1.6984003782272339, 1.6984003782272339, 1.6984003782272339, 1.698400616645813, 1.698400616645813, 1.698400616645813, 1.698400616645813, 1.698400616645813, 1.698400616645813, 1.698400616645813, 4.416375637054443, 4.4163618087768555, 1.698400616645813, 1.698400616645813, 3.7368907928466797, 3.7368743419647217, 3.7368698120117188, 4.416360855102539, 3.0574069023132324, 3.0573978424072266, 3.05739688873291, 3.057391881942749, 3.0573902130126953, 3.0573861598968506, 3.0573785305023193, 3.0573790073394775, 3.0573790073394775, 3.7368814945220947, 3.7368786334991455, 3.7368733882904053, 3.7368719577789307, 3.7368710041046143, 3.736868143081665, 4.416351318359375, 5.09583044052124, 5.775294303894043, 4.416353225708008, 4.416348934173584, 5.775278568267822, 5.775277137756348, 4.416343688964844, 4.4163289070129395, 5.095806121826172, 5.095800399780273, 5.775272369384766, 5.775270938873291, 5.775267124176025, 5.775265693664551, 5.775265693664551, 5.775265693664551, 5.775265693664551, 5.775265216827393, 5.775263786315918, 5.775264739990234, 5.775264739990234, 5.775262832641602, 5.775261878967285, 5.775261878967285, 5.775261402130127, 5.775261402130127, 5.775259971618652, 5.775259971618652, 5.775259971618652, 5.775259971618652, 5.775259494781494, 5.775259971618652, 5.7752580642700195, 5.7752580642700195, 5.7752556800842285, 5.775256156921387, 5.775256156921387, 5.775257110595703, 5.775257587432861, 5.775256156921387, 5.775253772735596], \"Total\": [4.0, 6.0, 6.0, 3.0, 3.0, 3.0, 5.0, 5.0, 3.0, 7.0, 10.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0, 4.0, 4.0, 12.0, 2.0, 2.0, 2.0, 2.0, 3.427489757537842, 2.741250991821289, 2.741250991821289, 2.741250991821289, 2.74125075340271, 2.741250991821289, 2.741250991821289, 2.74125075340271, 2.741250991821289, 2.74125075340271, 2.74125075340271, 6.165385723114014, 6.165386199951172, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550129413604736, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550129413604736, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.0550131797790527, 2.055013418197632, 2.055013418197632, 7.531106948852539, 4.792906284332275, 4.792906761169434, 4.792906761169434, 10.948802947998047, 12.314531326293945, 10.262561798095703, 8.210594177246094, 8.210594177246094, 11.628292083740234, 11.628291130065918, 14.359748840332031, 14.359748840332031, 9.576322555541992, 12.307779312133789, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237022399902, 15.039237022399902, 15.039237022399902, 15.039237022399902, 15.039237022399902, 15.039237976074219, 15.039237976074219, 15.039236068725586, 15.039236068725586, 14.352997779846191, 14.352996826171875, 14.352998733520508, 14.352996826171875, 14.352997779846191, 14.352996826171875, 14.352997779846191, 14.352997779846191, 14.352997779846191, 14.352998733520508, 14.352997779846191, 4.079987525939941, 3.400498390197754, 3.400498628616333, 3.400498390197754, 2.7210097312927246, 2.7210097312927246, 2.7210097312927246, 2.7210097312927246, 2.7210099697113037, 2.7210099697113037, 2.7210097312927246, 2.7210097312927246, 2.7210097312927246, 2.7210097312927246, 2.7210097312927246, 2.0415220260620117, 2.0415220260620117, 2.041522264480591, 2.041522264480591, 2.041522264480591, 2.0415220260620117, 2.0415220260620117, 2.0415220260620117, 2.0415220260620117, 2.0415220260620117, 2.041522264480591, 2.041522264480591, 2.041522264480591, 2.041522264480591, 2.041522264480591, 2.041522264480591, 2.041522264480591, 5.445394039154053, 5.445394039154053, 2.041522264480591, 2.041522264480591, 4.765904903411865, 4.765905380249023, 4.765904903411865, 6.131625175476074, 4.086416244506836, 4.086416244506836, 4.086415767669678, 4.086415767669678, 4.086415767669678, 4.086416244506836, 4.086415767669678, 4.086416244506836, 4.086416244506836, 5.452136039733887, 5.452136039733887, 5.452136039733887, 5.452136039733887, 5.452136039733887, 5.452136516571045, 7.504101753234863, 9.556071281433105, 12.294279098510742, 8.19034194946289, 8.19034194946289, 13.666759490966797, 13.666759490966797, 8.19034194946289, 8.19034194946289, 10.928550720214844, 10.928550720214844, 14.352997779846191, 14.352998733520508, 14.352998733520508, 14.352997779846191, 14.352997779846191, 14.352997779846191, 14.352997779846191, 14.352996826171875, 14.352997779846191, 14.352998733520508, 14.352996826171875, 14.352996826171875, 15.039236068725586, 15.039236068725586, 15.039237976074219, 15.039237976074219, 15.039237022399902, 15.039237022399902, 15.039237022399902, 15.039237022399902, 15.039237976074219, 15.039237022399902, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219, 15.039237976074219], \"Category\": [\"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Default\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic1\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\", \"Topic2\"], \"logprob\": [30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, -6.811100006103516, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -7.062399864196777, -6.30019998550415, -6.30019998550415, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -7.398900032043457, -6.175000190734863, -6.610400199890137, -6.610400199890137, -6.610400199890137, -5.872700214385986, -5.789299964904785, -5.963699817657471, -6.175000190734863, -6.175000190734863, -5.872700214385986, -5.872700214385986, -5.712399959564209, -5.712399959564209, -6.063799858093262, -5.872700214385986, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.712399959564209, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -5.789299964904785, -6.590799808502197, -6.791500091552734, -6.791500091552734, -6.791500091552734, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.042900085449219, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -7.37939977645874, -6.423699855804443, -6.423699855804443, -7.37939977645874, -7.37939977645874, -6.590799808502197, -6.590799808502197, -6.590799808502197, -6.423799991607666, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.791500091552734, -6.590799808502197, -6.590799808502197, -6.590799808502197, -6.590799808502197, -6.590799808502197, -6.590799808502197, -6.423799991607666, -6.280600070953369, -6.1554999351501465, -6.423799991607666, -6.423799991607666, -6.1554999351501465, -6.1554999351501465, -6.423799991607666, -6.423799991607666, -6.280600070953369, -6.280600070953369, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465, -6.1554999351501465], \"loglift\": [30.0, 29.0, 28.0, 27.0, 26.0, 25.0, 24.0, 23.0, 22.0, 21.0, 20.0, 19.0, 18.0, 17.0, 16.0, 15.0, 14.0, 13.0, 12.0, 11.0, 10.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.5741999745368958, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.5461999773979187, 0.49790000915527344, 0.49790000915527344, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.49779999256134033, 0.4230000078678131, 0.43950000405311584, 0.43950000405311584, 0.43950000405311584, 0.35109999775886536, 0.31690001487731934, 0.3248000144958496, 0.33660000562667847, 0.33660000562667847, 0.29089999198913574, 0.29089999198913574, 0.2401999980211258, 0.2401999980211258, 0.2939999997615814, 0.23409999907016754, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.1940000057220459, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.16369999945163727, 0.6201000213623047, 0.6015999913215637, 0.6015999913215637, 0.6015999913215637, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.573199987411499, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.5239999890327454, 0.4984999895095825, 0.4984999895095825, 0.5239999890327454, 0.5239999890327454, 0.46470001339912415, 0.46470001339912415, 0.46470001339912415, 0.3797999918460846, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.4178999960422516, 0.3301999866962433, 0.3301999866962433, 0.3301999866962433, 0.3301999866962433, 0.3301999866962433, 0.3301999866962433, 0.1777999997138977, 0.07919999957084656, -0.047600001096725464, 0.09030000120401382, 0.09030000120401382, -0.1534000039100647, -0.1534000039100647, 0.09030000120401382, 0.09030000120401382, -0.054999999701976776, -0.054999999701976776, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.20239999890327454, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214, -0.249099999666214]}, \"token.table\": {\"Topic\": [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 1, 1, 2, 2, 1, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 1, 2], \"Freq\": [0.598434567451477, 0.39895638823509216, 0.20982374250888824, 0.839294970035553, 0.6270467042922974, 0.41803112626075745, 0.48838010430336, 0.48838010430336, 0.598434567451477, 0.39895638823509216, 0.6270466446876526, 0.41803109645843506, 0.8109792470932007, 0.1621958464384079, 0.5984346270561218, 0.39895641803741455, 0.5984346270561218, 0.39895644783973694, 0.48838010430336, 0.48838010430336, 0.9796611070632935, 0.585361897945404, 0.43902140855789185, 0.5984346270561218, 0.39895641803741455, 0.9732297658920288, 0.9796611070632935, 0.6499953866004944, 0.3249976933002472, 0.598434567451477, 0.39895638823509216, 0.6267518997192383, 0.34819549322128296, 0.5490206480026245, 0.45751720666885376, 0.9732297658920288, 0.9732297658920288, 0.9732296466827393, 0.7308439016342163, 0.32481950521469116, 0.9732297658920288, 0.598434567451477, 0.39895638823509216, 0.7350212335586548, 0.9732297658920288, 0.598434567451477, 0.39895638823509216, 0.7295939922332764, 0.7295939922332764, 0.7350212335586548, 0.2447132021188736, 0.734139621257782, 0.9732296466827393, 0.834566593170166, 0.2086416482925415, 0.20982374250888824, 0.839294970035553, 0.6267518997192383, 0.34819549322128296, 0.598434567451477, 0.39895638823509216, 0.8752761483192444, 0.6270467638969421, 0.41803115606307983, 0.36682870984077454, 0.7336574196815491, 0.5984346270561218, 0.39895644783973694, 0.979661226272583, 0.9796611070632935, 0.5984346270561218, 0.39895641803741455, 0.5984346270561218, 0.39895641803741455, 0.598434567451477, 0.39895638823509216, 0.6879772543907166, 0.3439886271953583, 0.7966956496238708, 0.2655652165412903, 0.6879771947860718, 0.3439885973930359, 0.1836414337158203, 0.7345657348632812, 0.585361897945404, 0.43902140855789185, 0.6270466446876526, 0.41803109645843506, 0.6270467042922974, 0.41803112626075745, 0.598434567451477, 0.39895638823509216, 0.598434567451477, 0.39895638823509216, 0.8822236061096191, 0.9796611070632935, 0.6270467638969421, 0.41803115606307983, 0.598434567451477, 0.39895638823509216, 0.9732297658920288, 0.7295939922332764, 0.24471323192119598, 0.7341396808624268, 0.48838010430336, 0.48838010430336, 0.7350212335586548, 0.9796611070632935, 0.9796611070632935, 0.6270467042922974, 0.41803112626075745, 0.9732297658920288, 0.9796611070632935, 0.8345667123794556, 0.2086416780948639, 0.7295938730239868, 0.32617780566215515, 0.6523556113243103, 0.36682870984077454, 0.7336574196815491, 0.24471323192119598, 0.7341396808624268, 0.9796611070632935, 0.979661226272583, 0.2447132021188736, 0.734139621257782, 0.7295938730239868, 0.598434567451477, 0.39895638823509216, 0.48838010430336, 0.48838010430336, 0.7307631969451904, 0.24358773231506348, 0.24471323192119598, 0.7341396808624268, 0.39978134632110596, 0.5330418348312378, 0.73502117395401, 0.4185820519924164, 0.5232275724411011, 0.7350212335586548, 0.9732297658920288, 0.73502117395401, 0.979661226272583, 0.7350212335586548, 0.598434567451477, 0.39895638823509216, 0.7350212335586548, 0.9732298851013184, 0.7295939922332764, 0.9796611070632935, 0.2447132021188736, 0.734139621257782, 0.9803951382637024, 0.979661226272583, 0.7309695482254028, 0.3132726550102234, 0.9796611070632935, 0.36682870984077454, 0.7336574196815491, 0.6270467042922974, 0.41803112626075745, 0.9732297658920288, 0.6270467042922974, 0.41803112626075745, 0.1836414337158203, 0.7345657348632812, 0.7295938730239868, 0.8109791874885559, 0.1621958464384079, 0.9732298851013184, 0.9732297658920288, 0.979661226272583, 0.5490206480026245, 0.45751720666885376, 0.9732297658920288, 0.36682870984077454, 0.7336574196815491, 0.7306734919548035, 0.2740025520324707, 0.834566593170166, 0.2086416482925415, 0.2447132021188736, 0.734139621257782, 0.7350212335586548, 0.9732297658920288, 0.7307631969451904, 0.24358773231506348, 0.8822235465049744, 0.8822236061096191, 0.5693705081939697, 0.48803186416625977, 0.2447132021188736, 0.734139621257782, 0.9732297658920288, 0.7295938730239868, 0.6820908784866333, 0.29232466220855713, 0.6270467042922974, 0.41803112626075745, 0.9796611070632935, 0.7350212335586548, 0.9796611070632935, 0.36682868003845215, 0.7336573600769043, 0.36682870984077454, 0.7336574196815491, 0.7295938730239868, 0.5984346270561218, 0.39895641803741455, 0.7350212335586548, 0.9732297658920288, 0.7295938730239868, 0.979661226272583, 0.6270467638969421, 0.41803115606307983, 0.20982372760772705, 0.8392949104309082, 0.979661226272583, 0.6270466446876526, 0.41803109645843506, 0.24471323192119598, 0.7341396808624268, 0.9732297658920288, 0.598434567451477, 0.39895638823509216], \"Term\": [\"Active\", \"Active\", \"Apr\", \"Apr\", \"Area\", \"Area\", \"Computer\", \"Computer\", \"Current\", \"Current\", \"Date\", \"Date\", \"Days\", \"Days\", \"Education\", \"Education\", \"Email\", \"Email\", \"Engineering\", \"Engineering\", \"Entity\", \"Experience\", \"Experience\", \"Functional\", \"Functional\", \"GCP\", \"Gradient\", \"Highest\", \"Highest\", \"ID\", \"ID\", \"IT\", \"IT\", \"ITSoftwareSoftware\", \"ITSoftwareSoftware\", \"JDK\", \"JIRA\", \"Jquery\", \"Jump\", \"Jump\", \"June\", \"Key\", \"Key\", \"Leader\", \"LearningDeep\", \"Location\", \"Location\", \"MCA\", \"MS\", \"MTech\", \"Maintenance\", \"Maintenance\", \"Marathi\", \"Marital\", \"Marital\", \"May\", \"May\", \"Modified\", \"Modified\", \"Months\", \"Months\", \"NCR\", \"Name\", \"Name\", \"Nov\", \"Nov\", \"Number\", \"Number\", \"Numpy\", \"OpenCV\", \"Period\", \"Period\", \"Phone\", \"Phone\", \"Pref\", \"Pref\", \"Resume\", \"Resume\", \"SQL\", \"SQL\", \"SectionWork\", \"SectionWork\", \"Sep\", \"Sep\", \"Services\", \"Services\", \"Skill\", \"Skill\", \"Skills\", \"Skills\", \"Status\", \"Status\", \"Summary\", \"Summary\", \"Technologies\", \"Till\", \"Top\", \"Top\", \"UG\", \"UG\", \"accordingly\", \"actively\", \"algorithm\", \"algorithm\", \"application\", \"application\", \"apply\", \"architecture\", \"artificial\", \"back\", \"back\", \"box\", \"camera\", \"client\", \"client\", \"cloud\", \"code\", \"code\", \"company\", \"company\", \"component\", \"component\", \"control\", \"coordinate\", \"css\", \"css\", \"cycle\", \"d\", \"d\", \"datum\", \"datum\", \"degree\", \"degree\", \"deliver\", \"deliver\", \"design\", \"design\", \"detect\", \"develop\", \"develop\", \"document\", \"domain\", \"duration\", \"enable\", \"enhance\", \"experience\", \"experience\", \"extensive\", \"fashion\", \"full\", \"identification\", \"identify\", \"identify\", \"improve\", \"index\", \"industry\", \"industry\", \"information\", \"issue\", \"issue\", \"it\", \"it\", \"junior\", \"last\", \"last\", \"lead\", \"lead\", \"leadership\", \"less\", \"less\", \"level\", \"look\", \"many\", \"marital\", \"marital\", \"monitoring\", \"new\", \"new\", \"notice\", \"notice\", \"perform\", \"perform\", \"performance\", \"performance\", \"prediction\", \"pretraine\", \"professional\", \"professional\", \"program\", \"reduce\", \"role\", \"role\", \"section\", \"section\", \"sequence\", \"service\", \"singleunmarrie\", \"singleunmarrie\", \"skill\", \"skill\", \"steering\", \"structure\", \"suggest\", \"system\", \"system\", \"test\", \"test\", \"text\", \"total\", \"total\", \"track\", \"transfer\", \"troubleshoot\", \"tuning\", \"use\", \"use\", \"user\", \"user\", \"value\", \"version\", \"version\", \"website\", \"website\", \"working\", \"year\", \"year\"]}, \"R\": 30, \"lambda.step\": 0.01, \"plot.opts\": {\"xlab\": \"PC1\", \"ylab\": \"PC2\"}, \"topic.order\": [2, 1]};\n",
158
- "\n",
159
- "function LDAvis_load_lib(url, callback){\n",
160
- " var s = document.createElement('script');\n",
161
- " s.src = url;\n",
162
- " s.async = true;\n",
163
- " s.onreadystatechange = s.onload = callback;\n",
164
- " s.onerror = function(){console.warn(\"failed to load library \" + url);};\n",
165
- " document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
166
- "}\n",
167
- "\n",
168
- "if(typeof(LDAvis) !== \"undefined\"){\n",
169
- " // already loaded: just create the visualization\n",
170
- " !function(LDAvis){\n",
171
- " new LDAvis(\"#\" + \"ldavis_el22281400102997868488005527649\", ldavis_el22281400102997868488005527649_data);\n",
172
- " }(LDAvis);\n",
173
- "}else if(typeof define === \"function\" && define.amd){\n",
174
- " // require.js is available: use it to load d3/LDAvis\n",
175
- " require.config({paths: {d3: \"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min\"}});\n",
176
- " require([\"d3\"], function(d3){\n",
177
- " window.d3 = d3;\n",
178
- " LDAvis_load_lib(\"https://cdn.rawgit.com/bmabey/pyLDAvis/files/ldavis.v1.0.0.js\", function(){\n",
179
- " new LDAvis(\"#\" + \"ldavis_el22281400102997868488005527649\", ldavis_el22281400102997868488005527649_data);\n",
180
- " });\n",
181
- " });\n",
182
- "}else{\n",
183
- " // require.js not available: dynamically load d3 & LDAvis\n",
184
- " LDAvis_load_lib(\"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min.js\", function(){\n",
185
- " LDAvis_load_lib(\"https://cdn.rawgit.com/bmabey/pyLDAvis/files/ldavis.v1.0.0.js\", function(){\n",
186
- " new LDAvis(\"#\" + \"ldavis_el22281400102997868488005527649\", ldavis_el22281400102997868488005527649_data);\n",
187
- " })\n",
188
- " });\n",
189
- "}\n",
190
- "</script>"
191
- ],
192
- "text/plain": [
193
- "PreparedData(topic_coordinates= x y topics cluster Freq\n",
194
- "topic \n",
195
- "1 0.045266 0.0 1 1 50.735813\n",
196
- "0 -0.045266 0.0 2 1 49.264191, topic_info= Term Freq Total Category logprob loglift\n",
197
- "759 improve 4.000000 4.000000 Default 30.0000 30.0000\n",
198
- "565 less 6.000000 6.000000 Default 29.0000 29.0000\n",
199
- "520 Days 6.000000 6.000000 Default 28.0000 28.0000\n",
200
- "781 program 3.000000 3.000000 Default 27.0000 27.0000\n",
201
- "1734 reduce 3.000000 3.000000 Default 26.0000 26.0000\n",
202
- "... ... ... ... ... ... ...\n",
203
- "185 d 5.775256 15.039238 Topic2 -6.1555 -0.2491\n",
204
- "197 experience 5.775257 15.039238 Topic2 -6.1555 -0.2491\n",
205
- "152 Summary 5.775258 15.039238 Topic2 -6.1555 -0.2491\n",
206
- "0 Active 5.775256 15.039238 Topic2 -6.1555 -0.2491\n",
207
- "117 Pref 5.775254 15.039238 Topic2 -6.1555 -0.2491\n",
208
- "\n",
209
- "[205 rows x 6 columns], token_table= Topic Freq Term\n",
210
- "term \n",
211
- "0 1 0.598435 Active\n",
212
- "0 2 0.398956 Active\n",
213
- "1107 1 0.209824 Apr\n",
214
- "1107 2 0.839295 Apr\n",
215
- "7 1 0.627047 Area\n",
216
- "... ... ... ...\n",
217
- "671 1 0.244713 website\n",
218
- "671 2 0.734140 website\n",
219
- "515 1 0.973230 working\n",
220
- "271 1 0.598435 year\n",
221
- "271 2 0.398956 year\n",
222
- "\n",
223
- "[226 rows x 3 columns], R=30, lambda_step=0.01, plot_opts={'xlab': 'PC1', 'ylab': 'PC2'}, topic_order=[2, 1])"
224
- ]
225
- },
226
- "execution_count": 72,
227
- "metadata": {},
228
- "output_type": "execute_result"
229
- }
230
- ],
231
  "source": [
232
  "# Visualize the topics\n",
233
- "pyLDAvis.enable_notebook()\n",
234
- "vis = pyLDAvis.gensim.prepare(lda_model, corpus, id2word)\n",
235
- "vis"
236
  ]
237
  },
238
  {
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 2,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
20
  },
21
  {
22
  "cell_type": "code",
23
+ "execution_count": 3,
24
  "metadata": {},
25
  "outputs": [],
26
  "source": [
 
30
  },
31
  {
32
  "cell_type": "code",
33
+ "execution_count": 4,
34
  "metadata": {},
35
  "outputs": [],
36
  "source": [
 
41
  },
42
  {
43
  "cell_type": "code",
44
+ "execution_count": 5,
45
  "metadata": {},
46
  "outputs": [],
47
  "source": [
 
62
  },
63
  {
64
  "cell_type": "code",
65
+ "execution_count": 6,
66
  "metadata": {},
67
  "outputs": [],
68
  "source": [
 
78
  },
79
  {
80
  "cell_type": "code",
81
+ "execution_count": 7,
82
  "metadata": {},
83
  "outputs": [],
84
  "source": [
 
87
  },
88
  {
89
  "cell_type": "code",
90
+ "execution_count": 8,
91
  "metadata": {},
92
  "outputs": [],
93
  "source": [
 
104
  },
105
  {
106
  "cell_type": "code",
107
+ "execution_count": 9,
108
  "metadata": {},
109
  "outputs": [
110
  {
 
129
  },
130
  {
131
  "cell_type": "code",
132
+ "execution_count": 10,
133
  "metadata": {},
134
  "outputs": [],
135
  "source": [
 
141
  },
142
  {
143
  "cell_type": "code",
144
+ "execution_count": 11,
145
  "metadata": {},
146
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  "source": [
148
  "# Visualize the topics\n",
149
+ "# pyLDAvis.enable_notebook()\n",
150
+ "# vis = pyLDAvis.gensim.prepare(lda_model, corpus, id2word)\n",
151
+ "# vis"
152
  ]
153
  },
154
  {
Progress/untitled.md ADDED
File without changes