srbhr commited on
Commit
c2d1e63
·
1 Parent(s): d3e7219
Files changed (6) hide show
  1. Job_Data.csv +0 -0
  2. Resume_Data.csv +0 -0
  3. Similar.py +1 -3
  4. app.py +4 -4
  5. fileReader.py +0 -1
  6. tf_idf.py +1 -1
Job_Data.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Resume_Data.csv CHANGED
The diff for this file is too large to render. See raw diff
 
Similar.py CHANGED
@@ -1,5 +1,4 @@
1
  import textdistance as td
2
- import Cleaner
3
 
4
 
5
  def match(resume, job_des):
@@ -8,6 +7,5 @@ def match(resume, job_des):
8
  c = td.cosine.similarity(resume, job_des)
9
  o = td.overlap.normalized_similarity(resume, job_des)
10
  total = (j+s+c+o)/4
 
11
  return total*100
12
-
13
-
 
1
  import textdistance as td
 
2
 
3
 
4
  def match(resume, job_des):
 
7
  c = td.cosine.similarity(resume, job_des)
8
  o = td.overlap.normalized_similarity(resume, job_des)
9
  total = (j+s+c+o)/4
10
+ # total = (s+o)/2
11
  return total*100
 
 
app.py CHANGED
@@ -105,7 +105,7 @@ def calculate_scores(resumes, job_description):
105
  scores = []
106
  for x in range(resumes.shape[0]):
107
  score = Similar.match(
108
- resumes['TF_Based'][x], job_description['Selective_Reduced'][index])
109
  scores.append(score)
110
  return scores
111
 
@@ -160,7 +160,7 @@ document = get_list_of_words(Resumes['Cleaned'])
160
  id2word = corpora.Dictionary(document)
161
  corpus = [id2word.doc2bow(text) for text in document]
162
 
163
- lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus, id2word=id2word, num_topics=5, random_state=100,
164
  update_every=1, chunksize=100, passes=50, alpha='auto', per_word_topics=True)
165
 
166
  ################################### LDA CODE ##############################################
@@ -203,7 +203,7 @@ cloud = WordCloud(background_color='white',
203
 
204
  topics = lda_model.show_topics(formatted=False)
205
 
206
- fig, axes = plt.subplots(2, 2, figsize=(10, 10), sharex=True, sharey=True)
207
 
208
  for i, ax in enumerate(axes.flatten()):
209
  fig.add_subplot(ax)
@@ -243,7 +243,7 @@ st.write(fig3)
243
  ############################## RESUME PRINTING #############################
244
 
245
  option_2 = st.selectbox("Show the Best Matching Resumes?", options=[
246
- 'NO', 'YES'])
247
  if option_2 == 'YES':
248
  indx = st.slider("Which resume to display ?:",
249
  1, Ranked_resumes.shape[0], 1)
 
105
  scores = []
106
  for x in range(resumes.shape[0]):
107
  score = Similar.match(
108
+ resumes['TF_Based'][x], job_description['TF_Based'][index])
109
  scores.append(score)
110
  return scores
111
 
 
160
  id2word = corpora.Dictionary(document)
161
  corpus = [id2word.doc2bow(text) for text in document]
162
 
163
+ lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus, id2word=id2word, num_topics=6, random_state=100,
164
  update_every=1, chunksize=100, passes=50, alpha='auto', per_word_topics=True)
165
 
166
  ################################### LDA CODE ##############################################
 
203
 
204
  topics = lda_model.show_topics(formatted=False)
205
 
206
+ fig, axes = plt.subplots(2, 3, figsize=(10, 10), sharex=True, sharey=True)
207
 
208
  for i, ax in enumerate(axes.flatten()):
209
  fig.add_subplot(ax)
 
243
  ############################## RESUME PRINTING #############################
244
 
245
  option_2 = st.selectbox("Show the Best Matching Resumes?", options=[
246
+ 'YES', 'NO'])
247
  if option_2 == 'YES':
248
  indx = st.slider("Which resume to display ?:",
249
  1, Ranked_resumes.shape[0], 1)
fileReader.py CHANGED
@@ -1,7 +1,6 @@
1
  from operator import index
2
  from pandas._config.config import options
3
  import Cleaner
4
- import Similar
5
  import textract as tx
6
  import pandas as pd
7
  import os
 
1
  from operator import index
2
  from pandas._config.config import options
3
  import Cleaner
 
4
  import textract as tx
5
  import pandas as pd
6
  import os
tf_idf.py CHANGED
@@ -2,7 +2,7 @@ from sklearn.feature_extraction.text import TfidfVectorizer
2
 
3
 
4
  def do_tfidf(token):
5
- tfidf = TfidfVectorizer(max_df=0.07, min_df=0.001)
6
  words = tfidf.fit_transform(token)
7
  sentence = " ".join(tfidf.get_feature_names())
8
  return sentence
 
2
 
3
 
4
  def do_tfidf(token):
5
+ tfidf = TfidfVectorizer(max_df=0.05, min_df=0.002)
6
  words = tfidf.fit_transform(token)
7
  sentence = " ".join(tfidf.get_feature_names())
8
  return sentence