Spaces:

marchji2415
/

Resume

Runtime error

srbhr commited on Sep 18, 2020

Commit

c2d1e63

1 Parent(s): d3e7219

Updates

Files changed (6) hide show

Job_Data.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

Resume_Data.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

Similar.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import textdistance as td
-import Cleaner
 def match(resume, job_des):
@@ -8,6 +7,5 @@ def match(resume, job_des):
     c = td.cosine.similarity(resume, job_des)
     o = td.overlap.normalized_similarity(resume, job_des)
     total = (j+s+c+o)/4
     return total*100

 import textdistance as td
 def match(resume, job_des):
     c = td.cosine.similarity(resume, job_des)
     o = td.overlap.normalized_similarity(resume, job_des)
     total = (j+s+c+o)/4
+    # total = (s+o)/2
     return total*100

app.py CHANGED Viewed

@@ -105,7 +105,7 @@ def calculate_scores(resumes, job_description):
     scores = []
     for x in range(resumes.shape[0]):
         score = Similar.match(
-            resumes['TF_Based'][x], job_description['Selective_Reduced'][index])
         scores.append(score)
     return scores
@@ -160,7 +160,7 @@ document = get_list_of_words(Resumes['Cleaned'])
 id2word = corpora.Dictionary(document)
 corpus = [id2word.doc2bow(text) for text in document]
-lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus, id2word=id2word, num_topics=5, random_state=100,
                                             update_every=1, chunksize=100, passes=50, alpha='auto', per_word_topics=True)
 ################################### LDA CODE ##############################################
@@ -203,7 +203,7 @@ cloud = WordCloud(background_color='white',
 topics = lda_model.show_topics(formatted=False)
-fig, axes = plt.subplots(2, 2, figsize=(10, 10), sharex=True, sharey=True)
 for i, ax in enumerate(axes.flatten()):
     fig.add_subplot(ax)
@@ -243,7 +243,7 @@ st.write(fig3)
 ############################## RESUME PRINTING #############################
 option_2 = st.selectbox("Show the Best Matching Resumes?", options=[
-    'NO', 'YES'])
 if option_2 == 'YES':
     indx = st.slider("Which resume to display ?:",
                      1, Ranked_resumes.shape[0], 1)

     scores = []
     for x in range(resumes.shape[0]):
         score = Similar.match(
+            resumes['TF_Based'][x], job_description['TF_Based'][index])
         scores.append(score)
     return scores
 id2word = corpora.Dictionary(document)
 corpus = [id2word.doc2bow(text) for text in document]
+lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus, id2word=id2word, num_topics=6, random_state=100,
                                             update_every=1, chunksize=100, passes=50, alpha='auto', per_word_topics=True)
 ################################### LDA CODE ##############################################
 topics = lda_model.show_topics(formatted=False)
+fig, axes = plt.subplots(2, 3, figsize=(10, 10), sharex=True, sharey=True)
 for i, ax in enumerate(axes.flatten()):
     fig.add_subplot(ax)
 ############################## RESUME PRINTING #############################
 option_2 = st.selectbox("Show the Best Matching Resumes?", options=[
+    'YES', 'NO'])
 if option_2 == 'YES':
     indx = st.slider("Which resume to display ?:",
                      1, Ranked_resumes.shape[0], 1)

fileReader.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from operator import index
 from pandas._config.config import options
 import Cleaner
-import Similar
 import textract as tx
 import pandas as pd
 import os

 from operator import index
 from pandas._config.config import options
 import Cleaner
 import textract as tx
 import pandas as pd
 import os

tf_idf.py CHANGED Viewed

@@ -2,7 +2,7 @@ from sklearn.feature_extraction.text import TfidfVectorizer
 def do_tfidf(token):
-    tfidf = TfidfVectorizer(max_df=0.07, min_df=0.001)
     words = tfidf.fit_transform(token)
     sentence = " ".join(tfidf.get_feature_names())
     return sentence

 def do_tfidf(token):
+    tfidf = TfidfVectorizer(max_df=0.05, min_df=0.002)
     words = tfidf.fit_transform(token)
     sentence = " ".join(tfidf.get_feature_names())
     return sentence