Spaces:
Runtime error
Runtime error
Updates
Browse files- Job_Data.csv +0 -0
- Resume_Data.csv +0 -0
- Similar.py +1 -3
- app.py +4 -4
- fileReader.py +0 -1
- tf_idf.py +1 -1
Job_Data.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Resume_Data.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Similar.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
import textdistance as td
|
| 2 |
-
import Cleaner
|
| 3 |
|
| 4 |
|
| 5 |
def match(resume, job_des):
|
|
@@ -8,6 +7,5 @@ def match(resume, job_des):
|
|
| 8 |
c = td.cosine.similarity(resume, job_des)
|
| 9 |
o = td.overlap.normalized_similarity(resume, job_des)
|
| 10 |
total = (j+s+c+o)/4
|
|
|
|
| 11 |
return total*100
|
| 12 |
-
|
| 13 |
-
|
|
|
|
| 1 |
import textdistance as td
|
|
|
|
| 2 |
|
| 3 |
|
| 4 |
def match(resume, job_des):
|
|
|
|
| 7 |
c = td.cosine.similarity(resume, job_des)
|
| 8 |
o = td.overlap.normalized_similarity(resume, job_des)
|
| 9 |
total = (j+s+c+o)/4
|
| 10 |
+
# total = (s+o)/2
|
| 11 |
return total*100
|
|
|
|
|
|
app.py
CHANGED
|
@@ -105,7 +105,7 @@ def calculate_scores(resumes, job_description):
|
|
| 105 |
scores = []
|
| 106 |
for x in range(resumes.shape[0]):
|
| 107 |
score = Similar.match(
|
| 108 |
-
resumes['TF_Based'][x], job_description['
|
| 109 |
scores.append(score)
|
| 110 |
return scores
|
| 111 |
|
|
@@ -160,7 +160,7 @@ document = get_list_of_words(Resumes['Cleaned'])
|
|
| 160 |
id2word = corpora.Dictionary(document)
|
| 161 |
corpus = [id2word.doc2bow(text) for text in document]
|
| 162 |
|
| 163 |
-
lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus, id2word=id2word, num_topics=
|
| 164 |
update_every=1, chunksize=100, passes=50, alpha='auto', per_word_topics=True)
|
| 165 |
|
| 166 |
################################### LDA CODE ##############################################
|
|
@@ -203,7 +203,7 @@ cloud = WordCloud(background_color='white',
|
|
| 203 |
|
| 204 |
topics = lda_model.show_topics(formatted=False)
|
| 205 |
|
| 206 |
-
fig, axes = plt.subplots(2,
|
| 207 |
|
| 208 |
for i, ax in enumerate(axes.flatten()):
|
| 209 |
fig.add_subplot(ax)
|
|
@@ -243,7 +243,7 @@ st.write(fig3)
|
|
| 243 |
############################## RESUME PRINTING #############################
|
| 244 |
|
| 245 |
option_2 = st.selectbox("Show the Best Matching Resumes?", options=[
|
| 246 |
-
'
|
| 247 |
if option_2 == 'YES':
|
| 248 |
indx = st.slider("Which resume to display ?:",
|
| 249 |
1, Ranked_resumes.shape[0], 1)
|
|
|
|
| 105 |
scores = []
|
| 106 |
for x in range(resumes.shape[0]):
|
| 107 |
score = Similar.match(
|
| 108 |
+
resumes['TF_Based'][x], job_description['TF_Based'][index])
|
| 109 |
scores.append(score)
|
| 110 |
return scores
|
| 111 |
|
|
|
|
| 160 |
id2word = corpora.Dictionary(document)
|
| 161 |
corpus = [id2word.doc2bow(text) for text in document]
|
| 162 |
|
| 163 |
+
lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus, id2word=id2word, num_topics=6, random_state=100,
|
| 164 |
update_every=1, chunksize=100, passes=50, alpha='auto', per_word_topics=True)
|
| 165 |
|
| 166 |
################################### LDA CODE ##############################################
|
|
|
|
| 203 |
|
| 204 |
topics = lda_model.show_topics(formatted=False)
|
| 205 |
|
| 206 |
+
fig, axes = plt.subplots(2, 3, figsize=(10, 10), sharex=True, sharey=True)
|
| 207 |
|
| 208 |
for i, ax in enumerate(axes.flatten()):
|
| 209 |
fig.add_subplot(ax)
|
|
|
|
| 243 |
############################## RESUME PRINTING #############################
|
| 244 |
|
| 245 |
option_2 = st.selectbox("Show the Best Matching Resumes?", options=[
|
| 246 |
+
'YES', 'NO'])
|
| 247 |
if option_2 == 'YES':
|
| 248 |
indx = st.slider("Which resume to display ?:",
|
| 249 |
1, Ranked_resumes.shape[0], 1)
|
fileReader.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
from operator import index
|
| 2 |
from pandas._config.config import options
|
| 3 |
import Cleaner
|
| 4 |
-
import Similar
|
| 5 |
import textract as tx
|
| 6 |
import pandas as pd
|
| 7 |
import os
|
|
|
|
| 1 |
from operator import index
|
| 2 |
from pandas._config.config import options
|
| 3 |
import Cleaner
|
|
|
|
| 4 |
import textract as tx
|
| 5 |
import pandas as pd
|
| 6 |
import os
|
tf_idf.py
CHANGED
|
@@ -2,7 +2,7 @@ from sklearn.feature_extraction.text import TfidfVectorizer
|
|
| 2 |
|
| 3 |
|
| 4 |
def do_tfidf(token):
|
| 5 |
-
tfidf = TfidfVectorizer(max_df=0.
|
| 6 |
words = tfidf.fit_transform(token)
|
| 7 |
sentence = " ".join(tfidf.get_feature_names())
|
| 8 |
return sentence
|
|
|
|
| 2 |
|
| 3 |
|
| 4 |
def do_tfidf(token):
|
| 5 |
+
tfidf = TfidfVectorizer(max_df=0.05, min_df=0.002)
|
| 6 |
words = tfidf.fit_transform(token)
|
| 7 |
sentence = " ".join(tfidf.get_feature_names())
|
| 8 |
return sentence
|