Update app.py
Browse files
app.py
CHANGED
|
@@ -123,24 +123,33 @@ def get_recommendations_TFIDF(abstract):
|
|
| 123 |
tfidf_vectorizer = TfidfVectorizer()
|
| 124 |
# Generate the tf-idf vectors for the corpus
|
| 125 |
tfidf_matrix = tfidf_vectorizer.fit_transform(corpus)
|
|
|
|
| 126 |
# compute and print the cosine similarity matrix
|
| 127 |
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
|
| 128 |
|
| 129 |
# Get the pairwise similarity scores
|
| 130 |
sim_scores = list(enumerate(cosine_sim[-1]))
|
| 131 |
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
title = train_df['title'].iloc[paper_indices]
|
| 135 |
categories = train_df['categories'].iloc[paper_indices]
|
| 136 |
abstract = train_df['abstract'].iloc[paper_indices]
|
| 137 |
-
|
| 138 |
return title, categories, abstract, similarity
|
| 139 |
|
| 140 |
-
get_recommendations_TFIDF('''
|
| 141 |
-
In this paper we consider permutations of sequences of partitions, obtaining\na result which parallels von Neumann's theorem on permutations of dense\nsequences and uniformly distributed sequences of points.\n
|
| 142 |
-
''')
|
| 143 |
-
|
| 144 |
"""# Doc2Vec"""
|
| 145 |
|
| 146 |
import time
|
|
|
|
| 123 |
tfidf_vectorizer = TfidfVectorizer()
|
| 124 |
# Generate the tf-idf vectors for the corpus
|
| 125 |
tfidf_matrix = tfidf_vectorizer.fit_transform(corpus)
|
| 126 |
+
|
| 127 |
# compute and print the cosine similarity matrix
|
| 128 |
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
|
| 129 |
|
| 130 |
# Get the pairwise similarity scores
|
| 131 |
sim_scores = list(enumerate(cosine_sim[-1]))
|
| 132 |
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
|
| 133 |
+
|
| 134 |
+
# Check if the first result is the input abstract
|
| 135 |
+
if corpus[int(sim_scores[0][0])].split() == abstract.split() and corpus[int(sim_scores[1][0])].split() == abstract.split():
|
| 136 |
+
print(corpus[int(sim_scores[0][0])].split() == abstract.split())
|
| 137 |
+
print(corpus[int(sim_scores[1][0])].split() == abstract.split())
|
| 138 |
+
paper_indices = int(sim_scores[2][0])
|
| 139 |
+
similarity = "{:.2f}%".format(sim_scores[2][1] * 100) # Format similarity as a string with two decimal places and a percentage sign
|
| 140 |
+
elif sim_scores[0][0] == 500:
|
| 141 |
+
paper_indices = int(sim_scores[1][0])
|
| 142 |
+
similarity = "{:.2f}%".format(sim_scores[1][1] * 100) # Format similarity as a string with two decimal places and a percentage sign
|
| 143 |
+
else:
|
| 144 |
+
paper_indices = int(sim_scores[0][0])
|
| 145 |
+
similarity = "{:.2f}%".format(sim_scores[0][1] * 100) # Format similarity as a string with two decimal places and a percentage sign
|
| 146 |
|
| 147 |
title = train_df['title'].iloc[paper_indices]
|
| 148 |
categories = train_df['categories'].iloc[paper_indices]
|
| 149 |
abstract = train_df['abstract'].iloc[paper_indices]
|
| 150 |
+
|
| 151 |
return title, categories, abstract, similarity
|
| 152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
"""# Doc2Vec"""
|
| 154 |
|
| 155 |
import time
|