Spaces:
Running
Running
changes TF-IDF search to cosine similarity from dot product
Browse files- src/do_pca_on_tfidf.py +2 -2
src/do_pca_on_tfidf.py
CHANGED
|
@@ -47,11 +47,11 @@ def query_worker(query, dtm_svd, dtm_svd_mat, vocab_norm, concentration = 10 ):
|
|
| 47 |
|
| 48 |
# calculate the average TF-IDF score of the query over topics:
|
| 49 |
#mean_query_score = np.sum(np.mean(query_weights, axis=0) * dtm_svd_mat, axis=1)
|
| 50 |
-
mean_query_score = cosine_similarity(np.reshape(query_weights, shape = (1, -1)), dtm_svd_mat)
|
| 51 |
|
| 52 |
sorted_df = pl.DataFrame(
|
| 53 |
{
|
| 54 |
-
'score-tfidf':
|
| 55 |
'file':my_files
|
| 56 |
}).sort("score-tfidf", descending = True).with_columns(pl.Series("rank-tfidf", [i + 1 for i in range(len(mean_query_score))]))
|
| 57 |
|
|
|
|
| 47 |
|
| 48 |
# calculate the average TF-IDF score of the query over topics:
|
| 49 |
#mean_query_score = np.sum(np.mean(query_weights, axis=0) * dtm_svd_mat, axis=1)
|
| 50 |
+
mean_query_score = np.reshape(cosine_similarity(np.reshape(query_weights, shape = (1, -1)), dtm_svd_mat), shape=-1)
|
| 51 |
|
| 52 |
sorted_df = pl.DataFrame(
|
| 53 |
{
|
| 54 |
+
'score-tfidf': mean_query_score,
|
| 55 |
'file':my_files
|
| 56 |
}).sort("score-tfidf", descending = True).with_columns(pl.Series("rank-tfidf", [i + 1 for i in range(len(mean_query_score))]))
|
| 57 |
|