wbrooks commited on
Commit
ab4ff40
·
1 Parent(s): 1310186

changes TF-IDF search to cosine similarity from dot product

Browse files
Files changed (1) hide show
  1. src/do_pca_on_tfidf.py +2 -2
src/do_pca_on_tfidf.py CHANGED
@@ -47,11 +47,11 @@ def query_worker(query, dtm_svd, dtm_svd_mat, vocab_norm, concentration = 10 ):
47
 
48
  # calculate the average TF-IDF score of the query over topics:
49
  #mean_query_score = np.sum(np.mean(query_weights, axis=0) * dtm_svd_mat, axis=1)
50
- mean_query_score = cosine_similarity(np.reshape(query_weights, shape = (1, -1)), dtm_svd_mat)
51
 
52
  sorted_df = pl.DataFrame(
53
  {
54
- 'score-tfidf': np.reshape(mean_query_score, shape=-1),
55
  'file':my_files
56
  }).sort("score-tfidf", descending = True).with_columns(pl.Series("rank-tfidf", [i + 1 for i in range(len(mean_query_score))]))
57
 
 
47
 
48
  # calculate the average TF-IDF score of the query over topics:
49
  #mean_query_score = np.sum(np.mean(query_weights, axis=0) * dtm_svd_mat, axis=1)
50
+ mean_query_score = np.reshape(cosine_similarity(np.reshape(query_weights, shape = (1, -1)), dtm_svd_mat), shape=-1)
51
 
52
  sorted_df = pl.DataFrame(
53
  {
54
+ 'score-tfidf': mean_query_score,
55
  'file':my_files
56
  }).sort("score-tfidf", descending = True).with_columns(pl.Series("rank-tfidf", [i + 1 for i in range(len(mean_query_score))]))
57