knguyen471 commited on
Commit
d1b23d2
·
verified ·
1 Parent(s): 1895a09

Upload tfidf_similarity.py

Browse files
Files changed (1) hide show
  1. utils/tfidf_similarity.py +2 -11
utils/tfidf_similarity.py CHANGED
@@ -6,8 +6,7 @@ from sklearn.feature_extraction.text import TfidfVectorizer
6
 
7
  class TFIDF_Vectorizer():
8
  def __init__(self, load_vectorizer=None, stop_words='english', min_df=2):
9
- self.vectorizer_path = "tfidf_vectorizer.pkl"
10
- self.tfidf_matrix_path = "tfidf_matrix.npz"
11
 
12
  if load_vectorizer:
13
  with open(self.vectorizer_path, 'rb') as file:
@@ -16,15 +15,7 @@ class TFIDF_Vectorizer():
16
  self.vectorizer = TfidfVectorizer(stop_words=stop_words, min_df=min_df)
17
 
18
  def compute_tfidf_matrix(self, texts):
19
- features = self.vectorizer.fit_transform(texts)
20
-
21
- # save vectorizer
22
- with open(self.vectorizer_path, 'wb') as file:
23
- pickle.dump(self.vectorizer, file)
24
-
25
- # save tfidf matrix
26
- save_npz(self.tfidf_matrix_path, features)
27
- return features
28
 
29
  def transform(self, texts: list) -> any:
30
  return self.vectorizer.transform(texts)
 
6
 
7
  class TFIDF_Vectorizer():
8
  def __init__(self, load_vectorizer=None, stop_words='english', min_df=2):
9
+ self.vectorizer_path = "data/tfidf_vectorizer.pkl"
 
10
 
11
  if load_vectorizer:
12
  with open(self.vectorizer_path, 'rb') as file:
 
15
  self.vectorizer = TfidfVectorizer(stop_words=stop_words, min_df=min_df)
16
 
17
  def compute_tfidf_matrix(self, texts):
18
+ return self.vectorizer.fit_transform(texts)
 
 
 
 
 
 
 
 
19
 
20
  def transform(self, texts: list) -> any:
21
  return self.vectorizer.transform(texts)