File size: 1,003 Bytes
888aba6
 
 
 
 
 
 
 
d1b23d2
888aba6
 
 
 
 
 
 
 
d1b23d2
888aba6
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import pickle
from scipy.sparse import save_npz
from sklearn.metrics.pairwise import cosine_similarity

from sklearn.feature_extraction.text import TfidfVectorizer

class TFIDF_Vectorizer():
    def __init__(self, load_vectorizer=None, stop_words='english', min_df=2):
        self.vectorizer_path = "data/tfidf_vectorizer.pkl"

        if load_vectorizer:
            with open(self.vectorizer_path, 'rb') as file:
                self.vectorizer = pickle.load(file)
        else:
            self.vectorizer = TfidfVectorizer(stop_words=stop_words, min_df=min_df)

    def compute_tfidf_matrix(self, texts):
        return self.vectorizer.fit_transform(texts)
    
    def transform(self, texts: list) -> any:
        return self.vectorizer.transform(texts)
    
    def compute_tfidf_scores(self, query: str, restaurant_tfidf_features: any) -> list:
        query_tfidf_features = self.vectorizer.transform([query])
        return cosine_similarity(query_tfidf_features, restaurant_tfidf_features)[0]