import pickle from scipy.sparse import save_npz from sklearn.metrics.pairwise import cosine_similarity from sklearn.feature_extraction.text import TfidfVectorizer class TFIDF_Vectorizer(): def __init__(self, load_vectorizer=None, stop_words='english', min_df=2): self.vectorizer_path = "data/tfidf_vectorizer.pkl" if load_vectorizer: with open(self.vectorizer_path, 'rb') as file: self.vectorizer = pickle.load(file) else: self.vectorizer = TfidfVectorizer(stop_words=stop_words, min_df=min_df) def compute_tfidf_matrix(self, texts): return self.vectorizer.fit_transform(texts) def transform(self, texts: list) -> any: return self.vectorizer.transform(texts) def compute_tfidf_scores(self, query: str, restaurant_tfidf_features: any) -> list: query_tfidf_features = self.vectorizer.transform([query]) return cosine_similarity(query_tfidf_features, restaurant_tfidf_features)[0]