Spaces:
Sleeping
Sleeping
| import pickle | |
| from scipy.sparse import save_npz | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| class TFIDF_Vectorizer(): | |
| def __init__(self, load_vectorizer=None, stop_words='english', min_df=2): | |
| self.vectorizer_path = "tfidf_vectorizer.pkl" | |
| self.tfidf_matrix_path = "tfidf_matrix.npz" | |
| if load_vectorizer: | |
| with open(self.vectorizer_path, 'rb') as file: | |
| self.vectorizer = pickle.load(file) | |
| else: | |
| self.vectorizer = TfidfVectorizer(stop_words=stop_words, min_df=min_df) | |
| def compute_tfidf_matrix(self, texts): | |
| features = self.vectorizer.fit_transform(texts) | |
| # save vectorizer | |
| with open(self.vectorizer_path, 'wb') as file: | |
| pickle.dump(self.vectorizer, file) | |
| # save tfidf matrix | |
| save_npz(self.tfidf_matrix_path, features) | |
| return features | |
| def transform(self, texts: list) -> any: | |
| return self.vectorizer.transform(texts) | |
| def compute_tfidf_scores(self, query: str, restaurant_tfidf_features: any) -> list: | |
| query_tfidf_features = self.vectorizer.transform([query]) | |
| return cosine_similarity(query_tfidf_features, restaurant_tfidf_features)[0] |