Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import pandas as pd | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from scipy.sparse.linalg import svds | |
| from sklearn.preprocessing import MinMaxScaler | |
| import random | |
| import sys | |
| import os | |
| sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))) | |
| import response.ResponseRecommender as res_rcm | |
| from function.recommender import get_data | |
| class HybridRecommender: | |
| def __init__(self, cosine_sim, products, reviews, favourite_items, order_history, new_item_cart, | |
| num_factors=3, regularization=0.02): | |
| self.cosine_sim = cosine_sim | |
| self.products = products | |
| self.reviews = reviews | |
| self.favourite_items = favourite_items | |
| self.order_history = order_history | |
| self.new_item_cart = new_item_cart | |
| self.num_factors = num_factors | |
| self.regularization = regularization | |
| self.user_ids = reviews["user_id"].unique() | |
| self.product_ids = products["product_id"].unique() | |
| self.user_to_index = {uid: i for i, uid in enumerate(self.user_ids)} | |
| self.product_to_index = {pid: i for i, pid in enumerate(self.product_ids)} | |
| self.index_to_product = {i: pid for pid, i in self.product_to_index.items()} | |
| self.ratings_matrix = self._create_ratings_matrix() | |
| self.global_mean = self.reviews['rating'].mean() | |
| def _create_ratings_matrix(self): | |
| matrix = np.zeros((len(self.user_ids), len(self.product_ids))) | |
| for _, row in self.reviews.iterrows(): | |
| u_idx = self.user_to_index[row["user_id"]] | |
| p_idx = self.product_to_index[row["product_id"]] | |
| matrix[u_idx, p_idx] = row["rating"] | |
| return matrix | |
| def train_svd(self): | |
| ratings_filled = np.where(self.ratings_matrix == 0, self.global_mean, self.ratings_matrix) | |
| U, sigma, Vt = svds(ratings_filled, k=self.num_factors) | |
| sigma = np.diag(sigma) | |
| self.predicted_ratings = np.dot(np.dot(U, sigma), Vt) | |
| scaler = MinMaxScaler(feature_range=(1, 5)) | |
| self.predicted_ratings = scaler.fit_transform( | |
| self.predicted_ratings.reshape(-1, 1) | |
| ).reshape(self.ratings_matrix.shape) | |
| def get_content_score(self, user_id, top_n=3): | |
| fav_items = self.favourite_items[self.favourite_items['user_id'] == user_id]['product_id'] | |
| past_orders = self.order_history[self.order_history['user_id'] == user_id]['product_id'] | |
| cart_items = self.new_item_cart[self.new_item_cart['user_id'] == user_id]['product_id'] | |
| relevant_items = set(fav_items).union(set(past_orders)).union(set(cart_items)) | |
| content_scores = {} | |
| for item in relevant_items: | |
| if item in self.product_to_index: | |
| idx = self.product_to_index[item] | |
| sim_scores = enumerate(self.cosine_sim[idx]) | |
| top_similar = sorted(sim_scores, key=lambda x: x[1], reverse=True)[:top_n] | |
| for sim_idx, score in top_similar: | |
| prod_id = self.index_to_product[sim_idx] | |
| content_scores[prod_id] = content_scores.get(prod_id, 0) + score | |
| return content_scores | |
| def hybrid_recommend(self, user_id, top_n=10, weights={'collab': 0.2, 'content': 0.1, 'history': 0.7}, randomness=0.1): | |
| if user_id not in self.user_to_index: | |
| print(f"User {user_id} chưa có đánh giá nào. Kiểm tra dữ liệu khác...") | |
| content_scores = self.get_content_score(user_id) | |
| recommendations = sorted(content_scores.items(), key=lambda x: x[1], reverse=True) | |
| # Nếu chưa đủ sản phẩm thì lấy thêm từ cold-start (sản phẩm phổ biến) | |
| if len(recommendations) < top_n: | |
| popular_items = self.reviews.groupby('product_id')['rating'].mean().sort_values(ascending=False) | |
| popular_items = popular_items.loc[~popular_items.index.isin(content_scores.keys())] | |
| extra_items = list(popular_items.head(top_n - len(recommendations)).items()) | |
| recommendations.extend(extra_items) | |
| return recommendations[:top_n] | |
| user_idx = self.user_to_index[user_id] | |
| collab_scores = dict(enumerate(self.predicted_ratings[user_idx])) | |
| collab_scores = {self.index_to_product[i]: s for i, s in collab_scores.items()} | |
| content_scores = self.get_content_score(user_id) | |
| history_scores = {} | |
| user_history = self.order_history[self.order_history['user_id'] == user_id] | |
| for _, row in user_history.iterrows(): | |
| history_scores[row['product_id']] = row['time_weight'] | |
| final_scores = {} | |
| for prod_id in self.product_ids: | |
| final_score = 0 | |
| if prod_id in collab_scores: | |
| final_score += weights['collab'] * collab_scores[prod_id] | |
| if prod_id in content_scores: | |
| final_score += weights['content'] * content_scores[prod_id] | |
| if prod_id in history_scores: | |
| final_score += weights['history'] * history_scores[prod_id] | |
| if final_score > 0: | |
| noise = random.uniform(-randomness, randomness) * final_score | |
| final_scores[prod_id] = final_score + noise | |
| purchased = set(self.order_history[self.order_history['user_id'] == user_id]['product_id']) | |
| final_scores = {k: v for k, v in final_scores.items() if k not in purchased} | |
| top_candidates = sorted(final_scores.items(), key=lambda x: x[1], reverse=True) | |
| if len(top_candidates) < top_n: | |
| popular_items = self.reviews.groupby('product_id')['rating'].mean().sort_values(ascending=False) | |
| popular_items = popular_items.loc[~popular_items.index.isin(final_scores.keys())] | |
| extra_items = list(popular_items.head(top_n - len(top_candidates)).items()) | |
| top_candidates.extend(extra_items) | |
| return sorted(top_candidates[:top_n], key=lambda x: x[1], reverse=True) | |
| async def recommend(user_id: int, number: int): | |
| data = await get_data.get_data_recommend() | |
| products = pd.DataFrame(list(data)[2]) | |
| reviews = pd.DataFrame(list(data)[1]) | |
| favourite_items = pd.DataFrame(list(data)[0]) | |
| order_history = pd.DataFrame(list(data)[3]) | |
| new_item_cart = pd.DataFrame(list(data)[4]) | |
| products['description'] = products['name'] + ' ' + products['category'] | |
| products['description'] = products['description'].str.lower() | |
| tfidf = TfidfVectorizer(stop_words='english', ngram_range=(1, 2), max_features=1000) | |
| tfidf_matrix = tfidf.fit_transform(products['description'].fillna('')) | |
| cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix) | |
| recommender = HybridRecommender(cosine_sim, products, reviews, favourite_items, order_history, new_item_cart) | |
| recommender.train_svd() | |
| recommendations = recommender.hybrid_recommend(user_id, top_n=number) | |
| items = [res_rcm.ItemRecommend(pro_id=pid, product_name=products.loc[products['product_id'] == pid, 'name'].iloc[0]) | |
| for pid, _ in recommendations] | |
| return res_rcm.ListItemRecommend(user_id=user_id, total=len(items), list_item=items) | |
| async def recommend(user_id:int, number:int): | |
| data = await get_data.get_data_recommend() | |
| products = pd.DataFrame(list(data)[2]) | |
| reviews = pd.DataFrame(list(data)[1]) | |
| favourite_items = pd.DataFrame(list(data)[0]) | |
| order_history = pd.DataFrame(list(data)[3]) | |
| new_item_cart = pd.DataFrame(list(data)[4]) | |
| products['description'] = products['name'] + ' ' + products['category'] | |
| products['description'] = products['description'].str.lower() | |
| # Thêm trọng số thời gian cho lịch sử mua hàng | |
| order_history['timestamp'] = pd.date_range(start='2024-10-01', periods=len(order_history), freq='D') | |
| order_history['time_weight'] = 1 - (pd.Timestamp.now() - order_history['timestamp']).dt.days / 365 | |
| # Content-Based Filtering cải tiến | |
| tfidf = TfidfVectorizer(stop_words='english', ngram_range=(1, 2), max_features=1000) | |
| tfidf_matrix = tfidf.fit_transform(products['description'].fillna('')) | |
| cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix) | |
| recommender = HybridRecommender(cosine_sim,products, reviews, favourite_items, order_history,new_item_cart) | |
| recommender.train_svd() | |
| recommendations = recommender.hybrid_recommend(user_id, top_n=number, randomness=0.1) | |
| items = [ | |
| res_rcm.ItemRecommend( | |
| pro_id=product_id, | |
| product_name=products.loc[products['product_id'] == product_id, 'name'].iloc[0] | |
| ) | |
| for product_id, _ in recommendations | |
| ] | |
| return res_rcm.ListItemRecommend(user_id=user_id, total=len(items), list_item=items) | |
| if __name__ == "__main__": | |
| import asyncio | |
| print(asyncio.run(recommend(4,10))) |