import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse.linalg import svds
from sklearn.preprocessing import MinMaxScaler
import random
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
import response.ResponseRecommender as res_rcm
from function.recommender import get_data


class HybridRecommender:
    def __init__(self, cosine_sim, products, reviews, favourite_items, order_history, new_item_cart, 
                 num_factors=3, regularization=0.02):
        self.cosine_sim = cosine_sim
        self.products = products
        self.reviews = reviews
        self.favourite_items = favourite_items
        self.order_history = order_history
        self.new_item_cart = new_item_cart
        self.num_factors = num_factors
        self.regularization = regularization
        
        self.user_ids = reviews["user_id"].unique()
        self.product_ids = products["product_id"].unique()
        self.user_to_index = {uid: i for i, uid in enumerate(self.user_ids)}
        self.product_to_index = {pid: i for i, pid in enumerate(self.product_ids)}
        self.index_to_product = {i: pid for pid, i in self.product_to_index.items()}
        
        self.ratings_matrix = self._create_ratings_matrix()
        self.global_mean = self.reviews['rating'].mean()
    
    def _create_ratings_matrix(self):
        matrix = np.zeros((len(self.user_ids), len(self.product_ids)))
        for _, row in self.reviews.iterrows():
            u_idx = self.user_to_index[row["user_id"]]
            p_idx = self.product_to_index[row["product_id"]]
            matrix[u_idx, p_idx] = row["rating"]
        return matrix
    
    def train_svd(self):
        ratings_filled = np.where(self.ratings_matrix == 0, self.global_mean, self.ratings_matrix)
        U, sigma, Vt = svds(ratings_filled, k=self.num_factors)
        sigma = np.diag(sigma)
        self.predicted_ratings = np.dot(np.dot(U, sigma), Vt)
        scaler = MinMaxScaler(feature_range=(1, 5))
        self.predicted_ratings = scaler.fit_transform(
            self.predicted_ratings.reshape(-1, 1)
        ).reshape(self.ratings_matrix.shape)
    
    def get_content_score(self, user_id, top_n=3):
        fav_items = self.favourite_items[self.favourite_items['user_id'] == user_id]['product_id']
        past_orders = self.order_history[self.order_history['user_id'] == user_id]['product_id']
        cart_items = self.new_item_cart[self.new_item_cart['user_id'] == user_id]['product_id']
        
        relevant_items = set(fav_items).union(set(past_orders)).union(set(cart_items))
        content_scores = {}
        for item in relevant_items:
            if item in self.product_to_index:
                idx = self.product_to_index[item]
                sim_scores = enumerate(self.cosine_sim[idx])
                top_similar = sorted(sim_scores, key=lambda x: x[1], reverse=True)[:top_n]
                for sim_idx, score in top_similar:
                    prod_id = self.index_to_product[sim_idx]
                    content_scores[prod_id] = content_scores.get(prod_id, 0) + score
        
        return content_scores
    
    def hybrid_recommend(self, user_id, top_n=10, weights={'collab': 0.2, 'content': 0.1, 'history': 0.7}, randomness=0.1):
        if user_id not in self.user_to_index:
            print(f"User {user_id} chưa có đánh giá nào. Kiểm tra dữ liệu khác...")
            content_scores = self.get_content_score(user_id)
            recommendations = sorted(content_scores.items(), key=lambda x: x[1], reverse=True)

            # Nếu chưa đủ sản phẩm thì lấy thêm từ cold-start (sản phẩm phổ biến)
            if len(recommendations) < top_n:
                popular_items = self.reviews.groupby('product_id')['rating'].mean().sort_values(ascending=False)
                popular_items = popular_items.loc[~popular_items.index.isin(content_scores.keys())]
                extra_items = list(popular_items.head(top_n - len(recommendations)).items())
                recommendations.extend(extra_items)

            return recommendations[:top_n]

        user_idx = self.user_to_index[user_id]
    
        collab_scores = dict(enumerate(self.predicted_ratings[user_idx]))
        collab_scores = {self.index_to_product[i]: s for i, s in collab_scores.items()}
    
        content_scores = self.get_content_score(user_id)
    
        history_scores = {}
        user_history = self.order_history[self.order_history['user_id'] == user_id]
        for _, row in user_history.iterrows():
            history_scores[row['product_id']] = row['time_weight']
    
        final_scores = {}
        for prod_id in self.product_ids:
            final_score = 0
            if prod_id in collab_scores:
                final_score += weights['collab'] * collab_scores[prod_id]
            if prod_id in content_scores:
                final_score += weights['content'] * content_scores[prod_id]
            if prod_id in history_scores:
                final_score += weights['history'] * history_scores[prod_id]
            if final_score > 0:
                noise = random.uniform(-randomness, randomness) * final_score
                final_scores[prod_id] = final_score + noise

        purchased = set(self.order_history[self.order_history['user_id'] == user_id]['product_id'])
        final_scores = {k: v for k, v in final_scores.items() if k not in purchased}

        top_candidates = sorted(final_scores.items(), key=lambda x: x[1], reverse=True)

        if len(top_candidates) < top_n:
            popular_items = self.reviews.groupby('product_id')['rating'].mean().sort_values(ascending=False)
            popular_items = popular_items.loc[~popular_items.index.isin(final_scores.keys())]
            extra_items = list(popular_items.head(top_n - len(top_candidates)).items())
            top_candidates.extend(extra_items)

        return sorted(top_candidates[:top_n], key=lambda x: x[1], reverse=True)

async def recommend(user_id: int, number: int):
    data = await get_data.get_data_recommend()
    products = pd.DataFrame(list(data)[2])
    reviews = pd.DataFrame(list(data)[1])
    favourite_items = pd.DataFrame(list(data)[0])
    order_history = pd.DataFrame(list(data)[3])
    new_item_cart = pd.DataFrame(list(data)[4])

    products['description'] = products['name'] + ' ' + products['category']
    products['description'] = products['description'].str.lower()

    tfidf = TfidfVectorizer(stop_words='english', ngram_range=(1, 2), max_features=1000)
    tfidf_matrix = tfidf.fit_transform(products['description'].fillna(''))
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

    recommender = HybridRecommender(cosine_sim, products, reviews, favourite_items, order_history, new_item_cart)
    recommender.train_svd()
    
    recommendations = recommender.hybrid_recommend(user_id, top_n=number)
    items = [res_rcm.ItemRecommend(pro_id=pid, product_name=products.loc[products['product_id'] == pid, 'name'].iloc[0])
             for pid, _ in recommendations]

    return res_rcm.ListItemRecommend(user_id=user_id, total=len(items), list_item=items)






async def recommend(user_id:int, number:int):
    data = await get_data.get_data_recommend()
    products = pd.DataFrame(list(data)[2])
    reviews = pd.DataFrame(list(data)[1])
    favourite_items = pd.DataFrame(list(data)[0])
    order_history = pd.DataFrame(list(data)[3])
    new_item_cart = pd.DataFrame(list(data)[4])
    products['description'] = products['name'] + ' ' + products['category']
    products['description'] = products['description'].str.lower()
# Thêm trọng số thời gian cho lịch sử mua hàng
    order_history['timestamp'] = pd.date_range(start='2024-10-01', periods=len(order_history), freq='D')
    order_history['time_weight'] = 1 - (pd.Timestamp.now() - order_history['timestamp']).dt.days / 365
# Content-Based Filtering cải tiến
    tfidf = TfidfVectorizer(stop_words='english', ngram_range=(1, 2), max_features=1000)
    tfidf_matrix = tfidf.fit_transform(products['description'].fillna(''))
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
    
    
    recommender = HybridRecommender(cosine_sim,products, reviews, favourite_items, order_history,new_item_cart)
    recommender.train_svd()  
    recommendations = recommender.hybrid_recommend(user_id, top_n=number, randomness=0.1)
    items = [
    res_rcm.ItemRecommend(
        pro_id=product_id,
        product_name=products.loc[products['product_id'] == product_id, 'name'].iloc[0]
    )
    for product_id, _ in recommendations
]

    return res_rcm.ListItemRecommend(user_id=user_id, total=len(items), list_item=items)
if __name__ == "__main__":
 import asyncio
 print(asyncio.run(recommend(4,10)))