import gradio as gr
import pickle
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import os
from scipy.sparse import csr_matrix

class ItemBasedCF:
    def __init__(self, n_neighbors=20):
        self.n_neighbors = n_neighbors
        self.item_similarity = None
        self.user_item_matrix = None
        
    def predict(self, user_idx, movie_idx):
        user_ratings = self.user_item_matrix[user_idx].toarray().flatten()
        rated_mask = user_ratings > 0
        
        if not rated_mask.any():
            return 2.5
        
        similarities = self.item_similarity[movie_idx].toarray().flatten()
        
        weights = similarities * rated_mask
        if weights.sum() == 0:
            return 2.5
        
        prediction = (weights * user_ratings).sum() / weights.sum()
        return np.clip(prediction, 1, 5)


class SVDRecommender:
    def __init__(self, n_factors=50):
        self.n_factors = n_factors
        self.user_factors = None
        self.item_factors = None
        self.global_mean = 3.5
    
    def predict(self, user_idx, movie_idx):
        prediction = self.global_mean + np.dot(self.user_factors[user_idx], self.item_factors[movie_idx])
        return np.clip(prediction, 1, 5)


class NeuralCF(nn.Module):
    def __init__(self, n_users, n_movies, embedding_dim=50, hidden_layers=[64, 32, 16]):
        super(NeuralCF, self).__init__()
        self.user_embedding = nn.Embedding(n_users, embedding_dim)
        self.movie_embedding = nn.Embedding(n_movies, embedding_dim)
        
        layers = []
        input_dim = embedding_dim * 2
        for hidden_dim in hidden_layers:
            layers.append(nn.Linear(input_dim, hidden_dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(0.2))
            input_dim = hidden_dim
        layers.append(nn.Linear(input_dim, 1))
        self.mlp = nn.Sequential(*layers)
        
    def forward(self, user_ids, movie_ids):
        user_emb = self.user_embedding(user_ids)
        movie_emb = self.movie_embedding(movie_ids)
        x = torch.cat([user_emb, movie_emb], dim=1)
        output = self.mlp(x)
        return output.squeeze()
    
    def predict(self, user_idx, movie_idx, device='cpu'):
        self.eval()
        with torch.no_grad():
            user_tensor = torch.LongTensor([user_idx]).to(device)
            movie_tensor = torch.LongTensor([movie_idx]).to(device)
            prediction = self.forward(user_tensor, movie_tensor)
            return torch.clamp(prediction, 1, 5).item()


class HybridRecommender:
    def __init__(self, n_users, n_movies):
        self.n_users = n_users
        self.n_movies = n_movies
        self.item_cf = None
        self.svd = None
        self.ncf = None
        self.weights = {
            'item_cf': 0.3,
            'svd': 0.4,
            'ncf': 0.3
        }
    
    def predict(self, user_idx, movie_idx):
        cf_pred = self.item_cf.predict(user_idx, movie_idx)
        svd_pred = self.svd.predict(user_idx, movie_idx)
        ncf_pred = self.ncf.predict(user_idx, movie_idx)
        
        prediction = (
            self.weights['item_cf'] * cf_pred +
            self.weights['svd'] * svd_pred +
            self.weights['ncf'] * ncf_pred
        )
        
        return np.clip(prediction, 1, 5)
    
    def recommend_movies(self, user_id, N=10, user_id_map=None, reverse_movie_map=None, movies_df=None):
        if user_id_map is not None:
            if user_id not in user_id_map:
                return []
            user_idx = user_id_map[user_id]
        else:
            user_idx = user_id
        
        rated_movies = set(np.where(self.item_cf.user_item_matrix[user_idx].toarray().flatten() > 0)[0])
        
        scores = []
        for movie_idx in range(self.n_movies):
            if movie_idx not in rated_movies:
                score = self.predict(user_idx, movie_idx)
                scores.append((movie_idx, score))
        
        scores.sort(key=lambda x: x[1], reverse=True)
        top_recommendations = scores[:N]
        
        recommendations = []
        for movie_idx, score in top_recommendations:
            if reverse_movie_map is not None:
                original_movie_id = reverse_movie_map[movie_idx]
            else:
                original_movie_id = movie_idx
            
            if movies_df is not None:
                title = movies_df[movies_df['movie_id'] == original_movie_id]['title'].values[0]
            else:
                title = f"Movie {original_movie_id}"
            
            recommendations.append((original_movie_id, title, score))
        
        return recommendations


class MovieLensDataLoader:
    def __init__(self, ratings_path=None, movies_path=None):
        self.ratings_path = ratings_path
        self.movies_path = movies_path
        self.user_id_map = {}
        self.movie_id_map = {}
        self.reverse_user_map = {}
        self.reverse_movie_map = {}


def load_model_and_data():
    import os
    
    print("Checking for files...")
    print(f"Current directory: {os.getcwd()}")
    print(f"Files in current directory: {os.listdir('.')}")
    
    if os.path.exists('model_artifacts'):
        print(f"Files in model_artifacts/: {os.listdir('model_artifacts')}")
    else:
        print("ERROR: model_artifacts/ folder does not exist!")
    
    try:
        files_to_check = [
            'model_artifacts/hybrid_model.pkl',
            'model_artifacts/loader.pkl',
            'model_artifacts/movies.pkl'
        ]
        
        for file_path in files_to_check:
            if not os.path.exists(file_path):
                print(f"ERROR: Missing file: {file_path}")
            else:
                file_size = os.path.getsize(file_path) / (1024*1024)
                print(f"Found: {file_path} ({file_size:.2f} MB)")
        
        with open('model_artifacts/hybrid_model.pkl', 'rb') as f:
            model = pickle.load(f)
        print("Loaded hybrid_model.pkl")
        
        with open('model_artifacts/loader.pkl', 'rb') as f:
            loader = pickle.load(f)
        print("Loaded loader.pkl")
        
        with open('model_artifacts/movies.pkl', 'rb') as f:
            movies = pickle.load(f)
        print("Loaded movies.pkl")
        
        user_ids = sorted(loader.user_id_map.keys())
        print(f"Model loaded successfully! {len(user_ids)} users available")
        
        return model, loader, movies, user_ids
    except FileNotFoundError as e:
        print(f"ERROR: File not found - {e}")
        print("Make sure all pkl files are in the model_artifacts/ folder")
        return None, None, None, []
    except Exception as e:
        print(f"ERROR loading model: {type(e).__name__}: {e}")
        import traceback
        traceback.print_exc()
        return None, None, None, []


print("Loading model and data...")
model, loader, movies_df, user_ids = load_model_and_data()
print(f"Model loaded! Available users: {len(user_ids)}")


def get_recommendations(user_id, num_recommendations):
    if model is None or loader is None:
        return "Error: Model not loaded properly. Please check the model files."
    
    try:
        user_id = int(user_id)
        num_recommendations = int(num_recommendations)
        
        if user_id not in loader.user_id_map:
            return f"User ID {user_id} not found! Please select a valid user ID."
        
        recommendations = model.recommend_movies(
            user_id=user_id,
            N=num_recommendations,
            user_id_map=loader.user_id_map,
            reverse_movie_map=loader.reverse_movie_map,
            movies_df=movies_df
        )
        
        if not recommendations:
            return f"No recommendations found for User {user_id}"
        
        output = f"Top {num_recommendations} Movie Recommendations for User {user_id}\n\n"
        output += "=" * 60 + "\n\n"
        
        for i, (movie_id, title, score) in enumerate(recommendations, 1):
            stars = "*" * int(score)
            output += f"{i}. {title}\n"
            output += f"   Predicted Rating: {score:.2f}/5.00 {stars}\n"
            output += f"   Movie ID: {movie_id}\n\n"
        
        return output
        
    except ValueError:
        return "Error: Please enter valid numbers for User ID and Number of Recommendations"
    except Exception as e:
        return f"Error generating recommendations: {str(e)}"


def get_user_history(user_id):
    if model is None or loader is None:
        return "Error: Model not loaded properly."
    
    try:
        user_id = int(user_id)
        
        if user_id not in loader.user_id_map:
            return f"User ID {user_id} not found!"
        
        user_idx = loader.user_id_map[user_id]
        
        user_ratings = model.item_cf.user_item_matrix[user_idx].toarray().flatten()
        rated_indices = np.where(user_ratings > 0)[0]
        
        if len(rated_indices) == 0:
            return f"No rating history found for User {user_id}"
        
        history = []
        for movie_idx in rated_indices:
            original_movie_id = loader.reverse_movie_map[movie_idx]
            title = movies_df[movies_df['movie_id'] == original_movie_id]['title'].values[0]
            rating = user_ratings[movie_idx]
            history.append((title, rating))
        
        history.sort(key=lambda x: x[1], reverse=True)
        
        output = f"Rating History for User {user_id}\n\n"
        output += f"Total movies rated: {len(history)}\n"
        output += f"Average rating: {np.mean([r for _, r in history]):.2f}\n\n"
        output += "=" * 60 + "\n\n"
        output += "Top 10 Highest Rated Movies:\n\n"
        
        for i, (title, rating) in enumerate(history[:10], 1):
            stars = "*" * int(rating)
            output += f"{i}. {title} - {rating:.1f}/5 {stars}\n"
        
        return output
        
    except Exception as e:
        return f"Error: {str(e)}"


def get_movie_info(movie_title_search):
    if movies_df is None:
        return "Error: Movies data not loaded"
    
    try:
        matches = movies_df[movies_df['title'].str.contains(movie_title_search, case=False, na=False)]
        
        if len(matches) == 0:
            return f"No movies found matching '{movie_title_search}'"
        
        output = f"Search Results for '{movie_title_search}'\n\n"
        output += f"Found {len(matches)} movie(s):\n\n"
        output += "=" * 60 + "\n\n"
        
        for i, (_, row) in enumerate(matches.head(20).iterrows(), 1):
            output += f"{i}. {row['title']} (ID: {row['movie_id']})\n"
        
        if len(matches) > 20:
            output += f"\n... and {len(matches) - 20} more results"
        
        return output
        
    except Exception as e:
        return f"Error: {str(e)}"


with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis") as demo:
    
    gr.Markdown("""
    # Hybrid Movie Recommendation System
    ### DataSynthis Job Task - Powered by AI
    
    This system combines Collaborative Filtering, SVD Matrix Factorization, and Neural Networks 
    to provide personalized movie recommendations from the MovieLens 1M dataset.
    """)
    
    with gr.Tabs():
        
        with gr.Tab("Get Recommendations"):
            gr.Markdown("### Get personalized movie recommendations for any user")
            
            with gr.Row():
                with gr.Column(scale=1):
                    user_id_input = gr.Number(
                        label="User ID",
                        value=1,
                        minimum=1,
                        maximum=6040,
                        step=1,
                        info=f"Enter a user ID (1-6040)"
                    )
                    
                    num_recs_input = gr.Slider(
                        label="Number of Recommendations",
                        minimum=5,
                        maximum=20,
                        value=10,
                        step=1
                    )
                    
                    recommend_btn = gr.Button("Get Recommendations", variant="primary")
                
                with gr.Column(scale=2):
                    recommendations_output = gr.Textbox(
                        label="Recommendations",
                        lines=20,
                        max_lines=30
                    )
            
            recommend_btn.click(
                fn=get_recommendations,
                inputs=[user_id_input, num_recs_input],
                outputs=recommendations_output
            )
            
            gr.Markdown("""
            **How it works:**
            - Enter a User ID (between 1 and 6040)
            - Choose how many recommendations you want
            - Click "Get Recommendations" to see personalized movie suggestions
            """)
        
        with gr.Tab("User History"):
            gr.Markdown("### View a user's rating history")
            
            with gr.Row():
                with gr.Column(scale=1):
                    user_id_history = gr.Number(
                        label="User ID",
                        value=1,
                        minimum=1,
                        maximum=6040,
                        step=1
                    )
                    
                    history_btn = gr.Button("View History", variant="primary")
                
                with gr.Column(scale=2):
                    history_output = gr.Textbox(
                        label="Rating History",
                        lines=20,
                        max_lines=30
                    )
            
            history_btn.click(
                fn=get_user_history,
                inputs=user_id_history,
                outputs=history_output
            )
        
        with gr.Tab("Search Movies"):
            gr.Markdown("### Search for movies in the database")
            
            with gr.Row():
                with gr.Column(scale=1):
                    movie_search = gr.Textbox(
                        label="Movie Title Search",
                        placeholder="e.g., Star Wars, Godfather, Titanic...",
                        value="Star Wars"
                    )
                    
                    search_btn = gr.Button("Search", variant="primary")
                
                with gr.Column(scale=2):
                    search_output = gr.Textbox(
                        label="Search Results",
                        lines=20,
                        max_lines=30
                    )
            
            search_btn.click(
                fn=get_movie_info,
                inputs=movie_search,
                outputs=search_output
            )
        
        with gr.Tab("About"):
            gr.Markdown("""
            ## About This System
            
            ### Model Architecture
            This is a Hybrid Recommendation System that combines three powerful approaches:
            
            1. Item-Based Collaborative Filtering
               - Uses cosine similarity between movies
               - Recommends movies similar to what you've liked before
            
            2. SVD Matrix Factorization
               - Decomposes the user-movie rating matrix
               - Discovers latent factors that explain user preferences
            
            3. Neural Collaborative Filtering (NCF)
               - Deep learning model with user and movie embeddings
               - Learns complex non-linear patterns in user behavior
            
            ### Dataset
            - MovieLens 1M dataset
            - 1,000,209 ratings from 6,040 users on 3,900 movies
            - Ratings scale: 1-5 stars
            
            ### Performance Metrics
            - Precision@10: 26.77%
            - NDCG@10: 28.50%
            - Model improves recommendations by 40% vs baseline
            
            ### Created For
            DataSynthis Job Task
            
            ### Technologies Used
            - PyTorch (Neural Networks)
            - Scikit-learn (SVD, Similarity)
            - Pandas & NumPy (Data Processing)
            - Gradio (Web Interface)
            
            Note: This model is trained on the MovieLens 1M dataset. 
            User IDs range from 1 to 6040, and movie IDs range from 1 to 3952.
            """)
    
    gr.Markdown("""
    ---
    Hybrid Movie Recommendation System | Built for DataSynthis
    """)

if __name__ == "__main__":
    demo.launch(
        share=False,
        server_name="0.0.0.0",
        server_port=7860
    )