import gradio as gr import pandas as pd import numpy as np import pickle import torch import torch.nn as nn from surprise import SVD, KNNBasic import warnings warnings.filterwarnings('ignore') # ============================================================================ # NEURAL COLLABORATIVE FILTERING MODEL # ============================================================================ class NeuralCollaborativeFiltering(nn.Module): def __init__(self, n_users, n_items, embedding_dim=64, hidden_layers=[128, 64, 32]): super(NeuralCollaborativeFiltering, self).__init__() # GMF Embeddings self.gmf_user_embedding = nn.Embedding(n_users, embedding_dim) self.gmf_item_embedding = nn.Embedding(n_items, embedding_dim) # MLP Embeddings self.mlp_user_embedding = nn.Embedding(n_users, embedding_dim) self.mlp_item_embedding = nn.Embedding(n_items, embedding_dim) # MLP Layers mlp_layers = [] input_size = embedding_dim * 2 for hidden_size in hidden_layers: mlp_layers.append(nn.Linear(input_size, hidden_size)) mlp_layers.append(nn.ReLU()) mlp_layers.append(nn.Dropout(0.2)) input_size = hidden_size self.mlp = nn.Sequential(*mlp_layers) # Final prediction layer self.output = nn.Linear(embedding_dim + hidden_layers[-1], 1) def forward(self, user_ids, item_ids): gmf_user = self.gmf_user_embedding(user_ids) gmf_item = self.gmf_item_embedding(item_ids) gmf_vector = gmf_user * gmf_item mlp_user = self.mlp_user_embedding(user_ids) mlp_item = self.mlp_item_embedding(item_ids) mlp_vector = torch.cat([mlp_user, mlp_item], dim=-1) mlp_vector = self.mlp(mlp_vector) combined = torch.cat([gmf_vector, mlp_vector], dim=-1) output = self.output(combined) return output.squeeze() # ============================================================================ # HYBRID RECOMMENDER CLASS # ============================================================================ class HybridRecommender: def __init__(self, ncf_model, svd_model, item_mapping, reverse_item_mapping, ratings, movies, ncf_weight=0.65, svd_weight=0.35): self.ncf_model = ncf_model self.svd_model = svd_model self.item_mapping = item_mapping self.reverse_item_mapping = reverse_item_mapping self.ratings = ratings self.movies = movies self.ncf_weight = ncf_weight self.svd_weight = svd_weight self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.ncf_model.to(self.device) self.ncf_model.eval() def recommend_movies(self, user_id, N=10, min_rating=3.5): all_movie_ids = self.movies['movieId'].unique() rated_movies = self.ratings[self.ratings['userId'] == user_id]['movieId'].values movies_to_predict = [mid for mid in all_movie_ids if mid not in rated_movies] predictions = [] with torch.no_grad(): for movie_id in movies_to_predict: # NCF prediction if movie_id in self.reverse_item_mapping: user_tensor = torch.LongTensor([user_id - 1]).to(self.device) item_tensor = torch.LongTensor([self.reverse_item_mapping[movie_id]]).to(self.device) ncf_pred = self.ncf_model(user_tensor, item_tensor).item() ncf_pred = max(0.5, min(5.0, ncf_pred)) else: ncf_pred = 3.0 # SVD prediction try: svd_pred = self.svd_model.predict(user_id, movie_id).est except: svd_pred = 3.0 # Hybrid prediction hybrid_pred = (self.ncf_weight * ncf_pred + self.svd_weight * svd_pred) if hybrid_pred >= min_rating: predictions.append({ 'movieId': movie_id, 'predicted_rating': hybrid_pred, 'ncf_rating': ncf_pred, 'svd_rating': svd_pred }) if not predictions: return pd.DataFrame() predictions_df = pd.DataFrame(predictions) predictions_df = predictions_df.sort_values('predicted_rating', ascending=False).head(N) recommendations = predictions_df.merge(self.movies, on='movieId') recommendations['predicted_rating'] = recommendations['predicted_rating'].round(2) recommendations['ncf_rating'] = recommendations['ncf_rating'].round(2) recommendations['svd_rating'] = recommendations['svd_rating'].round(2) return recommendations[['title', 'genres', 'predicted_rating', 'ncf_rating', 'svd_rating']] # ============================================================================ # LOAD MODELS AND DATA # ============================================================================ print("Loading models and data...") # Load saved models and data with open('svd_model.pkl', 'rb') as f: svd_model = pickle.load(f) with open('item_based_cf.pkl', 'rb') as f: item_based_cf = pickle.load(f) with open('user_based_cf.pkl', 'rb') as f: user_based_cf = pickle.load(f) with open('movies.pkl', 'rb') as f: movies = pickle.load(f) with open('ratings.pkl', 'rb') as f: ratings = pickle.load(f) # Load NCF model if exists try: # Prepare item mapping ratings['movieId_cat'] = ratings['movieId'].astype('category') item_mapping = dict(enumerate(ratings['movieId_cat'].cat.categories)) reverse_item_mapping = {v: k for k, v in item_mapping.items()} n_users = ratings['userId'].nunique() n_items = ratings['movieId'].nunique() ncf_model = NeuralCollaborativeFiltering(n_users, n_items) ncf_model.load_state_dict(torch.load('ncf_model_best.pth', map_location='cpu')) ncf_model.eval() # Create hybrid recommender hybrid_recommender = HybridRecommender( ncf_model=ncf_model, svd_model=svd_model, item_mapping=item_mapping, reverse_item_mapping=reverse_item_mapping, ratings=ratings, movies=movies ) use_hybrid = True print("✓ Hybrid model loaded successfully!") except Exception as e: print(f"⚠ Could not load NCF model: {e}") print("Using SVD model only...") use_hybrid = False # ============================================================================ # RECOMMENDATION FUNCTIONS # ============================================================================ def get_user_history(user_id): """Get user's rating history""" user_ratings = ratings[ratings['userId'] == user_id].merge(movies, on='movieId') user_ratings = user_ratings.sort_values('rating', ascending=False).head(10) if len(user_ratings) == 0: return pd.DataFrame({"Message": ["No rating history found for this user"]}) return user_ratings[['title', 'genres', 'rating', 'timestamp']] def recommend_with_svd(user_id, n_recommendations, min_rating): """Generate recommendations using SVD model""" all_movie_ids = movies['movieId'].unique() rated_movies = ratings[ratings['userId'] == user_id]['movieId'].values movies_to_predict = [mid for mid in all_movie_ids if mid not in rated_movies] predictions = [] for movie_id in movies_to_predict: try: pred = svd_model.predict(user_id, movie_id) if pred.est >= min_rating: predictions.append({ 'movieId': movie_id, 'predicted_rating': pred.est }) except: continue if not predictions: return pd.DataFrame({"Message": ["No recommendations found with these criteria"]}) predictions_df = pd.DataFrame(predictions) predictions_df = predictions_df.sort_values('predicted_rating', ascending=False).head(n_recommendations) recommendations = predictions_df.merge(movies, on='movieId') recommendations['predicted_rating'] = recommendations['predicted_rating'].round(2) return recommendations[['title', 'genres', 'predicted_rating']] def get_recommendations(user_id, n_recommendations, min_rating, model_type): """Main recommendation function""" try: user_id = int(user_id) # Check if user exists if user_id not in ratings['userId'].values: return pd.DataFrame({"Error": [f"User ID {user_id} not found. Please enter a valid user ID (1-610)"]}) # Get recommendations based on model type if model_type == "Hybrid (NCF + SVD)" and use_hybrid: recommendations = hybrid_recommender.recommend_movies( user_id, N=n_recommendations, min_rating=min_rating ) elif model_type == "SVD (Matrix Factorization)": recommendations = recommend_with_svd(user_id, n_recommendations, min_rating) elif model_type == "Item-Based CF": # Use item-based CF for recommendations all_movie_ids = movies['movieId'].unique() rated_movies = ratings[ratings['userId'] == user_id]['movieId'].values movies_to_predict = [mid for mid in all_movie_ids if mid not in rated_movies] predictions = [] for movie_id in movies_to_predict: try: pred = item_based_cf.predict(user_id, movie_id) if pred.est >= min_rating: predictions.append({ 'movieId': movie_id, 'predicted_rating': pred.est }) except: continue if predictions: predictions_df = pd.DataFrame(predictions) predictions_df = predictions_df.sort_values('predicted_rating', ascending=False).head(n_recommendations) recommendations = predictions_df.merge(movies, on='movieId') recommendations['predicted_rating'] = recommendations['predicted_rating'].round(2) recommendations = recommendations[['title', 'genres', 'predicted_rating']] else: recommendations = pd.DataFrame({"Message": ["No recommendations found"]}) else: # User-Based CF all_movie_ids = movies['movieId'].unique() rated_movies = ratings[ratings['userId'] == user_id]['movieId'].values movies_to_predict = [mid for mid in all_movie_ids if mid not in rated_movies] predictions = [] for movie_id in movies_to_predict: try: pred = user_based_cf.predict(user_id, movie_id) if pred.est >= min_rating: predictions.append({ 'movieId': movie_id, 'predicted_rating': pred.est }) except: continue if predictions: predictions_df = pd.DataFrame(predictions) predictions_df = predictions_df.sort_values('predicted_rating', ascending=False).head(n_recommendations) recommendations = predictions_df.merge(movies, on='movieId') recommendations['predicted_rating'] = recommendations['predicted_rating'].round(2) recommendations = recommendations[['title', 'genres', 'predicted_rating']] else: recommendations = pd.DataFrame({"Message": ["No recommendations found"]}) if len(recommendations) == 0: return pd.DataFrame({"Message": ["No recommendations found with these criteria. Try lowering the minimum rating."]}) return recommendations except ValueError: return pd.DataFrame({"Error": ["Please enter a valid user ID (integer)"]}) except Exception as e: return pd.DataFrame({"Error": [f"An error occurred: {str(e)}"]}) def search_movies(query): """Search for movies by title""" if not query: return movies[['movieId', 'title', 'genres']].head(20) mask = movies['title'].str.contains(query, case=False, na=False) results = movies[mask][['movieId', 'title', 'genres']].head(20) if len(results) == 0: return pd.DataFrame({"Message": [f"No movies found matching '{query}'"]}) return results # ============================================================================ # GRADIO INTERFACE # ============================================================================ # Model options model_options = ["SVD (Matrix Factorization)", "Item-Based CF", "User-Based CF"] if use_hybrid: model_options.insert(0, "Hybrid (NCF + SVD)") # Create Gradio interface with gr.Blocks(theme=gr.themes.Soft(), title="MovieLens Recommender System") as demo: gr.Markdown( """ # 🎬 MovieLens Movie Recommendation System Get personalized movie recommendations using state-of-the-art collaborative filtering algorithms! **Available Models:** - 🚀 **Hybrid (NCF + SVD)**: Combines Neural Collaborative Filtering with Matrix Factorization - 📊 **SVD**: Singular Value Decomposition (Matrix Factorization) - 🎯 **Item-Based CF**: Recommends based on similar movies - 👥 **User-Based CF**: Recommends based on similar users """ ) with gr.Tab("Get Recommendations"): with gr.Row(): with gr.Column(scale=1): user_id_input = gr.Number( label="User ID", value=1, precision=0, info="Enter a user ID (1-610)" ) model_selector = gr.Dropdown( choices=model_options, value=model_options[0], label="Recommendation Model", info="Choose the algorithm to generate recommendations" ) n_recs = gr.Slider( minimum=5, maximum=50, value=10, step=1, label="Number of Recommendations", info="How many movies to recommend" ) min_rating_slider = gr.Slider( minimum=0.5, maximum=5.0, value=3.5, step=0.5, label="Minimum Predicted Rating", info="Only show movies with predicted rating above this threshold" ) recommend_btn = gr.Button("🎬 Get Recommendations", variant="primary", size="lg") with gr.Column(scale=2): recommendations_output = gr.Dataframe( label="Recommended Movies", wrap=True ) gr.Markdown("### 📊 User's Rating History") user_history_output = gr.Dataframe( label="Top Rated Movies by This User", wrap=True ) # Connect buttons recommend_btn.click( fn=get_recommendations, inputs=[user_id_input, n_recs, min_rating_slider, model_selector], outputs=recommendations_output ) user_id_input.change( fn=get_user_history, inputs=user_id_input, outputs=user_history_output ) with gr.Tab("Search Movies"): gr.Markdown("### 🔍 Search for Movies in Database") with gr.Row(): search_input = gr.Textbox( label="Search Query", placeholder="Enter movie title...", info="Search for movies by title" ) search_btn = gr.Button("Search", variant="primary") search_output = gr.Dataframe( label="Search Results", wrap=True ) search_btn.click( fn=search_movies, inputs=search_input, outputs=search_output ) search_input.submit( fn=search_movies, inputs=search_input, outputs=search_output ) with gr.Tab("About"): gr.Markdown( """ ## 📖 About This System This recommendation system was built using the MovieLens dataset and implements multiple collaborative filtering algorithms: ### Models 1. **Hybrid Model (NCF + SVD)** 🚀 - Combines Neural Collaborative Filtering with SVD - Best performance: RMSE improvement over baseline - Uses deep learning to capture non-linear patterns 2. **SVD (Singular Value Decomposition)** 📊 - Matrix factorization technique - Learns latent factors for users and items - Excellent for sparse data 3. **Item-Based Collaborative Filtering** 🎯 - Recommends movies similar to what you've liked - Based on item-item similarity - Good for users with consistent preferences 4. **User-Based Collaborative Filtering** 👥 - Recommends based on users similar to you - User-user similarity approach - Effective for discovering diverse content ### Dataset - **MovieLens Small Dataset**: 100,000+ ratings - **610 users** and **9,724 movies** - Rating scale: 0.5 to 5.0 stars ### Performance Metrics The models were evaluated using: - RMSE (Root Mean Square Error) - Precision@10 - Recall@10 - NDCG@10 (Normalized Discounted Cumulative Gain) ### How to Use 1. Enter a User ID (1-610) 2. Select a recommendation model 3. Choose number of recommendations 4. Set minimum rating threshold 5. Click "Get Recommendations" --- Built with ❤️ using Gradio, PyTorch, and Surprise """ ) print("✓ Gradio interface ready!") # Launch the app if __name__ == "__main__": demo.launch(share=True)