Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import pickle | |
| import torch | |
| import torch.nn as nn | |
| from surprise import SVD, KNNBasic | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| # ============================================================================ | |
| # NEURAL COLLABORATIVE FILTERING MODEL | |
| # ============================================================================ | |
| class NeuralCollaborativeFiltering(nn.Module): | |
| def __init__(self, n_users, n_items, embedding_dim=64, hidden_layers=[128, 64, 32]): | |
| super(NeuralCollaborativeFiltering, self).__init__() | |
| # GMF Embeddings | |
| self.gmf_user_embedding = nn.Embedding(n_users, embedding_dim) | |
| self.gmf_item_embedding = nn.Embedding(n_items, embedding_dim) | |
| # MLP Embeddings | |
| self.mlp_user_embedding = nn.Embedding(n_users, embedding_dim) | |
| self.mlp_item_embedding = nn.Embedding(n_items, embedding_dim) | |
| # MLP Layers | |
| mlp_layers = [] | |
| input_size = embedding_dim * 2 | |
| for hidden_size in hidden_layers: | |
| mlp_layers.append(nn.Linear(input_size, hidden_size)) | |
| mlp_layers.append(nn.ReLU()) | |
| mlp_layers.append(nn.Dropout(0.2)) | |
| input_size = hidden_size | |
| self.mlp = nn.Sequential(*mlp_layers) | |
| # Final prediction layer | |
| self.output = nn.Linear(embedding_dim + hidden_layers[-1], 1) | |
| def forward(self, user_ids, item_ids): | |
| gmf_user = self.gmf_user_embedding(user_ids) | |
| gmf_item = self.gmf_item_embedding(item_ids) | |
| gmf_vector = gmf_user * gmf_item | |
| mlp_user = self.mlp_user_embedding(user_ids) | |
| mlp_item = self.mlp_item_embedding(item_ids) | |
| mlp_vector = torch.cat([mlp_user, mlp_item], dim=-1) | |
| mlp_vector = self.mlp(mlp_vector) | |
| combined = torch.cat([gmf_vector, mlp_vector], dim=-1) | |
| output = self.output(combined) | |
| return output.squeeze() | |
| # ============================================================================ | |
| # HYBRID RECOMMENDER CLASS | |
| # ============================================================================ | |
| class HybridRecommender: | |
| def __init__(self, ncf_model, svd_model, item_mapping, reverse_item_mapping, | |
| ratings, movies, ncf_weight=0.65, svd_weight=0.35): | |
| self.ncf_model = ncf_model | |
| self.svd_model = svd_model | |
| self.item_mapping = item_mapping | |
| self.reverse_item_mapping = reverse_item_mapping | |
| self.ratings = ratings | |
| self.movies = movies | |
| self.ncf_weight = ncf_weight | |
| self.svd_weight = svd_weight | |
| self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| self.ncf_model.to(self.device) | |
| self.ncf_model.eval() | |
| def recommend_movies(self, user_id, N=10, min_rating=3.5): | |
| all_movie_ids = self.movies['movieId'].unique() | |
| rated_movies = self.ratings[self.ratings['userId'] == user_id]['movieId'].values | |
| movies_to_predict = [mid for mid in all_movie_ids if mid not in rated_movies] | |
| predictions = [] | |
| with torch.no_grad(): | |
| for movie_id in movies_to_predict: | |
| # NCF prediction | |
| if movie_id in self.reverse_item_mapping: | |
| user_tensor = torch.LongTensor([user_id - 1]).to(self.device) | |
| item_tensor = torch.LongTensor([self.reverse_item_mapping[movie_id]]).to(self.device) | |
| ncf_pred = self.ncf_model(user_tensor, item_tensor).item() | |
| ncf_pred = max(0.5, min(5.0, ncf_pred)) | |
| else: | |
| ncf_pred = 3.0 | |
| # SVD prediction | |
| try: | |
| svd_pred = self.svd_model.predict(user_id, movie_id).est | |
| except: | |
| svd_pred = 3.0 | |
| # Hybrid prediction | |
| hybrid_pred = (self.ncf_weight * ncf_pred + self.svd_weight * svd_pred) | |
| if hybrid_pred >= min_rating: | |
| predictions.append({ | |
| 'movieId': movie_id, | |
| 'predicted_rating': hybrid_pred, | |
| 'ncf_rating': ncf_pred, | |
| 'svd_rating': svd_pred | |
| }) | |
| if not predictions: | |
| return pd.DataFrame() | |
| predictions_df = pd.DataFrame(predictions) | |
| predictions_df = predictions_df.sort_values('predicted_rating', ascending=False).head(N) | |
| recommendations = predictions_df.merge(self.movies, on='movieId') | |
| recommendations['predicted_rating'] = recommendations['predicted_rating'].round(2) | |
| recommendations['ncf_rating'] = recommendations['ncf_rating'].round(2) | |
| recommendations['svd_rating'] = recommendations['svd_rating'].round(2) | |
| return recommendations[['title', 'genres', 'predicted_rating', 'ncf_rating', 'svd_rating']] | |
| # ============================================================================ | |
| # LOAD MODELS AND DATA | |
| # ============================================================================ | |
| print("Loading models and data...") | |
| # Load saved models and data | |
| with open('svd_model.pkl', 'rb') as f: | |
| svd_model = pickle.load(f) | |
| with open('item_based_cf.pkl', 'rb') as f: | |
| item_based_cf = pickle.load(f) | |
| with open('user_based_cf.pkl', 'rb') as f: | |
| user_based_cf = pickle.load(f) | |
| with open('movies.pkl', 'rb') as f: | |
| movies = pickle.load(f) | |
| with open('ratings.pkl', 'rb') as f: | |
| ratings = pickle.load(f) | |
| # Load NCF model if exists | |
| try: | |
| # Prepare item mapping | |
| ratings['movieId_cat'] = ratings['movieId'].astype('category') | |
| item_mapping = dict(enumerate(ratings['movieId_cat'].cat.categories)) | |
| reverse_item_mapping = {v: k for k, v in item_mapping.items()} | |
| n_users = ratings['userId'].nunique() | |
| n_items = ratings['movieId'].nunique() | |
| ncf_model = NeuralCollaborativeFiltering(n_users, n_items) | |
| ncf_model.load_state_dict(torch.load('ncf_model_best.pth', map_location='cpu')) | |
| ncf_model.eval() | |
| # Create hybrid recommender | |
| hybrid_recommender = HybridRecommender( | |
| ncf_model=ncf_model, | |
| svd_model=svd_model, | |
| item_mapping=item_mapping, | |
| reverse_item_mapping=reverse_item_mapping, | |
| ratings=ratings, | |
| movies=movies | |
| ) | |
| use_hybrid = True | |
| print("β Hybrid model loaded successfully!") | |
| except Exception as e: | |
| print(f"β Could not load NCF model: {e}") | |
| print("Using SVD model only...") | |
| use_hybrid = False | |
| # ============================================================================ | |
| # RECOMMENDATION FUNCTIONS | |
| # ============================================================================ | |
| def get_user_history(user_id): | |
| """Get user's rating history""" | |
| user_ratings = ratings[ratings['userId'] == user_id].merge(movies, on='movieId') | |
| user_ratings = user_ratings.sort_values('rating', ascending=False).head(10) | |
| if len(user_ratings) == 0: | |
| return pd.DataFrame({"Message": ["No rating history found for this user"]}) | |
| return user_ratings[['title', 'genres', 'rating', 'timestamp']] | |
| def recommend_with_svd(user_id, n_recommendations, min_rating): | |
| """Generate recommendations using SVD model""" | |
| all_movie_ids = movies['movieId'].unique() | |
| rated_movies = ratings[ratings['userId'] == user_id]['movieId'].values | |
| movies_to_predict = [mid for mid in all_movie_ids if mid not in rated_movies] | |
| predictions = [] | |
| for movie_id in movies_to_predict: | |
| try: | |
| pred = svd_model.predict(user_id, movie_id) | |
| if pred.est >= min_rating: | |
| predictions.append({ | |
| 'movieId': movie_id, | |
| 'predicted_rating': pred.est | |
| }) | |
| except: | |
| continue | |
| if not predictions: | |
| return pd.DataFrame({"Message": ["No recommendations found with these criteria"]}) | |
| predictions_df = pd.DataFrame(predictions) | |
| predictions_df = predictions_df.sort_values('predicted_rating', ascending=False).head(n_recommendations) | |
| recommendations = predictions_df.merge(movies, on='movieId') | |
| recommendations['predicted_rating'] = recommendations['predicted_rating'].round(2) | |
| return recommendations[['title', 'genres', 'predicted_rating']] | |
| def get_recommendations(user_id, n_recommendations, min_rating, model_type): | |
| """Main recommendation function""" | |
| try: | |
| user_id = int(user_id) | |
| # Check if user exists | |
| if user_id not in ratings['userId'].values: | |
| return pd.DataFrame({"Error": [f"User ID {user_id} not found. Please enter a valid user ID (1-610)"]}) | |
| # Get recommendations based on model type | |
| if model_type == "Hybrid (NCF + SVD)" and use_hybrid: | |
| recommendations = hybrid_recommender.recommend_movies( | |
| user_id, | |
| N=n_recommendations, | |
| min_rating=min_rating | |
| ) | |
| elif model_type == "SVD (Matrix Factorization)": | |
| recommendations = recommend_with_svd(user_id, n_recommendations, min_rating) | |
| elif model_type == "Item-Based CF": | |
| # Use item-based CF for recommendations | |
| all_movie_ids = movies['movieId'].unique() | |
| rated_movies = ratings[ratings['userId'] == user_id]['movieId'].values | |
| movies_to_predict = [mid for mid in all_movie_ids if mid not in rated_movies] | |
| predictions = [] | |
| for movie_id in movies_to_predict: | |
| try: | |
| pred = item_based_cf.predict(user_id, movie_id) | |
| if pred.est >= min_rating: | |
| predictions.append({ | |
| 'movieId': movie_id, | |
| 'predicted_rating': pred.est | |
| }) | |
| except: | |
| continue | |
| if predictions: | |
| predictions_df = pd.DataFrame(predictions) | |
| predictions_df = predictions_df.sort_values('predicted_rating', ascending=False).head(n_recommendations) | |
| recommendations = predictions_df.merge(movies, on='movieId') | |
| recommendations['predicted_rating'] = recommendations['predicted_rating'].round(2) | |
| recommendations = recommendations[['title', 'genres', 'predicted_rating']] | |
| else: | |
| recommendations = pd.DataFrame({"Message": ["No recommendations found"]}) | |
| else: # User-Based CF | |
| all_movie_ids = movies['movieId'].unique() | |
| rated_movies = ratings[ratings['userId'] == user_id]['movieId'].values | |
| movies_to_predict = [mid for mid in all_movie_ids if mid not in rated_movies] | |
| predictions = [] | |
| for movie_id in movies_to_predict: | |
| try: | |
| pred = user_based_cf.predict(user_id, movie_id) | |
| if pred.est >= min_rating: | |
| predictions.append({ | |
| 'movieId': movie_id, | |
| 'predicted_rating': pred.est | |
| }) | |
| except: | |
| continue | |
| if predictions: | |
| predictions_df = pd.DataFrame(predictions) | |
| predictions_df = predictions_df.sort_values('predicted_rating', ascending=False).head(n_recommendations) | |
| recommendations = predictions_df.merge(movies, on='movieId') | |
| recommendations['predicted_rating'] = recommendations['predicted_rating'].round(2) | |
| recommendations = recommendations[['title', 'genres', 'predicted_rating']] | |
| else: | |
| recommendations = pd.DataFrame({"Message": ["No recommendations found"]}) | |
| if len(recommendations) == 0: | |
| return pd.DataFrame({"Message": ["No recommendations found with these criteria. Try lowering the minimum rating."]}) | |
| return recommendations | |
| except ValueError: | |
| return pd.DataFrame({"Error": ["Please enter a valid user ID (integer)"]}) | |
| except Exception as e: | |
| return pd.DataFrame({"Error": [f"An error occurred: {str(e)}"]}) | |
| def search_movies(query): | |
| """Search for movies by title""" | |
| if not query: | |
| return movies[['movieId', 'title', 'genres']].head(20) | |
| mask = movies['title'].str.contains(query, case=False, na=False) | |
| results = movies[mask][['movieId', 'title', 'genres']].head(20) | |
| if len(results) == 0: | |
| return pd.DataFrame({"Message": [f"No movies found matching '{query}'"]}) | |
| return results | |
| # ============================================================================ | |
| # GRADIO INTERFACE | |
| # ============================================================================ | |
| # Model options | |
| model_options = ["SVD (Matrix Factorization)", "Item-Based CF", "User-Based CF"] | |
| if use_hybrid: | |
| model_options.insert(0, "Hybrid (NCF + SVD)") | |
| # Create Gradio interface | |
| with gr.Blocks(theme=gr.themes.Soft(), title="MovieLens Recommender System") as demo: | |
| gr.Markdown( | |
| """ | |
| # π¬ MovieLens Movie Recommendation System | |
| Get personalized movie recommendations using state-of-the-art collaborative filtering algorithms! | |
| **Available Models:** | |
| - π **Hybrid (NCF + SVD)**: Combines Neural Collaborative Filtering with Matrix Factorization | |
| - π **SVD**: Singular Value Decomposition (Matrix Factorization) | |
| - π― **Item-Based CF**: Recommends based on similar movies | |
| - π₯ **User-Based CF**: Recommends based on similar users | |
| """ | |
| ) | |
| with gr.Tab("Get Recommendations"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| user_id_input = gr.Number( | |
| label="User ID", | |
| value=1, | |
| precision=0, | |
| info="Enter a user ID (1-610)" | |
| ) | |
| model_selector = gr.Dropdown( | |
| choices=model_options, | |
| value=model_options[0], | |
| label="Recommendation Model", | |
| info="Choose the algorithm to generate recommendations" | |
| ) | |
| n_recs = gr.Slider( | |
| minimum=5, | |
| maximum=50, | |
| value=10, | |
| step=1, | |
| label="Number of Recommendations", | |
| info="How many movies to recommend" | |
| ) | |
| min_rating_slider = gr.Slider( | |
| minimum=0.5, | |
| maximum=5.0, | |
| value=3.5, | |
| step=0.5, | |
| label="Minimum Predicted Rating", | |
| info="Only show movies with predicted rating above this threshold" | |
| ) | |
| recommend_btn = gr.Button("π¬ Get Recommendations", variant="primary", size="lg") | |
| with gr.Column(scale=2): | |
| recommendations_output = gr.Dataframe( | |
| label="Recommended Movies", | |
| wrap=True | |
| ) | |
| gr.Markdown("### π User's Rating History") | |
| user_history_output = gr.Dataframe( | |
| label="Top Rated Movies by This User", | |
| wrap=True | |
| ) | |
| # Connect buttons | |
| recommend_btn.click( | |
| fn=get_recommendations, | |
| inputs=[user_id_input, n_recs, min_rating_slider, model_selector], | |
| outputs=recommendations_output | |
| ) | |
| user_id_input.change( | |
| fn=get_user_history, | |
| inputs=user_id_input, | |
| outputs=user_history_output | |
| ) | |
| with gr.Tab("Search Movies"): | |
| gr.Markdown("### π Search for Movies in Database") | |
| with gr.Row(): | |
| search_input = gr.Textbox( | |
| label="Search Query", | |
| placeholder="Enter movie title...", | |
| info="Search for movies by title" | |
| ) | |
| search_btn = gr.Button("Search", variant="primary") | |
| search_output = gr.Dataframe( | |
| label="Search Results", | |
| wrap=True | |
| ) | |
| search_btn.click( | |
| fn=search_movies, | |
| inputs=search_input, | |
| outputs=search_output | |
| ) | |
| search_input.submit( | |
| fn=search_movies, | |
| inputs=search_input, | |
| outputs=search_output | |
| ) | |
| with gr.Tab("About"): | |
| gr.Markdown( | |
| """ | |
| ## π About This System | |
| This recommendation system was built using the MovieLens dataset and implements multiple collaborative filtering algorithms: | |
| ### Models | |
| 1. **Hybrid Model (NCF + SVD)** π | |
| - Combines Neural Collaborative Filtering with SVD | |
| - Best performance: RMSE improvement over baseline | |
| - Uses deep learning to capture non-linear patterns | |
| 2. **SVD (Singular Value Decomposition)** π | |
| - Matrix factorization technique | |
| - Learns latent factors for users and items | |
| - Excellent for sparse data | |
| 3. **Item-Based Collaborative Filtering** π― | |
| - Recommends movies similar to what you've liked | |
| - Based on item-item similarity | |
| - Good for users with consistent preferences | |
| 4. **User-Based Collaborative Filtering** π₯ | |
| - Recommends based on users similar to you | |
| - User-user similarity approach | |
| - Effective for discovering diverse content | |
| ### Dataset | |
| - **MovieLens Small Dataset**: 100,000+ ratings | |
| - **610 users** and **9,724 movies** | |
| - Rating scale: 0.5 to 5.0 stars | |
| ### Performance Metrics | |
| The models were evaluated using: | |
| - RMSE (Root Mean Square Error) | |
| - Precision@10 | |
| - Recall@10 | |
| - NDCG@10 (Normalized Discounted Cumulative Gain) | |
| ### How to Use | |
| 1. Enter a User ID (1-610) | |
| 2. Select a recommendation model | |
| 3. Choose number of recommendations | |
| 4. Set minimum rating threshold | |
| 5. Click "Get Recommendations" | |
| --- | |
| Built with β€οΈ using Gradio, PyTorch, and Surprise | |
| """ | |
| ) | |
| print("β Gradio interface ready!") | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch(share=True) |