import gradio as gr import pickle import pandas as pd import numpy as np import torch import torch.nn as nn import os from scipy.sparse import csr_matrix class ItemBasedCF: pass class SVDRecommender: pass class NeuralCF(nn.Module): def __init__(self, n_users, n_movies, embedding_dim=50, hidden_layers=[64, 32, 16]): super(NeuralCF, self).__init__() self.user_embedding = nn.Embedding(n_users, embedding_dim) self.movie_embedding = nn.Embedding(n_movies, embedding_dim) layers = [] input_dim = embedding_dim * 2 for hidden_dim in hidden_layers: layers.append(nn.Linear(input_dim, hidden_dim)) layers.append(nn.ReLU()) layers.append(nn.Dropout(0.2)) input_dim = hidden_dim layers.append(nn.Linear(input_dim, 1)) self.mlp = nn.Sequential(*layers) def forward(self, user_ids, movie_ids): user_emb = self.user_embedding(user_ids) movie_emb = self.movie_embedding(movie_ids) x = torch.cat([user_emb, movie_emb], dim=1) output = self.mlp(x) return output.squeeze() def predict(self, user_idx, movie_idx, device='cpu'): self.eval() with torch.no_grad(): user_tensor = torch.LongTensor([user_idx]).to(device) movie_tensor = torch.LongTensor([movie_idx]).to(device) prediction = self.forward(user_tensor, movie_tensor) return torch.clamp(prediction, 1, 5).item() class HybridRecommender: pass class MovieLensDataLoader: pass def load_model_and_data(): import os print("Checking for files...") print(f"Current directory: {os.getcwd()}") print(f"Files in current directory: {os.listdir('.')}") if os.path.exists('model_artifacts'): print(f"Files in model_artifacts/: {os.listdir('model_artifacts')}") else: print("ERROR: model_artifacts/ folder does not exist!") try: files_to_check = [ 'model_artifacts/hybrid_model.pkl', 'model_artifacts/loader.pkl', 'model_artifacts/movies.pkl' ] for file_path in files_to_check: if not os.path.exists(file_path): print(f"ERROR: Missing file: {file_path}") else: file_size = os.path.getsize(file_path) / (1024*1024) print(f"Found: {file_path} ({file_size:.2f} MB)") with open('model_artifacts/hybrid_model.pkl', 'rb') as f: model = pickle.load(f) print("✓ Loaded hybrid_model.pkl") with open('model_artifacts/loader.pkl', 'rb') as f: loader = pickle.load(f) print("✓ Loaded loader.pkl") with open('model_artifacts/movies.pkl', 'rb') as f: movies = pickle.load(f) print("✓ Loaded movies.pkl") user_ids = sorted(loader.user_id_map.keys()) print(f"✓ Model loaded successfully! {len(user_ids)} users available") return model, loader, movies, user_ids except FileNotFoundError as e: print(f"ERROR: File not found - {e}") print("Make sure all pkl files are in the model_artifacts/ folder") return None, None, None, [] except Exception as e: print(f"ERROR loading model: {type(e).__name__}: {e}") import traceback traceback.print_exc() return None, None, None, [] print("Loading model and data...") model, loader, movies_df, user_ids = load_model_and_data() print(f"Model loaded! Available users: {len(user_ids)}") def get_recommendations(user_id, num_recommendations): if model is None or loader is None: return "❌ Error: Model not loaded properly. Please check the model files." try: user_id = int(user_id) num_recommendations = int(num_recommendations) if user_id not in loader.user_id_map: return f"❌ User ID {user_id} not found! Please select a valid user ID." recommendations = model.recommend_movies( user_id=user_id, N=num_recommendations, user_id_map=loader.user_id_map, reverse_movie_map=loader.reverse_movie_map, movies_df=movies_df ) if not recommendations: return f"❌ No recommendations found for User {user_id}" output = f"🎬 **Top {num_recommendations} Movie Recommendations for User {user_id}**\n\n" output += "=" * 60 + "\n\n" for i, (movie_id, title, score) in enumerate(recommendations, 1): stars = "⭐" * int(score) output += f"**{i}. {title}**\n" output += f" • Predicted Rating: {score:.2f}/5.00 {stars}\n" output += f" • Movie ID: {movie_id}\n\n" return output except ValueError: return "❌ Error: Please enter valid numbers for User ID and Number of Recommendations" except Exception as e: return f"❌ Error generating recommendations: {str(e)}" def get_user_history(user_id): if model is None or loader is None: return "❌ Error: Model not loaded properly." try: user_id = int(user_id) if user_id not in loader.user_id_map: return f"❌ User ID {user_id} not found!" user_idx = loader.user_id_map[user_id] user_ratings = model.item_cf.user_item_matrix[user_idx].toarray().flatten() rated_indices = np.where(user_ratings > 0)[0] if len(rated_indices) == 0: return f"No rating history found for User {user_id}" history = [] for movie_idx in rated_indices: original_movie_id = loader.reverse_movie_map[movie_idx] title = movies_df[movies_df['movie_id'] == original_movie_id]['title'].values[0] rating = user_ratings[movie_idx] history.append((title, rating)) history.sort(key=lambda x: x[1], reverse=True) output = f"📊 **Rating History for User {user_id}**\n\n" output += f"Total movies rated: {len(history)}\n" output += f"Average rating: {np.mean([r for _, r in history]):.2f}\n\n" output += "=" * 60 + "\n\n" output += "**Top 10 Highest Rated Movies:**\n\n" for i, (title, rating) in enumerate(history[:10], 1): stars = "⭐" * int(rating) output += f"{i}. **{title}** - {rating:.1f}/5 {stars}\n" return output except Exception as e: return f"❌ Error: {str(e)}" def get_movie_info(movie_title_search): if movies_df is None: return "❌ Error: Movies data not loaded" try: matches = movies_df[movies_df['title'].str.contains(movie_title_search, case=False, na=False)] if len(matches) == 0: return f"❌ No movies found matching '{movie_title_search}'" output = f"🔍 **Search Results for '{movie_title_search}'**\n\n" output += f"Found {len(matches)} movie(s):\n\n" output += "=" * 60 + "\n\n" for i, (_, row) in enumerate(matches.head(20).iterrows(), 1): output += f"{i}. **{row['title']}** (ID: {row['movie_id']})\n" if len(matches) > 20: output += f"\n... and {len(matches) - 20} more results" return output except Exception as e: return f"❌ Error: {str(e)}" with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis") as demo: gr.Markdown(""" # 🎬 Hybrid Movie Recommendation System ### DataSynthis Job Task - Powered by AI This system combines **Collaborative Filtering**, **SVD Matrix Factorization**, and **Neural Networks** to provide personalized movie recommendations from the MovieLens 100k dataset. --- """) with gr.Tabs(): with gr.Tab("🎯 Get Recommendations"): gr.Markdown("### Get personalized movie recommendations for any user") with gr.Row(): with gr.Column(scale=1): user_id_input = gr.Number( label="User ID", value=1, minimum=1, maximum=943, step=1, info=f"Enter a user ID (1-943)" ) num_recs_input = gr.Slider( label="Number of Recommendations", minimum=5, maximum=20, value=10, step=1 ) recommend_btn = gr.Button("🎬 Get Recommendations", variant="primary") with gr.Column(scale=2): recommendations_output = gr.Textbox( label="Recommendations", lines=20, max_lines=30 ) recommend_btn.click( fn=get_recommendations, inputs=[user_id_input, num_recs_input], outputs=recommendations_output ) gr.Markdown(""" **How it works:** - Enter a User ID (between 1 and 943) - Choose how many recommendations you want - Click "Get Recommendations" to see personalized movie suggestions """) with gr.Tab("📊 User History"): gr.Markdown("### View a user's rating history") with gr.Row(): with gr.Column(scale=1): user_id_history = gr.Number( label="User ID", value=1, minimum=1, maximum=943, step=1 ) history_btn = gr.Button("📊 View History", variant="primary") with gr.Column(scale=2): history_output = gr.Textbox( label="Rating History", lines=20, max_lines=30 ) history_btn.click( fn=get_user_history, inputs=user_id_history, outputs=history_output ) with gr.Tab("🔍 Search Movies"): gr.Markdown("### Search for movies in the database") with gr.Row(): with gr.Column(scale=1): movie_search = gr.Textbox( label="Movie Title Search", placeholder="e.g., Star Wars, Godfather, Titanic...", value="Star Wars" ) search_btn = gr.Button("🔍 Search", variant="primary") with gr.Column(scale=2): search_output = gr.Textbox( label="Search Results", lines=20, max_lines=30 ) search_btn.click( fn=get_movie_info, inputs=movie_search, outputs=search_output ) with gr.Tab("ℹ️ About"): gr.Markdown(""" ## About This System ### 🎯 Model Architecture This is a **Hybrid Recommendation System** that combines three powerful approaches: 1. **Item-Based Collaborative Filtering** - Uses cosine similarity between movies - Recommends movies similar to what you've liked before 2. **SVD Matrix Factorization** - Decomposes the user-movie rating matrix - Discovers latent factors that explain user preferences 3. **Neural Collaborative Filtering (NCF)** - Deep learning model with user and movie embeddings - Learns complex non-linear patterns in user behavior ### 📊 Dataset - **MovieLens 100k** dataset - 100,000 ratings from 943 users on 1,682 movies - Ratings scale: 1-5 stars ### 🎯 Performance Metrics - **Precision@10**: 26.77% - **NDCG@10**: 28.50% - **Model improves recommendations by 40% vs baseline** ### 👨‍💻 Created For **DataSynthis Job Task** ### 🔗 Technologies Used - PyTorch (Neural Networks) - Scikit-learn (SVD, Similarity) - Pandas & NumPy (Data Processing) - Gradio (Web Interface) --- **Note**: This model is trained on the MovieLens 100k dataset. User IDs range from 1 to 943, and movie IDs range from 1 to 1682. """) gr.Markdown(""" ---

🎬 Hybrid Movie Recommendation System | Built with ❤️ for DataSynthis

""") if __name__ == "__main__": demo.launch( share=False, server_name="0.0.0.0", server_port=7860 )import gradio as gr import pickle import pandas as pd import numpy as np import torch import torch.nn as nn import os from scipy.sparse import csr_matrix class ItemBasedCF: pass class SVDRecommender: pass class NeuralCF(nn.Module): def __init__(self, n_users, n_movies, embedding_dim=50, hidden_layers=[64, 32, 16]): super(NeuralCF, self).__init__() self.user_embedding = nn.Embedding(n_users, embedding_dim) self.movie_embedding = nn.Embedding(n_movies, embedding_dim) layers = [] input_dim = embedding_dim * 2 for hidden_dim in hidden_layers: layers.append(nn.Linear(input_dim, hidden_dim)) layers.append(nn.ReLU()) layers.append(nn.Dropout(0.2)) input_dim = hidden_dim layers.append(nn.Linear(input_dim, 1)) self.mlp = nn.Sequential(*layers) def forward(self, user_ids, movie_ids): user_emb = self.user_embedding(user_ids) movie_emb = self.movie_embedding(movie_ids) x = torch.cat([user_emb, movie_emb], dim=1) output = self.mlp(x) return output.squeeze() def predict(self, user_idx, movie_idx, device='cpu'): self.eval() with torch.no_grad(): user_tensor = torch.LongTensor([user_idx]).to(device) movie_tensor = torch.LongTensor([movie_idx]).to(device) prediction = self.forward(user_tensor, movie_tensor) return torch.clamp(prediction, 1, 5).item() class HybridRecommender: pass class MovieLensDataLoader: pass def load_model_and_data(): import os print("Checking for files...") print(f"Current directory: {os.getcwd()}") print(f"Files in current directory: {os.listdir('.')}") if os.path.exists('model_artifacts'): print(f"Files in model_artifacts/: {os.listdir('model_artifacts')}") else: print("ERROR: model_artifacts/ folder does not exist!") try: files_to_check = [ 'model_artifacts/hybrid_model.pkl', 'model_artifacts/loader.pkl', 'model_artifacts/movies.pkl' ] for file_path in files_to_check: if not os.path.exists(file_path): print(f"ERROR: Missing file: {file_path}") else: file_size = os.path.getsize(file_path) / (1024*1024) print(f"Found: {file_path} ({file_size:.2f} MB)") with open('model_artifacts/hybrid_model.pkl', 'rb') as f: model = pickle.load(f) print("✓ Loaded hybrid_model.pkl") with open('model_artifacts/loader.pkl', 'rb') as f: loader = pickle.load(f) print("✓ Loaded loader.pkl") with open('model_artifacts/movies.pkl', 'rb') as f: movies = pickle.load(f) print("✓ Loaded movies.pkl") user_ids = sorted(loader.user_id_map.keys()) print(f"✓ Model loaded successfully! {len(user_ids)} users available") return model, loader, movies, user_ids except FileNotFoundError as e: print(f"ERROR: File not found - {e}") print("Make sure all pkl files are in the model_artifacts/ folder") return None, None, None, [] except Exception as e: print(f"ERROR loading model: {type(e).__name__}: {e}") import traceback traceback.print_exc() return None, None, None, [] print("Loading model and data...") model, loader, movies_df, user_ids = load_model_and_data() print(f"Model loaded! Available users: {len(user_ids)}") def get_recommendations(user_id, num_recommendations): if model is None or loader is None: return "❌ Error: Model not loaded properly. Please check the model files." try: user_id = int(user_id) num_recommendations = int(num_recommendations) if user_id not in loader.user_id_map: return f"❌ User ID {user_id} not found! Please select a valid user ID." recommendations = model.recommend_movies( user_id=user_id, N=num_recommendations, user_id_map=loader.user_id_map, reverse_movie_map=loader.reverse_movie_map, movies_df=movies_df ) if not recommendations: return f"❌ No recommendations found for User {user_id}" output = f"🎬 **Top {num_recommendations} Movie Recommendations for User {user_id}**\n\n" output += "=" * 60 + "\n\n" for i, (movie_id, title, score) in enumerate(recommendations, 1): stars = "⭐" * int(score) output += f"**{i}. {title}**\n" output += f" • Predicted Rating: {score:.2f}/5.00 {stars}\n" output += f" • Movie ID: {movie_id}\n\n" return output except ValueError: return "❌ Error: Please enter valid numbers for User ID and Number of Recommendations" except Exception as e: return f"❌ Error generating recommendations: {str(e)}" def get_user_history(user_id): if model is None or loader is None: return "❌ Error: Model not loaded properly." try: user_id = int(user_id) if user_id not in loader.user_id_map: return f"❌ User ID {user_id} not found!" user_idx = loader.user_id_map[user_id] user_ratings = model.item_cf.user_item_matrix[user_idx].toarray().flatten() rated_indices = np.where(user_ratings > 0)[0] if len(rated_indices) == 0: return f"No rating history found for User {user_id}" history = [] for movie_idx in rated_indices: original_movie_id = loader.reverse_movie_map[movie_idx] title = movies_df[movies_df['movie_id'] == original_movie_id]['title'].values[0] rating = user_ratings[movie_idx] history.append((title, rating)) history.sort(key=lambda x: x[1], reverse=True) output = f"📊 **Rating History for User {user_id}**\n\n" output += f"Total movies rated: {len(history)}\n" output += f"Average rating: {np.mean([r for _, r in history]):.2f}\n\n" output += "=" * 60 + "\n\n" output += "**Top 10 Highest Rated Movies:**\n\n" for i, (title, rating) in enumerate(history[:10], 1): stars = "⭐" * int(rating) output += f"{i}. **{title}** - {rating:.1f}/5 {stars}\n" return output except Exception as e: return f"❌ Error: {str(e)}" def get_movie_info(movie_title_search): if movies_df is None: return "❌ Error: Movies data not loaded" try: matches = movies_df[movies_df['title'].str.contains(movie_title_search, case=False, na=False)] if len(matches) == 0: return f"❌ No movies found matching '{movie_title_search}'" output = f"🔍 **Search Results for '{movie_title_search}'**\n\n" output += f"Found {len(matches)} movie(s):\n\n" output += "=" * 60 + "\n\n" for i, (_, row) in enumerate(matches.head(20).iterrows(), 1): output += f"{i}. **{row['title']}** (ID: {row['movie_id']})\n" if len(matches) > 20: output += f"\n... and {len(matches) - 20} more results" return output except Exception as e: return f"❌ Error: {str(e)}" with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis") as demo: gr.Markdown(""" # 🎬 Hybrid Movie Recommendation System ### DataSynthis Job Task - Powered by AI This system combines **Collaborative Filtering**, **SVD Matrix Factorization**, and **Neural Networks** to provide personalized movie recommendations from the MovieLens 100k dataset. --- """) with gr.Tabs(): with gr.Tab("🎯 Get Recommendations"): gr.Markdown("### Get personalized movie recommendations for any user") with gr.Row(): with gr.Column(scale=1): user_id_input = gr.Number( label="User ID", value=1, minimum=1, maximum=943, step=1, info=f"Enter a user ID (1-943)" ) num_recs_input = gr.Slider( label="Number of Recommendations", minimum=5, maximum=20, value=10, step=1 ) recommend_btn = gr.Button("🎬 Get Recommendations", variant="primary") with gr.Column(scale=2): recommendations_output = gr.Textbox( label="Recommendations", lines=20, max_lines=30 ) recommend_btn.click( fn=get_recommendations, inputs=[user_id_input, num_recs_input], outputs=recommendations_output ) gr.Markdown(""" **How it works:** - Enter a User ID (between 1 and 943) - Choose how many recommendations you want - Click "Get Recommendations" to see personalized movie suggestions """) with gr.Tab("📊 User History"): gr.Markdown("### View a user's rating history") with gr.Row(): with gr.Column(scale=1): user_id_history = gr.Number( label="User ID", value=1, minimum=1, maximum=943, step=1 ) history_btn = gr.Button("📊 View History", variant="primary") with gr.Column(scale=2): history_output = gr.Textbox( label="Rating History", lines=20, max_lines=30 ) history_btn.click( fn=get_user_history, inputs=user_id_history, outputs=history_output ) with gr.Tab("🔍 Search Movies"): gr.Markdown("### Search for movies in the database") with gr.Row(): with gr.Column(scale=1): movie_search = gr.Textbox( label="Movie Title Search", placeholder="e.g., Star Wars, Godfather, Titanic...", value="Star Wars" ) search_btn = gr.Button("🔍 Search", variant="primary") with gr.Column(scale=2): search_output = gr.Textbox( label="Search Results", lines=20, max_lines=30 ) search_btn.click( fn=get_movie_info, inputs=movie_search, outputs=search_output ) with gr.Tab("ℹ️ About"): gr.Markdown(""" About This System Model Architecture This is a Hybrid Recommendation System that combines three powerful approaches: 1. Item-Based Collaborative Filtering - Uses cosine similarity between movies - Recommends movies similar to what you've liked before 2. SVD Matrix Factorization - Decomposes the user-movie rating matrix - Discovers latent factors that explain user preferences 3. Neural Collaborative Filtering (NCF) - Deep learning model with user and movie embeddings - Learns complex non-linear patterns in user behavior Performance Metrics - Precision@10: 26.77% - NDCG@10: 28.50% - Model improves recommendations by 40% vs baseline** Technologies Used - PyTorch (Neural Networks) - Scikit-learn (SVD, Similarity) - Pandas & NumPy (Data Processing) - Gradio (Web Interface) --- **Note**: This model is trained on the MovieLens 100k dataset. User IDs range from 1 to 943, and movie IDs range from 1 to 1682. """) gr.Markdown(""" ---

🎬 Hybrid Movie Recommendation System | Built with ❤️ for DataSynthis

""") if __name__ == "__main__": demo.launch( share=False, server_name="0.0.0.0", server_port=7860 )