Spaces:

sairika
/

DataSynthis_ML_JobTask

Sleeping

App Files Files Community

sairika commited on Oct 4, 2025

Commit

eb66ca3

verified ·

1 Parent(s): f4afa30

Update app.py

Browse files

Files changed (1) hide show

app.py +415 -120

app.py CHANGED Viewed

@@ -1,179 +1,474 @@
 import gradio as gr
-import pickle
 import pandas as pd
 import numpy as np
-from surprise import SVD
 import warnings
 warnings.filterwarnings('ignore')
-# Load models and data
-print("Loading models...")
 with open('svd_model.pkl', 'rb') as f:
     svd_model = pickle.load(f)
 with open('movies.pkl', 'rb') as f:
     movies = pickle.load(f)
 with open('ratings.pkl', 'rb') as f:
     ratings = pickle.load(f)
-print("Models loaded successfully!")
-def recommend_movies(user_id, num_recommendations, min_rating):
-    """
-    Generate movie recommendations for a user
-    """
-    try:
-        user_id = int(user_id)
-        num_recommendations = int(num_recommendations)
-        min_rating = float(min_rating)
-        # Check if user exists
-        if user_id not in ratings['userId'].values:
-            return f"⚠️ User ID {user_id} not found in database. Please try a different user ID (1-{ratings['userId'].max()})."
-        # Get all movies
-        all_movie_ids = movies['movieId'].unique()
-        # Get movies the user has already rated
-        rated_movies = ratings[ratings['userId'] == user_id]['movieId'].values
-        # Get movies the user hasn't rated
-        movies_to_predict = [mid for mid in all_movie_ids if mid not in rated_movies]
-        # Predict ratings
-        predictions = []
-        for movie_id in movies_to_predict:
             pred = svd_model.predict(user_id, movie_id)
             if pred.est >= min_rating:
                 predictions.append({
                     'movieId': movie_id,
                     'predicted_rating': pred.est
                 })
-        if not predictions:
-            return f"No movies found with predicted rating >= {min_rating}. Try lowering the minimum rating."
-        # Sort and get top N
-        predictions_df = pd.DataFrame(predictions)
-        predictions_df = predictions_df.sort_values('predicted_rating', ascending=False)
-        top_recommendations = predictions_df.head(num_recommendations)
-        # Merge with movie details
-        recommendations = top_recommendations.merge(movies, on='movieId')
-        recommendations['predicted_rating'] = recommendations['predicted_rating'].round(2)
-        # Format output
-        output = f"🎬 Top {len(recommendations)} Movie Recommendations for User {user_id}\n\n"
-        for idx, row in recommendations.iterrows():
-            output += f"{idx + 1}. **{row['title']}**\n"
-            output += f"   ⭐ Predicted Rating: {row['predicted_rating']}/5.0\n"
-            output += f"   🎭 Genres: {row['genres']}\n\n"
-        return output
-    except Exception as e:
-        return f"❌ Error: {str(e)}"
-def get_user_history(user_id):
-    """
-    Get user's rating history
-    """
     try:
         user_id = int(user_id)
         if user_id not in ratings['userId'].values:
-            return f"⚠️ User ID {user_id} not found."
-        user_ratings = ratings[ratings['userId'] == user_id].merge(movies, on='movieId')
-        user_ratings = user_ratings.sort_values('rating', ascending=False).head(10)
-        output = f"📊 User {user_id}'s Top Rated Movies:\n\n"
-        for idx, row in user_ratings.iterrows():
-            output += f"• **{row['title']}** - ⭐ {row['rating']}/5.0\n"
-            output += f"  Genres: {row['genres']}\n\n"
-        return output
     except Exception as e:
-        return f"❌ Error: {str(e)}"
 # Create Gradio interface
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown(
         """
-        # 🎬 MovieLens Recommendation System
-        ### Powered by SVD Matrix Factorization
-        Get personalized movie recommendations based on collaborative filtering!
         """
     )
-    with gr.Tab("🎯 Get Recommendations"):
         with gr.Row():
-            with gr.Column():
                 user_id_input = gr.Number(
-                    label="User ID",
-                    value=1,
-                    info=f"Enter a user ID (1 to {ratings['userId'].max()})"
                 )
-                num_rec_input = gr.Slider(
-                    minimum=5,
-                    maximum=20,
-                    value=10,
                     step=1,
-                    label="Number of Recommendations"
                 )
-                min_rating_input = gr.Slider(
-                    minimum=1.0,
-                    maximum=5.0,
-                    value=3.5,
                     step=0.5,
-                    label="Minimum Predicted Rating"
                 )
-                recommend_btn = gr.Button("🎬 Get Recommendations", variant="primary")
-            with gr.Column():
-                recommendations_output = gr.Markdown(label="Recommendations")
         recommend_btn.click(
-            fn=recommend_movies,
-            inputs=[user_id_input, num_rec_input, min_rating_input],
             outputs=recommendations_output
         )
-    with gr.Tab("📊 User History"):
         with gr.Row():
-            with gr.Column():
-                history_user_id = gr.Number(
-                    label="User ID",
-                    value=1,
-                    info="Enter a user ID to see their rating history"
-                )
-                history_btn = gr.Button("📊 View History", variant="primary")
-            with gr.Column():
-                history_output = gr.Markdown(label="User History")
-        history_btn.click(
-            fn=get_user_history,
-            inputs=history_user_id,
-            outputs=history_output
         )
-    gr.Markdown(
-        """
-        ---
-        ### 📈 Model Information
-        - **Algorithm**: SVD (Singular Value Decomposition)
-        - **Dataset**: MovieLens Small (100K ratings)
-        - **Evaluation Metrics**: RMSE, Precision@K, Recall@K, NDCG@K
-        - **Best Performance**: Lowest RMSE and Highest NDCG among tested models
-        """
-    )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import pandas as pd
 import numpy as np
+import pickle
+import torch
+import torch.nn as nn
+from surprise import SVD, KNNBasic
 import warnings
 warnings.filterwarnings('ignore')
+# ============================================================================
+# NEURAL COLLABORATIVE FILTERING MODEL
+# ============================================================================
+class NeuralCollaborativeFiltering(nn.Module):
+    def __init__(self, n_users, n_items, embedding_dim=64, hidden_layers=[128, 64, 32]):
+        super(NeuralCollaborativeFiltering, self).__init__()
+        # GMF Embeddings
+        self.gmf_user_embedding = nn.Embedding(n_users, embedding_dim)
+        self.gmf_item_embedding = nn.Embedding(n_items, embedding_dim)
+        # MLP Embeddings
+        self.mlp_user_embedding = nn.Embedding(n_users, embedding_dim)
+        self.mlp_item_embedding = nn.Embedding(n_items, embedding_dim)
+        # MLP Layers
+        mlp_layers = []
+        input_size = embedding_dim * 2
+        for hidden_size in hidden_layers:
+            mlp_layers.append(nn.Linear(input_size, hidden_size))
+            mlp_layers.append(nn.ReLU())
+            mlp_layers.append(nn.Dropout(0.2))
+            input_size = hidden_size
+        self.mlp = nn.Sequential(*mlp_layers)
+        # Final prediction layer
+        self.output = nn.Linear(embedding_dim + hidden_layers[-1], 1)
+    def forward(self, user_ids, item_ids):
+        gmf_user = self.gmf_user_embedding(user_ids)
+        gmf_item = self.gmf_item_embedding(item_ids)
+        gmf_vector = gmf_user * gmf_item
+        mlp_user = self.mlp_user_embedding(user_ids)
+        mlp_item = self.mlp_item_embedding(item_ids)
+        mlp_vector = torch.cat([mlp_user, mlp_item], dim=-1)
+        mlp_vector = self.mlp(mlp_vector)
+        combined = torch.cat([gmf_vector, mlp_vector], dim=-1)
+        output = self.output(combined)
+        return output.squeeze()
+# ============================================================================
+# HYBRID RECOMMENDER CLASS
+# ============================================================================
+class HybridRecommender:
+    def __init__(self, ncf_model, svd_model, item_mapping, reverse_item_mapping,
+                 ratings, movies, ncf_weight=0.65, svd_weight=0.35):
+        self.ncf_model = ncf_model
+        self.svd_model = svd_model
+        self.item_mapping = item_mapping
+        self.reverse_item_mapping = reverse_item_mapping
+        self.ratings = ratings
+        self.movies = movies
+        self.ncf_weight = ncf_weight
+        self.svd_weight = svd_weight
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.ncf_model.to(self.device)
+        self.ncf_model.eval()
+    def recommend_movies(self, user_id, N=10, min_rating=3.5):
+        all_movie_ids = self.movies['movieId'].unique()
+        rated_movies = self.ratings[self.ratings['userId'] == user_id]['movieId'].values
+        movies_to_predict = [mid for mid in all_movie_ids if mid not in rated_movies]
+        predictions = []
+        with torch.no_grad():
+            for movie_id in movies_to_predict:
+                # NCF prediction
+                if movie_id in self.reverse_item_mapping:
+                    user_tensor = torch.LongTensor([user_id - 1]).to(self.device)
+                    item_tensor = torch.LongTensor([self.reverse_item_mapping[movie_id]]).to(self.device)
+                    ncf_pred = self.ncf_model(user_tensor, item_tensor).item()
+                    ncf_pred = max(0.5, min(5.0, ncf_pred))
+                else:
+                    ncf_pred = 3.0
+                # SVD prediction
+                try:
+                    svd_pred = self.svd_model.predict(user_id, movie_id).est
+                except:
+                    svd_pred = 3.0
+                # Hybrid prediction
+                hybrid_pred = (self.ncf_weight * ncf_pred + self.svd_weight * svd_pred)
+                if hybrid_pred >= min_rating:
+                    predictions.append({
+                        'movieId': movie_id,
+                        'predicted_rating': hybrid_pred,
+                        'ncf_rating': ncf_pred,
+                        'svd_rating': svd_pred
+                    })
+        if not predictions:
+            return pd.DataFrame()
+        predictions_df = pd.DataFrame(predictions)
+        predictions_df = predictions_df.sort_values('predicted_rating', ascending=False).head(N)
+        recommendations = predictions_df.merge(self.movies, on='movieId')
+        recommendations['predicted_rating'] = recommendations['predicted_rating'].round(2)
+        recommendations['ncf_rating'] = recommendations['ncf_rating'].round(2)
+        recommendations['svd_rating'] = recommendations['svd_rating'].round(2)
+        return recommendations[['title', 'genres', 'predicted_rating', 'ncf_rating', 'svd_rating']]
+# ============================================================================
+# LOAD MODELS AND DATA
+# ============================================================================
+print("Loading models and data...")
+# Load saved models and data
 with open('svd_model.pkl', 'rb') as f:
     svd_model = pickle.load(f)
+with open('item_based_cf.pkl', 'rb') as f:
+    item_based_cf = pickle.load(f)
+with open('user_based_cf.pkl', 'rb') as f:
+    user_based_cf = pickle.load(f)
 with open('movies.pkl', 'rb') as f:
     movies = pickle.load(f)
 with open('ratings.pkl', 'rb') as f:
     ratings = pickle.load(f)
+# Load NCF model if exists
+try:
+    # Prepare item mapping
+    ratings['movieId_cat'] = ratings['movieId'].astype('category')
+    item_mapping = dict(enumerate(ratings['movieId_cat'].cat.categories))
+    reverse_item_mapping = {v: k for k, v in item_mapping.items()}
+    n_users = ratings['userId'].nunique()
+    n_items = ratings['movieId'].nunique()
+    ncf_model = NeuralCollaborativeFiltering(n_users, n_items)
+    ncf_model.load_state_dict(torch.load('ncf_model_best.pth', map_location='cpu'))
+    ncf_model.eval()
+    # Create hybrid recommender
+    hybrid_recommender = HybridRecommender(
+        ncf_model=ncf_model,
+        svd_model=svd_model,
+        item_mapping=item_mapping,
+        reverse_item_mapping=reverse_item_mapping,
+        ratings=ratings,
+        movies=movies
+    )
+    use_hybrid = True
+    print("✓ Hybrid model loaded successfully!")
+except Exception as e:
+    print(f"⚠ Could not load NCF model: {e}")
+    print("Using SVD model only...")
+    use_hybrid = False
+# ============================================================================
+# RECOMMENDATION FUNCTIONS
+# ============================================================================
+def get_user_history(user_id):
+    """Get user's rating history"""
+    user_ratings = ratings[ratings['userId'] == user_id].merge(movies, on='movieId')
+    user_ratings = user_ratings.sort_values('rating', ascending=False).head(10)
+    if len(user_ratings) == 0:
+        return pd.DataFrame({"Message": ["No rating history found for this user"]})
+    return user_ratings[['title', 'genres', 'rating', 'timestamp']]
+def recommend_with_svd(user_id, n_recommendations, min_rating):
+    """Generate recommendations using SVD model"""
+    all_movie_ids = movies['movieId'].unique()
+    rated_movies = ratings[ratings['userId'] == user_id]['movieId'].values
+    movies_to_predict = [mid for mid in all_movie_ids if mid not in rated_movies]
+    predictions = []
+    for movie_id in movies_to_predict:
+        try:
             pred = svd_model.predict(user_id, movie_id)
             if pred.est >= min_rating:
                 predictions.append({
                     'movieId': movie_id,
                     'predicted_rating': pred.est
                 })
+        except:
+            continue
+    if not predictions:
+        return pd.DataFrame({"Message": ["No recommendations found with these criteria"]})
+    predictions_df = pd.DataFrame(predictions)
+    predictions_df = predictions_df.sort_values('predicted_rating', ascending=False).head(n_recommendations)
+    recommendations = predictions_df.merge(movies, on='movieId')
+    recommendations['predicted_rating'] = recommendations['predicted_rating'].round(2)
+    return recommendations[['title', 'genres', 'predicted_rating']]
+def get_recommendations(user_id, n_recommendations, min_rating, model_type):
+    """Main recommendation function"""
     try:
         user_id = int(user_id)
+        # Check if user exists
         if user_id not in ratings['userId'].values:
+            return pd.DataFrame({"Error": [f"User ID {user_id} not found. Please enter a valid user ID (1-610)"]})
+        # Get recommendations based on model type
+        if model_type == "Hybrid (NCF + SVD)" and use_hybrid:
+            recommendations = hybrid_recommender.recommend_movies(
+                user_id,
+                N=n_recommendations,
+                min_rating=min_rating
+            )
+        elif model_type == "SVD (Matrix Factorization)":
+            recommendations = recommend_with_svd(user_id, n_recommendations, min_rating)
+        elif model_type == "Item-Based CF":
+            # Use item-based CF for recommendations
+            all_movie_ids = movies['movieId'].unique()
+            rated_movies = ratings[ratings['userId'] == user_id]['movieId'].values
+            movies_to_predict = [mid for mid in all_movie_ids if mid not in rated_movies]
+            predictions = []
+            for movie_id in movies_to_predict:
+                try:
+                    pred = item_based_cf.predict(user_id, movie_id)
+                    if pred.est >= min_rating:
+                        predictions.append({
+                            'movieId': movie_id,
+                            'predicted_rating': pred.est
+                        })
+                except:
+                    continue
+            if predictions:
+                predictions_df = pd.DataFrame(predictions)
+                predictions_df = predictions_df.sort_values('predicted_rating', ascending=False).head(n_recommendations)
+                recommendations = predictions_df.merge(movies, on='movieId')
+                recommendations['predicted_rating'] = recommendations['predicted_rating'].round(2)
+                recommendations = recommendations[['title', 'genres', 'predicted_rating']]
+            else:
+                recommendations = pd.DataFrame({"Message": ["No recommendations found"]})
+        else:  # User-Based CF
+            all_movie_ids = movies['movieId'].unique()
+            rated_movies = ratings[ratings['userId'] == user_id]['movieId'].values
+            movies_to_predict = [mid for mid in all_movie_ids if mid not in rated_movies]
+            predictions = []
+            for movie_id in movies_to_predict:
+                try:
+                    pred = user_based_cf.predict(user_id, movie_id)
+                    if pred.est >= min_rating:
+                        predictions.append({
+                            'movieId': movie_id,
+                            'predicted_rating': pred.est
+                        })
+                except:
+                    continue
+            if predictions:
+                predictions_df = pd.DataFrame(predictions)
+                predictions_df = predictions_df.sort_values('predicted_rating', ascending=False).head(n_recommendations)
+                recommendations = predictions_df.merge(movies, on='movieId')
+                recommendations['predicted_rating'] = recommendations['predicted_rating'].round(2)
+                recommendations = recommendations[['title', 'genres', 'predicted_rating']]
+            else:
+                recommendations = pd.DataFrame({"Message": ["No recommendations found"]})
+        if len(recommendations) == 0:
+            return pd.DataFrame({"Message": ["No recommendations found with these criteria. Try lowering the minimum rating."]})
+        return recommendations
+    except ValueError:
+        return pd.DataFrame({"Error": ["Please enter a valid user ID (integer)"]})
     except Exception as e:
+        return pd.DataFrame({"Error": [f"An error occurred: {str(e)}"]})
+def search_movies(query):
+    """Search for movies by title"""
+    if not query:
+        return movies[['movieId', 'title', 'genres']].head(20)
+    mask = movies['title'].str.contains(query, case=False, na=False)
+    results = movies[mask][['movieId', 'title', 'genres']].head(20)
+    if len(results) == 0:
+        return pd.DataFrame({"Message": [f"No movies found matching '{query}'"]})
+    return results
+# ============================================================================
+# GRADIO INTERFACE
+# ============================================================================
+# Model options
+model_options = ["SVD (Matrix Factorization)", "Item-Based CF", "User-Based CF"]
+if use_hybrid:
+    model_options.insert(0, "Hybrid (NCF + SVD)")
 # Create Gradio interface
+with gr.Blocks(theme=gr.themes.Soft(), title="MovieLens Recommender System") as demo:
     gr.Markdown(
         """
+        # 🎬 MovieLens Movie Recommendation System
+        Get personalized movie recommendations using state-of-the-art collaborative filtering algorithms!
+        **Available Models:**
+        - 🚀 **Hybrid (NCF + SVD)**: Combines Neural Collaborative Filtering with Matrix Factorization
+        - 📊 **SVD**: Singular Value Decomposition (Matrix Factorization)
+        - 🎯 **Item-Based CF**: Recommends based on similar movies
+        - 👥 **User-Based CF**: Recommends based on similar users
         """
     )
+    with gr.Tab("Get Recommendations"):
         with gr.Row():
+            with gr.Column(scale=1):
                 user_id_input = gr.Number(
+                    label="User ID",
+                    value=1,
+                    precision=0,
+                    info="Enter a user ID (1-610)"
                 )
+                model_selector = gr.Dropdown(
+                    choices=model_options,
+                    value=model_options[0],
+                    label="Recommendation Model",
+                    info="Choose the algorithm to generate recommendations"
+                )
+                n_recs = gr.Slider(
+                    minimum=5,
+                    maximum=50,
+                    value=10,
                     step=1,
+                    label="Number of Recommendations",
+                    info="How many movies to recommend"
                 )
+                min_rating_slider = gr.Slider(
+                    minimum=0.5,
+                    maximum=5.0,
+                    value=3.5,
                     step=0.5,
+                    label="Minimum Predicted Rating",
+                    info="Only show movies with predicted rating above this threshold"
+                )
+                recommend_btn = gr.Button("🎬 Get Recommendations", variant="primary", size="lg")
+            with gr.Column(scale=2):
+                recommendations_output = gr.Dataframe(
+                    label="Recommended Movies",
+                    wrap=True,
+                    height=500
                 )
+        gr.Markdown("### 📊 User's Rating History")
+        user_history_output = gr.Dataframe(
+            label="Top Rated Movies by This User",
+            wrap=True,
+            height=300
+        )
+        # Connect buttons
         recommend_btn.click(
+            fn=get_recommendations,
+            inputs=[user_id_input, n_recs, min_rating_slider, model_selector],
             outputs=recommendations_output
         )
+        user_id_input.change(
+            fn=get_user_history,
+            inputs=user_id_input,
+            outputs=user_history_output
+        )
+    with gr.Tab("Search Movies"):
+        gr.Markdown("### 🔍 Search for Movies in Database")
         with gr.Row():
+            search_input = gr.Textbox(
+                label="Search Query",
+                placeholder="Enter movie title...",
+                info="Search for movies by title"
+            )
+            search_btn = gr.Button("Search", variant="primary")
+        search_output = gr.Dataframe(
+            label="Search Results",
+            wrap=True,
+            height=500
+        )
+        search_btn.click(
+            fn=search_movies,
+            inputs=search_input,
+            outputs=search_output
+        )
+        search_input.submit(
+            fn=search_movies,
+            inputs=search_input,
+            outputs=search_output
         )
+    with gr.Tab("About"):
+        gr.Markdown(
+            """
+            ## 📖 About This System
+            This recommendation system was built using the MovieLens dataset and implements multiple collaborative filtering algorithms:
+            ### Models
+            1. **Hybrid Model (NCF + SVD)** 🚀
+               - Combines Neural Collaborative Filtering with SVD
+               - Best performance: RMSE improvement over baseline
+               - Uses deep learning to capture non-linear patterns
+            2. **SVD (Singular Value Decomposition)** 📊
+               - Matrix factorization technique
+               - Learns latent factors for users and items
+               - Excellent for sparse data
+            3. **Item-Based Collaborative Filtering** 🎯
+               - Recommends movies similar to what you've liked
+               - Based on item-item similarity
+               - Good for users with consistent preferences
+            4. **User-Based Collaborative Filtering** 👥
+               - Recommends based on users similar to you
+               - User-user similarity approach
+               - Effective for discovering diverse content
+            ### Dataset
+            - **MovieLens Small Dataset**: 100,000+ ratings
+            - **610 users** and **9,724 movies**
+            - Rating scale: 0.5 to 5.0 stars
+            ### Performance Metrics
+            The models were evaluated using:
+            - RMSE (Root Mean Square Error)
+            - Precision@10
+            - Recall@10
+            - NDCG@10 (Normalized Discounted Cumulative Gain)
+            ### How to Use
+            1. Enter a User ID (1-610)
+            2. Select a recommendation model
+            3. Choose number of recommendations
+            4. Set minimum rating threshold
+            5. Click "Get Recommendations"
+            ---
+            Built with ❤️ using Gradio, PyTorch, and Surprise
+            """
+        )
+print("✓ Gradio interface ready!")
+# Launch the app
 if __name__ == "__main__":
+    demo.launch(share=True)