Spaces:

N-Kibria
/

DataSynthis_Job_task

Sleeping

App Files Files Community

N-Kibria commited on Oct 4, 2025

Commit

a84da0d

verified ·

1 Parent(s): 8c0624c

Update app.py

Browse files

Files changed (1) hide show

app.py +170 -66

app.py CHANGED Viewed

@@ -8,13 +8,48 @@ import os
 from scipy.sparse import csr_matrix
 class ItemBasedCF:
-    pass
 class SVDRecommender:
-    pass
 class NeuralCF(nn.Module):
     def __init__(self, n_users, n_movies, embedding_dim=50, hidden_layers=[64, 32, 16]):
         super(NeuralCF, self).__init__()
         self.user_embedding = nn.Embedding(n_users, embedding_dim)
@@ -45,11 +80,78 @@ class NeuralCF(nn.Module):
             prediction = self.forward(user_tensor, movie_tensor)
             return torch.clamp(prediction, 1, 5).item()
 class HybridRecommender:
-    pass
 class MovieLensDataLoader:
-    pass
 def load_model_and_data():
     import os
@@ -79,18 +181,18 @@ def load_model_and_data():
         with open('model_artifacts/hybrid_model.pkl', 'rb') as f:
             model = pickle.load(f)
-        print("✓ Loaded hybrid_model.pkl")
         with open('model_artifacts/loader.pkl', 'rb') as f:
             loader = pickle.load(f)
-        print("✓ Loaded loader.pkl")
         with open('model_artifacts/movies.pkl', 'rb') as f:
             movies = pickle.load(f)
-        print("✓ Loaded movies.pkl")
         user_ids = sorted(loader.user_id_map.keys())
-        print(f"✓ Model loaded successfully! {len(user_ids)} users available")
         return model, loader, movies, user_ids
     except FileNotFoundError as e:
@@ -103,20 +205,22 @@ def load_model_and_data():
         traceback.print_exc()
         return None, None, None, []
 print("Loading model and data...")
 model, loader, movies_df, user_ids = load_model_and_data()
 print(f"Model loaded! Available users: {len(user_ids)}")
 def get_recommendations(user_id, num_recommendations):
     if model is None or loader is None:
-        return "❌ Error: Model not loaded properly. Please check the model files."
     try:
         user_id = int(user_id)
         num_recommendations = int(num_recommendations)
         if user_id not in loader.user_id_map:
-            return f"❌ User ID {user_id} not found! Please select a valid user ID."
         recommendations = model.recommend_movies(
             user_id=user_id,
@@ -127,33 +231,34 @@ def get_recommendations(user_id, num_recommendations):
         )
         if not recommendations:
-            return f"❌ No recommendations found for User {user_id}"
-        output = f"🎬 **Top {num_recommendations} Movie Recommendations for User {user_id}**\n\n"
         output += "=" * 60 + "\n\n"
         for i, (movie_id, title, score) in enumerate(recommendations, 1):
-            stars = "⭐" * int(score)
-            output += f"**{i}. {title}**\n"
-            output += f"   • Predicted Rating: {score:.2f}/5.00 {stars}\n"
-            output += f"   • Movie ID: {movie_id}\n\n"
         return output
     except ValueError:
-        return "❌ Error: Please enter valid numbers for User ID and Number of Recommendations"
     except Exception as e:
-        return f"❌ Error generating recommendations: {str(e)}"
 def get_user_history(user_id):
     if model is None or loader is None:
-        return "❌ Error: Model not loaded properly."
     try:
         user_id = int(user_id)
         if user_id not in loader.user_id_map:
-            return f"❌ User ID {user_id} not found!"
         user_idx = loader.user_id_map[user_id]
@@ -172,37 +277,38 @@ def get_user_history(user_id):
         history.sort(key=lambda x: x[1], reverse=True)
-        output = f"📊 **Rating History for User {user_id}**\n\n"
         output += f"Total movies rated: {len(history)}\n"
         output += f"Average rating: {np.mean([r for _, r in history]):.2f}\n\n"
         output += "=" * 60 + "\n\n"
-        output += "**Top 10 Highest Rated Movies:**\n\n"
         for i, (title, rating) in enumerate(history[:10], 1):
-            stars = "⭐" * int(rating)
-            output += f"{i}. **{title}** - {rating:.1f}/5 {stars}\n"
         return output
     except Exception as e:
-        return f"❌ Error: {str(e)}"
 def get_movie_info(movie_title_search):
     if movies_df is None:
-        return "❌ Error: Movies data not loaded"
     try:
         matches = movies_df[movies_df['title'].str.contains(movie_title_search, case=False, na=False)]
         if len(matches) == 0:
-            return f"❌ No movies found matching '{movie_title_search}'"
-        output = f"🔍 **Search Results for '{movie_title_search}'**\n\n"
         output += f"Found {len(matches)} movie(s):\n\n"
         output += "=" * 60 + "\n\n"
         for i, (_, row) in enumerate(matches.head(20).iterrows(), 1):
-            output += f"{i}. **{row['title']}** (ID: {row['movie_id']})\n"
         if len(matches) > 20:
             output += f"\n... and {len(matches) - 20} more results"
@@ -210,23 +316,22 @@ def get_movie_info(movie_title_search):
         return output
     except Exception as e:
-        return f"❌ Error: {str(e)}"
 with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis") as demo:
     gr.Markdown("""
-    # 🎬 Hybrid Movie Recommendation System
     ### DataSynthis Job Task - Powered by AI
-    This system combines **Collaborative Filtering**, **SVD Matrix Factorization**, and **Neural Networks**
-    to provide personalized movie recommendations from the MovieLens 100k dataset.
-    ---
     """)
     with gr.Tabs():
-        with gr.Tab("🎯 Get Recommendations"):
             gr.Markdown("### Get personalized movie recommendations for any user")
             with gr.Row():
@@ -235,9 +340,9 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
                         label="User ID",
                         value=1,
                         minimum=1,
-                        maximum=943,
                         step=1,
-                        info=f"Enter a user ID (1-943)"
                     )
                     num_recs_input = gr.Slider(
@@ -248,7 +353,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
                         step=1
                     )
-                    recommend_btn = gr.Button("🎬 Get Recommendations", variant="primary")
                 with gr.Column(scale=2):
                     recommendations_output = gr.Textbox(
@@ -265,12 +370,12 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
             gr.Markdown("""
             **How it works:**
-            - Enter a User ID (between 1 and 943)
             - Choose how many recommendations you want
             - Click "Get Recommendations" to see personalized movie suggestions
             """)
-        with gr.Tab("📊 User History"):
             gr.Markdown("### View a user's rating history")
             with gr.Row():
@@ -279,11 +384,11 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
                         label="User ID",
                         value=1,
                         minimum=1,
-                        maximum=943,
                         step=1
                     )
-                    history_btn = gr.Button("📊 View History", variant="primary")
                 with gr.Column(scale=2):
                     history_output = gr.Textbox(
@@ -298,7 +403,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
                 outputs=history_output
             )
-        with gr.Tab("🔍 Search Movies"):
             gr.Markdown("### Search for movies in the database")
             with gr.Row():
@@ -309,7 +414,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
                         value="Star Wars"
                     )
-                    search_btn = gr.Button("🔍 Search", variant="primary")
                 with gr.Column(scale=2):
                     search_output = gr.Textbox(
@@ -324,52 +429,51 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
                 outputs=search_output
             )
-        with gr.Tab("ℹ️ About"):
             gr.Markdown("""
             ## About This System
-            ### 🎯 Model Architecture
-            This is a **Hybrid Recommendation System** that combines three powerful approaches:
-            1. **Item-Based Collaborative Filtering**
                - Uses cosine similarity between movies
                - Recommends movies similar to what you've liked before
-            2. **SVD Matrix Factorization**
                - Decomposes the user-movie rating matrix
                - Discovers latent factors that explain user preferences
-            3. **Neural Collaborative Filtering (NCF)**
                - Deep learning model with user and movie embeddings
                - Learns complex non-linear patterns in user behavior
-            ### 🎯 Performance Metrics
-            - **Precision@10**: 26.77%
-            - **NDCG@10**: 28.50%
-            - **Model improves recommendations by 40% vs baseline**
             ### Created For
-            **DataSynthis Job Task**
-            ### 🔗 Technologies Used
             - PyTorch (Neural Networks)
             - Scikit-learn (SVD, Similarity)
             - Pandas & NumPy (Data Processing)
             - Gradio (Web Interface)
-            ---
-            **Note**: This model is trained on the MovieLens 100k dataset.
-            User IDs range from 1 to 943, and movie IDs range from 1 to 1682.
             """)
     gr.Markdown("""
     ---
-    <div style='text-align: center'>
-        <p>🎬 <strong>Hybrid Movie Recommendation System</strong> | Built with ❤️ for DataSynthis</p>
-    </div>
     """)
 if __name__ == "__main__":

 from scipy.sparse import csr_matrix
 class ItemBasedCF:
+    def __init__(self):
+        self.user_item_matrix = None
+        self.similarity_matrix = None
+    def predict(self, user_idx, movie_idx):
+        if self.user_item_matrix is None or self.similarity_matrix is None:
+            return 3.0
+        user_ratings = self.user_item_matrix[user_idx].toarray().flatten()
+        rated_items = np.where(user_ratings > 0)[0]
+        if len(rated_items) == 0:
+            return 3.0
+        similarities = self.similarity_matrix[movie_idx, rated_items].toarray().flatten()
+        ratings = user_ratings[rated_items]
+        if similarities.sum() == 0:
+            return 3.0
+        prediction = np.dot(similarities, ratings) / similarities.sum()
+        return np.clip(prediction, 1, 5)
 class SVDRecommender:
+    def __init__(self):
+        self.user_factors = None
+        self.item_factors = None
+        self.global_mean = 3.5
+    def predict(self, user_idx, movie_idx):
+        if self.user_factors is None or self.item_factors is None:
+            return self.global_mean
+        if user_idx >= len(self.user_factors) or movie_idx >= len(self.item_factors):
+            return self.global_mean
+        prediction = self.global_mean + np.dot(self.user_factors[user_idx], self.item_factors[movie_idx])
+        return np.clip(prediction, 1, 5)
 class NeuralCF(nn.Module):
     def __init__(self, n_users, n_movies, embedding_dim=50, hidden_layers=[64, 32, 16]):
         super(NeuralCF, self).__init__()
         self.user_embedding = nn.Embedding(n_users, embedding_dim)
             prediction = self.forward(user_tensor, movie_tensor)
             return torch.clamp(prediction, 1, 5).item()
 class HybridRecommender:
+    def __init__(self):
+        self.item_cf = None
+        self.svd = None
+        self.ncf = None
+        self.weights = [0.33, 0.33, 0.34]
+        self.device = 'cpu'
+    def predict(self, user_idx, movie_idx):
+        predictions = []
+        if self.item_cf is not None:
+            predictions.append(self.item_cf.predict(user_idx, movie_idx))
+        if self.svd is not None:
+            predictions.append(self.svd.predict(user_idx, movie_idx))
+        if self.ncf is not None:
+            predictions.append(self.ncf.predict(user_idx, movie_idx, self.device))
+        if not predictions:
+            return 3.5
+        weights = self.weights[:len(predictions)]
+        weight_sum = sum(weights)
+        weighted_pred = sum(p * w for p, w in zip(predictions, weights)) / weight_sum
+        return np.clip(weighted_pred, 1, 5)
+    def recommend_movies(self, user_id, N, user_id_map, reverse_movie_map, movies_df):
+        if user_id not in user_id_map:
+            return []
+        user_idx = user_id_map[user_id]
+        if self.item_cf is None or self.item_cf.user_item_matrix is None:
+            return []
+        user_ratings = self.item_cf.user_item_matrix[user_idx].toarray().flatten()
+        unrated_indices = np.where(user_ratings == 0)[0]
+        if len(unrated_indices) == 0:
+            return []
+        predictions = []
+        for movie_idx in unrated_indices:
+            pred_rating = self.predict(user_idx, movie_idx)
+            predictions.append((movie_idx, pred_rating))
+        predictions.sort(key=lambda x: x[1], reverse=True)
+        top_predictions = predictions[:N]
+        recommendations = []
+        for movie_idx, pred_rating in top_predictions:
+            original_movie_id = reverse_movie_map[movie_idx]
+            movie_info = movies_df[movies_df['movie_id'] == original_movie_id]
+            if not movie_info.empty:
+                title = movie_info['title'].values[0]
+                recommendations.append((original_movie_id, title, pred_rating))
+        return recommendations
 class MovieLensDataLoader:
+    def __init__(self):
+        self.user_id_map = {}
+        self.movie_id_map = {}
+        self.reverse_user_map = {}
+        self.reverse_movie_map = {}
 def load_model_and_data():
     import os
         with open('model_artifacts/hybrid_model.pkl', 'rb') as f:
             model = pickle.load(f)
+        print("Loaded hybrid_model.pkl")
         with open('model_artifacts/loader.pkl', 'rb') as f:
             loader = pickle.load(f)
+        print("Loaded loader.pkl")
         with open('model_artifacts/movies.pkl', 'rb') as f:
             movies = pickle.load(f)
+        print("Loaded movies.pkl")
         user_ids = sorted(loader.user_id_map.keys())
+        print(f"Model loaded successfully! {len(user_ids)} users available")
         return model, loader, movies, user_ids
     except FileNotFoundError as e:
         traceback.print_exc()
         return None, None, None, []
 print("Loading model and data...")
 model, loader, movies_df, user_ids = load_model_and_data()
 print(f"Model loaded! Available users: {len(user_ids)}")
 def get_recommendations(user_id, num_recommendations):
     if model is None or loader is None:
+        return "Error: Model not loaded properly. Please check the model files."
     try:
         user_id = int(user_id)
         num_recommendations = int(num_recommendations)
         if user_id not in loader.user_id_map:
+            return f"User ID {user_id} not found! Please select a valid user ID."
         recommendations = model.recommend_movies(
             user_id=user_id,
         )
         if not recommendations:
+            return f"No recommendations found for User {user_id}"
+        output = f"Top {num_recommendations} Movie Recommendations for User {user_id}\n\n"
         output += "=" * 60 + "\n\n"
         for i, (movie_id, title, score) in enumerate(recommendations, 1):
+            stars = "*" * int(score)
+            output += f"{i}. {title}\n"
+            output += f"   Predicted Rating: {score:.2f}/5.00 {stars}\n"
+            output += f"   Movie ID: {movie_id}\n\n"
         return output
     except ValueError:
+        return "Error: Please enter valid numbers for User ID and Number of Recommendations"
     except Exception as e:
+        return f"Error generating recommendations: {str(e)}"
 def get_user_history(user_id):
     if model is None or loader is None:
+        return "Error: Model not loaded properly."
     try:
         user_id = int(user_id)
         if user_id not in loader.user_id_map:
+            return f"User ID {user_id} not found!"
         user_idx = loader.user_id_map[user_id]
         history.sort(key=lambda x: x[1], reverse=True)
+        output = f"Rating History for User {user_id}\n\n"
         output += f"Total movies rated: {len(history)}\n"
         output += f"Average rating: {np.mean([r for _, r in history]):.2f}\n\n"
         output += "=" * 60 + "\n\n"
+        output += "Top 10 Highest Rated Movies:\n\n"
         for i, (title, rating) in enumerate(history[:10], 1):
+            stars = "*" * int(rating)
+            output += f"{i}. {title} - {rating:.1f}/5 {stars}\n"
         return output
     except Exception as e:
+        return f"Error: {str(e)}"
 def get_movie_info(movie_title_search):
     if movies_df is None:
+        return "Error: Movies data not loaded"
     try:
         matches = movies_df[movies_df['title'].str.contains(movie_title_search, case=False, na=False)]
         if len(matches) == 0:
+            return f"No movies found matching '{movie_title_search}'"
+        output = f"Search Results for '{movie_title_search}'\n\n"
         output += f"Found {len(matches)} movie(s):\n\n"
         output += "=" * 60 + "\n\n"
         for i, (_, row) in enumerate(matches.head(20).iterrows(), 1):
+            output += f"{i}. {row['title']} (ID: {row['movie_id']})\n"
         if len(matches) > 20:
             output += f"\n... and {len(matches) - 20} more results"
         return output
     except Exception as e:
+        return f"Error: {str(e)}"
 with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis") as demo:
     gr.Markdown("""
+    # Hybrid Movie Recommendation System
     ### DataSynthis Job Task - Powered by AI
+    This system combines Collaborative Filtering, SVD Matrix Factorization, and Neural Networks
+    to provide personalized movie recommendations from the MovieLens 1M dataset.
     """)
     with gr.Tabs():
+        with gr.Tab("Get Recommendations"):
             gr.Markdown("### Get personalized movie recommendations for any user")
             with gr.Row():
                         label="User ID",
                         value=1,
                         minimum=1,
+                        maximum=6040,
                         step=1,
+                        info=f"Enter a user ID (1-6040)"
                     )
                     num_recs_input = gr.Slider(
                         step=1
                     )
+                    recommend_btn = gr.Button("Get Recommendations", variant="primary")
                 with gr.Column(scale=2):
                     recommendations_output = gr.Textbox(
             gr.Markdown("""
             **How it works:**
+            - Enter a User ID (between 1 and 6040)
             - Choose how many recommendations you want
             - Click "Get Recommendations" to see personalized movie suggestions
             """)
+        with gr.Tab("User History"):
             gr.Markdown("### View a user's rating history")
             with gr.Row():
                         label="User ID",
                         value=1,
                         minimum=1,
+                        maximum=6040,
                         step=1
                     )
+                    history_btn = gr.Button("View History", variant="primary")
                 with gr.Column(scale=2):
                     history_output = gr.Textbox(
                 outputs=history_output
             )
+        with gr.Tab("Search Movies"):
             gr.Markdown("### Search for movies in the database")
             with gr.Row():
                         value="Star Wars"
                     )
+                    search_btn = gr.Button("Search", variant="primary")
                 with gr.Column(scale=2):
                     search_output = gr.Textbox(
                 outputs=search_output
             )
+        with gr.Tab("About"):
             gr.Markdown("""
             ## About This System
+            ### Model Architecture
+            This is a Hybrid Recommendation System that combines three powerful approaches:
+            1. Item-Based Collaborative Filtering
                - Uses cosine similarity between movies
                - Recommends movies similar to what you've liked before
+            2. SVD Matrix Factorization
                - Decomposes the user-movie rating matrix
                - Discovers latent factors that explain user preferences
+            3. Neural Collaborative Filtering (NCF)
                - Deep learning model with user and movie embeddings
                - Learns complex non-linear patterns in user behavior
+            ### Dataset
+            - MovieLens 1M dataset
+            - 1,000,209 ratings from 6,040 users on 3,900 movies
+            - Ratings scale: 1-5 stars
+            ### Performance Metrics
+            - Precision@10: 26.77%
+            - NDCG@10: 28.50%
+            - Model improves recommendations by 40% vs baseline
             ### Created For
+            DataSynthis Job Task
+            ### Technologies Used
             - PyTorch (Neural Networks)
             - Scikit-learn (SVD, Similarity)
             - Pandas & NumPy (Data Processing)
             - Gradio (Web Interface)
+            Note: This model is trained on the MovieLens 1M dataset.
+            User IDs range from 1 to 6040, and movie IDs range from 1 to 3952.
             """)
     gr.Markdown("""
     ---
+    Hybrid Movie Recommendation System | Built for DataSynthis
     """)
 if __name__ == "__main__":