N-Kibria commited on
Commit
a84da0d
·
verified ·
1 Parent(s): 8c0624c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +170 -66
app.py CHANGED
@@ -8,13 +8,48 @@ import os
8
  from scipy.sparse import csr_matrix
9
 
10
  class ItemBasedCF:
11
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  class SVDRecommender:
14
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  class NeuralCF(nn.Module):
17
-
18
  def __init__(self, n_users, n_movies, embedding_dim=50, hidden_layers=[64, 32, 16]):
19
  super(NeuralCF, self).__init__()
20
  self.user_embedding = nn.Embedding(n_users, embedding_dim)
@@ -45,11 +80,78 @@ class NeuralCF(nn.Module):
45
  prediction = self.forward(user_tensor, movie_tensor)
46
  return torch.clamp(prediction, 1, 5).item()
47
 
 
48
  class HybridRecommender:
49
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  class MovieLensDataLoader:
52
- pass
 
 
 
 
 
53
 
54
  def load_model_and_data():
55
  import os
@@ -79,18 +181,18 @@ def load_model_and_data():
79
 
80
  with open('model_artifacts/hybrid_model.pkl', 'rb') as f:
81
  model = pickle.load(f)
82
- print("Loaded hybrid_model.pkl")
83
 
84
  with open('model_artifacts/loader.pkl', 'rb') as f:
85
  loader = pickle.load(f)
86
- print("Loaded loader.pkl")
87
 
88
  with open('model_artifacts/movies.pkl', 'rb') as f:
89
  movies = pickle.load(f)
90
- print("Loaded movies.pkl")
91
 
92
  user_ids = sorted(loader.user_id_map.keys())
93
- print(f"Model loaded successfully! {len(user_ids)} users available")
94
 
95
  return model, loader, movies, user_ids
96
  except FileNotFoundError as e:
@@ -103,20 +205,22 @@ def load_model_and_data():
103
  traceback.print_exc()
104
  return None, None, None, []
105
 
 
106
  print("Loading model and data...")
107
  model, loader, movies_df, user_ids = load_model_and_data()
108
  print(f"Model loaded! Available users: {len(user_ids)}")
109
 
 
110
  def get_recommendations(user_id, num_recommendations):
111
  if model is None or loader is None:
112
- return "Error: Model not loaded properly. Please check the model files."
113
 
114
  try:
115
  user_id = int(user_id)
116
  num_recommendations = int(num_recommendations)
117
 
118
  if user_id not in loader.user_id_map:
119
- return f"User ID {user_id} not found! Please select a valid user ID."
120
 
121
  recommendations = model.recommend_movies(
122
  user_id=user_id,
@@ -127,33 +231,34 @@ def get_recommendations(user_id, num_recommendations):
127
  )
128
 
129
  if not recommendations:
130
- return f"No recommendations found for User {user_id}"
131
 
132
- output = f"🎬 **Top {num_recommendations} Movie Recommendations for User {user_id}**\n\n"
133
  output += "=" * 60 + "\n\n"
134
 
135
  for i, (movie_id, title, score) in enumerate(recommendations, 1):
136
- stars = "" * int(score)
137
- output += f"**{i}. {title}**\n"
138
- output += f" Predicted Rating: {score:.2f}/5.00 {stars}\n"
139
- output += f" Movie ID: {movie_id}\n\n"
140
 
141
  return output
142
 
143
  except ValueError:
144
- return "Error: Please enter valid numbers for User ID and Number of Recommendations"
145
  except Exception as e:
146
- return f"Error generating recommendations: {str(e)}"
 
147
 
148
  def get_user_history(user_id):
149
  if model is None or loader is None:
150
- return "Error: Model not loaded properly."
151
 
152
  try:
153
  user_id = int(user_id)
154
 
155
  if user_id not in loader.user_id_map:
156
- return f"User ID {user_id} not found!"
157
 
158
  user_idx = loader.user_id_map[user_id]
159
 
@@ -172,37 +277,38 @@ def get_user_history(user_id):
172
 
173
  history.sort(key=lambda x: x[1], reverse=True)
174
 
175
- output = f"📊 **Rating History for User {user_id}**\n\n"
176
  output += f"Total movies rated: {len(history)}\n"
177
  output += f"Average rating: {np.mean([r for _, r in history]):.2f}\n\n"
178
  output += "=" * 60 + "\n\n"
179
- output += "**Top 10 Highest Rated Movies:**\n\n"
180
 
181
  for i, (title, rating) in enumerate(history[:10], 1):
182
- stars = "" * int(rating)
183
- output += f"{i}. **{title}** - {rating:.1f}/5 {stars}\n"
184
 
185
  return output
186
 
187
  except Exception as e:
188
- return f"Error: {str(e)}"
 
189
 
190
  def get_movie_info(movie_title_search):
191
  if movies_df is None:
192
- return "Error: Movies data not loaded"
193
 
194
  try:
195
  matches = movies_df[movies_df['title'].str.contains(movie_title_search, case=False, na=False)]
196
 
197
  if len(matches) == 0:
198
- return f"No movies found matching '{movie_title_search}'"
199
 
200
- output = f"🔍 **Search Results for '{movie_title_search}'**\n\n"
201
  output += f"Found {len(matches)} movie(s):\n\n"
202
  output += "=" * 60 + "\n\n"
203
 
204
  for i, (_, row) in enumerate(matches.head(20).iterrows(), 1):
205
- output += f"{i}. **{row['title']}** (ID: {row['movie_id']})\n"
206
 
207
  if len(matches) > 20:
208
  output += f"\n... and {len(matches) - 20} more results"
@@ -210,23 +316,22 @@ def get_movie_info(movie_title_search):
210
  return output
211
 
212
  except Exception as e:
213
- return f"Error: {str(e)}"
 
214
 
215
  with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis") as demo:
216
 
217
  gr.Markdown("""
218
- # 🎬 Hybrid Movie Recommendation System
219
  ### DataSynthis Job Task - Powered by AI
220
 
221
- This system combines **Collaborative Filtering**, **SVD Matrix Factorization**, and **Neural Networks**
222
- to provide personalized movie recommendations from the MovieLens 100k dataset.
223
-
224
- ---
225
  """)
226
 
227
  with gr.Tabs():
228
 
229
- with gr.Tab("🎯 Get Recommendations"):
230
  gr.Markdown("### Get personalized movie recommendations for any user")
231
 
232
  with gr.Row():
@@ -235,9 +340,9 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
235
  label="User ID",
236
  value=1,
237
  minimum=1,
238
- maximum=943,
239
  step=1,
240
- info=f"Enter a user ID (1-943)"
241
  )
242
 
243
  num_recs_input = gr.Slider(
@@ -248,7 +353,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
248
  step=1
249
  )
250
 
251
- recommend_btn = gr.Button("🎬 Get Recommendations", variant="primary")
252
 
253
  with gr.Column(scale=2):
254
  recommendations_output = gr.Textbox(
@@ -265,12 +370,12 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
265
 
266
  gr.Markdown("""
267
  **How it works:**
268
- - Enter a User ID (between 1 and 943)
269
  - Choose how many recommendations you want
270
  - Click "Get Recommendations" to see personalized movie suggestions
271
  """)
272
 
273
- with gr.Tab("📊 User History"):
274
  gr.Markdown("### View a user's rating history")
275
 
276
  with gr.Row():
@@ -279,11 +384,11 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
279
  label="User ID",
280
  value=1,
281
  minimum=1,
282
- maximum=943,
283
  step=1
284
  )
285
 
286
- history_btn = gr.Button("📊 View History", variant="primary")
287
 
288
  with gr.Column(scale=2):
289
  history_output = gr.Textbox(
@@ -298,7 +403,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
298
  outputs=history_output
299
  )
300
 
301
- with gr.Tab("🔍 Search Movies"):
302
  gr.Markdown("### Search for movies in the database")
303
 
304
  with gr.Row():
@@ -309,7 +414,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
309
  value="Star Wars"
310
  )
311
 
312
- search_btn = gr.Button("🔍 Search", variant="primary")
313
 
314
  with gr.Column(scale=2):
315
  search_output = gr.Textbox(
@@ -324,52 +429,51 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
324
  outputs=search_output
325
  )
326
 
327
- with gr.Tab("ℹ️ About"):
328
  gr.Markdown("""
329
  ## About This System
330
 
331
- ### 🎯 Model Architecture
332
- This is a **Hybrid Recommendation System** that combines three powerful approaches:
333
 
334
- 1. **Item-Based Collaborative Filtering**
335
  - Uses cosine similarity between movies
336
  - Recommends movies similar to what you've liked before
337
 
338
- 2. **SVD Matrix Factorization**
339
  - Decomposes the user-movie rating matrix
340
  - Discovers latent factors that explain user preferences
341
 
342
- 3. **Neural Collaborative Filtering (NCF)**
343
  - Deep learning model with user and movie embeddings
344
  - Learns complex non-linear patterns in user behavior
345
 
346
-
 
 
 
347
 
348
- ### 🎯 Performance Metrics
349
- - **Precision@10**: 26.77%
350
- - **NDCG@10**: 28.50%
351
- - **Model improves recommendations by 40% vs baseline**
352
 
353
  ### Created For
354
- **DataSynthis Job Task**
355
 
356
- ### 🔗 Technologies Used
357
  - PyTorch (Neural Networks)
358
  - Scikit-learn (SVD, Similarity)
359
  - Pandas & NumPy (Data Processing)
360
  - Gradio (Web Interface)
361
 
362
- ---
363
-
364
- **Note**: This model is trained on the MovieLens 100k dataset.
365
- User IDs range from 1 to 943, and movie IDs range from 1 to 1682.
366
  """)
367
 
368
  gr.Markdown("""
369
  ---
370
- <div style='text-align: center'>
371
- <p>🎬 <strong>Hybrid Movie Recommendation System</strong> | Built with ❤️ for DataSynthis</p>
372
- </div>
373
  """)
374
 
375
  if __name__ == "__main__":
 
8
  from scipy.sparse import csr_matrix
9
 
10
  class ItemBasedCF:
11
+ def __init__(self):
12
+ self.user_item_matrix = None
13
+ self.similarity_matrix = None
14
+
15
+ def predict(self, user_idx, movie_idx):
16
+ if self.user_item_matrix is None or self.similarity_matrix is None:
17
+ return 3.0
18
+
19
+ user_ratings = self.user_item_matrix[user_idx].toarray().flatten()
20
+ rated_items = np.where(user_ratings > 0)[0]
21
+
22
+ if len(rated_items) == 0:
23
+ return 3.0
24
+
25
+ similarities = self.similarity_matrix[movie_idx, rated_items].toarray().flatten()
26
+ ratings = user_ratings[rated_items]
27
+
28
+ if similarities.sum() == 0:
29
+ return 3.0
30
+
31
+ prediction = np.dot(similarities, ratings) / similarities.sum()
32
+ return np.clip(prediction, 1, 5)
33
+
34
 
35
  class SVDRecommender:
36
+ def __init__(self):
37
+ self.user_factors = None
38
+ self.item_factors = None
39
+ self.global_mean = 3.5
40
+
41
+ def predict(self, user_idx, movie_idx):
42
+ if self.user_factors is None or self.item_factors is None:
43
+ return self.global_mean
44
+
45
+ if user_idx >= len(self.user_factors) or movie_idx >= len(self.item_factors):
46
+ return self.global_mean
47
+
48
+ prediction = self.global_mean + np.dot(self.user_factors[user_idx], self.item_factors[movie_idx])
49
+ return np.clip(prediction, 1, 5)
50
+
51
 
52
  class NeuralCF(nn.Module):
 
53
  def __init__(self, n_users, n_movies, embedding_dim=50, hidden_layers=[64, 32, 16]):
54
  super(NeuralCF, self).__init__()
55
  self.user_embedding = nn.Embedding(n_users, embedding_dim)
 
80
  prediction = self.forward(user_tensor, movie_tensor)
81
  return torch.clamp(prediction, 1, 5).item()
82
 
83
+
84
  class HybridRecommender:
85
+ def __init__(self):
86
+ self.item_cf = None
87
+ self.svd = None
88
+ self.ncf = None
89
+ self.weights = [0.33, 0.33, 0.34]
90
+ self.device = 'cpu'
91
+
92
+ def predict(self, user_idx, movie_idx):
93
+ predictions = []
94
+
95
+ if self.item_cf is not None:
96
+ predictions.append(self.item_cf.predict(user_idx, movie_idx))
97
+
98
+ if self.svd is not None:
99
+ predictions.append(self.svd.predict(user_idx, movie_idx))
100
+
101
+ if self.ncf is not None:
102
+ predictions.append(self.ncf.predict(user_idx, movie_idx, self.device))
103
+
104
+ if not predictions:
105
+ return 3.5
106
+
107
+ weights = self.weights[:len(predictions)]
108
+ weight_sum = sum(weights)
109
+ weighted_pred = sum(p * w for p, w in zip(predictions, weights)) / weight_sum
110
+
111
+ return np.clip(weighted_pred, 1, 5)
112
+
113
+ def recommend_movies(self, user_id, N, user_id_map, reverse_movie_map, movies_df):
114
+ if user_id not in user_id_map:
115
+ return []
116
+
117
+ user_idx = user_id_map[user_id]
118
+
119
+ if self.item_cf is None or self.item_cf.user_item_matrix is None:
120
+ return []
121
+
122
+ user_ratings = self.item_cf.user_item_matrix[user_idx].toarray().flatten()
123
+ unrated_indices = np.where(user_ratings == 0)[0]
124
+
125
+ if len(unrated_indices) == 0:
126
+ return []
127
+
128
+ predictions = []
129
+ for movie_idx in unrated_indices:
130
+ pred_rating = self.predict(user_idx, movie_idx)
131
+ predictions.append((movie_idx, pred_rating))
132
+
133
+ predictions.sort(key=lambda x: x[1], reverse=True)
134
+ top_predictions = predictions[:N]
135
+
136
+ recommendations = []
137
+ for movie_idx, pred_rating in top_predictions:
138
+ original_movie_id = reverse_movie_map[movie_idx]
139
+ movie_info = movies_df[movies_df['movie_id'] == original_movie_id]
140
+
141
+ if not movie_info.empty:
142
+ title = movie_info['title'].values[0]
143
+ recommendations.append((original_movie_id, title, pred_rating))
144
+
145
+ return recommendations
146
+
147
 
148
  class MovieLensDataLoader:
149
+ def __init__(self):
150
+ self.user_id_map = {}
151
+ self.movie_id_map = {}
152
+ self.reverse_user_map = {}
153
+ self.reverse_movie_map = {}
154
+
155
 
156
  def load_model_and_data():
157
  import os
 
181
 
182
  with open('model_artifacts/hybrid_model.pkl', 'rb') as f:
183
  model = pickle.load(f)
184
+ print("Loaded hybrid_model.pkl")
185
 
186
  with open('model_artifacts/loader.pkl', 'rb') as f:
187
  loader = pickle.load(f)
188
+ print("Loaded loader.pkl")
189
 
190
  with open('model_artifacts/movies.pkl', 'rb') as f:
191
  movies = pickle.load(f)
192
+ print("Loaded movies.pkl")
193
 
194
  user_ids = sorted(loader.user_id_map.keys())
195
+ print(f"Model loaded successfully! {len(user_ids)} users available")
196
 
197
  return model, loader, movies, user_ids
198
  except FileNotFoundError as e:
 
205
  traceback.print_exc()
206
  return None, None, None, []
207
 
208
+
209
  print("Loading model and data...")
210
  model, loader, movies_df, user_ids = load_model_and_data()
211
  print(f"Model loaded! Available users: {len(user_ids)}")
212
 
213
+
214
  def get_recommendations(user_id, num_recommendations):
215
  if model is None or loader is None:
216
+ return "Error: Model not loaded properly. Please check the model files."
217
 
218
  try:
219
  user_id = int(user_id)
220
  num_recommendations = int(num_recommendations)
221
 
222
  if user_id not in loader.user_id_map:
223
+ return f"User ID {user_id} not found! Please select a valid user ID."
224
 
225
  recommendations = model.recommend_movies(
226
  user_id=user_id,
 
231
  )
232
 
233
  if not recommendations:
234
+ return f"No recommendations found for User {user_id}"
235
 
236
+ output = f"Top {num_recommendations} Movie Recommendations for User {user_id}\n\n"
237
  output += "=" * 60 + "\n\n"
238
 
239
  for i, (movie_id, title, score) in enumerate(recommendations, 1):
240
+ stars = "*" * int(score)
241
+ output += f"{i}. {title}\n"
242
+ output += f" Predicted Rating: {score:.2f}/5.00 {stars}\n"
243
+ output += f" Movie ID: {movie_id}\n\n"
244
 
245
  return output
246
 
247
  except ValueError:
248
+ return "Error: Please enter valid numbers for User ID and Number of Recommendations"
249
  except Exception as e:
250
+ return f"Error generating recommendations: {str(e)}"
251
+
252
 
253
  def get_user_history(user_id):
254
  if model is None or loader is None:
255
+ return "Error: Model not loaded properly."
256
 
257
  try:
258
  user_id = int(user_id)
259
 
260
  if user_id not in loader.user_id_map:
261
+ return f"User ID {user_id} not found!"
262
 
263
  user_idx = loader.user_id_map[user_id]
264
 
 
277
 
278
  history.sort(key=lambda x: x[1], reverse=True)
279
 
280
+ output = f"Rating History for User {user_id}\n\n"
281
  output += f"Total movies rated: {len(history)}\n"
282
  output += f"Average rating: {np.mean([r for _, r in history]):.2f}\n\n"
283
  output += "=" * 60 + "\n\n"
284
+ output += "Top 10 Highest Rated Movies:\n\n"
285
 
286
  for i, (title, rating) in enumerate(history[:10], 1):
287
+ stars = "*" * int(rating)
288
+ output += f"{i}. {title} - {rating:.1f}/5 {stars}\n"
289
 
290
  return output
291
 
292
  except Exception as e:
293
+ return f"Error: {str(e)}"
294
+
295
 
296
  def get_movie_info(movie_title_search):
297
  if movies_df is None:
298
+ return "Error: Movies data not loaded"
299
 
300
  try:
301
  matches = movies_df[movies_df['title'].str.contains(movie_title_search, case=False, na=False)]
302
 
303
  if len(matches) == 0:
304
+ return f"No movies found matching '{movie_title_search}'"
305
 
306
+ output = f"Search Results for '{movie_title_search}'\n\n"
307
  output += f"Found {len(matches)} movie(s):\n\n"
308
  output += "=" * 60 + "\n\n"
309
 
310
  for i, (_, row) in enumerate(matches.head(20).iterrows(), 1):
311
+ output += f"{i}. {row['title']} (ID: {row['movie_id']})\n"
312
 
313
  if len(matches) > 20:
314
  output += f"\n... and {len(matches) - 20} more results"
 
316
  return output
317
 
318
  except Exception as e:
319
+ return f"Error: {str(e)}"
320
+
321
 
322
  with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis") as demo:
323
 
324
  gr.Markdown("""
325
+ # Hybrid Movie Recommendation System
326
  ### DataSynthis Job Task - Powered by AI
327
 
328
+ This system combines Collaborative Filtering, SVD Matrix Factorization, and Neural Networks
329
+ to provide personalized movie recommendations from the MovieLens 1M dataset.
 
 
330
  """)
331
 
332
  with gr.Tabs():
333
 
334
+ with gr.Tab("Get Recommendations"):
335
  gr.Markdown("### Get personalized movie recommendations for any user")
336
 
337
  with gr.Row():
 
340
  label="User ID",
341
  value=1,
342
  minimum=1,
343
+ maximum=6040,
344
  step=1,
345
+ info=f"Enter a user ID (1-6040)"
346
  )
347
 
348
  num_recs_input = gr.Slider(
 
353
  step=1
354
  )
355
 
356
+ recommend_btn = gr.Button("Get Recommendations", variant="primary")
357
 
358
  with gr.Column(scale=2):
359
  recommendations_output = gr.Textbox(
 
370
 
371
  gr.Markdown("""
372
  **How it works:**
373
+ - Enter a User ID (between 1 and 6040)
374
  - Choose how many recommendations you want
375
  - Click "Get Recommendations" to see personalized movie suggestions
376
  """)
377
 
378
+ with gr.Tab("User History"):
379
  gr.Markdown("### View a user's rating history")
380
 
381
  with gr.Row():
 
384
  label="User ID",
385
  value=1,
386
  minimum=1,
387
+ maximum=6040,
388
  step=1
389
  )
390
 
391
+ history_btn = gr.Button("View History", variant="primary")
392
 
393
  with gr.Column(scale=2):
394
  history_output = gr.Textbox(
 
403
  outputs=history_output
404
  )
405
 
406
+ with gr.Tab("Search Movies"):
407
  gr.Markdown("### Search for movies in the database")
408
 
409
  with gr.Row():
 
414
  value="Star Wars"
415
  )
416
 
417
+ search_btn = gr.Button("Search", variant="primary")
418
 
419
  with gr.Column(scale=2):
420
  search_output = gr.Textbox(
 
429
  outputs=search_output
430
  )
431
 
432
+ with gr.Tab("About"):
433
  gr.Markdown("""
434
  ## About This System
435
 
436
+ ### Model Architecture
437
+ This is a Hybrid Recommendation System that combines three powerful approaches:
438
 
439
+ 1. Item-Based Collaborative Filtering
440
  - Uses cosine similarity between movies
441
  - Recommends movies similar to what you've liked before
442
 
443
+ 2. SVD Matrix Factorization
444
  - Decomposes the user-movie rating matrix
445
  - Discovers latent factors that explain user preferences
446
 
447
+ 3. Neural Collaborative Filtering (NCF)
448
  - Deep learning model with user and movie embeddings
449
  - Learns complex non-linear patterns in user behavior
450
 
451
+ ### Dataset
452
+ - MovieLens 1M dataset
453
+ - 1,000,209 ratings from 6,040 users on 3,900 movies
454
+ - Ratings scale: 1-5 stars
455
 
456
+ ### Performance Metrics
457
+ - Precision@10: 26.77%
458
+ - NDCG@10: 28.50%
459
+ - Model improves recommendations by 40% vs baseline
460
 
461
  ### Created For
462
+ DataSynthis Job Task
463
 
464
+ ### Technologies Used
465
  - PyTorch (Neural Networks)
466
  - Scikit-learn (SVD, Similarity)
467
  - Pandas & NumPy (Data Processing)
468
  - Gradio (Web Interface)
469
 
470
+ Note: This model is trained on the MovieLens 1M dataset.
471
+ User IDs range from 1 to 6040, and movie IDs range from 1 to 3952.
 
 
472
  """)
473
 
474
  gr.Markdown("""
475
  ---
476
+ Hybrid Movie Recommendation System | Built for DataSynthis
 
 
477
  """)
478
 
479
  if __name__ == "__main__":