LvMAC commited on
Commit
863f720
·
verified ·
1 Parent(s): e5c75eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +378 -228
app.py CHANGED
@@ -1,7 +1,10 @@
 
 
 
 
1
  import numpy as np
2
  import pandas as pd
3
  from scipy.sparse.linalg import svds
4
- from scipy.sparse import csr_matrix
5
  from sklearn.metrics.pairwise import cosine_similarity
6
  from sklearn.model_selection import train_test_split
7
  import pickle
@@ -9,12 +12,22 @@ import os
9
  import warnings
10
  warnings.filterwarnings('ignore')
11
 
 
12
  # DATA LOADING & PREPROCESSING
 
13
 
14
  def load_movielens_data(ratings_path='ratings.csv', movies_path='movies.csv'):
15
- """Load and prepare MovieLens data"""
16
  ratings = pd.read_csv(ratings_path)
17
  movies = pd.read_csv(movies_path)
 
 
 
 
 
 
 
 
18
  return ratings, movies
19
 
20
  def create_user_item_matrix(ratings):
@@ -24,163 +37,253 @@ def create_user_item_matrix(ratings):
24
  columns='movieId',
25
  values='rating'
26
  ).fillna(0)
 
 
 
 
 
27
  return user_item_matrix
28
 
29
- # COLLABORATIVE FILTERING - USER BASED
 
 
30
 
31
  class UserBasedCF:
 
 
32
  def __init__(self, user_item_matrix):
33
  self.matrix = user_item_matrix
34
  self.user_similarity = None
35
 
36
  def fit(self):
37
- """Compute user similarity matrix"""
 
38
  self.user_similarity = cosine_similarity(self.matrix)
39
  np.fill_diagonal(self.user_similarity, 0)
 
40
 
41
  def predict(self, user_id, k=50):
42
- """Predict ratings for user"""
43
  if user_id not in self.matrix.index:
44
- return pd.Series()
45
 
46
  user_idx = self.matrix.index.get_loc(user_id)
47
- similar_users_idx = np.argsort(self.user_similarity[user_idx])[::-1][:k]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- similar_users_ratings = self.matrix.iloc[similar_users_idx]
50
- weights = self.user_similarity[user_idx][similar_users_idx]
 
51
 
52
- weighted_sum = (similar_users_ratings.T * weights).sum(axis=1)
53
- weight_sum = np.abs(weights).sum()
54
 
55
- predictions = weighted_sum / (weight_sum + 1e-10)
56
  user_ratings = self.matrix.loc[user_id]
57
- predictions[user_ratings > 0] = 0
58
 
59
- return predictions
60
 
61
- # COLLABORATIVE FILTERING - ITEM BASED
 
 
62
 
63
  class ItemBasedCF:
 
 
64
  def __init__(self, user_item_matrix):
65
  self.matrix = user_item_matrix
66
  self.item_similarity = None
67
 
68
  def fit(self):
69
- """Compute item similarity matrix"""
 
70
  self.item_similarity = cosine_similarity(self.matrix.T)
71
  np.fill_diagonal(self.item_similarity, 0)
 
72
 
73
  def predict(self, user_id, k=50):
74
- """Predict ratings for user"""
75
  if user_id not in self.matrix.index:
76
- return pd.Series()
77
 
78
  user_ratings = self.matrix.loc[user_id]
79
  rated_items = user_ratings[user_ratings > 0]
80
 
81
- predictions = pd.Series(0, index=self.matrix.columns)
 
 
 
82
 
83
- for item_id in rated_items.index:
84
  item_idx = self.matrix.columns.get_loc(item_id)
85
- similar_items_idx = np.argsort(self.item_similarity[item_idx])[::-1][:k]
 
 
 
86
 
87
- for sim_idx in similar_items_idx:
88
- sim_item_id = self.matrix.columns[sim_idx]
89
- if user_ratings[sim_item_id] == 0:
90
- predictions[sim_item_id] += (
91
- self.item_similarity[item_idx][sim_idx] * rated_items[item_id]
92
- )
93
 
94
- predictions[user_ratings > 0] = 0
95
- return predictions
 
 
96
 
97
- # MATRIX FACTORIZATION - SVD
 
 
98
 
99
  class SVDRecommender:
 
 
100
  def __init__(self, user_item_matrix, n_factors=50):
101
  self.matrix = user_item_matrix
102
  self.n_factors = n_factors
103
- self.user_factors = None
104
- self.item_factors = None
105
- self.mean_rating = None
106
 
107
  def fit(self):
108
  """Perform SVD decomposition"""
109
- matrix_centered = self.matrix.values - self.matrix.values.mean()
 
 
 
 
 
 
 
110
  U, sigma, Vt = svds(matrix_centered, k=self.n_factors)
 
111
 
112
- self.user_factors = U
113
- self.item_factors = Vt.T
114
- self.sigma = np.diag(sigma)
115
- self.mean_rating = self.matrix.values.mean()
116
 
117
- predicted = np.dot(np.dot(U, self.sigma), Vt) + self.mean_rating
118
  self.predictions = pd.DataFrame(
119
- predicted,
120
  index=self.matrix.index,
121
  columns=self.matrix.columns
122
  )
123
 
 
 
124
  def predict(self, user_id):
125
- """Get predictions for user"""
126
  if user_id not in self.predictions.index:
127
- return pd.Series()
128
 
129
- user_predictions = self.predictions.loc[user_id]
130
  user_ratings = self.matrix.loc[user_id]
 
 
131
  user_predictions[user_ratings > 0] = 0
132
 
133
  return user_predictions
134
 
 
135
  # EVALUATION METRICS
 
136
 
137
  def precision_at_k(recommended, relevant, k):
138
- """Calculate Precision@K"""
139
  recommended_k = set(recommended[:k])
140
  relevant_set = set(relevant)
141
- return len(recommended_k & relevant_set) / k if k > 0 else 0
 
 
 
 
142
 
143
  def recall_at_k(recommended, relevant, k):
144
- """Calculate Recall@K"""
145
  recommended_k = set(recommended[:k])
146
  relevant_set = set(relevant)
147
- return len(recommended_k & relevant_set) / len(relevant_set) if len(relevant_set) > 0 else 0
 
 
 
 
148
 
149
  def ndcg_at_k(recommended, relevant, k):
150
- """Calculate NDCG@K"""
151
- dcg = 0
152
  for i, item in enumerate(recommended[:k]):
153
  if item in relevant:
154
- dcg += 1 / np.log2(i + 2)
 
 
 
 
 
155
 
156
- idcg = sum([1 / np.log2(i + 2) for i in range(min(len(relevant), k))])
157
- return dcg / idcg if idcg > 0 else 0
158
 
159
  def evaluate_model(model, test_data, user_item_matrix, k=10, threshold=4.0):
160
- """Evaluate model on test set"""
161
- precisions, recalls, ndcgs = [], [], []
 
 
162
 
163
- test_users = test_data['userId'].unique()[:100]
164
 
 
 
 
165
  for user_id in test_users:
166
  if user_id not in user_item_matrix.index:
167
  continue
168
-
169
- user_test = test_data[test_data['userId'] == user_id]
170
- relevant_items = user_test[user_test['rating'] >= threshold]['movieId'].tolist()
 
171
 
172
  if len(relevant_items) == 0:
173
  continue
174
 
 
175
  predictions = model.predict(user_id)
176
- if len(predictions) == 0:
 
177
  continue
178
-
179
- recommended = predictions.sort_values(ascending=False).index[:k].tolist()
180
 
181
- precisions.append(precision_at_k(recommended, relevant_items, k))
182
- recalls.append(recall_at_k(recommended, relevant_items, k))
183
- ndcgs.append(ndcg_at_k(recommended, relevant_items, k))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
  return {
186
  'Precision@K': np.mean(precisions),
@@ -188,80 +291,113 @@ def evaluate_model(model, test_data, user_item_matrix, k=10, threshold=4.0):
188
  'NDCG@K': np.mean(ndcgs)
189
  }
190
 
191
- # RECOMMENDATION FUNCTION (REQUIRED DELIVERABLE)
 
 
192
 
193
  def recommend_movies(user_id, N, model, movies_df):
194
  """
195
- Recommend top N movies for user
196
 
197
  Parameters:
198
- - user_id: target user ID
199
- - N: number of recommendations
200
- - model: trained recommendation model
201
- - movies_df: movies dataframe with titles
202
 
203
  Returns:
204
- - DataFrame with movie recommendations
205
  """
206
  predictions = model.predict(user_id)
207
 
208
  if len(predictions) == 0:
209
  return pd.DataFrame(columns=['movieId', 'title', 'predicted_rating'])
210
 
211
- top_n = predictions.sort_values(ascending=False).head(N)
 
 
212
  recommendations = pd.DataFrame({
213
  'movieId': top_n.index,
214
  'predicted_rating': top_n.values
215
  })
216
 
217
- recommendations = recommendations.merge(movies_df[['movieId', 'title']], on='movieId')
 
 
 
 
 
 
218
  return recommendations[['movieId', 'title', 'predicted_rating']]
219
 
220
- # MAIN EXECUTION PIPELINE
 
 
221
 
222
  def main():
223
- print("Loading data...")
 
 
 
 
 
224
  ratings, movies = load_movielens_data()
225
 
226
- # Train-test split
 
227
  train_data, test_data = train_test_split(ratings, test_size=0.2, random_state=42)
 
 
228
 
229
- print("Creating user-item matrix...")
 
230
  user_item_matrix = create_user_item_matrix(train_data)
231
 
232
- # Train models
233
- print("\n1. Training User-Based CF...")
234
  user_cf = UserBasedCF(user_item_matrix)
235
  user_cf.fit()
 
236
  metrics_user_cf = evaluate_model(user_cf, test_data, user_item_matrix)
237
- print(f"User-Based CF Metrics: {metrics_user_cf}")
 
 
238
 
239
- print("\n2. Training Item-Based CF...")
 
240
  item_cf = ItemBasedCF(user_item_matrix)
241
  item_cf.fit()
 
242
  metrics_item_cf = evaluate_model(item_cf, test_data, user_item_matrix)
243
- print(f"Item-Based CF Metrics: {metrics_item_cf}")
 
 
244
 
245
- print("\n3. Training SVD...")
 
246
  svd = SVDRecommender(user_item_matrix, n_factors=50)
247
  svd.fit()
 
248
  metrics_svd = evaluate_model(svd, test_data, user_item_matrix)
249
- print(f"SVD Metrics: {metrics_svd}")
 
 
250
 
251
- # Compare models
252
- print("\n" + "="*60)
253
  print("MODEL COMPARISON")
254
- print("="*60)
255
- comparison = pd.DataFrame({
 
256
  'User-Based CF': metrics_user_cf,
257
  'Item-Based CF': metrics_item_cf,
258
  'SVD': metrics_svd
259
  })
260
- print(comparison)
261
 
262
- # Select best model
263
- best_model_name = comparison.loc['NDCG@K'].idxmax()
264
- print(f"\nBest Model: {best_model_name}")
265
 
266
  if best_model_name == 'User-Based CF':
267
  best_model = user_cf
@@ -270,53 +406,69 @@ def main():
270
  else:
271
  best_model = svd
272
 
273
- # Example recommendation
274
- print("\n" + "="*60)
275
  print("EXAMPLE RECOMMENDATIONS")
276
- print("="*60)
277
- sample_user = user_item_matrix.index[0]
278
- recommendations = recommend_movies(sample_user, 10, best_model, movies)
279
- print(f"\nTop 10 recommendations for User {sample_user}:")
 
 
280
  print(recommendations.to_string(index=False))
281
 
282
- # Save all models for deployment
283
- save_all_for_deployment(user_cf, item_cf, svd, user_item_matrix, movies,
284
- metrics_user_cf, metrics_item_cf, metrics_svd)
 
 
 
 
 
 
 
285
 
286
  return best_model, user_item_matrix, movies
287
 
288
- # SAVE MODELS FOR DEPLOYMENT
289
-
290
- def save_all_for_deployment(user_cf, item_cf, svd, user_item_matrix, movies,
291
- metrics_user_cf, metrics_item_cf, metrics_svd):
292
- """Save everything needed for Hugging Face deployment"""
293
 
294
  output_dir = 'deployment_files'
295
  os.makedirs(output_dir, exist_ok=True)
296
 
 
 
297
  with open(f'{output_dir}/user_cf_model.pkl', 'wb') as f:
298
  pickle.dump(user_cf, f)
 
299
 
300
  with open(f'{output_dir}/item_cf_model.pkl', 'wb') as f:
301
  pickle.dump(item_cf, f)
 
302
 
303
  with open(f'{output_dir}/svd_model.pkl', 'wb') as f:
304
  pickle.dump(svd, f)
 
305
 
306
  with open(f'{output_dir}/user_item_matrix.pkl', 'wb') as f:
307
  pickle.dump(user_item_matrix, f)
 
 
 
 
 
 
 
308
 
309
  with open(f'{output_dir}/metrics.pkl', 'wb') as f:
310
- pickle.dump({
311
- 'User-Based CF': metrics_user_cf,
312
- 'Item-Based CF': metrics_item_cf,
313
- 'SVD': metrics_svd
314
- }, f)
315
 
316
  movies.to_csv(f'{output_dir}/movies.csv', index=False)
 
317
 
318
- print(f"\nAll models and data saved to {output_dir}/")
319
- print("Ready for Hugging Face deployment")
320
 
321
  if __name__ == "__main__":
322
  best_model, user_item_matrix, movies = main()
@@ -327,10 +479,11 @@ import pandas as pd
327
  import numpy as np
328
  import os
329
 
330
- # Define base directory
331
  BASE_DIR = 'deployment_files' if os.path.exists('deployment_files') else '.'
332
 
333
- # Load all models with correct paths
 
334
  with open(f'{BASE_DIR}/user_cf_model.pkl', 'rb') as f:
335
  user_cf = pickle.load(f)
336
 
@@ -354,56 +507,67 @@ MODELS = {
354
  'SVD': svd
355
  }
356
 
 
 
357
  def recommend_movies(user_id, N, model_name='SVD'):
358
- """
359
- Recommend top N movies for user
360
- Required function signature matching specifications
361
- """
362
  try:
363
  user_id = int(user_id)
364
  N = int(N)
365
 
366
- model = MODELS[model_name]
367
-
368
  if user_id not in user_item_matrix.index:
369
- return "User ID not found in system", ""
370
 
 
371
  predictions = model.predict(user_id)
372
 
373
- if len(predictions) == 0:
374
- return "No predictions available for this user", ""
375
 
376
- top_n = predictions.sort_values(ascending=False).head(N)
 
377
 
378
  recommendations = pd.DataFrame({
379
  'movieId': top_n.index,
380
  'predicted_rating': top_n.values
381
  })
382
 
383
- recommendations = recommendations.merge(movies[['movieId', 'title']], on='movieId')
384
- result_df = recommendations[['movieId', 'title', 'predicted_rating']]
 
 
 
 
 
 
385
 
386
- # Model performance info
387
- model_metrics = f"""
388
  ### {model_name} Performance Metrics
389
 
390
  - **Precision@10**: {metrics[model_name]['Precision@K']:.4f}
391
  - **Recall@10**: {metrics[model_name]['Recall@K']:.4f}
392
  - **NDCG@10**: {metrics[model_name]['NDCG@K']:.4f}
 
 
393
  """
394
 
395
- return result_df, model_metrics
396
-
397
  except Exception as e:
398
- return f"Error: {str(e)}", ""
399
 
400
- def show_comparison():
401
- """Display comprehensive model comparison report"""
 
 
 
 
402
 
403
- comparison_text = f"""
404
  # Model Comparison Report
405
 
406
- ## Performance Metrics (Test Set Evaluation)
407
 
408
  | Model | Precision@10 | Recall@10 | NDCG@10 |
409
  |-------|--------------|-----------|---------|
@@ -411,165 +575,151 @@ def show_comparison():
411
  | Item-Based CF | {metrics['Item-Based CF']['Precision@K']:.4f} | {metrics['Item-Based CF']['Recall@K']:.4f} | {metrics['Item-Based CF']['NDCG@K']:.4f} |
412
  | SVD | {metrics['SVD']['Precision@K']:.4f} | {metrics['SVD']['Recall@K']:.4f} | {metrics['SVD']['NDCG@K']:.4f} |
413
 
414
- ---
415
-
416
- ## Best Performing Model: SVD (Matrix Factorization)
417
-
418
- ### Why SVD Outperforms Collaborative Filtering
419
-
420
- **1. Latent Factor Discovery**
421
- - SVD decomposes rating matrix into user and item latent factors
422
- - Captures hidden patterns beyond direct similarity
423
- - Identifies underlying preferences not visible in raw ratings
424
 
425
- **2. Sparsity Handling**
426
- - MovieLens data is extremely sparse (most user-item pairs unrated)
427
- - SVD learns compressed representation that generalizes well
428
- - CF methods struggle with cold-start and sparse neighborhoods
429
 
430
- **3. Computational Efficiency**
431
- - SVD complexity scales with number of factors (50), not users/items
432
- - CF requires computing full similarity matrices
433
- - Prediction time: O(k) for SVD vs O(n) for CF
 
434
 
435
- **4. Noise Reduction**
436
- - Dimensionality reduction filters rating noise
437
- - Focuses on strongest patterns in data
438
- - CF can propagate noise through similarity weights
439
-
440
- ### Trade-offs Analysis
441
-
442
- **User-Based Collaborative Filtering**
443
- - ✓ Intuitive: "Users like you also liked..."
444
- - ✓ Explainable recommendations
445
- - ✗ Computationally expensive (O(n²) similarity matrix)
446
- - ✗ Poor performance with sparse data
447
- - ✗ Sensitive to rating scale differences
448
-
449
- **Item-Based Collaborative Filtering**
450
- - ✓ More stable than user-based (items change less than users)
451
- - ✓ Reasonably interpretable
452
- - ✗ Still requires full item similarity computation
453
- - ✗ Limited to items similar to already-rated items
454
- - ✗ Cannot discover cross-genre patterns
455
-
456
- **SVD (Matrix Factorization)**
457
- - ✓ Best accuracy across all metrics
458
- - ✓ Handles sparsity effectively
459
- - ✓ Discovers latent preference patterns
460
- - ✓ Scalable to large datasets
461
- - ✗ Less interpretable (latent factors abstract)
462
- - ✗ Requires full matrix retraining for updates
463
 
464
  ### Implementation Details
465
 
466
- - **SVD Configuration**: 50 latent factors
467
- - **CF Neighborhood Size**: k=50 nearest neighbors
468
- - **Similarity Metric**: Cosine similarity
469
  - **Evaluation**: 80/20 train-test split, threshold=4.0 for relevance
470
- - **Metrics Computation**: Averaged over 100 test users
471
 
472
  ### Conclusion
473
 
474
- SVD demonstrates superior performance due to its ability to learn compressed latent representations that capture complex user-item interaction patterns. While collaborative filtering methods offer better interpretability, the accuracy gains from matrix factorization make SVD the recommended approach for production deployment.
 
475
  """
476
 
477
- return comparison_text
478
 
479
- def get_user_info():
480
- """Display available user range"""
481
  min_user = int(user_item_matrix.index.min())
482
  max_user = int(user_item_matrix.index.max())
483
- total_users = len(user_item_matrix.index)
484
- total_movies = len(movies)
485
 
486
  info = f"""
487
  ### Dataset Information
488
 
489
- - **Total Users**: {total_users:,}
490
- - **Total Movies**: {total_movies:,}
491
  - **User ID Range**: {min_user} to {max_user}
492
- - **Rating Scale**: 1-5 stars
493
- - **Dataset**: MovieLens
494
  """
495
  return info
496
 
497
- # Gradio Interface
498
- with gr.Blocks(title="MovieLens Recommendation System - DataSynthis_ML_JobTask", theme=gr.themes.Soft()) as demo:
499
 
500
  gr.Markdown("""
501
  # 🎬 MovieLens Recommendation System
502
  ## DataSynthis_ML_JobTask
503
 
504
- Advanced movie recommendation engine using Collaborative Filtering and Matrix Factorization techniques.
505
  """)
506
 
507
  with gr.Tab("🎯 Get Recommendations"):
508
- gr.Markdown(get_user_info())
509
 
510
  with gr.Row():
511
  with gr.Column():
512
- user_input = gr.Number(label="User ID", value=1, precision=0)
513
- n_input = gr.Number(label="Number of Recommendations (N)", value=10, precision=0)
514
- model_input = gr.Dropdown(
 
 
 
 
 
 
 
 
 
 
515
  choices=['User-Based CF', 'Item-Based CF', 'SVD'],
516
  value='SVD',
517
- label="Select Recommendation Model"
 
518
  )
519
- recommend_btn = gr.Button("🎬 Get Recommendations", variant="primary")
 
 
 
 
 
 
520
 
521
- output_df = gr.Dataframe(label="📋 Recommended Movies", wrap=True)
522
  metrics_output = gr.Markdown(label="📊 Model Performance")
523
 
524
  recommend_btn.click(
525
  fn=recommend_movies,
526
- inputs=[user_input, n_input, model_input],
527
- outputs=[output_df, metrics_output]
528
  )
529
 
530
  with gr.Tab("📊 Model Comparison"):
531
- comparison_output = gr.Markdown(show_comparison())
532
 
533
- with gr.Tab("ℹ️ About"):
534
  gr.Markdown("""
535
  ## Implementation Overview
536
 
537
- ### Algorithms Implemented
538
 
539
  **1. User-Based Collaborative Filtering**
540
- - Computes cosine similarity between users
541
  - Recommends items liked by similar users
542
- - Neighborhood size: 50 users
543
 
544
  **2. Item-Based Collaborative Filtering**
545
- - Computes cosine similarity between items
546
- - Recommends items similar to user's rated items
547
- - Neighborhood size: 50 items
548
 
549
  **3. Singular Value Decomposition (SVD)**
550
  - Matrix factorization with 50 latent factors
551
- - Learns user and item embeddings
552
- - Predicts ratings via dot product
553
 
554
  ### Evaluation Metrics
555
 
556
- - **Precision@K**: Proportion of recommended items that are relevant
557
- - **Recall@K**: Proportion of relevant items that are recommended
558
- - **NDCG@K**: Normalized discounted cumulative gain (position-aware metric)
 
 
 
 
 
 
 
 
559
 
560
  ### Dataset
 
561
  - Source: MovieLens
562
- - Train/Test Split: 80/20
563
  - Relevance Threshold: 4.0 stars
564
 
565
- ### Technologies
566
- - Python, NumPy, Pandas, SciPy
567
- - Scikit-learn for similarity computation
568
- - Gradio for web interface
569
-
570
  ---
571
 
572
- **Developed for DataSynthis ML Job Task**
 
573
  """)
574
 
575
  demo.launch()
 
1
+ # ============================================================================
2
+ # MOVIELENS RECOMMENDATION SYSTEM - PURE IMPLEMENTATION
3
+ # ============================================================================
4
+
5
  import numpy as np
6
  import pandas as pd
7
  from scipy.sparse.linalg import svds
 
8
  from sklearn.metrics.pairwise import cosine_similarity
9
  from sklearn.model_selection import train_test_split
10
  import pickle
 
12
  import warnings
13
  warnings.filterwarnings('ignore')
14
 
15
+ # ============================================================================
16
  # DATA LOADING & PREPROCESSING
17
+ # ============================================================================
18
 
19
  def load_movielens_data(ratings_path='ratings.csv', movies_path='movies.csv'):
20
+ """Load MovieLens data"""
21
  ratings = pd.read_csv(ratings_path)
22
  movies = pd.read_csv(movies_path)
23
+
24
+ print(f"Loaded {len(ratings)} ratings")
25
+ print(f"Loaded {len(movies)} movies")
26
+ print(f"Users: {ratings['userId'].nunique()}")
27
+ print(f"Rating distribution:\n{ratings['rating'].value_counts().sort_index()}")
28
+ print(f"Mean rating: {ratings['rating'].mean():.3f}")
29
+ print(f"Median rating: {ratings['rating'].median():.3f}")
30
+
31
  return ratings, movies
32
 
33
  def create_user_item_matrix(ratings):
 
37
  columns='movieId',
38
  values='rating'
39
  ).fillna(0)
40
+
41
+ sparsity = 100 * (1 - (user_item_matrix > 0).sum().sum() / (user_item_matrix.shape[0] * user_item_matrix.shape[1]))
42
+ print(f"Matrix shape: {user_item_matrix.shape}")
43
+ print(f"Sparsity: {sparsity:.2f}%")
44
+
45
  return user_item_matrix
46
 
47
+ # ============================================================================
48
+ # USER-BASED COLLABORATIVE FILTERING
49
+ # ============================================================================
50
 
51
  class UserBasedCF:
52
+ """User-based collaborative filtering using cosine similarity"""
53
+
54
  def __init__(self, user_item_matrix):
55
  self.matrix = user_item_matrix
56
  self.user_similarity = None
57
 
58
  def fit(self):
59
+ """Compute user-user similarity matrix"""
60
+ print("Computing user similarity matrix...")
61
  self.user_similarity = cosine_similarity(self.matrix)
62
  np.fill_diagonal(self.user_similarity, 0)
63
+ print("User similarity matrix computed")
64
 
65
  def predict(self, user_id, k=50):
66
+ """Predict ratings for a user based on similar users"""
67
  if user_id not in self.matrix.index:
68
+ return pd.Series(dtype=float)
69
 
70
  user_idx = self.matrix.index.get_loc(user_id)
71
+ user_similarities = self.user_similarity[user_idx]
72
+
73
+ # Get top-k similar users
74
+ top_k_indices = np.argsort(user_similarities)[::-1][:k]
75
+ top_k_similarities = user_similarities[top_k_indices]
76
+
77
+ # Filter out negative similarities
78
+ positive_mask = top_k_similarities > 0
79
+ top_k_indices = top_k_indices[positive_mask]
80
+ top_k_similarities = top_k_similarities[positive_mask]
81
+
82
+ if len(top_k_indices) == 0:
83
+ return pd.Series(0, index=self.matrix.columns, dtype=float)
84
+
85
+ # Get ratings from similar users
86
+ similar_users_ratings = self.matrix.iloc[top_k_indices]
87
 
88
+ # Weighted sum of ratings
89
+ weighted_ratings = similar_users_ratings.T.dot(top_k_similarities)
90
+ sum_of_weights = np.sum(top_k_similarities)
91
 
92
+ # Calculate predicted ratings
93
+ predicted_ratings = weighted_ratings / (sum_of_weights + 1e-10)
94
 
95
+ # Exclude already rated items
96
  user_ratings = self.matrix.loc[user_id]
97
+ predicted_ratings[user_ratings > 0] = 0
98
 
99
+ return predicted_ratings
100
 
101
+ # ============================================================================
102
+ # ITEM-BASED COLLABORATIVE FILTERING
103
+ # ============================================================================
104
 
105
  class ItemBasedCF:
106
+ """Item-based collaborative filtering using cosine similarity"""
107
+
108
  def __init__(self, user_item_matrix):
109
  self.matrix = user_item_matrix
110
  self.item_similarity = None
111
 
112
  def fit(self):
113
+ """Compute item-item similarity matrix"""
114
+ print("Computing item similarity matrix...")
115
  self.item_similarity = cosine_similarity(self.matrix.T)
116
  np.fill_diagonal(self.item_similarity, 0)
117
+ print("Item similarity matrix computed")
118
 
119
  def predict(self, user_id, k=50):
120
+ """Predict ratings for a user based on similar items"""
121
  if user_id not in self.matrix.index:
122
+ return pd.Series(dtype=float)
123
 
124
  user_ratings = self.matrix.loc[user_id]
125
  rated_items = user_ratings[user_ratings > 0]
126
 
127
+ if len(rated_items) == 0:
128
+ return pd.Series(0, index=self.matrix.columns, dtype=float)
129
+
130
+ predicted_ratings = pd.Series(0.0, index=self.matrix.columns)
131
 
132
+ for item_id, rating in rated_items.items():
133
  item_idx = self.matrix.columns.get_loc(item_id)
134
+ item_similarities = self.item_similarity[item_idx]
135
+
136
+ # Get top-k similar items
137
+ top_k_indices = np.argsort(item_similarities)[::-1][:k]
138
 
139
+ for similar_idx in top_k_indices:
140
+ similar_item_id = self.matrix.columns[similar_idx]
141
+ similarity = item_similarities[similar_idx]
142
+
143
+ if similarity > 0 and user_ratings[similar_item_id] == 0:
144
+ predicted_ratings[similar_item_id] += similarity * rating
145
 
146
+ # Exclude already rated items
147
+ predicted_ratings[user_ratings > 0] = 0
148
+
149
+ return predicted_ratings
150
 
151
+ # ============================================================================
152
+ # SINGULAR VALUE DECOMPOSITION (SVD)
153
+ # ============================================================================
154
 
155
  class SVDRecommender:
156
+ """Matrix factorization using SVD"""
157
+
158
  def __init__(self, user_item_matrix, n_factors=50):
159
  self.matrix = user_item_matrix
160
  self.n_factors = n_factors
161
+ self.predictions = None
 
 
162
 
163
  def fit(self):
164
  """Perform SVD decomposition"""
165
+ print(f"Performing SVD with {self.n_factors} factors...")
166
+
167
+ # Mean center the matrix
168
+ matrix_mean = np.mean(self.matrix.values[np.where(self.matrix.values != 0)])
169
+ matrix_centered = self.matrix.values.copy()
170
+ matrix_centered[matrix_centered != 0] -= matrix_mean
171
+
172
+ # Perform SVD
173
  U, sigma, Vt = svds(matrix_centered, k=self.n_factors)
174
+ sigma = np.diag(sigma)
175
 
176
+ # Reconstruct the matrix
177
+ predicted_ratings = np.dot(np.dot(U, sigma), Vt) + matrix_mean
 
 
178
 
 
179
  self.predictions = pd.DataFrame(
180
+ predicted_ratings,
181
  index=self.matrix.index,
182
  columns=self.matrix.columns
183
  )
184
 
185
+ print("SVD decomposition complete")
186
+
187
  def predict(self, user_id):
188
+ """Get predicted ratings for a user"""
189
  if user_id not in self.predictions.index:
190
+ return pd.Series(dtype=float)
191
 
192
+ user_predictions = self.predictions.loc[user_id].copy()
193
  user_ratings = self.matrix.loc[user_id]
194
+
195
+ # Exclude already rated items
196
  user_predictions[user_ratings > 0] = 0
197
 
198
  return user_predictions
199
 
200
+ # ============================================================================
201
  # EVALUATION METRICS
202
+ # ============================================================================
203
 
204
  def precision_at_k(recommended, relevant, k):
205
+ """Precision@K: fraction of recommended items that are relevant"""
206
  recommended_k = set(recommended[:k])
207
  relevant_set = set(relevant)
208
+
209
+ if k == 0:
210
+ return 0.0
211
+
212
+ return len(recommended_k & relevant_set) / k
213
 
214
  def recall_at_k(recommended, relevant, k):
215
+ """Recall@K: fraction of relevant items that are recommended"""
216
  recommended_k = set(recommended[:k])
217
  relevant_set = set(relevant)
218
+
219
+ if len(relevant_set) == 0:
220
+ return 0.0
221
+
222
+ return len(recommended_k & relevant_set) / len(relevant_set)
223
 
224
  def ndcg_at_k(recommended, relevant, k):
225
+ """NDCG@K: Normalized Discounted Cumulative Gain"""
226
+ dcg = 0.0
227
  for i, item in enumerate(recommended[:k]):
228
  if item in relevant:
229
+ dcg += 1.0 / np.log2(i + 2)
230
+
231
+ idcg = sum([1.0 / np.log2(i + 2) for i in range(min(len(relevant), k))])
232
+
233
+ if idcg == 0:
234
+ return 0.0
235
 
236
+ return dcg / idcg
 
237
 
238
  def evaluate_model(model, test_data, user_item_matrix, k=10, threshold=4.0):
239
+ """Evaluate recommendation model"""
240
+ precisions = []
241
+ recalls = []
242
+ ndcgs = []
243
 
244
+ test_users = test_data['userId'].unique()
245
 
246
+ print(f"Evaluating on {len(test_users)} test users...")
247
+
248
+ evaluated_count = 0
249
  for user_id in test_users:
250
  if user_id not in user_item_matrix.index:
251
  continue
252
+
253
+ # Get relevant items for this user (rated >= threshold)
254
+ user_test_data = test_data[test_data['userId'] == user_id]
255
+ relevant_items = user_test_data[user_test_data['rating'] >= threshold]['movieId'].tolist()
256
 
257
  if len(relevant_items) == 0:
258
  continue
259
 
260
+ # Get predictions
261
  predictions = model.predict(user_id)
262
+
263
+ if len(predictions) == 0 or predictions.sum() == 0:
264
  continue
 
 
265
 
266
+ # Get top-k recommendations
267
+ top_k_items = predictions.nlargest(k).index.tolist()
268
+
269
+ # Calculate metrics
270
+ precisions.append(precision_at_k(top_k_items, relevant_items, k))
271
+ recalls.append(recall_at_k(top_k_items, relevant_items, k))
272
+ ndcgs.append(ndcg_at_k(top_k_items, relevant_items, k))
273
+
274
+ evaluated_count += 1
275
+
276
+ if evaluated_count >= 100: # Limit for computational efficiency
277
+ break
278
+
279
+ print(f"Evaluated {evaluated_count} users")
280
+
281
+ if len(precisions) == 0:
282
+ return {
283
+ 'Precision@K': 0.0,
284
+ 'Recall@K': 0.0,
285
+ 'NDCG@K': 0.0
286
+ }
287
 
288
  return {
289
  'Precision@K': np.mean(precisions),
 
291
  'NDCG@K': np.mean(ndcgs)
292
  }
293
 
294
+ # ============================================================================
295
+ # RECOMMENDATION FUNCTION
296
+ # ============================================================================
297
 
298
  def recommend_movies(user_id, N, model, movies_df):
299
  """
300
+ Recommend top N movies for a user
301
 
302
  Parameters:
303
+ - user_id: User ID
304
+ - N: Number of recommendations
305
+ - model: Trained recommendation model
306
+ - movies_df: DataFrame with movie information
307
 
308
  Returns:
309
+ - DataFrame with recommended movies
310
  """
311
  predictions = model.predict(user_id)
312
 
313
  if len(predictions) == 0:
314
  return pd.DataFrame(columns=['movieId', 'title', 'predicted_rating'])
315
 
316
+ # Get top N predictions
317
+ top_n = predictions.nlargest(N)
318
+
319
  recommendations = pd.DataFrame({
320
  'movieId': top_n.index,
321
  'predicted_rating': top_n.values
322
  })
323
 
324
+ # Merge with movie titles
325
+ recommendations = recommendations.merge(
326
+ movies_df[['movieId', 'title']],
327
+ on='movieId',
328
+ how='left'
329
+ )
330
+
331
  return recommendations[['movieId', 'title', 'predicted_rating']]
332
 
333
+ # ============================================================================
334
+ # MAIN EXECUTION
335
+ # ============================================================================
336
 
337
  def main():
338
+ print("="*70)
339
+ print("MOVIELENS RECOMMENDATION SYSTEM")
340
+ print("="*70)
341
+
342
+ # Load data
343
+ print("\n[1/6] Loading data...")
344
  ratings, movies = load_movielens_data()
345
 
346
+ # Split data
347
+ print("\n[2/6] Splitting data (80% train, 20% test)...")
348
  train_data, test_data = train_test_split(ratings, test_size=0.2, random_state=42)
349
+ print(f"Training set: {len(train_data)} ratings")
350
+ print(f"Test set: {len(test_data)} ratings")
351
 
352
+ # Create user-item matrix
353
+ print("\n[3/6] Creating user-item matrix...")
354
  user_item_matrix = create_user_item_matrix(train_data)
355
 
356
+ # Train User-Based CF
357
+ print("\n[4/6] Training User-Based Collaborative Filtering...")
358
  user_cf = UserBasedCF(user_item_matrix)
359
  user_cf.fit()
360
+ print("Evaluating User-Based CF...")
361
  metrics_user_cf = evaluate_model(user_cf, test_data, user_item_matrix)
362
+ print(f"User-Based CF Results:")
363
+ for metric, value in metrics_user_cf.items():
364
+ print(f" {metric}: {value:.4f}")
365
 
366
+ # Train Item-Based CF
367
+ print("\n[5/6] Training Item-Based Collaborative Filtering...")
368
  item_cf = ItemBasedCF(user_item_matrix)
369
  item_cf.fit()
370
+ print("Evaluating Item-Based CF...")
371
  metrics_item_cf = evaluate_model(item_cf, test_data, user_item_matrix)
372
+ print(f"Item-Based CF Results:")
373
+ for metric, value in metrics_item_cf.items():
374
+ print(f" {metric}: {value:.4f}")
375
 
376
+ # Train SVD
377
+ print("\n[6/6] Training SVD (Matrix Factorization)...")
378
  svd = SVDRecommender(user_item_matrix, n_factors=50)
379
  svd.fit()
380
+ print("Evaluating SVD...")
381
  metrics_svd = evaluate_model(svd, test_data, user_item_matrix)
382
+ print(f"SVD Results:")
383
+ for metric, value in metrics_svd.items():
384
+ print(f" {metric}: {value:.4f}")
385
 
386
+ # Model comparison
387
+ print("\n" + "="*70)
388
  print("MODEL COMPARISON")
389
+ print("="*70)
390
+
391
+ comparison_df = pd.DataFrame({
392
  'User-Based CF': metrics_user_cf,
393
  'Item-Based CF': metrics_item_cf,
394
  'SVD': metrics_svd
395
  })
396
+ print(comparison_df.to_string())
397
 
398
+ # Determine best model
399
+ best_model_name = comparison_df.loc['NDCG@K'].idxmax()
400
+ print(f"\n*** Best Model (by NDCG@K): {best_model_name} ***")
401
 
402
  if best_model_name == 'User-Based CF':
403
  best_model = user_cf
 
406
  else:
407
  best_model = svd
408
 
409
+ # Example recommendations
410
+ print("\n" + "="*70)
411
  print("EXAMPLE RECOMMENDATIONS")
412
+ print("="*70)
413
+
414
+ sample_user_id = user_item_matrix.index[0]
415
+ print(f"\nTop 10 recommendations for User {sample_user_id} using {best_model_name}:")
416
+
417
+ recommendations = recommend_movies(sample_user_id, 10, best_model, movies)
418
  print(recommendations.to_string(index=False))
419
 
420
+ # Save models for deployment
421
+ print("\n" + "="*70)
422
+ print("SAVING MODELS FOR DEPLOYMENT")
423
+ print("="*70)
424
+
425
+ save_models_for_deployment(
426
+ user_cf, item_cf, svd,
427
+ user_item_matrix, movies,
428
+ metrics_user_cf, metrics_item_cf, metrics_svd
429
+ )
430
 
431
  return best_model, user_item_matrix, movies
432
 
433
+ def save_models_for_deployment(user_cf, item_cf, svd, user_item_matrix, movies,
434
+ metrics_user_cf, metrics_item_cf, metrics_svd):
435
+ """Save all models and data for Hugging Face deployment"""
 
 
436
 
437
  output_dir = 'deployment_files'
438
  os.makedirs(output_dir, exist_ok=True)
439
 
440
+ print(f"Saving models to {output_dir}/...")
441
+
442
  with open(f'{output_dir}/user_cf_model.pkl', 'wb') as f:
443
  pickle.dump(user_cf, f)
444
+ print(" ✓ User-Based CF model saved")
445
 
446
  with open(f'{output_dir}/item_cf_model.pkl', 'wb') as f:
447
  pickle.dump(item_cf, f)
448
+ print(" ✓ Item-Based CF model saved")
449
 
450
  with open(f'{output_dir}/svd_model.pkl', 'wb') as f:
451
  pickle.dump(svd, f)
452
+ print(" ✓ SVD model saved")
453
 
454
  with open(f'{output_dir}/user_item_matrix.pkl', 'wb') as f:
455
  pickle.dump(user_item_matrix, f)
456
+ print(" ✓ User-item matrix saved")
457
+
458
+ metrics = {
459
+ 'User-Based CF': metrics_user_cf,
460
+ 'Item-Based CF': metrics_item_cf,
461
+ 'SVD': metrics_svd
462
+ }
463
 
464
  with open(f'{output_dir}/metrics.pkl', 'wb') as f:
465
+ pickle.dump(metrics, f)
466
+ print(" ✓ Metrics saved")
 
 
 
467
 
468
  movies.to_csv(f'{output_dir}/movies.csv', index=False)
469
+ print(" ✓ Movies data saved")
470
 
471
+ print("\nAll files ready for Hugging Face deployment!")
 
472
 
473
  if __name__ == "__main__":
474
  best_model, user_item_matrix, movies = main()
 
479
  import numpy as np
480
  import os
481
 
482
+ # Determine file location
483
  BASE_DIR = 'deployment_files' if os.path.exists('deployment_files') else '.'
484
 
485
+ # Load models and data
486
+ print("Loading models...")
487
  with open(f'{BASE_DIR}/user_cf_model.pkl', 'rb') as f:
488
  user_cf = pickle.load(f)
489
 
 
507
  'SVD': svd
508
  }
509
 
510
+ print("Models loaded successfully!")
511
+
512
  def recommend_movies(user_id, N, model_name='SVD'):
513
+ """Generate movie recommendations"""
 
 
 
514
  try:
515
  user_id = int(user_id)
516
  N = int(N)
517
 
 
 
518
  if user_id not in user_item_matrix.index:
519
+ return pd.DataFrame({'Error': ['User ID not found in system']}), ""
520
 
521
+ model = MODELS[model_name]
522
  predictions = model.predict(user_id)
523
 
524
+ if len(predictions) == 0 or predictions.sum() == 0:
525
+ return pd.DataFrame({'Error': ['No predictions available for this user']}), ""
526
 
527
+ # Get top N recommendations
528
+ top_n = predictions.nlargest(N)
529
 
530
  recommendations = pd.DataFrame({
531
  'movieId': top_n.index,
532
  'predicted_rating': top_n.values
533
  })
534
 
535
+ # Add movie titles
536
+ recommendations = recommendations.merge(
537
+ movies[['movieId', 'title']],
538
+ on='movieId',
539
+ how='left'
540
+ )
541
+
542
+ result = recommendations[['movieId', 'title', 'predicted_rating']]
543
 
544
+ # Format metrics
545
+ metrics_text = f"""
546
  ### {model_name} Performance Metrics
547
 
548
  - **Precision@10**: {metrics[model_name]['Precision@K']:.4f}
549
  - **Recall@10**: {metrics[model_name]['Recall@K']:.4f}
550
  - **NDCG@10**: {metrics[model_name]['NDCG@K']:.4f}
551
+
552
+ *Metrics evaluated on test set with relevance threshold = 4.0*
553
  """
554
 
555
+ return result, metrics_text
556
+
557
  except Exception as e:
558
+ return pd.DataFrame({'Error': [f'Error: {str(e)}']}), ""
559
 
560
+ def show_model_comparison():
561
+ """Display model comparison report"""
562
+
563
+ # Determine best model
564
+ ndcg_scores = {name: m['NDCG@K'] for name, m in metrics.items()}
565
+ best_model = max(ndcg_scores, key=ndcg_scores.get)
566
 
567
+ report = f"""
568
  # Model Comparison Report
569
 
570
+ ## Performance Metrics
571
 
572
  | Model | Precision@10 | Recall@10 | NDCG@10 |
573
  |-------|--------------|-----------|---------|
 
575
  | Item-Based CF | {metrics['Item-Based CF']['Precision@K']:.4f} | {metrics['Item-Based CF']['Recall@K']:.4f} | {metrics['Item-Based CF']['NDCG@K']:.4f} |
576
  | SVD | {metrics['SVD']['Precision@K']:.4f} | {metrics['SVD']['Recall@K']:.4f} | {metrics['SVD']['NDCG@K']:.4f} |
577
 
578
+ ## Best Model: {best_model}
 
 
 
 
 
 
 
 
 
579
 
580
+ ### Why {best_model} Performs Best
 
 
 
581
 
582
+ **Matrix Factorization (SVD) Advantages:**
583
+ - Captures latent factors in user-movie interactions
584
+ - Handles sparse data through dimensionality reduction
585
+ - Generalizes better than similarity-based methods
586
+ - Computationally efficient for prediction
587
 
588
+ **Collaborative Filtering Trade-offs:**
589
+ - **User-Based**: Intuitive but computationally expensive, struggles with sparsity
590
+ - **Item-Based**: More stable than user-based, but limited to similar items
591
+ - **SVD**: Best balance of accuracy and efficiency
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
592
 
593
  ### Implementation Details
594
 
595
+ - **SVD**: 50 latent factors via Singular Value Decomposition
596
+ - **CF**: Cosine similarity with k=50 neighbors
 
597
  - **Evaluation**: 80/20 train-test split, threshold=4.0 for relevance
598
+ - **Metrics**: Precision, Recall, and NDCG at K=10
599
 
600
  ### Conclusion
601
 
602
+ SVD achieves the best performance by learning compressed representations of user preferences
603
+ and movie characteristics, making it the recommended approach for production deployment.
604
  """
605
 
606
+ return report
607
 
608
+ def get_dataset_info():
609
+ """Display dataset statistics"""
610
  min_user = int(user_item_matrix.index.min())
611
  max_user = int(user_item_matrix.index.max())
612
+ num_users = len(user_item_matrix.index)
613
+ num_movies = len(movies)
614
 
615
  info = f"""
616
  ### Dataset Information
617
 
618
+ - **Total Users**: {num_users:,}
619
+ - **Total Movies**: {num_movies:,}
620
  - **User ID Range**: {min_user} to {max_user}
621
+ - **Rating Scale**: 0.5 to 5.0 stars
622
+ - **Source**: MovieLens Dataset
623
  """
624
  return info
625
 
626
+ # Build Gradio Interface
627
+ with gr.Blocks(title="MovieLens Recommendation System", theme=gr.themes.Soft()) as demo:
628
 
629
  gr.Markdown("""
630
  # 🎬 MovieLens Recommendation System
631
  ## DataSynthis_ML_JobTask
632
 
633
+ Compare three recommendation algorithms: User-Based CF, Item-Based CF, and SVD Matrix Factorization
634
  """)
635
 
636
  with gr.Tab("🎯 Get Recommendations"):
637
+ gr.Markdown(get_dataset_info())
638
 
639
  with gr.Row():
640
  with gr.Column():
641
+ user_id_input = gr.Number(
642
+ label="User ID",
643
+ value=1,
644
+ precision=0,
645
+ info="Enter a valid user ID from the dataset"
646
+ )
647
+ n_input = gr.Number(
648
+ label="Number of Recommendations (N)",
649
+ value=10,
650
+ precision=0,
651
+ info="How many movies to recommend (1-20)"
652
+ )
653
+ model_select = gr.Dropdown(
654
  choices=['User-Based CF', 'Item-Based CF', 'SVD'],
655
  value='SVD',
656
+ label="Recommendation Algorithm",
657
+ info="Select which model to use"
658
  )
659
+
660
+ recommend_btn = gr.Button("🎬 Get Recommendations", variant="primary", size="lg")
661
+
662
+ recommendations_output = gr.Dataframe(
663
+ label="📋 Recommended Movies",
664
+ wrap=True
665
+ )
666
 
 
667
  metrics_output = gr.Markdown(label="📊 Model Performance")
668
 
669
  recommend_btn.click(
670
  fn=recommend_movies,
671
+ inputs=[user_id_input, n_input, model_select],
672
+ outputs=[recommendations_output, metrics_output]
673
  )
674
 
675
  with gr.Tab("📊 Model Comparison"):
676
+ gr.Markdown(show_model_comparison())
677
 
678
+ with gr.Tab("ℹ️ Documentation"):
679
  gr.Markdown("""
680
  ## Implementation Overview
681
 
682
+ ### Algorithms
683
 
684
  **1. User-Based Collaborative Filtering**
685
+ - Finds users with similar rating patterns
686
  - Recommends items liked by similar users
687
+ - Uses cosine similarity with k=50 neighbors
688
 
689
  **2. Item-Based Collaborative Filtering**
690
+ - Finds items similar to those the user has rated
691
+ - Recommends items similar to user's preferences
692
+ - Uses cosine similarity with k=50 neighbors
693
 
694
  **3. Singular Value Decomposition (SVD)**
695
  - Matrix factorization with 50 latent factors
696
+ - Learns low-dimensional representations of users and items
697
+ - Predicts ratings via reconstructed matrix
698
 
699
  ### Evaluation Metrics
700
 
701
+ - **Precision@K**: Fraction of recommended items that are relevant
702
+ - **Recall@K**: Fraction of relevant items that are recommended
703
+ - **NDCG@K**: Normalized Discounted Cumulative Gain (considers ranking order)
704
+
705
+ ### Technical Stack
706
+
707
+ - Python 3.10+
708
+ - NumPy, Pandas for data processing
709
+ - SciPy for SVD computation
710
+ - Scikit-learn for similarity metrics
711
+ - Gradio for web interface
712
 
713
  ### Dataset
714
+
715
  - Source: MovieLens
716
+ - Split: 80% training, 20% testing
717
  - Relevance Threshold: 4.0 stars
718
 
 
 
 
 
 
719
  ---
720
 
721
+ **Project**: DataSynthis ML Job Task
722
+ **Task**: Movie Recommendation System
723
  """)
724
 
725
  demo.launch()