N-Kibria commited on
Commit
a3cd8bd
Β·
verified Β·
1 Parent(s): 6244f84

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +631 -49
app.py CHANGED
@@ -1,9 +1,75 @@
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import pickle
3
  import pandas as pd
4
  import numpy as np
5
  import torch
 
6
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  def load_model_and_data():
9
  """Load the trained model and necessary data"""
@@ -62,22 +128,500 @@ def load_model_and_data():
62
  traceback.print_exc()
63
  return None, None, None, []
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  print("Loading model and data...")
67
  model, loader, movies_df, user_ids = load_model_and_data()
68
  print(f"Model loaded! Available users: {len(user_ids)}")
69
 
 
 
 
 
70
  def get_recommendations(user_id, num_recommendations):
 
 
 
 
 
 
 
 
 
 
71
  if model is None or loader is None:
72
- return "Error: Model not loaded properly."
73
 
74
  try:
75
  user_id = int(user_id)
76
  num_recommendations = int(num_recommendations)
77
 
 
78
  if user_id not in loader.user_id_map:
79
- return f"User ID {user_id} not found! Please select a valid user ID."
80
 
 
81
  recommendations = model.recommend_movies(
82
  user_id=user_id,
83
  N=num_recommendations,
@@ -87,42 +631,55 @@ def get_recommendations(user_id, num_recommendations):
87
  )
88
 
89
  if not recommendations:
90
- return f"No recommendations found for User {user_id}"
91
 
92
- output = f"Top {num_recommendations} Movie Recommendations for User {user_id}\n\n"
 
93
  output += "=" * 60 + "\n\n"
94
 
95
  for i, (movie_id, title, score) in enumerate(recommendations, 1):
96
- stars = "*" * int(score)
97
- output += f"{i}. {title}\n"
98
- output += f" Predicted Rating: {score:.2f}/5.00 {stars}\n"
99
- output += f" Movie ID: {movie_id}\n\n"
100
 
101
  return output
102
 
103
  except ValueError:
104
- return "Error: Please enter valid numbers for User ID and Number of Recommendations"
105
  except Exception as e:
106
- return f"Error generating recommendations: {str(e)}"
 
107
 
108
  def get_user_history(user_id):
 
 
 
 
 
 
 
 
 
109
  if model is None or loader is None:
110
- return "Error: Model not loaded properly."
111
 
112
  try:
113
  user_id = int(user_id)
114
 
115
  if user_id not in loader.user_id_map:
116
- return f"User ID {user_id} not found!"
117
 
118
  user_idx = loader.user_id_map[user_id]
119
 
 
120
  user_ratings = model.item_cf.user_item_matrix[user_idx].toarray().flatten()
121
  rated_indices = np.where(user_ratings > 0)[0]
122
 
123
  if len(rated_indices) == 0:
124
  return f"No rating history found for User {user_id}"
125
 
 
126
  history = []
127
  for movie_idx in rated_indices:
128
  original_movie_id = loader.reverse_movie_map[movie_idx]
@@ -130,39 +687,52 @@ def get_user_history(user_id):
130
  rating = user_ratings[movie_idx]
131
  history.append((title, rating))
132
 
 
133
  history.sort(key=lambda x: x[1], reverse=True)
134
 
135
- output = f"Rating History for User {user_id}\n\n"
 
136
  output += f"Total movies rated: {len(history)}\n"
137
  output += f"Average rating: {np.mean([r for _, r in history]):.2f}\n\n"
138
  output += "=" * 60 + "\n\n"
139
- output += "Top 10 Highest Rated Movies:\n\n"
140
 
141
  for i, (title, rating) in enumerate(history[:10], 1):
142
- stars = "*" * int(rating)
143
- output += f"{i}. {title} - {rating:.1f}/5 {stars}\n"
144
 
145
  return output
146
 
147
  except Exception as e:
148
- return f"Error: {str(e)}"
 
149
 
150
  def get_movie_info(movie_title_search):
 
 
 
 
 
 
 
 
 
151
  if movies_df is None:
152
- return "Error: Movies data not loaded"
153
 
154
  try:
 
155
  matches = movies_df[movies_df['title'].str.contains(movie_title_search, case=False, na=False)]
156
 
157
  if len(matches) == 0:
158
- return f"No movies found matching '{movie_title_search}'"
159
 
160
- output = f"Search Results for '{movie_title_search}'\n\n"
161
  output += f"Found {len(matches)} movie(s):\n\n"
162
  output += "=" * 60 + "\n\n"
163
 
164
  for i, (_, row) in enumerate(matches.head(20).iterrows(), 1):
165
- output += f"{i}. {row['title']} (ID: {row['movie_id']})\n"
166
 
167
  if len(matches) > 20:
168
  output += f"\n... and {len(matches) - 20} more results"
@@ -170,15 +740,21 @@ def get_movie_info(movie_title_search):
170
  return output
171
 
172
  except Exception as e:
173
- return f"Error: {str(e)}"
 
 
 
 
 
174
 
 
175
  with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis") as demo:
176
 
177
  gr.Markdown("""
178
- # Hybrid Movie Recommendation System
179
  ### DataSynthis Job Task - Powered by AI
180
 
181
- This system combines Collaborative Filtering, SVD Matrix Factorization, and Neural Networks
182
  to provide personalized movie recommendations from the MovieLens 100k dataset.
183
 
184
  ---
@@ -186,7 +762,8 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
186
 
187
  with gr.Tabs():
188
 
189
- with gr.Tab("Get Recommendations"):
 
190
  gr.Markdown("### Get personalized movie recommendations for any user")
191
 
192
  with gr.Row():
@@ -197,7 +774,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
197
  minimum=1,
198
  maximum=943,
199
  step=1,
200
- info="Enter a user ID (1-943)"
201
  )
202
 
203
  num_recs_input = gr.Slider(
@@ -208,7 +785,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
208
  step=1
209
  )
210
 
211
- recommend_btn = gr.Button("Get Recommendations", variant="primary")
212
 
213
  with gr.Column(scale=2):
214
  recommendations_output = gr.Textbox(
@@ -224,13 +801,14 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
224
  )
225
 
226
  gr.Markdown("""
227
- How it works:
228
  - Enter a User ID (between 1 and 943)
229
  - Choose how many recommendations you want
230
  - Click "Get Recommendations" to see personalized movie suggestions
231
  """)
232
 
233
- with gr.Tab("User History"):
 
234
  gr.Markdown("### View a user's rating history")
235
 
236
  with gr.Row():
@@ -243,7 +821,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
243
  step=1
244
  )
245
 
246
- history_btn = gr.Button("View History", variant="primary")
247
 
248
  with gr.Column(scale=2):
249
  history_output = gr.Textbox(
@@ -258,7 +836,8 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
258
  outputs=history_output
259
  )
260
 
261
- with gr.Tab("Search Movies"):
 
262
  gr.Markdown("### Search for movies in the database")
263
 
264
  with gr.Row():
@@ -269,7 +848,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
269
  value="Star Wars"
270
  )
271
 
272
- search_btn = gr.Button("Search", variant="primary")
273
 
274
  with gr.Column(scale=2):
275
  search_output = gr.Textbox(
@@ -284,39 +863,40 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
284
  outputs=search_output
285
  )
286
 
287
- with gr.Tab("About"):
 
288
  gr.Markdown("""
289
  ## About This System
290
 
291
- ### Model Architecture
292
- This is a Hybrid Recommendation System that combines three approaches:
293
 
294
- 1. Item-Based Collaborative Filtering
295
  - Uses cosine similarity between movies
296
  - Recommends movies similar to what you've liked before
297
 
298
- 2. SVD Matrix Factorization
299
  - Decomposes the user-movie rating matrix
300
  - Discovers latent factors that explain user preferences
301
 
302
- 3. Neural Collaborative Filtering (NCF)
303
  - Deep learning model with user and movie embeddings
304
  - Learns complex non-linear patterns in user behavior
305
 
306
- ### Dataset
307
- - MovieLens 100k dataset
308
  - 100,000 ratings from 943 users on 1,682 movies
309
  - Ratings scale: 1-5 stars
310
 
311
- ### Performance Metrics
312
- - Precision@10: 26.77%
313
- - NDCG@10: 28.50%
314
- - Model improves recommendations by 40% vs baseline
315
 
316
- ### Created For
317
- DataSynthis Job Task
318
 
319
- ### Technologies Used
320
  - PyTorch (Neural Networks)
321
  - Scikit-learn (SVD, Similarity)
322
  - Pandas & NumPy (Data Processing)
@@ -324,17 +904,19 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis")
324
 
325
  ---
326
 
327
- Note: This model is trained on the MovieLens 100k dataset.
328
  User IDs range from 1 to 943, and movie IDs range from 1 to 1682.
329
  """)
330
 
 
331
  gr.Markdown("""
332
  ---
333
  <div style='text-align: center'>
334
- <p>Hybrid Movie Recommendation System | Built for DataSynthis</p>
335
  </div>
336
  """)
337
 
 
338
  if __name__ == "__main__":
339
  demo.launch(
340
  share=False,
 
1
+ """
2
+ Gradio App for Hybrid Movie Recommendation System
3
+ Deploy to Hugging Face Spaces as 'DataSynthis_Job_task'
4
+
5
+ File: app.py
6
+ """
7
+
8
  import gradio as gr
9
  import pickle
10
  import pandas as pd
11
  import numpy as np
12
  import torch
13
+ import torch.nn as nn
14
  import os
15
+ from scipy.sparse import csr_matrix
16
+
17
+ #==============================================================================
18
+ # MODEL CLASS DEFINITIONS (Required for unpickling)
19
+ #==============================================================================
20
+
21
+ class ItemBasedCF:
22
+ """Item-Based Collaborative Filtering"""
23
+ pass
24
+
25
+ class SVDRecommender:
26
+ """SVD Recommender"""
27
+ pass
28
+
29
+ class NeuralCF(nn.Module):
30
+ """Neural Collaborative Filtering Model"""
31
+
32
+ def __init__(self, n_users, n_movies, embedding_dim=50, hidden_layers=[64, 32, 16]):
33
+ super(NeuralCF, self).__init__()
34
+ self.user_embedding = nn.Embedding(n_users, embedding_dim)
35
+ self.movie_embedding = nn.Embedding(n_movies, embedding_dim)
36
+
37
+ layers = []
38
+ input_dim = embedding_dim * 2
39
+ for hidden_dim in hidden_layers:
40
+ layers.append(nn.Linear(input_dim, hidden_dim))
41
+ layers.append(nn.ReLU())
42
+ layers.append(nn.Dropout(0.2))
43
+ input_dim = hidden_dim
44
+ layers.append(nn.Linear(input_dim, 1))
45
+ self.mlp = nn.Sequential(*layers)
46
+
47
+ def forward(self, user_ids, movie_ids):
48
+ user_emb = self.user_embedding(user_ids)
49
+ movie_emb = self.movie_embedding(movie_ids)
50
+ x = torch.cat([user_emb, movie_emb], dim=1)
51
+ output = self.mlp(x)
52
+ return output.squeeze()
53
+
54
+ def predict(self, user_idx, movie_idx, device='cpu'):
55
+ self.eval()
56
+ with torch.no_grad():
57
+ user_tensor = torch.LongTensor([user_idx]).to(device)
58
+ movie_tensor = torch.LongTensor([movie_idx]).to(device)
59
+ prediction = self.forward(user_tensor, movie_tensor)
60
+ return torch.clamp(prediction, 1, 5).item()
61
+
62
+ class HybridRecommender:
63
+ """Hybrid Recommendation System"""
64
+ pass
65
+
66
+ class MovieLensDataLoader:
67
+ """Data Loader"""
68
+ pass
69
+
70
+ #==============================================================================
71
+ # LOAD MODEL AND DATA
72
+ #==============================================================================
73
 
74
  def load_model_and_data():
75
  """Load the trained model and necessary data"""
 
128
  traceback.print_exc()
129
  return None, None, None, []
130
 
131
+ # Load everything at startup
132
+ print("Loading model and data...")
133
+ model, loader, movies_df, user_ids = load_model_and_data()
134
+ print(f"Model loaded! Available users: {len(user_ids)}")
135
+
136
+ #==============================================================================
137
+ # RECOMMENDATION FUNCTION
138
+ #==============================================================================
139
+
140
+ def get_recommendations(user_id, num_recommendations):
141
+ """
142
+ Get movie recommendations for a user
143
+
144
+ Args:
145
+ user_id: User ID (int)
146
+ num_recommendations: Number of recommendations to return
147
+
148
+ Returns:
149
+ Formatted string with recommendations
150
+ """
151
+ if model is None or loader is None:
152
+ return "❌ Error: Model not loaded properly. Please check the model files."
153
+
154
+ try:
155
+ user_id = int(user_id)
156
+ num_recommendations = int(num_recommendations)
157
+
158
+ # Check if user exists
159
+ if user_id not in loader.user_id_map:
160
+ return f"❌ User ID {user_id} not found! Please select a valid user ID."
161
+
162
+ # Get recommendations
163
+ recommendations = model.recommend_movies(
164
+ user_id=user_id,
165
+ N=num_recommendations,
166
+ user_id_map=loader.user_id_map,
167
+ reverse_movie_map=loader.reverse_movie_map,
168
+ movies_df=movies_df
169
+ )
170
+
171
+ if not recommendations:
172
+ return f"❌ No recommendations found for User {user_id}"
173
+
174
+ # Format output
175
+ output = f"🎬 **Top {num_recommendations} Movie Recommendations for User {user_id}**\n\n"
176
+ output += "=" * 60 + "\n\n"
177
+
178
+ for i, (movie_id, title, score) in enumerate(recommendations, 1):
179
+ stars = "⭐" * int(score)
180
+ output += f"**{i}. {title}**\n"
181
+ output += f" β€’ Predicted Rating: {score:.2f}/5.00 {stars}\n"
182
+ output += f" β€’ Movie ID: {movie_id}\n\n"
183
+
184
+ return output
185
+
186
+ except ValueError:
187
+ return "❌ Error: Please enter valid numbers for User ID and Number of Recommendations"
188
+ except Exception as e:
189
+ return f"❌ Error generating recommendations: {str(e)}"
190
+
191
+
192
+ def get_user_history(user_id):
193
+ """
194
+ Show user's rating history
195
+
196
+ Args:
197
+ user_id: User ID
198
+
199
+ Returns:
200
+ Formatted string with user's past ratings
201
+ """
202
+ if model is None or loader is None:
203
+ return "❌ Error: Model not loaded properly."
204
+
205
+ try:
206
+ user_id = int(user_id)
207
+
208
+ if user_id not in loader.user_id_map:
209
+ return f"❌ User ID {user_id} not found!"
210
+
211
+ user_idx = loader.user_id_map[user_id]
212
+
213
+ # Get user's ratings from the training data
214
+ user_ratings = model.item_cf.user_item_matrix[user_idx].toarray().flatten()
215
+ rated_indices = np.where(user_ratings > 0)[0]
216
+
217
+ if len(rated_indices) == 0:
218
+ return f"No rating history found for User {user_id}"
219
+
220
+ # Get movie details
221
+ history = []
222
+ for movie_idx in rated_indices:
223
+ original_movie_id = loader.reverse_movie_map[movie_idx]
224
+ title = movies_df[movies_df['movie_id'] == original_movie_id]['title'].values[0]
225
+ rating = user_ratings[movie_idx]
226
+ history.append((title, rating))
227
+
228
+ # Sort by rating (highest first)
229
+ history.sort(key=lambda x: x[1], reverse=True)
230
+
231
+ # Format output
232
+ output = f"πŸ“Š **Rating History for User {user_id}**\n\n"
233
+ output += f"Total movies rated: {len(history)}\n"
234
+ output += f"Average rating: {np.mean([r for _, r in history]):.2f}\n\n"
235
+ output += "=" * 60 + "\n\n"
236
+ output += "**Top 10 Highest Rated Movies:**\n\n"
237
+
238
+ for i, (title, rating) in enumerate(history[:10], 1):
239
+ stars = "⭐" * int(rating)
240
+ output += f"{i}. **{title}** - {rating:.1f}/5 {stars}\n"
241
+
242
+ return output
243
+
244
+ except Exception as e:
245
+ return f"❌ Error: {str(e)}"
246
+
247
 
248
+ def get_movie_info(movie_title_search):
249
+ """
250
+ Search for movies by title
251
+
252
+ Args:
253
+ movie_title_search: Search query
254
+
255
+ Returns:
256
+ Formatted string with matching movies
257
+ """
258
+ if movies_df is None:
259
+ return "❌ Error: Movies data not loaded"
260
+
261
+ try:
262
+ # Search for movies
263
+ matches = movies_df[movies_df['title'].str.contains(movie_title_search, case=False, na=False)]
264
+
265
+ if len(matches) == 0:
266
+ return f"❌ No movies found matching '{movie_title_search}'"
267
+
268
+ output = f"πŸ” **Search Results for '{movie_title_search}'**\n\n"
269
+ output += f"Found {len(matches)} movie(s):\n\n"
270
+ output += "=" * 60 + "\n\n"
271
+
272
+ for i, (_, row) in enumerate(matches.head(20).iterrows(), 1):
273
+ output += f"{i}. **{row['title']}** (ID: {row['movie_id']})\n"
274
+
275
+ if len(matches) > 20:
276
+ output += f"\n... and {len(matches) - 20} more results"
277
+
278
+ return output
279
+
280
+ except Exception as e:
281
+ return f"❌ Error: {str(e)}"
282
+
283
+
284
+ #==============================================================================
285
+ # GRADIO INTERFACE
286
+ #==============================================================================
287
+
288
+ # Create Gradio interface with tabs
289
+ with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis") as demo:
290
+
291
+ gr.Markdown("""
292
+ # 🎬 Hybrid Movie Recommendation System
293
+ ### DataSynthis Job Task - Powered by AI
294
+
295
+ This system combines **Collaborative Filtering**, **SVD Matrix Factorization**, and **Neural Networks**
296
+ to provide personalized movie recommendations from the MovieLens 100k dataset.
297
+
298
+ ---
299
+ """)
300
+
301
+ with gr.Tabs():
302
+
303
+ # TAB 1: Get Recommendations
304
+ with gr.Tab("🎯 Get Recommendations"):
305
+ gr.Markdown("### Get personalized movie recommendations for any user")
306
+
307
+ with gr.Row():
308
+ with gr.Column(scale=1):
309
+ user_id_input = gr.Number(
310
+ label="User ID",
311
+ value=1,
312
+ minimum=1,
313
+ maximum=943,
314
+ step=1,
315
+ info=f"Enter a user ID (1-943)"
316
+ )
317
+
318
+ num_recs_input = gr.Slider(
319
+ label="Number of Recommendations",
320
+ minimum=5,
321
+ maximum=20,
322
+ value=10,
323
+ step=1
324
+ )
325
+
326
+ recommend_btn = gr.Button("🎬 Get Recommendations", variant="primary")
327
+
328
+ with gr.Column(scale=2):
329
+ recommendations_output = gr.Textbox(
330
+ label="Recommendations",
331
+ lines=20,
332
+ max_lines=30
333
+ )
334
+
335
+ recommend_btn.click(
336
+ fn=get_recommendations,
337
+ inputs=[user_id_input, num_recs_input],
338
+ outputs=recommendations_output
339
+ )
340
+
341
+ gr.Markdown("""
342
+ **How it works:**
343
+ - Enter a User ID (between 1 and 943)
344
+ - Choose how many recommendations you want
345
+ - Click "Get Recommendations" to see personalized movie suggestions
346
+ """)
347
+
348
+ # TAB 2: User History
349
+ with gr.Tab("πŸ“Š User History"):
350
+ gr.Markdown("### View a user's rating history")
351
+
352
+ with gr.Row():
353
+ with gr.Column(scale=1):
354
+ user_id_history = gr.Number(
355
+ label="User ID",
356
+ value=1,
357
+ minimum=1,
358
+ maximum=943,
359
+ step=1
360
+ )
361
+
362
+ history_btn = gr.Button("πŸ“Š View History", variant="primary")
363
+
364
+ with gr.Column(scale=2):
365
+ history_output = gr.Textbox(
366
+ label="Rating History",
367
+ lines=20,
368
+ max_lines=30
369
+ )
370
+
371
+ history_btn.click(
372
+ fn=get_user_history,
373
+ inputs=user_id_history,
374
+ outputs=history_output
375
+ )
376
+
377
+ # TAB 3: Search Movies
378
+ with gr.Tab("πŸ” Search Movies"):
379
+ gr.Markdown("### Search for movies in the database")
380
+
381
+ with gr.Row():
382
+ with gr.Column(scale=1):
383
+ movie_search = gr.Textbox(
384
+ label="Movie Title Search",
385
+ placeholder="e.g., Star Wars, Godfather, Titanic...",
386
+ value="Star Wars"
387
+ )
388
+
389
+ search_btn = gr.Button("πŸ” Search", variant="primary")
390
+
391
+ with gr.Column(scale=2):
392
+ search_output = gr.Textbox(
393
+ label="Search Results",
394
+ lines=20,
395
+ max_lines=30
396
+ )
397
+
398
+ search_btn.click(
399
+ fn=get_movie_info,
400
+ inputs=movie_search,
401
+ outputs=search_output
402
+ )
403
+
404
+ # TAB 4: About
405
+ with gr.Tab("ℹ️ About"):
406
+ gr.Markdown("""
407
+ ## About This System
408
+
409
+ ### 🎯 Model Architecture
410
+ This is a **Hybrid Recommendation System** that combines three powerful approaches:
411
+
412
+ 1. **Item-Based Collaborative Filtering**
413
+ - Uses cosine similarity between movies
414
+ - Recommends movies similar to what you've liked before
415
+
416
+ 2. **SVD Matrix Factorization**
417
+ - Decomposes the user-movie rating matrix
418
+ - Discovers latent factors that explain user preferences
419
+
420
+ 3. **Neural Collaborative Filtering (NCF)**
421
+ - Deep learning model with user and movie embeddings
422
+ - Learns complex non-linear patterns in user behavior
423
+
424
+ ### πŸ“Š Dataset
425
+ - **MovieLens 100k** dataset
426
+ - 100,000 ratings from 943 users on 1,682 movies
427
+ - Ratings scale: 1-5 stars
428
+
429
+ ### 🎯 Performance Metrics
430
+ - **Precision@10**: 26.77%
431
+ - **NDCG@10**: 28.50%
432
+ - **Model improves recommendations by 40% vs baseline**
433
+
434
+ ### πŸ‘¨β€πŸ’» Created For
435
+ **DataSynthis Job Task**
436
+
437
+ ### πŸ”— Technologies Used
438
+ - PyTorch (Neural Networks)
439
+ - Scikit-learn (SVD, Similarity)
440
+ - Pandas & NumPy (Data Processing)
441
+ - Gradio (Web Interface)
442
+
443
+ ---
444
+
445
+ **Note**: This model is trained on the MovieLens 100k dataset.
446
+ User IDs range from 1 to 943, and movie IDs range from 1 to 1682.
447
+ """)
448
+
449
+ # Footer
450
+ gr.Markdown("""
451
+ ---
452
+ <div style='text-align: center'>
453
+ <p>🎬 <strong>Hybrid Movie Recommendation System</strong> | Built with ❀️ for DataSynthis</p>
454
+ </div>
455
+ """)
456
+
457
+ # Launch the app
458
+ if __name__ == "__main__":
459
+ demo.launch(
460
+ share=False,
461
+ server_name="0.0.0.0",
462
+ server_port=7860
463
+ )"""
464
+ Gradio App for Hybrid Movie Recommendation System
465
+ Deploy to Hugging Face Spaces as 'DataSynthis_Job_task'
466
+
467
+ File: app.py
468
+ """
469
+
470
+ import gradio as gr
471
+ import pickle
472
+ import pandas as pd
473
+ import numpy as np
474
+ import torch
475
+ import torch.nn as nn
476
+ import os
477
+ from scipy.sparse import csr_matrix
478
+
479
+ #==============================================================================
480
+ # MODEL CLASS DEFINITIONS (Required for unpickling)
481
+ #==============================================================================
482
+
483
+ class ItemBasedCF:
484
+ """Item-Based Collaborative Filtering"""
485
+ pass
486
+
487
+ class SVDRecommender:
488
+ """SVD Recommender"""
489
+ pass
490
+
491
+ class NeuralCF(nn.Module):
492
+ """Neural Collaborative Filtering Model"""
493
+
494
+ def __init__(self, n_users, n_movies, embedding_dim=50, hidden_layers=[64, 32, 16]):
495
+ super(NeuralCF, self).__init__()
496
+ self.user_embedding = nn.Embedding(n_users, embedding_dim)
497
+ self.movie_embedding = nn.Embedding(n_movies, embedding_dim)
498
+
499
+ layers = []
500
+ input_dim = embedding_dim * 2
501
+ for hidden_dim in hidden_layers:
502
+ layers.append(nn.Linear(input_dim, hidden_dim))
503
+ layers.append(nn.ReLU())
504
+ layers.append(nn.Dropout(0.2))
505
+ input_dim = hidden_dim
506
+ layers.append(nn.Linear(input_dim, 1))
507
+ self.mlp = nn.Sequential(*layers)
508
+
509
+ def forward(self, user_ids, movie_ids):
510
+ user_emb = self.user_embedding(user_ids)
511
+ movie_emb = self.movie_embedding(movie_ids)
512
+ x = torch.cat([user_emb, movie_emb], dim=1)
513
+ output = self.mlp(x)
514
+ return output.squeeze()
515
+
516
+ def predict(self, user_idx, movie_idx, device='cpu'):
517
+ self.eval()
518
+ with torch.no_grad():
519
+ user_tensor = torch.LongTensor([user_idx]).to(device)
520
+ movie_tensor = torch.LongTensor([movie_idx]).to(device)
521
+ prediction = self.forward(user_tensor, movie_tensor)
522
+ return torch.clamp(prediction, 1, 5).item()
523
+
524
+ class HybridRecommender:
525
+ """Hybrid Recommendation System"""
526
+ pass
527
+
528
+ class MovieLensDataLoader:
529
+ """Data Loader"""
530
+ pass
531
+
532
+ #==============================================================================
533
+ # LOAD MODEL AND DATA
534
+ #==============================================================================
535
+
536
+ def load_model_and_data():
537
+ """Load the trained model and necessary data"""
538
+ import os
539
+
540
+ # Debug: Check what files exist
541
+ print("Checking for files...")
542
+ print(f"Current directory: {os.getcwd()}")
543
+ print(f"Files in current directory: {os.listdir('.')}")
544
+
545
+ if os.path.exists('model_artifacts'):
546
+ print(f"Files in model_artifacts/: {os.listdir('model_artifacts')}")
547
+ else:
548
+ print("ERROR: model_artifacts/ folder does not exist!")
549
+
550
+ try:
551
+ # Check each file individually
552
+ files_to_check = [
553
+ 'model_artifacts/hybrid_model.pkl',
554
+ 'model_artifacts/loader.pkl',
555
+ 'model_artifacts/movies.pkl'
556
+ ]
557
+
558
+ for file_path in files_to_check:
559
+ if not os.path.exists(file_path):
560
+ print(f"ERROR: Missing file: {file_path}")
561
+ else:
562
+ file_size = os.path.getsize(file_path) / (1024*1024) # MB
563
+ print(f"Found: {file_path} ({file_size:.2f} MB)")
564
+
565
+ # Load files
566
+ with open('model_artifacts/hybrid_model.pkl', 'rb') as f:
567
+ model = pickle.load(f)
568
+ print("βœ“ Loaded hybrid_model.pkl")
569
+
570
+ with open('model_artifacts/loader.pkl', 'rb') as f:
571
+ loader = pickle.load(f)
572
+ print("βœ“ Loaded loader.pkl")
573
+
574
+ with open('model_artifacts/movies.pkl', 'rb') as f:
575
+ movies = pickle.load(f)
576
+ print("βœ“ Loaded movies.pkl")
577
+
578
+ # Get list of users
579
+ user_ids = sorted(loader.user_id_map.keys())
580
+ print(f"βœ“ Model loaded successfully! {len(user_ids)} users available")
581
+
582
+ return model, loader, movies, user_ids
583
+ except FileNotFoundError as e:
584
+ print(f"ERROR: File not found - {e}")
585
+ print("Make sure all pkl files are in the model_artifacts/ folder")
586
+ return None, None, None, []
587
+ except Exception as e:
588
+ print(f"ERROR loading model: {type(e).__name__}: {e}")
589
+ import traceback
590
+ traceback.print_exc()
591
+ return None, None, None, []
592
+
593
+ # Load everything at startup
594
  print("Loading model and data...")
595
  model, loader, movies_df, user_ids = load_model_and_data()
596
  print(f"Model loaded! Available users: {len(user_ids)}")
597
 
598
+ #==============================================================================
599
+ # RECOMMENDATION FUNCTION
600
+ #==============================================================================
601
+
602
  def get_recommendations(user_id, num_recommendations):
603
+ """
604
+ Get movie recommendations for a user
605
+
606
+ Args:
607
+ user_id: User ID (int)
608
+ num_recommendations: Number of recommendations to return
609
+
610
+ Returns:
611
+ Formatted string with recommendations
612
+ """
613
  if model is None or loader is None:
614
+ return "❌ Error: Model not loaded properly. Please check the model files."
615
 
616
  try:
617
  user_id = int(user_id)
618
  num_recommendations = int(num_recommendations)
619
 
620
+ # Check if user exists
621
  if user_id not in loader.user_id_map:
622
+ return f"❌ User ID {user_id} not found! Please select a valid user ID."
623
 
624
+ # Get recommendations
625
  recommendations = model.recommend_movies(
626
  user_id=user_id,
627
  N=num_recommendations,
 
631
  )
632
 
633
  if not recommendations:
634
+ return f"❌ No recommendations found for User {user_id}"
635
 
636
+ # Format output
637
+ output = f"🎬 **Top {num_recommendations} Movie Recommendations for User {user_id}**\n\n"
638
  output += "=" * 60 + "\n\n"
639
 
640
  for i, (movie_id, title, score) in enumerate(recommendations, 1):
641
+ stars = "⭐" * int(score)
642
+ output += f"**{i}. {title}**\n"
643
+ output += f" β€’ Predicted Rating: {score:.2f}/5.00 {stars}\n"
644
+ output += f" β€’ Movie ID: {movie_id}\n\n"
645
 
646
  return output
647
 
648
  except ValueError:
649
+ return "❌ Error: Please enter valid numbers for User ID and Number of Recommendations"
650
  except Exception as e:
651
+ return f"❌ Error generating recommendations: {str(e)}"
652
+
653
 
654
  def get_user_history(user_id):
655
+ """
656
+ Show user's rating history
657
+
658
+ Args:
659
+ user_id: User ID
660
+
661
+ Returns:
662
+ Formatted string with user's past ratings
663
+ """
664
  if model is None or loader is None:
665
+ return "❌ Error: Model not loaded properly."
666
 
667
  try:
668
  user_id = int(user_id)
669
 
670
  if user_id not in loader.user_id_map:
671
+ return f"❌ User ID {user_id} not found!"
672
 
673
  user_idx = loader.user_id_map[user_id]
674
 
675
+ # Get user's ratings from the training data
676
  user_ratings = model.item_cf.user_item_matrix[user_idx].toarray().flatten()
677
  rated_indices = np.where(user_ratings > 0)[0]
678
 
679
  if len(rated_indices) == 0:
680
  return f"No rating history found for User {user_id}"
681
 
682
+ # Get movie details
683
  history = []
684
  for movie_idx in rated_indices:
685
  original_movie_id = loader.reverse_movie_map[movie_idx]
 
687
  rating = user_ratings[movie_idx]
688
  history.append((title, rating))
689
 
690
+ # Sort by rating (highest first)
691
  history.sort(key=lambda x: x[1], reverse=True)
692
 
693
+ # Format output
694
+ output = f"πŸ“Š **Rating History for User {user_id}**\n\n"
695
  output += f"Total movies rated: {len(history)}\n"
696
  output += f"Average rating: {np.mean([r for _, r in history]):.2f}\n\n"
697
  output += "=" * 60 + "\n\n"
698
+ output += "**Top 10 Highest Rated Movies:**\n\n"
699
 
700
  for i, (title, rating) in enumerate(history[:10], 1):
701
+ stars = "⭐" * int(rating)
702
+ output += f"{i}. **{title}** - {rating:.1f}/5 {stars}\n"
703
 
704
  return output
705
 
706
  except Exception as e:
707
+ return f"❌ Error: {str(e)}"
708
+
709
 
710
  def get_movie_info(movie_title_search):
711
+ """
712
+ Search for movies by title
713
+
714
+ Args:
715
+ movie_title_search: Search query
716
+
717
+ Returns:
718
+ Formatted string with matching movies
719
+ """
720
  if movies_df is None:
721
+ return "❌ Error: Movies data not loaded"
722
 
723
  try:
724
+ # Search for movies
725
  matches = movies_df[movies_df['title'].str.contains(movie_title_search, case=False, na=False)]
726
 
727
  if len(matches) == 0:
728
+ return f"❌ No movies found matching '{movie_title_search}'"
729
 
730
+ output = f"πŸ” **Search Results for '{movie_title_search}'**\n\n"
731
  output += f"Found {len(matches)} movie(s):\n\n"
732
  output += "=" * 60 + "\n\n"
733
 
734
  for i, (_, row) in enumerate(matches.head(20).iterrows(), 1):
735
+ output += f"{i}. **{row['title']}** (ID: {row['movie_id']})\n"
736
 
737
  if len(matches) > 20:
738
  output += f"\n... and {len(matches) - 20} more results"
 
740
  return output
741
 
742
  except Exception as e:
743
+ return f"❌ Error: {str(e)}"
744
+
745
+
746
+ #==============================================================================
747
+ # GRADIO INTERFACE
748
+ #==============================================================================
749
 
750
+ # Create Gradio interface with tabs
751
  with gr.Blocks(theme=gr.themes.Soft(), title="Movie Recommender - DataSynthis") as demo:
752
 
753
  gr.Markdown("""
754
+ # 🎬 Hybrid Movie Recommendation System
755
  ### DataSynthis Job Task - Powered by AI
756
 
757
+ This system combines **Collaborative Filtering**, **SVD Matrix Factorization**, and **Neural Networks**
758
  to provide personalized movie recommendations from the MovieLens 100k dataset.
759
 
760
  ---
 
762
 
763
  with gr.Tabs():
764
 
765
+ # TAB 1: Get Recommendations
766
+ with gr.Tab("🎯 Get Recommendations"):
767
  gr.Markdown("### Get personalized movie recommendations for any user")
768
 
769
  with gr.Row():
 
774
  minimum=1,
775
  maximum=943,
776
  step=1,
777
+ info=f"Enter a user ID (1-943)"
778
  )
779
 
780
  num_recs_input = gr.Slider(
 
785
  step=1
786
  )
787
 
788
+ recommend_btn = gr.Button("🎬 Get Recommendations", variant="primary")
789
 
790
  with gr.Column(scale=2):
791
  recommendations_output = gr.Textbox(
 
801
  )
802
 
803
  gr.Markdown("""
804
+ **How it works:**
805
  - Enter a User ID (between 1 and 943)
806
  - Choose how many recommendations you want
807
  - Click "Get Recommendations" to see personalized movie suggestions
808
  """)
809
 
810
+ # TAB 2: User History
811
+ with gr.Tab("πŸ“Š User History"):
812
  gr.Markdown("### View a user's rating history")
813
 
814
  with gr.Row():
 
821
  step=1
822
  )
823
 
824
+ history_btn = gr.Button("πŸ“Š View History", variant="primary")
825
 
826
  with gr.Column(scale=2):
827
  history_output = gr.Textbox(
 
836
  outputs=history_output
837
  )
838
 
839
+ # TAB 3: Search Movies
840
+ with gr.Tab("πŸ” Search Movies"):
841
  gr.Markdown("### Search for movies in the database")
842
 
843
  with gr.Row():
 
848
  value="Star Wars"
849
  )
850
 
851
+ search_btn = gr.Button("πŸ” Search", variant="primary")
852
 
853
  with gr.Column(scale=2):
854
  search_output = gr.Textbox(
 
863
  outputs=search_output
864
  )
865
 
866
+ # TAB 4: About
867
+ with gr.Tab("ℹ️ About"):
868
  gr.Markdown("""
869
  ## About This System
870
 
871
+ ### 🎯 Model Architecture
872
+ This is a **Hybrid Recommendation System** that combines three powerful approaches:
873
 
874
+ 1. **Item-Based Collaborative Filtering**
875
  - Uses cosine similarity between movies
876
  - Recommends movies similar to what you've liked before
877
 
878
+ 2. **SVD Matrix Factorization**
879
  - Decomposes the user-movie rating matrix
880
  - Discovers latent factors that explain user preferences
881
 
882
+ 3. **Neural Collaborative Filtering (NCF)**
883
  - Deep learning model with user and movie embeddings
884
  - Learns complex non-linear patterns in user behavior
885
 
886
+ ### πŸ“Š Dataset
887
+ - **MovieLens 100k** dataset
888
  - 100,000 ratings from 943 users on 1,682 movies
889
  - Ratings scale: 1-5 stars
890
 
891
+ ### 🎯 Performance Metrics
892
+ - **Precision@10**: 26.77%
893
+ - **NDCG@10**: 28.50%
894
+ - **Model improves recommendations by 40% vs baseline**
895
 
896
+ ### πŸ‘¨β€πŸ’» Created For
897
+ **DataSynthis Job Task**
898
 
899
+ ### πŸ”— Technologies Used
900
  - PyTorch (Neural Networks)
901
  - Scikit-learn (SVD, Similarity)
902
  - Pandas & NumPy (Data Processing)
 
904
 
905
  ---
906
 
907
+ **Note**: This model is trained on the MovieLens 100k dataset.
908
  User IDs range from 1 to 943, and movie IDs range from 1 to 1682.
909
  """)
910
 
911
+ # Footer
912
  gr.Markdown("""
913
  ---
914
  <div style='text-align: center'>
915
+ <p>🎬 <strong>Hybrid Movie Recommendation System</strong> | Built with ❀️ for DataSynthis</p>
916
  </div>
917
  """)
918
 
919
+ # Launch the app
920
  if __name__ == "__main__":
921
  demo.launch(
922
  share=False,