edwinbh commited on
Commit
acbbfff
·
verified ·
1 Parent(s): 5d8ed49

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +599 -518
src/streamlit_app.py CHANGED
@@ -1,572 +1,653 @@
1
  """
2
- DLRM Inference Engine for Book Recommendations
3
- Loads trained DLRM model and provides recommendation functionality
4
  """
5
 
6
  import os
7
  import sys
8
- import torch
9
- import numpy as np
 
 
 
 
 
10
  import pandas as pd
 
 
11
  import pickle
12
- import mlflow
13
- from mlflow import MlflowClient
14
- import tempfile
15
- from typing import List, Dict, Tuple, Optional, Any
16
- from functools import partial
17
  import warnings
18
  warnings.filterwarnings('ignore')
19
 
20
- # Check for CPU_ONLY environment variable
21
- CPU_ONLY = os.environ.get('CPU_ONLY', 'false').lower() == 'true'
22
-
23
- # Disable CUDA if CPU_ONLY is set
24
- if CPU_ONLY:
25
- os.environ['CUDA_VISIBLE_DEVICES'] = ''
26
- print("🔄 Running in CPU-only mode (CUDA disabled)")
27
-
28
  try:
29
- from torchrec import EmbeddingBagCollection
30
- from torchrec.models.dlrm import DLRM, DLRMTrain
31
- from torchrec.modules.embedding_configs import EmbeddingBagConfig
32
- from torchrec.sparse.jagged_tensor import KeyedJaggedTensor
33
- from torchrec.datasets.utils import Batch
34
- TORCHREC_AVAILABLE = True
35
  except ImportError as e:
36
- print(f"⚠️ Warning: torchrec import error: {e}")
37
- print("⚠️ Some functionality will be limited")
38
  TORCHREC_AVAILABLE = False
39
 
40
- class DLRMBookRecommender:
41
- """DLRM-based book recommender for inference"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- def __init__(self, model_path: str = None, run_id: str = None):
44
- """
45
- Initialize DLRM book recommender
46
-
47
- Args:
48
- model_path: Path to saved model state dict
49
- run_id: MLflow run ID to load model from
50
- """
51
- self.device = torch.device("cpu")
52
- self.model = None
53
- self.preprocessing_info = None
54
- self.torchrec_available = TORCHREC_AVAILABLE
55
-
56
- if not self.torchrec_available:
57
- print("⚠️ Running in limited mode without torchrec")
58
- return
59
-
60
- # Load preprocessing info
61
- self._load_preprocessing_info()
62
-
63
- # Load model
64
- if model_path and os.path.exists(model_path):
65
- self._load_model_from_path(model_path)
66
- elif run_id:
67
- self._load_model_from_mlflow(run_id)
68
- else:
69
- print("⚠️ No model loaded. Please provide model_path or run_id")
70
 
71
- def _load_preprocessing_info(self):
72
- """Load preprocessing information"""
73
- if os.path.exists('book_dlrm_preprocessing.pkl'):
74
- with open('book_dlrm_preprocessing.pkl', 'rb') as f:
75
- self.preprocessing_info = pickle.load(f)
76
-
77
- self.dense_cols = self.preprocessing_info['dense_cols']
78
- self.cat_cols = self.preprocessing_info['cat_cols']
79
- self.emb_counts = self.preprocessing_info['emb_counts']
80
- self.user_encoder = self.preprocessing_info['user_encoder']
81
- self.book_encoder = self.preprocessing_info['book_encoder']
82
- self.publisher_encoder = self.preprocessing_info['publisher_encoder']
83
- self.location_encoder = self.preprocessing_info['location_encoder']
84
- self.scaler = self.preprocessing_info['scaler']
85
-
86
- print("✅ Preprocessing info loaded")
 
 
 
 
 
 
 
 
 
87
  else:
88
- raise FileNotFoundError("book_dlrm_preprocessing.pkl not found. Run preprocessing first.")
 
 
89
 
90
- def _load_model_from_path(self, model_path: str):
91
- """Load model from saved state dict"""
92
- try:
93
- # Create model architecture
94
- eb_configs = [
95
- EmbeddingBagConfig(
96
- name=f"t_{feature_name}",
97
- embedding_dim=64, # Default embedding dim
98
- num_embeddings=self.emb_counts[feature_idx],
99
- feature_names=[feature_name],
100
- )
101
- for feature_idx, feature_name in enumerate(self.cat_cols)
102
- ]
103
 
104
- dlrm_model = DLRM(
105
- embedding_bag_collection=EmbeddingBagCollection(
106
- tables=eb_configs, device=self.device
107
- ),
108
- dense_in_features=len(self.dense_cols),
109
- dense_arch_layer_sizes=[256, 128, 64],
110
- over_arch_layer_sizes=[512, 256, 128, 1],
111
- dense_device=self.device,
112
- )
 
113
 
114
- # Load state dict
115
- state_dict = torch.load(model_path, map_location=self.device)
116
-
117
- # Remove 'model.' prefix if present
118
- if any(key.startswith('model.') for key in state_dict.keys()):
119
- state_dict = {k[6:]: v for k, v in state_dict.items()}
120
-
121
- dlrm_model.load_state_dict(state_dict)
122
- self.model = dlrm_model
123
- self.model.eval()
124
-
125
- print(f"✅ Model loaded from {model_path}")
126
-
127
- except Exception as e:
128
- print(f"❌ Error loading model: {e}")
129
 
130
- def _load_model_from_mlflow(self, run_id: str):
131
- """Load model from MLflow"""
132
- try:
133
- client = MlflowClient()
134
- run = client.get_run(run_id)
135
-
136
- # Get model parameters from MLflow
137
- params = run.data.params
138
- cat_cols = eval(params.get('cat_cols'))
139
- emb_counts = eval(params.get('emb_counts'))
140
- dense_cols = eval(params.get('dense_cols'))
141
- embedding_dim = int(params.get('embedding_dim', 64))
142
- dense_arch_layer_sizes = eval(params.get('dense_arch_layer_sizes'))
143
- over_arch_layer_sizes = eval(params.get('over_arch_layer_sizes'))
144
-
145
- # Download model from MLflow
146
- temp_dir = tempfile.mkdtemp()
 
 
 
 
 
147
 
148
- # Try different artifact paths
149
- for artifact_path in ['model_state_dict_final', 'model_state_dict_2', 'model_state_dict_1', 'model_state_dict_0']:
150
- try:
151
- client.download_artifacts(run_id, f"{artifact_path}/state_dict.pth", temp_dir)
152
- state_dict = mlflow.pytorch.load_state_dict(f"{temp_dir}/{artifact_path}")
153
- break
154
- except:
155
- continue
156
- else:
157
- raise Exception("No model artifacts found")
158
 
159
- # Create model
160
- eb_configs = [
161
- EmbeddingBagConfig(
162
- name=f"t_{feature_name}",
163
- embedding_dim=embedding_dim,
164
- num_embeddings=emb_counts[feature_idx],
165
- feature_names=[feature_name],
166
  )
167
- for feature_idx, feature_name in enumerate(cat_cols)
168
- ]
169
-
170
- dlrm_model = DLRM(
171
- embedding_bag_collection=EmbeddingBagCollection(
172
- tables=eb_configs, device=self.device
173
- ),
174
- dense_in_features=len(dense_cols),
175
- dense_arch_layer_sizes=dense_arch_layer_sizes,
176
- over_arch_layer_sizes=over_arch_layer_sizes,
177
- dense_device=self.device,
178
- )
179
-
180
- # Remove prefix and load state dict
181
- if any(key.startswith('model.') for key in state_dict.keys()):
182
- state_dict = {k[6:]: v for k, v in state_dict.items()}
183
 
184
- dlrm_model.load_state_dict(state_dict)
185
- self.model = dlrm_model
186
- self.model.eval()
 
187
 
188
- print(f" Model loaded from MLflow run: {run_id}")
 
 
 
 
189
 
190
- except Exception as e:
191
- print(f"❌ Error loading model from MLflow: {e}")
192
 
193
- def _prepare_user_features(self, user_id: int, user_data: Optional[Dict] = None) -> Tuple[torch.Tensor, KeyedJaggedTensor]:
194
- """Prepare user features for inference"""
195
-
196
- if user_data is None:
197
- # Create default user features
198
- user_data = {
199
- 'User-ID': user_id,
200
- 'Age': 30, # Default age
201
- 'Location': 'usa', # Default location
202
- }
203
-
204
- # Encode categorical features
205
- try:
206
- user_id_encoded = self.user_encoder.transform([str(user_id)])[0]
207
- except:
208
- # Handle unknown user
209
- user_id_encoded = 0
210
-
211
- try:
212
- location = str(user_data.get('Location', 'usa')).split(',')[-1].strip().lower()
213
- country_encoded = self.location_encoder.transform([location])[0]
214
- except:
215
- country_encoded = 0
216
-
217
- # Age group
218
- age = user_data.get('Age', 30)
219
- if age < 18:
220
- age_group = 0
221
- elif age < 25:
222
- age_group = 1
223
- elif age < 35:
224
- age_group = 2
225
- elif age < 50:
226
- age_group = 3
227
- elif age < 65:
228
- age_group = 4
229
- else:
230
- age_group = 5
231
-
232
- # Get user statistics (if available)
233
- user_activity = user_data.get('user_activity', 10) # Default
234
- user_avg_rating = user_data.get('user_avg_rating', 6.0) # Default
235
- age_normalized = user_data.get('Age', 30)
236
-
237
- # Normalize dense features
238
- dense_features = np.array([[age_normalized, 2000, user_activity, 10, user_avg_rating, 6.0]]) # Default values
239
- dense_features = self.scaler.transform(dense_features)
240
- dense_features = torch.tensor(dense_features, dtype=torch.float32)
241
-
242
- return dense_features, user_id_encoded, country_encoded, age_group
243
 
244
- def _prepare_book_features(self, book_isbn: str, book_data: Optional[Dict] = None) -> Tuple[int, int, int, int]:
245
- """Prepare book features for inference"""
246
-
247
- if book_data is None:
248
- book_data = {}
249
-
250
- # Encode book ID
251
- try:
252
- book_id_encoded = self.book_encoder.transform([str(book_isbn)])[0]
253
- except:
254
- book_id_encoded = 0
255
-
256
- # Encode publisher
257
- try:
258
- publisher = str(book_data.get('Publisher', 'Unknown'))
259
- publisher_encoded = self.publisher_encoder.transform([publisher])[0]
260
- except:
261
- publisher_encoded = 0
262
-
263
- # Publication decade
264
- year = book_data.get('Year-Of-Publication', 2000)
265
- decade = ((int(year) // 10) * 10)
266
- try:
267
- decade_encoded = preprocessing_info.get('decade_encoder', LabelEncoder()).transform([str(decade)])[0]
268
- except:
269
- decade_encoded = 6 # Default to 2000s
270
-
271
- # Rating level (default to medium)
272
- rating_level = 1
273
-
274
- return book_id_encoded, publisher_encoded, decade_encoded, rating_level
275
 
276
- def predict_rating(self, user_id: int, book_isbn: str,
277
- user_data: Optional[Dict] = None,
278
- book_data: Optional[Dict] = None) -> float:
279
- """
280
- Predict rating probability for user-book pair
281
-
282
- Args:
283
- user_id: User ID
284
- book_isbn: Book ISBN
285
- user_data: Additional user data (optional)
286
- book_data: Additional book data (optional)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
- Returns:
289
- Prediction probability (0-1)
290
- """
291
- if self.model is None:
292
- print("❌ Model not loaded")
293
- return 0.0
294
-
295
- if not self.torchrec_available:
296
- print(" Cannot make predictions without torchrec")
297
- return 0.5 # Return default neutral prediction
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
 
299
- try:
300
- # Prepare features
301
- dense_features, user_id_encoded, country_encoded, age_group = self._prepare_user_features(user_id, user_data)
302
- book_id_encoded, publisher_encoded, decade_encoded, rating_level = self._prepare_book_features(book_isbn, book_data)
303
 
304
- # Create sparse features
305
- kjt_values = [user_id_encoded, book_id_encoded, publisher_encoded, country_encoded, age_group, decade_encoded, rating_level]
306
- kjt_lengths = [1] * len(kjt_values)
 
 
 
 
 
 
307
 
308
- sparse_features = KeyedJaggedTensor.from_lengths_sync(
309
- self.cat_cols,
310
- torch.tensor(kjt_values),
311
- torch.tensor(kjt_lengths, dtype=torch.int32),
312
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
 
314
- # Make prediction
315
- with torch.no_grad():
316
- logits = self.model(dense_features=dense_features, sparse_features=sparse_features)
317
- prediction = torch.sigmoid(logits).item()
318
 
319
- return prediction
320
 
321
- except Exception as e:
322
- print(f"Error in prediction: {e}")
323
- return 0.0
324
-
325
- def get_user_recommendations(self, user_id: int,
326
- candidate_books: List[str],
327
- k: int = 10,
328
- user_data: Optional[Dict] = None) -> List[Tuple[str, float]]:
329
- """
330
- Get top-k book recommendations for a user
331
-
332
- Args:
333
- user_id: User ID
334
- candidate_books: List of candidate book ISBNs
335
- k: Number of recommendations
336
- user_data: Additional user data
337
 
338
- Returns:
339
- List of (book_isbn, prediction_score) tuples
340
- """
341
- if self.model is None or not self.torchrec_available:
342
- print("❌ Model not loaded or torchrec not available")
343
- return []
344
-
345
- recommendations = []
346
-
347
- print(f"Generating recommendations for user {user_id} from {len(candidate_books)} candidates...")
348
-
349
- for book_isbn in candidate_books:
350
- score = self.predict_rating(user_id, book_isbn, user_data)
351
- recommendations.append((book_isbn, score))
352
-
353
- # Sort by score and return top-k
354
- recommendations.sort(key=lambda x: x[1], reverse=True)
355
- return recommendations[:k]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
 
357
- def batch_recommend(self, user_ids: List[int],
358
- candidate_books: List[str],
359
- k: int = 10) -> Dict[int, List[Tuple[str, float]]]:
360
- """
361
- Generate recommendations for multiple users
 
 
 
 
 
362
 
363
- Args:
364
- user_ids: List of user IDs
365
- candidate_books: List of candidate book ISBNs
366
- k: Number of recommendations per user
367
-
368
- Returns:
369
- Dictionary mapping user_id to recommendations
370
- """
371
- results = {}
372
 
373
- for user_id in user_ids:
374
- results[user_id] = self.get_user_recommendations(user_id, candidate_books, k)
 
 
 
375
 
376
- return results
377
-
378
- def get_similar_books(self, target_book_isbn: str,
379
- candidate_books: List[str],
380
- sample_users: List[int],
381
- k: int = 10) -> List[Tuple[str, float]]:
382
- """
383
- Find books similar to target book by comparing user preferences
384
 
385
- Args:
386
- target_book_isbn: Target book ISBN
387
- candidate_books: List of candidate book ISBNs
388
- sample_users: Sample users to test similarity with
389
- k: Number of similar books
390
 
391
- Returns:
392
- List of (book_isbn, similarity_score) tuples
393
- """
394
- target_scores = []
395
- candidate_scores = {book: [] for book in candidate_books}
396
-
397
- # Get predictions for target book and candidates across sample users
398
- for user_id in sample_users:
399
- target_score = self.predict_rating(user_id, target_book_isbn)
400
- target_scores.append(target_score)
401
 
402
- for book_isbn in candidate_books:
403
- if book_isbn != target_book_isbn:
404
- score = self.predict_rating(user_id, book_isbn)
405
- candidate_scores[book_isbn].append(score)
 
 
 
 
 
 
 
406
 
407
- # Calculate similarity based on correlation of user preferences
408
- similarities = []
409
- target_scores = np.array(target_scores)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
 
411
- for book_isbn, scores in candidate_scores.items():
412
- if len(scores) > 0:
413
- scores_array = np.array(scores)
414
- # Calculate correlation as similarity measure
415
- correlation = np.corrcoef(target_scores, scores_array)[0, 1]
416
- if not np.isnan(correlation):
417
- similarities.append((book_isbn, correlation))
418
 
419
- # Sort by similarity and return top-k
420
- similarities.sort(key=lambda x: x[1], reverse=True)
421
- return similarities[:k]
422
-
423
-
424
- def load_dlrm_recommender(model_source: str = "latest") -> DLRMBookRecommender:
425
- """
426
- Load DLRM recommender from various sources
427
-
428
- Args:
429
- model_source: "latest" for latest MLflow run, "file" for local file, or specific run_id
430
 
431
- Returns:
432
- DLRMBookRecommender instance
433
- """
434
- # Check if we're in CPU-only mode
435
- cpu_only = os.environ.get('CPU_ONLY', 'false').lower() == 'true'
436
- if cpu_only:
437
- print("🔄 Loading recommender in CPU-only mode")
438
-
439
- # Create recommender instance
440
- recommender = DLRMBookRecommender()
441
-
442
- # If torchrec is not available, return limited recommender
443
- if not TORCHREC_AVAILABLE:
444
- print("⚠️ torchrec not available, returning limited recommender")
445
- return recommender
446
-
447
- if model_source == "latest":
448
- # Try to get latest MLflow run
449
- try:
450
- experiment = mlflow.get_experiment_by_name('dlrm-book-recommendation-book_recommender')
451
- if experiment:
452
- runs = mlflow.search_runs(experiment_ids=[experiment.experiment_id],
453
- order_by=["start_time desc"], max_results=1)
454
- if len(runs) > 0:
455
- latest_run_id = runs.iloc[0].run_id
456
- recommender = DLRMBookRecommender(run_id=latest_run_id)
457
- return recommender
458
- except Exception as e:
459
- print(f"⚠️ Error loading from MLflow: {e}")
460
-
461
- elif model_source == "file":
462
- # Try to load from local file
463
- for filename in [
464
- '/home/mr-behdadi/PROJECT/ICE/notebooks/dlrm_book_model_final.pth',
465
- '/home/mr-behdadi/PROJECT/ICE/notebooks/dlrm_book_model_epoch_2.pth',
466
- '/home/mr-behdadi/PROJECT/ICE/notebooks/dlrm_book_model_epoch_0.pth',
467
- '/home/mr-behdadi/PROJECT/ICE/notebooks/dlrm_book_model_epoch_1.pth']:
468
- if os.path.exists(filename):
469
- try:
470
- recommender = DLRMBookRecommender(model_path=filename)
471
- return recommender
472
- except Exception as e:
473
- print(f"⚠️ Error loading from {filename}: {e}")
474
-
475
- else:
476
- # Treat as run_id
477
- try:
478
- recommender = DLRMBookRecommender(run_id=model_source)
479
- return recommender
480
- except Exception as e:
481
- print(f"⚠️ Error loading from run_id {model_source}: {e}")
482
-
483
- print("⚠️ Could not load any trained model")
484
- return recommender
485
-
486
-
487
- def demo_dlrm_recommendations():
488
- """Demo function to show DLRM recommendations"""
489
-
490
- print("🚀 DLRM Book Recommendation Demo")
491
- print("=" * 50)
492
-
493
- # Load book data for demo
494
- books_df = pd.read_csv('Books.csv', encoding='latin-1', low_memory=False)
495
- users_df = pd.read_csv('Users.csv', encoding='latin-1', low_memory=False)
496
- ratings_df = pd.read_csv('Ratings.csv', encoding='latin-1', low_memory=False)
497
-
498
- books_df.columns = books_df.columns.str.replace('"', '')
499
- users_df.columns = users_df.columns.str.replace('"', '')
500
- ratings_df.columns = ratings_df.columns.str.replace('"', '')
501
-
502
- # Load recommender
503
- recommender = load_dlrm_recommender("file")
504
-
505
- if recommender.model is None:
506
- print("❌ No trained model found. Please run training first.")
507
- return
508
-
509
- # Get sample user and books
510
- sample_user_id = ratings_df['User-ID'].iloc[0]
511
- sample_books = books_df['ISBN'].head(20).tolist()
512
-
513
- print(f"\n📚 Getting recommendations for User {sample_user_id}")
514
- print(f"Testing with {len(sample_books)} candidate books...")
515
-
516
- # Get recommendations
517
- recommendations = recommender.get_user_recommendations(
518
- user_id=sample_user_id,
519
- candidate_books=sample_books,
520
- k=10
521
- )
522
-
523
- print(f"\n🎯 Top 10 DLRM Recommendations:")
524
- print("-" * 50)
525
-
526
- for i, (book_isbn, score) in enumerate(recommendations, 1):
527
- # Get book info
528
- book_info = books_df[books_df['ISBN'] == book_isbn]
529
- if len(book_info) > 0:
530
- book = book_info.iloc[0]
531
- title = book['Book-Title']
532
- author = book['Book-Author']
533
- print(f"{i:2d}. {title} by {author}")
534
- print(f" ISBN: {book_isbn}, Score: {score:.4f}")
535
- else:
536
- print(f"{i:2d}. ISBN: {book_isbn}, Score: {score:.4f}")
537
- print()
538
-
539
- # Show user's actual ratings for comparison
540
- user_ratings = ratings_df[ratings_df['User-ID'] == sample_user_id]
541
- if len(user_ratings) > 0:
542
- print(f"\n📖 User {sample_user_id}'s Actual Reading History:")
543
- print("-" * 50)
544
 
545
- for _, rating in user_ratings.head(5).iterrows():
546
- book_info = books_df[books_df['ISBN'] == rating['ISBN']]
547
- if len(book_info) > 0:
548
- book = book_info.iloc[0]
549
- print(f" {book['Book-Title']} by {book['Book-Author']} - Rating: {rating['Book-Rating']}/10")
550
-
551
- # Test book similarity
552
- if len(recommendations) > 0:
553
- target_book = recommendations[0][0]
554
- print(f"\n🔍 Finding books similar to: {target_book}")
555
 
556
- similar_books = recommender.get_similar_books(
557
- target_book_isbn=target_book,
558
- candidate_books=sample_books,
559
- sample_users=ratings_df['User-ID'].head(10).tolist(),
560
- k=5
561
- )
562
 
563
- print(f"\n📚 Similar Books:")
564
- print("-" * 30)
565
- for i, (book_isbn, similarity) in enumerate(similar_books, 1):
566
- book_info = books_df[books_df['ISBN'] == book_isbn]
567
- if len(book_info) > 0:
568
- book = book_info.iloc[0]
569
- print(f"{i}. {book['Book-Title']} (similarity: {similarity:.3f})")
570
 
571
  if __name__ == "__main__":
572
- demo_dlrm_recommendations()
 
1
  """
2
+ Streamlit Dashboard for DLRM Book Recommendation System
3
+ Simple interface for DLRM-based book recommendations
4
  """
5
 
6
  import os
7
  import sys
8
+ import streamlit as st
9
+
10
+ # Check if CPU_ONLY mode is enabled via command line argument
11
+ if len(sys.argv) > 1 and sys.argv[1] == '--cpu-only':
12
+ os.environ['CPU_ONLY'] = 'true'
13
+ print("🔄 Running in CPU-only mode (CUDA disabled)")
14
+
15
  import pandas as pd
16
+ import numpy as np
17
+ import torch
18
  import pickle
19
+ from typing import Dict, List, Tuple, Optional
 
 
 
 
20
  import warnings
21
  warnings.filterwarnings('ignore')
22
 
23
+ # Import our DLRM recommender
 
 
 
 
 
 
 
24
  try:
25
+ from dlrm_inference import DLRMBookRecommender, load_dlrm_recommender, TORCHREC_AVAILABLE
 
 
 
 
 
26
  except ImportError as e:
27
+ print(f"⚠️ Error importing DLRM recommender: {e}")
 
28
  TORCHREC_AVAILABLE = False
29
 
30
+
31
+ # Page configuration
32
+ st.set_page_config(
33
+ page_title="DLRM Book Recommendations",
34
+ page_icon="📚",
35
+ layout="wide",
36
+ initial_sidebar_state="expanded"
37
+ )
38
+
39
+ # Check if running in CPU-only mode
40
+ cpu_only_mode = os.environ.get('CPU_ONLY', 'false').lower() == 'true'
41
+
42
+ # Custom CSS
43
+ st.markdown("""
44
+ <style>
45
+ .main-header {
46
+ font-size: 3rem;
47
+ color: #1f77b4;
48
+ text-align: center;
49
+ margin-bottom: 2rem;
50
+ }
51
+ .metric-card {
52
+ background-color: #f0f2f6;
53
+ padding: 1rem;
54
+ border-radius: 0.5rem;
55
+ border-left: 5px solid #1f77b4;
56
+ }
57
+ .dlrm-explanation {
58
+ background-color: #e8f4fd;
59
+ padding: 1rem;
60
+ border-radius: 0.5rem;
61
+ border-left: 4px solid #0066cc;
62
+ margin: 1rem 0;
63
+ }
64
+ .book-card {
65
+ background-color: #ffffff;
66
+ padding: 1rem;
67
+ border-radius: 0.5rem;
68
+ border: 1px solid #e1e5eb;
69
+ margin-bottom: 1rem;
70
+ }
71
+ .cpu-mode-banner {
72
+ background-color: #fff3cd;
73
+ color: #856404;
74
+ padding: 0.75rem;
75
+ border-radius: 0.5rem;
76
+ border-left: 4px solid #ffeeba;
77
+ margin: 1rem 0;
78
+ text-align: center;
79
+ }
80
+ </style>
81
+ """, unsafe_allow_html=True)
82
+
83
+ @st.cache_data
84
+ def load_data():
85
+ """Load and cache the book data"""
86
+ try:
87
+ books_df = pd.read_csv('Books.csv', encoding='latin-1', low_memory=False)
88
+ users_df = pd.read_csv('Users.csv', encoding='latin-1', low_memory=False)
89
+ ratings_df = pd.read_csv('Ratings.csv', encoding='latin-1', low_memory=False)
90
+
91
+ # Clean column names
92
+ books_df.columns = books_df.columns.str.replace('"', '')
93
+ users_df.columns = users_df.columns.str.replace('"', '')
94
+ ratings_df.columns = ratings_df.columns.str.replace('"', '')
95
+
96
+ return books_df, users_df, ratings_df
97
+ except Exception as e:
98
+ st.error(f"Error loading data: {e}")
99
+ return None, None, None
100
+
101
+ @st.cache_resource
102
+ def load_dlrm_model():
103
+ """Load and cache the DLRM model"""
104
+
105
 
106
+ try:
107
+ recommender = load_dlrm_recommender("file")
108
+ return recommender
109
+ except Exception as e:
110
+ st.error(f"Error loading DLRM model: {e}")
111
+ return None
112
+
113
+ def display_book_info(book_isbn, books_df, show_rating=None):
114
+ """Display book information with actual book cover"""
115
+ book_info = books_df[books_df['ISBN'] == book_isbn]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ if len(book_info) == 0:
118
+ st.write(f"Book with ISBN {book_isbn} not found")
119
+ return
120
+
121
+ book = book_info.iloc[0]
122
+
123
+ col1, col2 = st.columns([1, 3])
124
+
125
+ with col1:
126
+ # Try to display actual book cover from Image-URL-M
127
+ image_url = book.get('Image-URL-M', '')
128
+
129
+ if image_url and pd.notna(image_url) and str(image_url) != 'nan':
130
+ try:
131
+ # Clean the URL (sometimes there are issues with Amazon URLs)
132
+ clean_url = str(image_url).strip()
133
+ if clean_url and 'http' in clean_url:
134
+ st.image(clean_url, width=150, caption="📚")
135
+ else:
136
+ # Fallback to placeholder
137
+ st.image("https://via.placeholder.com/150x200?text=📚&color=1f77b4&bg=f0f2f6", width=150)
138
+ except Exception as e:
139
+ # If image loading fails, show placeholder
140
+ st.image("https://via.placeholder.com/150x200?text=📚&color=1f77b4&bg=f0f2f6", width=150)
141
+ st.caption("⚠️ Cover unavailable")
142
  else:
143
+ # Show placeholder if no image URL
144
+ st.image("https://via.placeholder.com/150x200?text=📚&color=1f77b4&bg=f0f2f6", width=150)
145
+ st.caption("📚 No cover")
146
 
147
+ with col2:
148
+ st.markdown(f"**{book['Book-Title']}**")
149
+ st.write(f"*by {book['Book-Author']}*")
150
+ st.write(f"📅 Published: {book.get('Year-Of-Publication', 'Unknown')}")
151
+ st.write(f"🏢 Publisher: {book.get('Publisher', 'Unknown')}")
152
+ st.write(f"📖 ISBN: {book['ISBN']}")
153
+
154
+ if show_rating is not None:
155
+ st.markdown(f"**🎯 DLRM Score: {show_rating:.4f}**")
 
 
 
 
156
 
157
+ def main():
158
+ # Header
159
+ st.markdown('<h1 class="main-header">📚 DLRM Book Recommendation System</h1>', unsafe_allow_html=True)
160
+ st.markdown("### Deep Learning Recommendation Model for Personalized Book Suggestions")
161
+
162
+ # CPU Mode Banner (if enabled)
163
+ if cpu_only_mode:
164
+ st.markdown('<div class="cpu-mode-banner">⚙️ Running in CPU-only mode (NVIDIA drivers not required)</div>', unsafe_allow_html=True)
165
+
166
+ st.markdown("---")
167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
+ # Load data
170
+ with st.spinner("Loading book data..."):
171
+ books_df, users_df, ratings_df = load_data()
172
+
173
+ if books_df is None:
174
+ st.error("Failed to load data. Please check if CSV files are available.")
175
+ return
176
+
177
+ # Sidebar info
178
+ st.sidebar.title("📊 Dataset Information")
179
+ st.sidebar.metric("📚 Books", f"{len(books_df):,}")
180
+ st.sidebar.metric("👥 Users", f"{len(users_df):,}")
181
+ st.sidebar.metric("⭐ Ratings", f"{len(ratings_df):,}")
182
+
183
+ # Load DLRM model
184
+ with st.spinner("Loading DLRM model..."):
185
+ recommender = load_dlrm_model()
186
+
187
+ if recommender is None or not hasattr(recommender, 'model') or recommender.model is None:
188
+ if cpu_only_mode:
189
+ st.warning("⚠️ DLRM model not available in CPU-only mode")
190
+ st.info("The app will continue with limited functionality")
191
 
192
+ # Show options for browsing books without recommendations
193
+ st.subheader("📚 Browse Books")
 
 
 
 
 
 
 
 
194
 
195
+ # Simple book browser
196
+ search_query = st.text_input("Search for books", placeholder="Enter title, author, or publisher")
197
+ if search_query:
198
+ mask = (
199
+ books_df['Book-Title'].str.contains(search_query, case=False, na=False) |
200
+ books_df['Book-Author'].str.contains(search_query, case=False, na=False) |
201
+ books_df['Publisher'].str.contains(search_query, case=False, na=False)
202
  )
203
+ results = books_df[mask].head(20)
204
+
205
+ if len(results) > 0:
206
+ st.success(f"Found {len(results)} books matching '{search_query}'")
207
+ for _, book in results.iterrows():
208
+ st.markdown(f"**{book['Book-Title']}** by *{book['Book-Author']}*")
209
+ st.write(f"Published: {book.get('Year-Of-Publication', 'Unknown')} | ISBN: {book['ISBN']}")
210
+ st.markdown("---")
211
+ else:
212
+ st.info(f"No books found matching '{search_query}'")
 
 
 
 
 
 
213
 
214
+ return
215
+ else:
216
+ st.error("❌ DLRM model not available")
217
+ st.info("Please run the training script first: `python train_dlrm_books.py`")
218
 
219
+ st.markdown("### Available Options:")
220
+ st.markdown("1. **Train DLRM Model**: Run `python train_dlrm_books.py`")
221
+ st.markdown("2. **Prepare Data**: Run `python dlrm_book_recommender.py`")
222
+ st.markdown("3. **Check Files**: Ensure preprocessing files exist")
223
+ st.markdown("4. **Try CPU-only Mode**: Run `streamlit run streamlit_dlrm_app.py -- --cpu-only`")
224
 
225
+ return
 
226
 
227
+ if cpu_only_mode:
228
+ st.success(" DLRM model loaded successfully in CPU-only mode!")
229
+ else:
230
+ st.success("✅ DLRM model loaded successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
232
+ # Model info
233
+ st.sidebar.markdown("---")
234
+ st.sidebar.subheader("🤖 DLRM Model Info")
235
+ if recommender.preprocessing_info:
236
+ st.sidebar.write(f"Dense features: {len(recommender.dense_cols)}")
237
+ st.sidebar.write(f"Categorical features: {len(recommender.cat_cols)}")
238
+ st.sidebar.write(f"Embedding dim: 64")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
+ # Main interface
241
+ tab1, tab2, tab3, tab4 = st.tabs(["🎯 Get Recommendations", "🔍 Test Predictions", "📊 Model Analysis", "📸 Book Gallery"])
242
+
243
+ with tab1:
244
+ st.header("🎯 DLRM Book Recommendations")
245
+ st.info("Get personalized book recommendations using the trained DLRM model")
246
+
247
+ # User selection
248
+ col1, col2 = st.columns([2, 1])
249
+
250
+ with col1:
251
+ user_ids = sorted(users_df['User-ID'].unique())
252
+ selected_user_id = st.selectbox("Select a user", user_ids[:1000]) # Limit for performance
253
+
254
+ with col2:
255
+ num_recommendations = st.slider("Number of recommendations", 5, 20, 10)
256
+
257
+ # Show user info
258
+ user_info = users_df[users_df['User-ID'] == selected_user_id]
259
+ if len(user_info) > 0:
260
+ user = user_info.iloc[0]
261
+ st.markdown(f"**User Info**: Age: {user.get('Age', 'Unknown')}, Location: {user.get('Location', 'Unknown')}")
262
+
263
+ # User's reading history
264
+ user_ratings = ratings_df[ratings_df['User-ID'] == selected_user_id]
265
+ if len(user_ratings) > 0:
266
+ with st.expander(f"📖 User's Reading History ({len(user_ratings)} books)", expanded=False):
267
+ top_rated = user_ratings.sort_values('Book-Rating', ascending=False).head(10)
268
+ for _, rating in top_rated.iterrows():
269
+ book_info = books_df[books_df['ISBN'] == rating['ISBN']]
270
+ if len(book_info) > 0:
271
+ book = book_info.iloc[0]
272
+ st.write(f"• **{book['Book-Title']}** by {book['Book-Author']} - {rating['Book-Rating']}/10 ⭐")
273
+
274
+ if st.button("🚀 Get DLRM Recommendations", type="primary"):
275
+ with st.spinner("🤖 DLRM is analyzing user preferences..."):
276
+
277
+ # Get candidate books (popular books not rated by user)
278
+ user_rated_books = set(user_ratings['ISBN']) if len(user_ratings) > 0 else set()
279
+
280
+ # Get popular books as candidates
281
+ book_popularity = ratings_df.groupby('ISBN').size().sort_values(ascending=False)
282
+ candidate_books = [isbn for isbn in book_popularity.head(100).index if isbn not in user_rated_books]
283
+
284
+ if len(candidate_books) < num_recommendations:
285
+ candidate_books = book_popularity.head(200).index.tolist()
286
+
287
+ # Get recommendations
288
+ recommendations = recommender.get_user_recommendations(
289
+ user_id=selected_user_id,
290
+ candidate_books=candidate_books,
291
+ k=num_recommendations
292
+ )
293
 
294
+ if recommendations:
295
+ st.success(f"Generated {len(recommendations)} DLRM recommendations!")
296
+
297
+ st.subheader("🎯 DLRM Recommendations")
298
+
299
+ for i, (book_isbn, score) in enumerate(recommendations, 1):
300
+ book_info = books_df[books_df['ISBN'] == book_isbn]
301
+ if len(book_info) > 0:
302
+ with st.expander(f"{i}. Recommendation (DLRM Score: {score:.4f})", expanded=(i <= 3)):
303
+ display_book_info(book_isbn, books_df, show_rating=score)
304
+
305
+ # Additional book stats
306
+ book_ratings = ratings_df[ratings_df['ISBN'] == book_isbn]
307
+ if len(book_ratings) > 0:
308
+ avg_rating = book_ratings['Book-Rating'].mean()
309
+ num_ratings = len(book_ratings)
310
+
311
+ st.markdown('<div class="dlrm-explanation">', unsafe_allow_html=True)
312
+ st.markdown("**📊 Book Statistics:**")
313
+ st.write(f"Average Rating: {avg_rating:.1f}/10 from {num_ratings} readers")
314
+ st.write(f"DLRM Confidence: {score:.1%}")
315
+ st.markdown('</div>', unsafe_allow_html=True)
316
+ else:
317
+ st.write(f"Book with ISBN {book_isbn} not found in database")
318
+ else:
319
+ st.warning("No recommendations generated")
320
+
321
+ with tab2:
322
+ st.header("🔍 Test DLRM Predictions")
323
+ st.info("Test how well DLRM predicts actual user ratings")
324
+
325
+ col1, col2 = st.columns(2)
326
+
327
+ with col1:
328
+ test_user_id = st.selectbox("Select user for testing", user_ids[:500], key="test_user")
329
+
330
+ with col2:
331
+ test_mode = st.radio("Test mode", ["Random books", "User's actual books"])
332
+
333
+ if st.button("🧪 Test Predictions", type="secondary"):
334
+ with st.spinner("Testing DLRM predictions..."):
335
+
336
+ if test_mode == "User's actual books":
337
+ # Test on user's actual rated books
338
+ user_test_ratings = ratings_df[ratings_df['User-ID'] == test_user_id].sample(min(10, len(user_ratings)))
339
+
340
+ if len(user_test_ratings) > 0:
341
+ st.subheader("🎯 DLRM vs Actual Ratings")
342
+
343
+ predictions = []
344
+ actuals = []
345
+
346
+ for _, rating in user_test_ratings.iterrows():
347
+ book_isbn = rating['ISBN']
348
+ actual_rating = rating['Book-Rating']
349
+
350
+ # Get DLRM prediction
351
+ dlrm_score = recommender.predict_rating(test_user_id, book_isbn)
352
+
353
+ predictions.append(dlrm_score)
354
+ actuals.append(actual_rating >= 6) # Convert to binary
355
+
356
+ # Display comparison
357
+ book_info = books_df[books_df['ISBN'] == book_isbn]
358
+ if len(book_info) > 0:
359
+ book = book_info.iloc[0]
360
+
361
+ col1, col2, col3 = st.columns([2, 1, 1])
362
+ with col1:
363
+ st.write(f"**{book['Book-Title']}**")
364
+ st.write(f"*by {book['Book-Author']}*")
365
+
366
+ with col2:
367
+ st.metric("Actual Rating", f"{actual_rating}/10")
368
+
369
+ with col3:
370
+ st.metric("DLRM Score", f"{dlrm_score:.3f}")
371
+
372
+ # Calculate accuracy
373
+ if predictions and actuals:
374
+ # Convert DLRM scores to binary predictions
375
+ binary_preds = [1 if p > 0.5 else 0 for p in predictions]
376
+ accuracy = sum(p == a for p, a in zip(binary_preds, actuals)) / len(actuals)
377
+
378
+ st.markdown("---")
379
+ st.success(f"🎯 DLRM Accuracy: {accuracy:.1%}")
380
+
381
+ # Show correlation
382
+ actual_numeric = [rating['Book-Rating'] for _, rating in user_test_ratings.iterrows()]
383
+ correlation = np.corrcoef(predictions, actual_numeric)[0, 1] if len(predictions) > 1 else 0
384
+ st.info(f"📊 Correlation with actual ratings: {correlation:.3f}")
385
+
386
+ else:
387
+ st.warning("No ratings found for this user")
388
+
389
+ else:
390
+ # Test on random books
391
+ random_books = books_df.sample(10)['ISBN'].tolist()
392
+
393
+ st.subheader("🎲 Random Book Predictions")
394
+
395
+ for book_isbn in random_books:
396
+ dlrm_score = recommender.predict_rating(test_user_id, book_isbn)
397
+
398
+ book_info = books_df[books_df['ISBN'] == book_isbn]
399
+ if len(book_info) > 0:
400
+ book = book_info.iloc[0]
401
+
402
+ col1, col2 = st.columns([3, 1])
403
+ with col1:
404
+ st.write(f"**{book['Book-Title']}** by *{book['Book-Author']}*")
405
+
406
+ with col2:
407
+ st.metric("DLRM Score", f"{dlrm_score:.4f}")
408
+
409
+ with tab3:
410
+ st.header("📊 DLRM Model Analysis")
411
+ st.info("Analysis of the DLRM model performance and characteristics")
412
 
413
+ # Model architecture info
414
+ if recommender and recommender.preprocessing_info:
415
+ col1, col2 = st.columns(2)
 
416
 
417
+ with col1:
418
+ st.subheader("🏗️ Model Architecture")
419
+ st.write(f"**Dense Features ({len(recommender.dense_cols)}):**")
420
+ for col in recommender.dense_cols:
421
+ st.write(f"• {col}")
422
+
423
+ st.write(f"**Categorical Features ({len(recommender.cat_cols)}):**")
424
+ for i, col in enumerate(recommender.cat_cols):
425
+ st.write(f"• {col}: {recommender.emb_counts[i]} embeddings")
426
 
427
+ with col2:
428
+ st.subheader("📈 Dataset Statistics")
429
+ total_samples = recommender.preprocessing_info.get('total_samples', 0)
430
+ positive_rate = recommender.preprocessing_info.get('positive_rate', 0)
431
+
432
+ st.metric("Total Samples", f"{total_samples:,}")
433
+ st.metric("Positive Rate", f"{positive_rate:.1%}")
434
+ st.metric("Train Samples", f"{recommender.preprocessing_info.get('train_samples', 0):,}")
435
+ st.metric("Validation Samples", f"{recommender.preprocessing_info.get('val_samples', 0):,}")
436
+ st.metric("Test Samples", f"{recommender.preprocessing_info.get('test_samples', 0):,}")
437
+
438
+ # Feature importance analysis
439
+ st.subheader("🔍 Feature Analysis")
440
+
441
+ if st.button("Analyze Feature Importance"):
442
+ with st.spinner("Analyzing feature importance..."):
443
+
444
+ # Sample some users and books
445
+ sample_users = users_df['User-ID'].sample(20).tolist()
446
+ sample_books = books_df['ISBN'].sample(20).tolist()
447
+
448
+ # Test different feature combinations
449
+ st.write("**Feature Impact Analysis:**")
450
+
451
+ base_predictions = []
452
+ for user_id in sample_users[:5]:
453
+ for book_isbn in sample_books[:5]:
454
+ score = recommender.predict_rating(user_id, book_isbn)
455
+ base_predictions.append(score)
456
+
457
+ avg_prediction = np.mean(base_predictions)
458
+ st.metric("Average Prediction Score", f"{avg_prediction:.4f}")
459
+
460
+ st.success("✅ Feature analysis completed!")
461
+
462
+ # Load training results if available
463
+ if os.path.exists('dlrm_book_training_results.pkl'):
464
+ with open('/home/mr-behdadi/PROJECT/ICE/dlrm_book_training_results.pkl', 'rb') as f:
465
+ training_results = pickle.load(f)
466
 
467
+ st.subheader("📈 Training Results")
 
 
 
468
 
469
+ col1, col2 = st.columns(2)
470
 
471
+ with col1:
472
+ st.metric("Final Validation AUROC", f"{training_results.get('final_val_auroc', 0):.4f}")
473
+ st.metric("Test AUROC", f"{training_results.get('test_auroc', 0):.4f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
474
 
475
+ with col2:
476
+ val_history = training_results.get('val_aurocs_history', [])
477
+ if val_history:
478
+ st.line_chart(pd.DataFrame({
479
+ 'Epoch': range(len(val_history)),
480
+ 'Validation AUROC': val_history
481
+ }).set_index('Epoch'))
482
+
483
+ # Instructions
484
+ st.markdown("---")
485
+ st.markdown("""
486
+ ## 🚀 How DLRM Works for Book Recommendations
487
+
488
+ **DLRM (Deep Learning Recommendation Model)** is specifically designed for recommendation systems and offers several advantages:
489
+
490
+ ### 🏗️ Architecture Benefits:
491
+ - **Multi-feature Processing**: Handles both categorical (user ID, book ID, publisher) and numerical (age, ratings) features
492
+ - **Embedding Tables**: Learns rich representations for categorical features
493
+ - **Cross-feature Interactions**: Captures complex relationships between different features
494
+ - **Scalable Design**: Efficiently handles large-scale recommendation datasets
495
+
496
+ ### 📊 Features Used:
497
+ **Categorical Features:**
498
+ - User ID, Book ID, Publisher, Country, Age Group, Publication Decade, Rating Level
499
+
500
+ **Dense Features:**
501
+ - Normalized Age, Publication Year, User Activity, Book Popularity, Average Ratings
502
+
503
+ ### 🎯 Why DLRM vs LLM for Recommendations:
504
+ - **Purpose-built**: Specifically designed for recommendation systems
505
+ - **Feature Integration**: Better at combining diverse feature types
506
+ - **Scalability**: More efficient for large-scale recommendation tasks
507
+ - **Performance**: Higher accuracy for rating prediction tasks
508
+ - **Production Ready**: Optimized for real-time inference
509
 
510
+ ### 💡 Best Use Cases:
511
+ - **Personalized Recommendations**: Based on user behavior and item characteristics
512
+ - **Rating Prediction**: Accurately predicts user preferences
513
+ - **Cold Start**: Handles new users and items through content features
514
+ - **Real-time Serving**: Fast inference for production systems
515
+ """)
516
+
517
+ with tab4:
518
+ st.header("📸 Book Gallery")
519
+ st.info("Browse book covers and discover new titles")
520
 
521
+ # Gallery options
522
+ col1, col2 = st.columns([2, 1])
 
 
 
 
 
 
 
523
 
524
+ with col1:
525
+ gallery_mode = st.selectbox(
526
+ "Choose gallery mode",
527
+ ["Popular Books", "Recent Publications", "Random Selection", "Search Results"]
528
+ )
529
 
530
+ with col2:
531
+ books_per_row = st.slider("Books per row", 2, 6, 4)
532
+ max_books = st.slider("Maximum books", 10, 50, 20)
 
 
 
 
 
533
 
534
+ # Get books based on selected mode
535
+ if gallery_mode == "Popular Books":
536
+ # Get most rated books
537
+ book_popularity = ratings_df.groupby('ISBN').size().sort_values(ascending=False)
538
+ gallery_books = books_df[books_df['ISBN'].isin(book_popularity.head(max_books).index)]
539
 
540
+ elif gallery_mode == "Recent Publications":
541
+ # Get recent books
542
+ books_df_temp = books_df.copy()
543
+ books_df_temp['Year-Of-Publication'] = pd.to_numeric(books_df_temp['Year-Of-Publication'], errors='coerce')
544
+ recent_books = books_df_temp.sort_values('Year-Of-Publication', ascending=False, na_position='last')
545
+ gallery_books = recent_books.head(max_books)
546
+
547
+ elif gallery_mode == "Random Selection":
548
+ # Random books
549
+ gallery_books = books_df.sample(min(max_books, len(books_df)))
550
 
551
+ else: # Search Results
552
+ search_query = st.text_input("Search books for gallery", placeholder="Enter title, author, or publisher")
553
+ if search_query:
554
+ mask = (
555
+ books_df['Book-Title'].str.contains(search_query, case=False, na=False) |
556
+ books_df['Book-Author'].str.contains(search_query, case=False, na=False) |
557
+ books_df['Publisher'].str.contains(search_query, case=False, na=False)
558
+ )
559
+ gallery_books = books_df[mask].head(max_books)
560
+ else:
561
+ gallery_books = books_df.head(max_books)
562
 
563
+ # Display gallery
564
+ if len(gallery_books) > 0:
565
+ st.markdown(f"**📚 Showing {len(gallery_books)} books**")
566
+
567
+ # Create grid layout
568
+ books_list = gallery_books.to_dict('records')
569
+
570
+ # Display books in rows
571
+ for i in range(0, len(books_list), books_per_row):
572
+ cols = st.columns(books_per_row)
573
+
574
+ for j, col in enumerate(cols):
575
+ if i + j < len(books_list):
576
+ book = books_list[i + j]
577
+
578
+ with col:
579
+ # Book cover
580
+ image_url = book.get('Image-URL-M', '')
581
+
582
+ if image_url and pd.notna(image_url) and str(image_url) != 'nan':
583
+ try:
584
+ clean_url = str(image_url).strip()
585
+ if clean_url and 'http' in clean_url:
586
+ st.image(clean_url, width='stretch')
587
+ else:
588
+ st.image("https://via.placeholder.com/150x200?text=📚&color=1f77b4&bg=f0f2f6", width='stretch')
589
+ except:
590
+ st.image("https://via.placeholder.com/150x200?text=📚&color=1f77b4&bg=f0f2f6", width='stretch')
591
+ else:
592
+ st.image("https://via.placeholder.com/150x200?text=📚&color=1f77b4&bg=f0f2f6", width='stretch')
593
+
594
+ # Book info
595
+ title = book['Book-Title']
596
+ if len(title) > 40:
597
+ title = title[:37] + "..."
598
+
599
+ author = book['Book-Author']
600
+ if len(author) > 25:
601
+ author = author[:22] + "..."
602
+
603
+ st.markdown(f"**{title}**")
604
+ st.write(f"*{author}*")
605
+ st.write(f"📅 {book.get('Year-Of-Publication', 'Unknown')}")
606
+
607
+ # Book statistics
608
+ book_stats = ratings_df[ratings_df['ISBN'] == book['ISBN']]
609
+ if len(book_stats) > 0:
610
+ avg_rating = book_stats['Book-Rating'].mean()
611
+ num_ratings = len(book_stats)
612
+ st.write(f"⭐ {avg_rating:.1f}/10 ({num_ratings} ratings)")
613
+ else:
614
+ st.write("⭐ No ratings")
615
+
616
+ # DLRM prediction button
617
+ if recommender and recommender.model:
618
+ if st.button(f"🎯 DLRM Score", key=f"dlrm_{book['ISBN']}"):
619
+ with st.spinner("Calculating..."):
620
+ # Use first user as example
621
+ sample_user = users_df['User-ID'].iloc[0]
622
+ dlrm_score = recommender.predict_rating(sample_user, book['ISBN'])
623
+ st.success(f"DLRM Score: {dlrm_score:.3f}")
624
+ else:
625
+ st.info("No books found for the selected criteria")
626
 
627
+ # Quick stats
628
+ st.markdown("---")
629
+ st.subheader("📊 Gallery Statistics")
 
 
 
 
630
 
631
+ col1, col2, col3, col4 = st.columns(4)
 
 
 
 
 
 
 
 
 
 
632
 
633
+ with col1:
634
+ books_with_covers = sum(1 for _, book in gallery_books.iterrows()
635
+ if book.get('Image-URL-M') and pd.notna(book.get('Image-URL-M')))
636
+ st.metric("Books with Covers", f"{books_with_covers}/{len(gallery_books)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
637
 
638
+ with col2:
639
+ # Convert Year-Of-Publication to numeric, coercing errors to NaN
640
+ years = pd.to_numeric(gallery_books['Year-Of-Publication'], errors='coerce')
641
+ avg_year = years.mean()
642
+ st.metric("Average Publication Year", f"{avg_year:.0f}" if not pd.isna(avg_year) else "Unknown")
 
 
 
 
 
643
 
644
+ with col3:
645
+ unique_authors = gallery_books['Book-Author'].nunique()
646
+ st.metric("Unique Authors", unique_authors)
 
 
 
647
 
648
+ with col4:
649
+ unique_publishers = gallery_books['Publisher'].nunique()
650
+ st.metric("Unique Publishers", unique_publishers)
 
 
 
 
651
 
652
  if __name__ == "__main__":
653
+ main()