edwinbh commited on
Commit
5d8ed49
Β·
verified Β·
1 Parent(s): 406535b

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +518 -599
src/streamlit_app.py CHANGED
@@ -1,653 +1,572 @@
1
  """
2
- Streamlit Dashboard for DLRM Book Recommendation System
3
- Simple interface for DLRM-based book recommendations
4
  """
5
 
6
  import os
7
  import sys
8
- import streamlit as st
9
-
10
- # Check if CPU_ONLY mode is enabled via command line argument
11
- if len(sys.argv) > 1 and sys.argv[1] == '--cpu-only':
12
- os.environ['CPU_ONLY'] = 'true'
13
- print("πŸ”„ Running in CPU-only mode (CUDA disabled)")
14
-
15
- import pandas as pd
16
- import numpy as np
17
  import torch
 
 
18
  import pickle
19
- from typing import Dict, List, Tuple, Optional
 
 
 
 
20
  import warnings
21
  warnings.filterwarnings('ignore')
22
 
23
- # Import our DLRM recommender
 
 
 
 
 
 
 
24
  try:
25
- from dlrm_inference import DLRMBookRecommender, load_dlrm_recommender, TORCHREC_AVAILABLE
 
 
 
 
 
26
  except ImportError as e:
27
- print(f"⚠️ Error importing DLRM recommender: {e}")
 
28
  TORCHREC_AVAILABLE = False
29
 
30
-
31
- # Page configuration
32
- st.set_page_config(
33
- page_title="DLRM Book Recommendations",
34
- page_icon="πŸ“š",
35
- layout="wide",
36
- initial_sidebar_state="expanded"
37
- )
38
-
39
- # Check if running in CPU-only mode
40
- cpu_only_mode = os.environ.get('CPU_ONLY', 'false').lower() == 'true'
41
-
42
- # Custom CSS
43
- st.markdown("""
44
- <style>
45
- .main-header {
46
- font-size: 3rem;
47
- color: #1f77b4;
48
- text-align: center;
49
- margin-bottom: 2rem;
50
- }
51
- .metric-card {
52
- background-color: #f0f2f6;
53
- padding: 1rem;
54
- border-radius: 0.5rem;
55
- border-left: 5px solid #1f77b4;
56
- }
57
- .dlrm-explanation {
58
- background-color: #e8f4fd;
59
- padding: 1rem;
60
- border-radius: 0.5rem;
61
- border-left: 4px solid #0066cc;
62
- margin: 1rem 0;
63
- }
64
- .book-card {
65
- background-color: #ffffff;
66
- padding: 1rem;
67
- border-radius: 0.5rem;
68
- border: 1px solid #e1e5eb;
69
- margin-bottom: 1rem;
70
- }
71
- .cpu-mode-banner {
72
- background-color: #fff3cd;
73
- color: #856404;
74
- padding: 0.75rem;
75
- border-radius: 0.5rem;
76
- border-left: 4px solid #ffeeba;
77
- margin: 1rem 0;
78
- text-align: center;
79
- }
80
- </style>
81
- """, unsafe_allow_html=True)
82
-
83
- @st.cache_data
84
- def load_data():
85
- """Load and cache the book data"""
86
- try:
87
- books_df = pd.read_csv('Books.csv', encoding='latin-1', low_memory=False)
88
- users_df = pd.read_csv('Users.csv', encoding='latin-1', low_memory=False)
89
- ratings_df = pd.read_csv('Ratings.csv', encoding='latin-1', low_memory=False)
90
-
91
- # Clean column names
92
- books_df.columns = books_df.columns.str.replace('"', '')
93
- users_df.columns = users_df.columns.str.replace('"', '')
94
- ratings_df.columns = ratings_df.columns.str.replace('"', '')
95
-
96
- return books_df, users_df, ratings_df
97
- except Exception as e:
98
- st.error(f"Error loading data: {e}")
99
- return None, None, None
100
-
101
- @st.cache_resource
102
- def load_dlrm_model():
103
- """Load and cache the DLRM model"""
104
-
105
 
106
- try:
107
- recommender = load_dlrm_recommender("file")
108
- return recommender
109
- except Exception as e:
110
- st.error(f"Error loading DLRM model: {e}")
111
- return None
112
-
113
- def display_book_info(book_isbn, books_df, show_rating=None):
114
- """Display book information with actual book cover"""
115
- book_info = books_df[books_df['ISBN'] == book_isbn]
116
-
117
- if len(book_info) == 0:
118
- st.write(f"Book with ISBN {book_isbn} not found")
119
- return
120
-
121
- book = book_info.iloc[0]
122
-
123
- col1, col2 = st.columns([1, 3])
 
 
 
 
 
 
 
 
 
124
 
125
- with col1:
126
- # Try to display actual book cover from Image-URL-M
127
- image_url = book.get('Image-URL-M', '')
128
-
129
- if image_url and pd.notna(image_url) and str(image_url) != 'nan':
130
- try:
131
- # Clean the URL (sometimes there are issues with Amazon URLs)
132
- clean_url = str(image_url).strip()
133
- if clean_url and 'http' in clean_url:
134
- st.image(clean_url, width=150, caption="πŸ“š")
135
- else:
136
- # Fallback to placeholder
137
- st.image("https://via.placeholder.com/150x200?text=πŸ“š&color=1f77b4&bg=f0f2f6", width=150)
138
- except Exception as e:
139
- # If image loading fails, show placeholder
140
- st.image("https://via.placeholder.com/150x200?text=πŸ“š&color=1f77b4&bg=f0f2f6", width=150)
141
- st.caption("⚠️ Cover unavailable")
142
  else:
143
- # Show placeholder if no image URL
144
- st.image("https://via.placeholder.com/150x200?text=πŸ“š&color=1f77b4&bg=f0f2f6", width=150)
145
- st.caption("πŸ“š No cover")
146
 
147
- with col2:
148
- st.markdown(f"**{book['Book-Title']}**")
149
- st.write(f"*by {book['Book-Author']}*")
150
- st.write(f"πŸ“… Published: {book.get('Year-Of-Publication', 'Unknown')}")
151
- st.write(f"🏒 Publisher: {book.get('Publisher', 'Unknown')}")
152
- st.write(f"πŸ“– ISBN: {book['ISBN']}")
153
-
154
- if show_rating is not None:
155
- st.markdown(f"**🎯 DLRM Score: {show_rating:.4f}**")
 
 
 
 
156
 
157
- def main():
158
- # Header
159
- st.markdown('<h1 class="main-header">πŸ“š DLRM Book Recommendation System</h1>', unsafe_allow_html=True)
160
- st.markdown("### Deep Learning Recommendation Model for Personalized Book Suggestions")
161
-
162
- # CPU Mode Banner (if enabled)
163
- if cpu_only_mode:
164
- st.markdown('<div class="cpu-mode-banner">βš™οΈ Running in CPU-only mode (NVIDIA drivers not required)</div>', unsafe_allow_html=True)
165
-
166
- st.markdown("---")
167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
- # Load data
170
- with st.spinner("Loading book data..."):
171
- books_df, users_df, ratings_df = load_data()
172
-
173
- if books_df is None:
174
- st.error("Failed to load data. Please check if CSV files are available.")
175
- return
176
-
177
- # Sidebar info
178
- st.sidebar.title("πŸ“Š Dataset Information")
179
- st.sidebar.metric("πŸ“š Books", f"{len(books_df):,}")
180
- st.sidebar.metric("πŸ‘₯ Users", f"{len(users_df):,}")
181
- st.sidebar.metric("⭐ Ratings", f"{len(ratings_df):,}")
182
-
183
- # Load DLRM model
184
- with st.spinner("Loading DLRM model..."):
185
- recommender = load_dlrm_model()
186
-
187
- if recommender is None or not hasattr(recommender, 'model') or recommender.model is None:
188
- if cpu_only_mode:
189
- st.warning("⚠️ DLRM model not available in CPU-only mode")
190
- st.info("The app will continue with limited functionality")
191
 
192
- # Show options for browsing books without recommendations
193
- st.subheader("πŸ“š Browse Books")
194
 
195
- # Simple book browser
196
- search_query = st.text_input("Search for books", placeholder="Enter title, author, or publisher")
197
- if search_query:
198
- mask = (
199
- books_df['Book-Title'].str.contains(search_query, case=False, na=False) |
200
- books_df['Book-Author'].str.contains(search_query, case=False, na=False) |
201
- books_df['Publisher'].str.contains(search_query, case=False, na=False)
 
 
 
 
 
 
 
 
 
 
 
202
  )
203
- results = books_df[mask].head(20)
204
-
205
- if len(results) > 0:
206
- st.success(f"Found {len(results)} books matching '{search_query}'")
207
- for _, book in results.iterrows():
208
- st.markdown(f"**{book['Book-Title']}** by *{book['Book-Author']}*")
209
- st.write(f"Published: {book.get('Year-Of-Publication', 'Unknown')} | ISBN: {book['ISBN']}")
210
- st.markdown("---")
211
- else:
212
- st.info(f"No books found matching '{search_query}'")
 
 
 
 
 
 
213
 
214
- return
215
- else:
216
- st.error("❌ DLRM model not available")
217
- st.info("Please run the training script first: `python train_dlrm_books.py`")
218
 
219
- st.markdown("### Available Options:")
220
- st.markdown("1. **Train DLRM Model**: Run `python train_dlrm_books.py`")
221
- st.markdown("2. **Prepare Data**: Run `python dlrm_book_recommender.py`")
222
- st.markdown("3. **Check Files**: Ensure preprocessing files exist")
223
- st.markdown("4. **Try CPU-only Mode**: Run `streamlit run streamlit_dlrm_app.py -- --cpu-only`")
224
 
225
- return
226
-
227
- if cpu_only_mode:
228
- st.success("βœ… DLRM model loaded successfully in CPU-only mode!")
229
- else:
230
- st.success("βœ… DLRM model loaded successfully!")
231
-
232
- # Model info
233
- st.sidebar.markdown("---")
234
- st.sidebar.subheader("πŸ€– DLRM Model Info")
235
- if recommender.preprocessing_info:
236
- st.sidebar.write(f"Dense features: {len(recommender.dense_cols)}")
237
- st.sidebar.write(f"Categorical features: {len(recommender.cat_cols)}")
238
- st.sidebar.write(f"Embedding dim: 64")
239
-
240
- # Main interface
241
- tab1, tab2, tab3, tab4 = st.tabs(["🎯 Get Recommendations", "πŸ” Test Predictions", "πŸ“Š Model Analysis", "πŸ“Έ Book Gallery"])
242
 
243
- with tab1:
244
- st.header("🎯 DLRM Book Recommendations")
245
- st.info("Get personalized book recommendations using the trained DLRM model")
246
-
247
- # User selection
248
- col1, col2 = st.columns([2, 1])
249
-
250
- with col1:
251
- user_ids = sorted(users_df['User-ID'].unique())
252
- selected_user_id = st.selectbox("Select a user", user_ids[:1000]) # Limit for performance
253
-
254
- with col2:
255
- num_recommendations = st.slider("Number of recommendations", 5, 20, 10)
256
-
257
- # Show user info
258
- user_info = users_df[users_df['User-ID'] == selected_user_id]
259
- if len(user_info) > 0:
260
- user = user_info.iloc[0]
261
- st.markdown(f"**User Info**: Age: {user.get('Age', 'Unknown')}, Location: {user.get('Location', 'Unknown')}")
262
-
263
- # User's reading history
264
- user_ratings = ratings_df[ratings_df['User-ID'] == selected_user_id]
265
- if len(user_ratings) > 0:
266
- with st.expander(f"πŸ“– User's Reading History ({len(user_ratings)} books)", expanded=False):
267
- top_rated = user_ratings.sort_values('Book-Rating', ascending=False).head(10)
268
- for _, rating in top_rated.iterrows():
269
- book_info = books_df[books_df['ISBN'] == rating['ISBN']]
270
- if len(book_info) > 0:
271
- book = book_info.iloc[0]
272
- st.write(f"β€’ **{book['Book-Title']}** by {book['Book-Author']} - {rating['Book-Rating']}/10 ⭐")
273
-
274
- if st.button("πŸš€ Get DLRM Recommendations", type="primary"):
275
- with st.spinner("πŸ€– DLRM is analyzing user preferences..."):
276
-
277
- # Get candidate books (popular books not rated by user)
278
- user_rated_books = set(user_ratings['ISBN']) if len(user_ratings) > 0 else set()
279
-
280
- # Get popular books as candidates
281
- book_popularity = ratings_df.groupby('ISBN').size().sort_values(ascending=False)
282
- candidate_books = [isbn for isbn in book_popularity.head(100).index if isbn not in user_rated_books]
283
-
284
- if len(candidate_books) < num_recommendations:
285
- candidate_books = book_popularity.head(200).index.tolist()
286
-
287
- # Get recommendations
288
- recommendations = recommender.get_user_recommendations(
289
- user_id=selected_user_id,
290
- candidate_books=candidate_books,
291
- k=num_recommendations
292
- )
293
-
294
- if recommendations:
295
- st.success(f"Generated {len(recommendations)} DLRM recommendations!")
296
-
297
- st.subheader("🎯 DLRM Recommendations")
298
-
299
- for i, (book_isbn, score) in enumerate(recommendations, 1):
300
- book_info = books_df[books_df['ISBN'] == book_isbn]
301
- if len(book_info) > 0:
302
- with st.expander(f"{i}. Recommendation (DLRM Score: {score:.4f})", expanded=(i <= 3)):
303
- display_book_info(book_isbn, books_df, show_rating=score)
304
-
305
- # Additional book stats
306
- book_ratings = ratings_df[ratings_df['ISBN'] == book_isbn]
307
- if len(book_ratings) > 0:
308
- avg_rating = book_ratings['Book-Rating'].mean()
309
- num_ratings = len(book_ratings)
310
-
311
- st.markdown('<div class="dlrm-explanation">', unsafe_allow_html=True)
312
- st.markdown("**πŸ“Š Book Statistics:**")
313
- st.write(f"Average Rating: {avg_rating:.1f}/10 from {num_ratings} readers")
314
- st.write(f"DLRM Confidence: {score:.1%}")
315
- st.markdown('</div>', unsafe_allow_html=True)
316
- else:
317
- st.write(f"Book with ISBN {book_isbn} not found in database")
318
- else:
319
- st.warning("No recommendations generated")
320
 
321
- with tab2:
322
- st.header("πŸ” Test DLRM Predictions")
323
- st.info("Test how well DLRM predicts actual user ratings")
324
-
325
- col1, col2 = st.columns(2)
326
-
327
- with col1:
328
- test_user_id = st.selectbox("Select user for testing", user_ids[:500], key="test_user")
329
-
330
- with col2:
331
- test_mode = st.radio("Test mode", ["Random books", "User's actual books"])
332
-
333
- if st.button("πŸ§ͺ Test Predictions", type="secondary"):
334
- with st.spinner("Testing DLRM predictions..."):
335
-
336
- if test_mode == "User's actual books":
337
- # Test on user's actual rated books
338
- user_test_ratings = ratings_df[ratings_df['User-ID'] == test_user_id].sample(min(10, len(user_ratings)))
339
-
340
- if len(user_test_ratings) > 0:
341
- st.subheader("🎯 DLRM vs Actual Ratings")
342
-
343
- predictions = []
344
- actuals = []
345
-
346
- for _, rating in user_test_ratings.iterrows():
347
- book_isbn = rating['ISBN']
348
- actual_rating = rating['Book-Rating']
349
-
350
- # Get DLRM prediction
351
- dlrm_score = recommender.predict_rating(test_user_id, book_isbn)
352
-
353
- predictions.append(dlrm_score)
354
- actuals.append(actual_rating >= 6) # Convert to binary
355
-
356
- # Display comparison
357
- book_info = books_df[books_df['ISBN'] == book_isbn]
358
- if len(book_info) > 0:
359
- book = book_info.iloc[0]
360
-
361
- col1, col2, col3 = st.columns([2, 1, 1])
362
- with col1:
363
- st.write(f"**{book['Book-Title']}**")
364
- st.write(f"*by {book['Book-Author']}*")
365
-
366
- with col2:
367
- st.metric("Actual Rating", f"{actual_rating}/10")
368
-
369
- with col3:
370
- st.metric("DLRM Score", f"{dlrm_score:.3f}")
371
-
372
- # Calculate accuracy
373
- if predictions and actuals:
374
- # Convert DLRM scores to binary predictions
375
- binary_preds = [1 if p > 0.5 else 0 for p in predictions]
376
- accuracy = sum(p == a for p, a in zip(binary_preds, actuals)) / len(actuals)
377
-
378
- st.markdown("---")
379
- st.success(f"🎯 DLRM Accuracy: {accuracy:.1%}")
380
-
381
- # Show correlation
382
- actual_numeric = [rating['Book-Rating'] for _, rating in user_test_ratings.iterrows()]
383
- correlation = np.corrcoef(predictions, actual_numeric)[0, 1] if len(predictions) > 1 else 0
384
- st.info(f"πŸ“Š Correlation with actual ratings: {correlation:.3f}")
385
-
386
- else:
387
- st.warning("No ratings found for this user")
388
-
389
- else:
390
- # Test on random books
391
- random_books = books_df.sample(10)['ISBN'].tolist()
392
-
393
- st.subheader("🎲 Random Book Predictions")
394
-
395
- for book_isbn in random_books:
396
- dlrm_score = recommender.predict_rating(test_user_id, book_isbn)
397
-
398
- book_info = books_df[books_df['ISBN'] == book_isbn]
399
- if len(book_info) > 0:
400
- book = book_info.iloc[0]
401
-
402
- col1, col2 = st.columns([3, 1])
403
- with col1:
404
- st.write(f"**{book['Book-Title']}** by *{book['Book-Author']}*")
405
-
406
- with col2:
407
- st.metric("DLRM Score", f"{dlrm_score:.4f}")
408
 
409
- with tab3:
410
- st.header("πŸ“Š DLRM Model Analysis")
411
- st.info("Analysis of the DLRM model performance and characteristics")
 
 
412
 
413
- # Model architecture info
414
- if recommender and recommender.preprocessing_info:
415
- col1, col2 = st.columns(2)
 
 
416
 
417
- with col1:
418
- st.subheader("πŸ—οΈ Model Architecture")
419
- st.write(f"**Dense Features ({len(recommender.dense_cols)}):**")
420
- for col in recommender.dense_cols:
421
- st.write(f"β€’ {col}")
422
-
423
- st.write(f"**Categorical Features ({len(recommender.cat_cols)}):**")
424
- for i, col in enumerate(recommender.cat_cols):
425
- st.write(f"β€’ {col}: {recommender.emb_counts[i]} embeddings")
 
 
 
 
 
 
426
 
427
- with col2:
428
- st.subheader("πŸ“ˆ Dataset Statistics")
429
- total_samples = recommender.preprocessing_info.get('total_samples', 0)
430
- positive_rate = recommender.preprocessing_info.get('positive_rate', 0)
431
-
432
- st.metric("Total Samples", f"{total_samples:,}")
433
- st.metric("Positive Rate", f"{positive_rate:.1%}")
434
- st.metric("Train Samples", f"{recommender.preprocessing_info.get('train_samples', 0):,}")
435
- st.metric("Validation Samples", f"{recommender.preprocessing_info.get('val_samples', 0):,}")
436
- st.metric("Test Samples", f"{recommender.preprocessing_info.get('test_samples', 0):,}")
437
-
438
- # Feature importance analysis
439
- st.subheader("πŸ” Feature Analysis")
440
-
441
- if st.button("Analyze Feature Importance"):
442
- with st.spinner("Analyzing feature importance..."):
443
-
444
- # Sample some users and books
445
- sample_users = users_df['User-ID'].sample(20).tolist()
446
- sample_books = books_df['ISBN'].sample(20).tolist()
447
-
448
- # Test different feature combinations
449
- st.write("**Feature Impact Analysis:**")
450
-
451
- base_predictions = []
452
- for user_id in sample_users[:5]:
453
- for book_isbn in sample_books[:5]:
454
- score = recommender.predict_rating(user_id, book_isbn)
455
- base_predictions.append(score)
456
-
457
- avg_prediction = np.mean(base_predictions)
458
- st.metric("Average Prediction Score", f"{avg_prediction:.4f}")
459
-
460
- st.success("βœ… Feature analysis completed!")
461
-
462
- # Load training results if available
463
- if os.path.exists('dlrm_book_training_results.pkl'):
464
- with open('/home/mr-behdadi/PROJECT/ICE/dlrm_book_training_results.pkl', 'rb') as f:
465
- training_results = pickle.load(f)
466
 
467
- st.subheader("πŸ“ˆ Training Results")
 
 
 
 
468
 
469
- col1, col2 = st.columns(2)
 
 
 
470
 
471
- with col1:
472
- st.metric("Final Validation AUROC", f"{training_results.get('final_val_auroc', 0):.4f}")
473
- st.metric("Test AUROC", f"{training_results.get('test_auroc', 0):.4f}")
474
 
475
- with col2:
476
- val_history = training_results.get('val_aurocs_history', [])
477
- if val_history:
478
- st.line_chart(pd.DataFrame({
479
- 'Epoch': range(len(val_history)),
480
- 'Validation AUROC': val_history
481
- }).set_index('Epoch'))
482
-
483
- # Instructions
484
- st.markdown("---")
485
- st.markdown("""
486
- ## πŸš€ How DLRM Works for Book Recommendations
487
-
488
- **DLRM (Deep Learning Recommendation Model)** is specifically designed for recommendation systems and offers several advantages:
489
-
490
- ### πŸ—οΈ Architecture Benefits:
491
- - **Multi-feature Processing**: Handles both categorical (user ID, book ID, publisher) and numerical (age, ratings) features
492
- - **Embedding Tables**: Learns rich representations for categorical features
493
- - **Cross-feature Interactions**: Captures complex relationships between different features
494
- - **Scalable Design**: Efficiently handles large-scale recommendation datasets
495
 
496
- ### πŸ“Š Features Used:
497
- **Categorical Features:**
498
- - User ID, Book ID, Publisher, Country, Age Group, Publication Decade, Rating Level
499
-
500
- **Dense Features:**
501
- - Normalized Age, Publication Year, User Activity, Book Popularity, Average Ratings
502
-
503
- ### 🎯 Why DLRM vs LLM for Recommendations:
504
- - **Purpose-built**: Specifically designed for recommendation systems
505
- - **Feature Integration**: Better at combining diverse feature types
506
- - **Scalability**: More efficient for large-scale recommendation tasks
507
- - **Performance**: Higher accuracy for rating prediction tasks
508
- - **Production Ready**: Optimized for real-time inference
509
-
510
- ### πŸ’‘ Best Use Cases:
511
- - **Personalized Recommendations**: Based on user behavior and item characteristics
512
- - **Rating Prediction**: Accurately predicts user preferences
513
- - **Cold Start**: Handles new users and items through content features
514
- - **Real-time Serving**: Fast inference for production systems
515
- """)
516
-
517
- with tab4:
518
- st.header("πŸ“Έ Book Gallery")
519
- st.info("Browse book covers and discover new titles")
520
 
521
- # Gallery options
522
- col1, col2 = st.columns([2, 1])
 
 
 
 
 
 
 
 
 
 
523
 
524
- with col1:
525
- gallery_mode = st.selectbox(
526
- "Choose gallery mode",
527
- ["Popular Books", "Recent Publications", "Random Selection", "Search Results"]
528
- )
529
 
530
- with col2:
531
- books_per_row = st.slider("Books per row", 2, 6, 4)
532
- max_books = st.slider("Maximum books", 10, 50, 20)
533
 
534
- # Get books based on selected mode
535
- if gallery_mode == "Popular Books":
536
- # Get most rated books
537
- book_popularity = ratings_df.groupby('ISBN').size().sort_values(ascending=False)
538
- gallery_books = books_df[books_df['ISBN'].isin(book_popularity.head(max_books).index)]
539
-
540
- elif gallery_mode == "Recent Publications":
541
- # Get recent books
542
- books_df_temp = books_df.copy()
543
- books_df_temp['Year-Of-Publication'] = pd.to_numeric(books_df_temp['Year-Of-Publication'], errors='coerce')
544
- recent_books = books_df_temp.sort_values('Year-Of-Publication', ascending=False, na_position='last')
545
- gallery_books = recent_books.head(max_books)
546
-
547
- elif gallery_mode == "Random Selection":
548
- # Random books
549
- gallery_books = books_df.sample(min(max_books, len(books_df)))
 
 
550
 
551
- else: # Search Results
552
- search_query = st.text_input("Search books for gallery", placeholder="Enter title, author, or publisher")
553
- if search_query:
554
- mask = (
555
- books_df['Book-Title'].str.contains(search_query, case=False, na=False) |
556
- books_df['Book-Author'].str.contains(search_query, case=False, na=False) |
557
- books_df['Publisher'].str.contains(search_query, case=False, na=False)
558
- )
559
- gallery_books = books_df[mask].head(max_books)
560
- else:
561
- gallery_books = books_df.head(max_books)
 
 
 
 
 
562
 
563
- # Display gallery
564
- if len(gallery_books) > 0:
565
- st.markdown(f"**πŸ“š Showing {len(gallery_books)} books**")
 
 
566
 
567
- # Create grid layout
568
- books_list = gallery_books.to_dict('records')
 
 
 
 
 
 
 
 
569
 
570
- # Display books in rows
571
- for i in range(0, len(books_list), books_per_row):
572
- cols = st.columns(books_per_row)
573
-
574
- for j, col in enumerate(cols):
575
- if i + j < len(books_list):
576
- book = books_list[i + j]
577
-
578
- with col:
579
- # Book cover
580
- image_url = book.get('Image-URL-M', '')
581
-
582
- if image_url and pd.notna(image_url) and str(image_url) != 'nan':
583
- try:
584
- clean_url = str(image_url).strip()
585
- if clean_url and 'http' in clean_url:
586
- st.image(clean_url, width='stretch')
587
- else:
588
- st.image("https://via.placeholder.com/150x200?text=πŸ“š&color=1f77b4&bg=f0f2f6", width='stretch')
589
- except:
590
- st.image("https://via.placeholder.com/150x200?text=πŸ“š&color=1f77b4&bg=f0f2f6", width='stretch')
591
- else:
592
- st.image("https://via.placeholder.com/150x200?text=πŸ“š&color=1f77b4&bg=f0f2f6", width='stretch')
593
-
594
- # Book info
595
- title = book['Book-Title']
596
- if len(title) > 40:
597
- title = title[:37] + "..."
598
-
599
- author = book['Book-Author']
600
- if len(author) > 25:
601
- author = author[:22] + "..."
602
-
603
- st.markdown(f"**{title}**")
604
- st.write(f"*{author}*")
605
- st.write(f"πŸ“… {book.get('Year-Of-Publication', 'Unknown')}")
606
-
607
- # Book statistics
608
- book_stats = ratings_df[ratings_df['ISBN'] == book['ISBN']]
609
- if len(book_stats) > 0:
610
- avg_rating = book_stats['Book-Rating'].mean()
611
- num_ratings = len(book_stats)
612
- st.write(f"⭐ {avg_rating:.1f}/10 ({num_ratings} ratings)")
613
- else:
614
- st.write("⭐ No ratings")
615
-
616
- # DLRM prediction button
617
- if recommender and recommender.model:
618
- if st.button(f"🎯 DLRM Score", key=f"dlrm_{book['ISBN']}"):
619
- with st.spinner("Calculating..."):
620
- # Use first user as example
621
- sample_user = users_df['User-ID'].iloc[0]
622
- dlrm_score = recommender.predict_rating(sample_user, book['ISBN'])
623
- st.success(f"DLRM Score: {dlrm_score:.3f}")
624
- else:
625
- st.info("No books found for the selected criteria")
626
 
627
- # Quick stats
628
- st.markdown("---")
629
- st.subheader("πŸ“Š Gallery Statistics")
630
 
631
- col1, col2, col3, col4 = st.columns(4)
 
 
 
 
 
 
632
 
633
- with col1:
634
- books_with_covers = sum(1 for _, book in gallery_books.iterrows()
635
- if book.get('Image-URL-M') and pd.notna(book.get('Image-URL-M')))
636
- st.metric("Books with Covers", f"{books_with_covers}/{len(gallery_books)}")
 
 
 
 
 
 
 
637
 
638
- with col2:
639
- # Convert Year-Of-Publication to numeric, coercing errors to NaN
640
- years = pd.to_numeric(gallery_books['Year-Of-Publication'], errors='coerce')
641
- avg_year = years.mean()
642
- st.metric("Average Publication Year", f"{avg_year:.0f}" if not pd.isna(avg_year) else "Unknown")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
643
 
644
- with col3:
645
- unique_authors = gallery_books['Book-Author'].nunique()
646
- st.metric("Unique Authors", unique_authors)
 
 
 
647
 
648
- with col4:
649
- unique_publishers = gallery_books['Publisher'].nunique()
650
- st.metric("Unique Publishers", unique_publishers)
 
 
 
 
651
 
652
  if __name__ == "__main__":
653
- main()
 
1
  """
2
+ DLRM Inference Engine for Book Recommendations
3
+ Loads trained DLRM model and provides recommendation functionality
4
  """
5
 
6
  import os
7
  import sys
 
 
 
 
 
 
 
 
 
8
  import torch
9
+ import numpy as np
10
+ import pandas as pd
11
  import pickle
12
+ import mlflow
13
+ from mlflow import MlflowClient
14
+ import tempfile
15
+ from typing import List, Dict, Tuple, Optional, Any
16
+ from functools import partial
17
  import warnings
18
  warnings.filterwarnings('ignore')
19
 
20
+ # Check for CPU_ONLY environment variable
21
+ CPU_ONLY = os.environ.get('CPU_ONLY', 'false').lower() == 'true'
22
+
23
+ # Disable CUDA if CPU_ONLY is set
24
+ if CPU_ONLY:
25
+ os.environ['CUDA_VISIBLE_DEVICES'] = ''
26
+ print("πŸ”„ Running in CPU-only mode (CUDA disabled)")
27
+
28
  try:
29
+ from torchrec import EmbeddingBagCollection
30
+ from torchrec.models.dlrm import DLRM, DLRMTrain
31
+ from torchrec.modules.embedding_configs import EmbeddingBagConfig
32
+ from torchrec.sparse.jagged_tensor import KeyedJaggedTensor
33
+ from torchrec.datasets.utils import Batch
34
+ TORCHREC_AVAILABLE = True
35
  except ImportError as e:
36
+ print(f"⚠️ Warning: torchrec import error: {e}")
37
+ print("⚠️ Some functionality will be limited")
38
  TORCHREC_AVAILABLE = False
39
 
40
+ class DLRMBookRecommender:
41
+ """DLRM-based book recommender for inference"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ def __init__(self, model_path: str = None, run_id: str = None):
44
+ """
45
+ Initialize DLRM book recommender
46
+
47
+ Args:
48
+ model_path: Path to saved model state dict
49
+ run_id: MLflow run ID to load model from
50
+ """
51
+ self.device = torch.device("cpu")
52
+ self.model = None
53
+ self.preprocessing_info = None
54
+ self.torchrec_available = TORCHREC_AVAILABLE
55
+
56
+ if not self.torchrec_available:
57
+ print("⚠️ Running in limited mode without torchrec")
58
+ return
59
+
60
+ # Load preprocessing info
61
+ self._load_preprocessing_info()
62
+
63
+ # Load model
64
+ if model_path and os.path.exists(model_path):
65
+ self._load_model_from_path(model_path)
66
+ elif run_id:
67
+ self._load_model_from_mlflow(run_id)
68
+ else:
69
+ print("⚠️ No model loaded. Please provide model_path or run_id")
70
 
71
+ def _load_preprocessing_info(self):
72
+ """Load preprocessing information"""
73
+ if os.path.exists('book_dlrm_preprocessing.pkl'):
74
+ with open('book_dlrm_preprocessing.pkl', 'rb') as f:
75
+ self.preprocessing_info = pickle.load(f)
76
+
77
+ self.dense_cols = self.preprocessing_info['dense_cols']
78
+ self.cat_cols = self.preprocessing_info['cat_cols']
79
+ self.emb_counts = self.preprocessing_info['emb_counts']
80
+ self.user_encoder = self.preprocessing_info['user_encoder']
81
+ self.book_encoder = self.preprocessing_info['book_encoder']
82
+ self.publisher_encoder = self.preprocessing_info['publisher_encoder']
83
+ self.location_encoder = self.preprocessing_info['location_encoder']
84
+ self.scaler = self.preprocessing_info['scaler']
85
+
86
+ print("βœ… Preprocessing info loaded")
 
87
  else:
88
+ raise FileNotFoundError("book_dlrm_preprocessing.pkl not found. Run preprocessing first.")
 
 
89
 
90
+ def _load_model_from_path(self, model_path: str):
91
+ """Load model from saved state dict"""
92
+ try:
93
+ # Create model architecture
94
+ eb_configs = [
95
+ EmbeddingBagConfig(
96
+ name=f"t_{feature_name}",
97
+ embedding_dim=64, # Default embedding dim
98
+ num_embeddings=self.emb_counts[feature_idx],
99
+ feature_names=[feature_name],
100
+ )
101
+ for feature_idx, feature_name in enumerate(self.cat_cols)
102
+ ]
103
 
104
+ dlrm_model = DLRM(
105
+ embedding_bag_collection=EmbeddingBagCollection(
106
+ tables=eb_configs, device=self.device
107
+ ),
108
+ dense_in_features=len(self.dense_cols),
109
+ dense_arch_layer_sizes=[256, 128, 64],
110
+ over_arch_layer_sizes=[512, 256, 128, 1],
111
+ dense_device=self.device,
112
+ )
 
113
 
114
+ # Load state dict
115
+ state_dict = torch.load(model_path, map_location=self.device)
116
+
117
+ # Remove 'model.' prefix if present
118
+ if any(key.startswith('model.') for key in state_dict.keys()):
119
+ state_dict = {k[6:]: v for k, v in state_dict.items()}
120
+
121
+ dlrm_model.load_state_dict(state_dict)
122
+ self.model = dlrm_model
123
+ self.model.eval()
124
+
125
+ print(f"βœ… Model loaded from {model_path}")
126
+
127
+ except Exception as e:
128
+ print(f"❌ Error loading model: {e}")
129
 
130
+ def _load_model_from_mlflow(self, run_id: str):
131
+ """Load model from MLflow"""
132
+ try:
133
+ client = MlflowClient()
134
+ run = client.get_run(run_id)
135
+
136
+ # Get model parameters from MLflow
137
+ params = run.data.params
138
+ cat_cols = eval(params.get('cat_cols'))
139
+ emb_counts = eval(params.get('emb_counts'))
140
+ dense_cols = eval(params.get('dense_cols'))
141
+ embedding_dim = int(params.get('embedding_dim', 64))
142
+ dense_arch_layer_sizes = eval(params.get('dense_arch_layer_sizes'))
143
+ over_arch_layer_sizes = eval(params.get('over_arch_layer_sizes'))
 
 
 
 
 
 
 
 
144
 
145
+ # Download model from MLflow
146
+ temp_dir = tempfile.mkdtemp()
147
 
148
+ # Try different artifact paths
149
+ for artifact_path in ['model_state_dict_final', 'model_state_dict_2', 'model_state_dict_1', 'model_state_dict_0']:
150
+ try:
151
+ client.download_artifacts(run_id, f"{artifact_path}/state_dict.pth", temp_dir)
152
+ state_dict = mlflow.pytorch.load_state_dict(f"{temp_dir}/{artifact_path}")
153
+ break
154
+ except:
155
+ continue
156
+ else:
157
+ raise Exception("No model artifacts found")
158
+
159
+ # Create model
160
+ eb_configs = [
161
+ EmbeddingBagConfig(
162
+ name=f"t_{feature_name}",
163
+ embedding_dim=embedding_dim,
164
+ num_embeddings=emb_counts[feature_idx],
165
+ feature_names=[feature_name],
166
  )
167
+ for feature_idx, feature_name in enumerate(cat_cols)
168
+ ]
169
+
170
+ dlrm_model = DLRM(
171
+ embedding_bag_collection=EmbeddingBagCollection(
172
+ tables=eb_configs, device=self.device
173
+ ),
174
+ dense_in_features=len(dense_cols),
175
+ dense_arch_layer_sizes=dense_arch_layer_sizes,
176
+ over_arch_layer_sizes=over_arch_layer_sizes,
177
+ dense_device=self.device,
178
+ )
179
+
180
+ # Remove prefix and load state dict
181
+ if any(key.startswith('model.') for key in state_dict.keys()):
182
+ state_dict = {k[6:]: v for k, v in state_dict.items()}
183
 
184
+ dlrm_model.load_state_dict(state_dict)
185
+ self.model = dlrm_model
186
+ self.model.eval()
 
187
 
188
+ print(f"βœ… Model loaded from MLflow run: {run_id}")
 
 
 
 
189
 
190
+ except Exception as e:
191
+ print(f"❌ Error loading model from MLflow: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
+ def _prepare_user_features(self, user_id: int, user_data: Optional[Dict] = None) -> Tuple[torch.Tensor, KeyedJaggedTensor]:
194
+ """Prepare user features for inference"""
195
+
196
+ if user_data is None:
197
+ # Create default user features
198
+ user_data = {
199
+ 'User-ID': user_id,
200
+ 'Age': 30, # Default age
201
+ 'Location': 'usa', # Default location
202
+ }
203
+
204
+ # Encode categorical features
205
+ try:
206
+ user_id_encoded = self.user_encoder.transform([str(user_id)])[0]
207
+ except:
208
+ # Handle unknown user
209
+ user_id_encoded = 0
210
+
211
+ try:
212
+ location = str(user_data.get('Location', 'usa')).split(',')[-1].strip().lower()
213
+ country_encoded = self.location_encoder.transform([location])[0]
214
+ except:
215
+ country_encoded = 0
216
+
217
+ # Age group
218
+ age = user_data.get('Age', 30)
219
+ if age < 18:
220
+ age_group = 0
221
+ elif age < 25:
222
+ age_group = 1
223
+ elif age < 35:
224
+ age_group = 2
225
+ elif age < 50:
226
+ age_group = 3
227
+ elif age < 65:
228
+ age_group = 4
229
+ else:
230
+ age_group = 5
231
+
232
+ # Get user statistics (if available)
233
+ user_activity = user_data.get('user_activity', 10) # Default
234
+ user_avg_rating = user_data.get('user_avg_rating', 6.0) # Default
235
+ age_normalized = user_data.get('Age', 30)
236
+
237
+ # Normalize dense features
238
+ dense_features = np.array([[age_normalized, 2000, user_activity, 10, user_avg_rating, 6.0]]) # Default values
239
+ dense_features = self.scaler.transform(dense_features)
240
+ dense_features = torch.tensor(dense_features, dtype=torch.float32)
241
+
242
+ return dense_features, user_id_encoded, country_encoded, age_group
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
+ def _prepare_book_features(self, book_isbn: str, book_data: Optional[Dict] = None) -> Tuple[int, int, int, int]:
245
+ """Prepare book features for inference"""
246
+
247
+ if book_data is None:
248
+ book_data = {}
249
+
250
+ # Encode book ID
251
+ try:
252
+ book_id_encoded = self.book_encoder.transform([str(book_isbn)])[0]
253
+ except:
254
+ book_id_encoded = 0
255
+
256
+ # Encode publisher
257
+ try:
258
+ publisher = str(book_data.get('Publisher', 'Unknown'))
259
+ publisher_encoded = self.publisher_encoder.transform([publisher])[0]
260
+ except:
261
+ publisher_encoded = 0
262
+
263
+ # Publication decade
264
+ year = book_data.get('Year-Of-Publication', 2000)
265
+ decade = ((int(year) // 10) * 10)
266
+ try:
267
+ decade_encoded = preprocessing_info.get('decade_encoder', LabelEncoder()).transform([str(decade)])[0]
268
+ except:
269
+ decade_encoded = 6 # Default to 2000s
270
+
271
+ # Rating level (default to medium)
272
+ rating_level = 1
273
+
274
+ return book_id_encoded, publisher_encoded, decade_encoded, rating_level
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
+ def predict_rating(self, user_id: int, book_isbn: str,
277
+ user_data: Optional[Dict] = None,
278
+ book_data: Optional[Dict] = None) -> float:
279
+ """
280
+ Predict rating probability for user-book pair
281
 
282
+ Args:
283
+ user_id: User ID
284
+ book_isbn: Book ISBN
285
+ user_data: Additional user data (optional)
286
+ book_data: Additional book data (optional)
287
 
288
+ Returns:
289
+ Prediction probability (0-1)
290
+ """
291
+ if self.model is None:
292
+ print("❌ Model not loaded")
293
+ return 0.0
294
+
295
+ if not self.torchrec_available:
296
+ print("❌ Cannot make predictions without torchrec")
297
+ return 0.5 # Return default neutral prediction
298
+
299
+ try:
300
+ # Prepare features
301
+ dense_features, user_id_encoded, country_encoded, age_group = self._prepare_user_features(user_id, user_data)
302
+ book_id_encoded, publisher_encoded, decade_encoded, rating_level = self._prepare_book_features(book_isbn, book_data)
303
 
304
+ # Create sparse features
305
+ kjt_values = [user_id_encoded, book_id_encoded, publisher_encoded, country_encoded, age_group, decade_encoded, rating_level]
306
+ kjt_lengths = [1] * len(kjt_values)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
 
308
+ sparse_features = KeyedJaggedTensor.from_lengths_sync(
309
+ self.cat_cols,
310
+ torch.tensor(kjt_values),
311
+ torch.tensor(kjt_lengths, dtype=torch.int32),
312
+ )
313
 
314
+ # Make prediction
315
+ with torch.no_grad():
316
+ logits = self.model(dense_features=dense_features, sparse_features=sparse_features)
317
+ prediction = torch.sigmoid(logits).item()
318
 
319
+ return prediction
 
 
320
 
321
+ except Exception as e:
322
+ print(f"Error in prediction: {e}")
323
+ return 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
 
325
+ def get_user_recommendations(self, user_id: int,
326
+ candidate_books: List[str],
327
+ k: int = 10,
328
+ user_data: Optional[Dict] = None) -> List[Tuple[str, float]]:
329
+ """
330
+ Get top-k book recommendations for a user
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
 
332
+ Args:
333
+ user_id: User ID
334
+ candidate_books: List of candidate book ISBNs
335
+ k: Number of recommendations
336
+ user_data: Additional user data
337
+
338
+ Returns:
339
+ List of (book_isbn, prediction_score) tuples
340
+ """
341
+ if self.model is None or not self.torchrec_available:
342
+ print("❌ Model not loaded or torchrec not available")
343
+ return []
344
 
345
+ recommendations = []
 
 
 
 
346
 
347
+ print(f"Generating recommendations for user {user_id} from {len(candidate_books)} candidates...")
 
 
348
 
349
+ for book_isbn in candidate_books:
350
+ score = self.predict_rating(user_id, book_isbn, user_data)
351
+ recommendations.append((book_isbn, score))
352
+
353
+ # Sort by score and return top-k
354
+ recommendations.sort(key=lambda x: x[1], reverse=True)
355
+ return recommendations[:k]
356
+
357
+ def batch_recommend(self, user_ids: List[int],
358
+ candidate_books: List[str],
359
+ k: int = 10) -> Dict[int, List[Tuple[str, float]]]:
360
+ """
361
+ Generate recommendations for multiple users
362
+
363
+ Args:
364
+ user_ids: List of user IDs
365
+ candidate_books: List of candidate book ISBNs
366
+ k: Number of recommendations per user
367
 
368
+ Returns:
369
+ Dictionary mapping user_id to recommendations
370
+ """
371
+ results = {}
372
+
373
+ for user_id in user_ids:
374
+ results[user_id] = self.get_user_recommendations(user_id, candidate_books, k)
375
+
376
+ return results
377
+
378
+ def get_similar_books(self, target_book_isbn: str,
379
+ candidate_books: List[str],
380
+ sample_users: List[int],
381
+ k: int = 10) -> List[Tuple[str, float]]:
382
+ """
383
+ Find books similar to target book by comparing user preferences
384
 
385
+ Args:
386
+ target_book_isbn: Target book ISBN
387
+ candidate_books: List of candidate book ISBNs
388
+ sample_users: Sample users to test similarity with
389
+ k: Number of similar books
390
 
391
+ Returns:
392
+ List of (book_isbn, similarity_score) tuples
393
+ """
394
+ target_scores = []
395
+ candidate_scores = {book: [] for book in candidate_books}
396
+
397
+ # Get predictions for target book and candidates across sample users
398
+ for user_id in sample_users:
399
+ target_score = self.predict_rating(user_id, target_book_isbn)
400
+ target_scores.append(target_score)
401
 
402
+ for book_isbn in candidate_books:
403
+ if book_isbn != target_book_isbn:
404
+ score = self.predict_rating(user_id, book_isbn)
405
+ candidate_scores[book_isbn].append(score)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
 
407
+ # Calculate similarity based on correlation of user preferences
408
+ similarities = []
409
+ target_scores = np.array(target_scores)
410
 
411
+ for book_isbn, scores in candidate_scores.items():
412
+ if len(scores) > 0:
413
+ scores_array = np.array(scores)
414
+ # Calculate correlation as similarity measure
415
+ correlation = np.corrcoef(target_scores, scores_array)[0, 1]
416
+ if not np.isnan(correlation):
417
+ similarities.append((book_isbn, correlation))
418
 
419
+ # Sort by similarity and return top-k
420
+ similarities.sort(key=lambda x: x[1], reverse=True)
421
+ return similarities[:k]
422
+
423
+
424
+ def load_dlrm_recommender(model_source: str = "latest") -> DLRMBookRecommender:
425
+ """
426
+ Load DLRM recommender from various sources
427
+
428
+ Args:
429
+ model_source: "latest" for latest MLflow run, "file" for local file, or specific run_id
430
 
431
+ Returns:
432
+ DLRMBookRecommender instance
433
+ """
434
+ # Check if we're in CPU-only mode
435
+ cpu_only = os.environ.get('CPU_ONLY', 'false').lower() == 'true'
436
+ if cpu_only:
437
+ print("πŸ”„ Loading recommender in CPU-only mode")
438
+
439
+ # Create recommender instance
440
+ recommender = DLRMBookRecommender()
441
+
442
+ # If torchrec is not available, return limited recommender
443
+ if not TORCHREC_AVAILABLE:
444
+ print("⚠️ torchrec not available, returning limited recommender")
445
+ return recommender
446
+
447
+ if model_source == "latest":
448
+ # Try to get latest MLflow run
449
+ try:
450
+ experiment = mlflow.get_experiment_by_name('dlrm-book-recommendation-book_recommender')
451
+ if experiment:
452
+ runs = mlflow.search_runs(experiment_ids=[experiment.experiment_id],
453
+ order_by=["start_time desc"], max_results=1)
454
+ if len(runs) > 0:
455
+ latest_run_id = runs.iloc[0].run_id
456
+ recommender = DLRMBookRecommender(run_id=latest_run_id)
457
+ return recommender
458
+ except Exception as e:
459
+ print(f"⚠️ Error loading from MLflow: {e}")
460
+
461
+ elif model_source == "file":
462
+ # Try to load from local file
463
+ for filename in [
464
+ '/home/mr-behdadi/PROJECT/ICE/notebooks/dlrm_book_model_final.pth',
465
+ '/home/mr-behdadi/PROJECT/ICE/notebooks/dlrm_book_model_epoch_2.pth',
466
+ '/home/mr-behdadi/PROJECT/ICE/notebooks/dlrm_book_model_epoch_0.pth',
467
+ '/home/mr-behdadi/PROJECT/ICE/notebooks/dlrm_book_model_epoch_1.pth']:
468
+ if os.path.exists(filename):
469
+ try:
470
+ recommender = DLRMBookRecommender(model_path=filename)
471
+ return recommender
472
+ except Exception as e:
473
+ print(f"⚠️ Error loading from {filename}: {e}")
474
+
475
+ else:
476
+ # Treat as run_id
477
+ try:
478
+ recommender = DLRMBookRecommender(run_id=model_source)
479
+ return recommender
480
+ except Exception as e:
481
+ print(f"⚠️ Error loading from run_id {model_source}: {e}")
482
+
483
+ print("⚠️ Could not load any trained model")
484
+ return recommender
485
+
486
+
487
+ def demo_dlrm_recommendations():
488
+ """Demo function to show DLRM recommendations"""
489
+
490
+ print("πŸš€ DLRM Book Recommendation Demo")
491
+ print("=" * 50)
492
+
493
+ # Load book data for demo
494
+ books_df = pd.read_csv('Books.csv', encoding='latin-1', low_memory=False)
495
+ users_df = pd.read_csv('Users.csv', encoding='latin-1', low_memory=False)
496
+ ratings_df = pd.read_csv('Ratings.csv', encoding='latin-1', low_memory=False)
497
+
498
+ books_df.columns = books_df.columns.str.replace('"', '')
499
+ users_df.columns = users_df.columns.str.replace('"', '')
500
+ ratings_df.columns = ratings_df.columns.str.replace('"', '')
501
+
502
+ # Load recommender
503
+ recommender = load_dlrm_recommender("file")
504
+
505
+ if recommender.model is None:
506
+ print("❌ No trained model found. Please run training first.")
507
+ return
508
+
509
+ # Get sample user and books
510
+ sample_user_id = ratings_df['User-ID'].iloc[0]
511
+ sample_books = books_df['ISBN'].head(20).tolist()
512
+
513
+ print(f"\nπŸ“š Getting recommendations for User {sample_user_id}")
514
+ print(f"Testing with {len(sample_books)} candidate books...")
515
+
516
+ # Get recommendations
517
+ recommendations = recommender.get_user_recommendations(
518
+ user_id=sample_user_id,
519
+ candidate_books=sample_books,
520
+ k=10
521
+ )
522
+
523
+ print(f"\n🎯 Top 10 DLRM Recommendations:")
524
+ print("-" * 50)
525
+
526
+ for i, (book_isbn, score) in enumerate(recommendations, 1):
527
+ # Get book info
528
+ book_info = books_df[books_df['ISBN'] == book_isbn]
529
+ if len(book_info) > 0:
530
+ book = book_info.iloc[0]
531
+ title = book['Book-Title']
532
+ author = book['Book-Author']
533
+ print(f"{i:2d}. {title} by {author}")
534
+ print(f" ISBN: {book_isbn}, Score: {score:.4f}")
535
+ else:
536
+ print(f"{i:2d}. ISBN: {book_isbn}, Score: {score:.4f}")
537
+ print()
538
+
539
+ # Show user's actual ratings for comparison
540
+ user_ratings = ratings_df[ratings_df['User-ID'] == sample_user_id]
541
+ if len(user_ratings) > 0:
542
+ print(f"\nπŸ“– User {sample_user_id}'s Actual Reading History:")
543
+ print("-" * 50)
544
+
545
+ for _, rating in user_ratings.head(5).iterrows():
546
+ book_info = books_df[books_df['ISBN'] == rating['ISBN']]
547
+ if len(book_info) > 0:
548
+ book = book_info.iloc[0]
549
+ print(f"β€’ {book['Book-Title']} by {book['Book-Author']} - Rating: {rating['Book-Rating']}/10")
550
+
551
+ # Test book similarity
552
+ if len(recommendations) > 0:
553
+ target_book = recommendations[0][0]
554
+ print(f"\nπŸ” Finding books similar to: {target_book}")
555
 
556
+ similar_books = recommender.get_similar_books(
557
+ target_book_isbn=target_book,
558
+ candidate_books=sample_books,
559
+ sample_users=ratings_df['User-ID'].head(10).tolist(),
560
+ k=5
561
+ )
562
 
563
+ print(f"\nπŸ“š Similar Books:")
564
+ print("-" * 30)
565
+ for i, (book_isbn, similarity) in enumerate(similar_books, 1):
566
+ book_info = books_df[books_df['ISBN'] == book_isbn]
567
+ if len(book_info) > 0:
568
+ book = book_info.iloc[0]
569
+ print(f"{i}. {book['Book-Title']} (similarity: {similarity:.3f})")
570
 
571
  if __name__ == "__main__":
572
+ demo_dlrm_recommendations()