edwinbh commited on
Commit
2968f9e
Β·
verified Β·
1 Parent(s): cbdb03d

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +590 -35
src/streamlit_app.py CHANGED
@@ -1,40 +1,595 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
 
 
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
 
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
 
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
1
+ """
2
+ Streamlit Dashboard for DLRM Book Recommendation System
3
+ Simple interface for DLRM-based book recommendations
4
+ """
5
+
6
  import streamlit as st
7
+ import pandas as pd
8
+ import numpy as np
9
+ import torch
10
+ import pickle
11
+ import os
12
+ import sys
13
+ from typing import Dict, List, Tuple, Optional
14
+ import warnings
15
+ warnings.filterwarnings('ignore')
16
 
17
+ sys.path.append('.')
18
+ from dlrm_inference import DLRMBookRecommender, load_dlrm_recommender
19
 
 
 
 
20
 
21
+ # Page configuration
22
+ st.set_page_config(
23
+ page_title="DLRM Book Recommendations",
24
+ page_icon="πŸ“š",
25
+ layout="wide",
26
+ initial_sidebar_state="expanded"
27
+ )
28
+
29
+ # Custom CSS
30
+ st.markdown("""
31
+ <style>
32
+ .main-header {
33
+ font-size: 3rem;
34
+ color: #1f77b4;
35
+ text-align: center;
36
+ margin-bottom: 2rem;
37
+ }
38
+ .metric-card {
39
+ background-color: #f0f2f6;
40
+ padding: 1rem;
41
+ border-radius: 0.5rem;
42
+ border-left: 5px solid #1f77b4;
43
+ }
44
+ .dlrm-explanation {
45
+ background-color: #e8f4fd;
46
+ padding: 1rem;
47
+ border-radius: 0.5rem;
48
+ border-left: 4px solid #0066cc;
49
+ margin: 1rem 0;
50
+ }
51
+ .book-card {
52
+ background-color: #ffffff;
53
+ padding: 1rem;
54
+ border-radius: 0.5rem;
55
+ border: 1px solid #e1e5eb;
56
+ margin-bottom: 1rem;
57
+ }
58
+ </style>
59
+ """, unsafe_allow_html=True)
60
+
61
+ @st.cache_data
62
+ def load_data():
63
+ """Load and cache the book data"""
64
+ try:
65
+ books_df = pd.read_csv('Books.csv', encoding='latin-1', low_memory=False)
66
+ users_df = pd.read_csv('Users.csv', encoding='latin-1', low_memory=False)
67
+ ratings_df = pd.read_csv('Ratings.csv', encoding='latin-1', low_memory=False)
68
+
69
+ # Clean column names
70
+ books_df.columns = books_df.columns.str.replace('"', '')
71
+ users_df.columns = users_df.columns.str.replace('"', '')
72
+ ratings_df.columns = ratings_df.columns.str.replace('"', '')
73
+
74
+ return books_df, users_df, ratings_df
75
+ except Exception as e:
76
+ st.error(f"Error loading data: {e}")
77
+ return None, None, None
78
+
79
+ @st.cache_resource
80
+ def load_dlrm_model():
81
+ """Load and cache the DLRM model"""
82
+
83
+
84
+ try:
85
+ recommender = load_dlrm_recommender("file")
86
+ return recommender
87
+ except Exception as e:
88
+ st.error(f"Error loading DLRM model: {e}")
89
+ return None
90
+
91
+ def display_book_info(book_isbn, books_df, show_rating=None):
92
+ """Display book information with actual book cover"""
93
+ book_info = books_df[books_df['ISBN'] == book_isbn]
94
+
95
+ if len(book_info) == 0:
96
+ st.write(f"Book with ISBN {book_isbn} not found")
97
+ return
98
+
99
+ book = book_info.iloc[0]
100
+
101
+ col1, col2 = st.columns([1, 3])
102
+
103
+ with col1:
104
+ # Try to display actual book cover from Image-URL-M
105
+ image_url = book.get('Image-URL-M', '')
106
+
107
+ if image_url and pd.notna(image_url) and str(image_url) != 'nan':
108
+ try:
109
+ # Clean the URL (sometimes there are issues with Amazon URLs)
110
+ clean_url = str(image_url).strip()
111
+ if clean_url and 'http' in clean_url:
112
+ st.image(clean_url, width=150, caption="πŸ“š")
113
+ else:
114
+ # Fallback to placeholder
115
+ st.image("https://via.placeholder.com/150x200?text=πŸ“š&color=1f77b4&bg=f0f2f6", width=150)
116
+ except Exception as e:
117
+ # If image loading fails, show placeholder
118
+ st.image("https://via.placeholder.com/150x200?text=πŸ“š&color=1f77b4&bg=f0f2f6", width=150)
119
+ st.caption("⚠️ Cover unavailable")
120
+ else:
121
+ # Show placeholder if no image URL
122
+ st.image("https://via.placeholder.com/150x200?text=πŸ“š&color=1f77b4&bg=f0f2f6", width=150)
123
+ st.caption("πŸ“š No cover")
124
+
125
+ with col2:
126
+ st.markdown(f"**{book['Book-Title']}**")
127
+ st.write(f"*by {book['Book-Author']}*")
128
+ st.write(f"πŸ“… Published: {book.get('Year-Of-Publication', 'Unknown')}")
129
+ st.write(f"🏒 Publisher: {book.get('Publisher', 'Unknown')}")
130
+ st.write(f"πŸ“– ISBN: {book['ISBN']}")
131
+
132
+ if show_rating is not None:
133
+ st.markdown(f"**🎯 DLRM Score: {show_rating:.4f}**")
134
+
135
+ def main():
136
+ # Header
137
+ st.markdown('<h1 class="main-header">πŸ“š DLRM Book Recommendation System</h1>', unsafe_allow_html=True)
138
+ st.markdown("### Deep Learning Recommendation Model for Personalized Book Suggestions")
139
+ st.markdown("---")
140
+
141
+
142
+
143
+ # Load data
144
+ with st.spinner("Loading book data..."):
145
+ books_df, users_df, ratings_df = load_data()
146
+
147
+ if books_df is None:
148
+ st.error("Failed to load data. Please check if CSV files are available.")
149
+ return
150
+
151
+ # Sidebar info
152
+ st.sidebar.title("πŸ“Š Dataset Information")
153
+ st.sidebar.metric("πŸ“š Books", f"{len(books_df):,}")
154
+ st.sidebar.metric("πŸ‘₯ Users", f"{len(users_df):,}")
155
+ st.sidebar.metric("⭐ Ratings", f"{len(ratings_df):,}")
156
+
157
+ # Load DLRM model
158
+ with st.spinner("Loading DLRM model..."):
159
+ recommender = load_dlrm_model()
160
+
161
+ if recommender is None or recommender.model is None:
162
+ st.error("❌ DLRM model not available")
163
+ st.info("Please run the training script first: `python train_dlrm_books.py`")
164
+
165
+ st.markdown("### Available Options:")
166
+ st.markdown("1. **Train DLRM Model**: Run `python train_dlrm_books.py`")
167
+ st.markdown("2. **Prepare Data**: Run `python dlrm_book_recommender.py`")
168
+ st.markdown("3. **Check Files**: Ensure preprocessing files exist")
169
+
170
+ return
171
+
172
+ st.success("βœ… DLRM model loaded successfully!")
173
+
174
+ # Model info
175
+ st.sidebar.markdown("---")
176
+ st.sidebar.subheader("πŸ€– DLRM Model Info")
177
+ if recommender.preprocessing_info:
178
+ st.sidebar.write(f"Dense features: {len(recommender.dense_cols)}")
179
+ st.sidebar.write(f"Categorical features: {len(recommender.cat_cols)}")
180
+ st.sidebar.write(f"Embedding dim: 64")
181
+
182
+ # Main interface
183
+ tab1, tab2, tab3, tab4 = st.tabs(["🎯 Get Recommendations", "πŸ” Test Predictions", "πŸ“Š Model Analysis", "πŸ“Έ Book Gallery"])
184
+
185
+ with tab1:
186
+ st.header("🎯 DLRM Book Recommendations")
187
+ st.info("Get personalized book recommendations using the trained DLRM model")
188
+
189
+ # User selection
190
+ col1, col2 = st.columns([2, 1])
191
+
192
+ with col1:
193
+ user_ids = sorted(users_df['User-ID'].unique())
194
+ selected_user_id = st.selectbox("Select a user", user_ids[:1000]) # Limit for performance
195
+
196
+ with col2:
197
+ num_recommendations = st.slider("Number of recommendations", 5, 20, 10)
198
+
199
+ # Show user info
200
+ user_info = users_df[users_df['User-ID'] == selected_user_id]
201
+ if len(user_info) > 0:
202
+ user = user_info.iloc[0]
203
+ st.markdown(f"**User Info**: Age: {user.get('Age', 'Unknown')}, Location: {user.get('Location', 'Unknown')}")
204
+
205
+ # User's reading history
206
+ user_ratings = ratings_df[ratings_df['User-ID'] == selected_user_id]
207
+ if len(user_ratings) > 0:
208
+ with st.expander(f"πŸ“– User's Reading History ({len(user_ratings)} books)", expanded=False):
209
+ top_rated = user_ratings.sort_values('Book-Rating', ascending=False).head(10)
210
+ for _, rating in top_rated.iterrows():
211
+ book_info = books_df[books_df['ISBN'] == rating['ISBN']]
212
+ if len(book_info) > 0:
213
+ book = book_info.iloc[0]
214
+ st.write(f"β€’ **{book['Book-Title']}** by {book['Book-Author']} - {rating['Book-Rating']}/10 ⭐")
215
+
216
+ if st.button("πŸš€ Get DLRM Recommendations", type="primary"):
217
+ with st.spinner("πŸ€– DLRM is analyzing user preferences..."):
218
+
219
+ # Get candidate books (popular books not rated by user)
220
+ user_rated_books = set(user_ratings['ISBN']) if len(user_ratings) > 0 else set()
221
+
222
+ # Get popular books as candidates
223
+ book_popularity = ratings_df.groupby('ISBN').size().sort_values(ascending=False)
224
+ candidate_books = [isbn for isbn in book_popularity.head(100).index if isbn not in user_rated_books]
225
+
226
+ if len(candidate_books) < num_recommendations:
227
+ candidate_books = book_popularity.head(200).index.tolist()
228
+
229
+ # Get recommendations
230
+ recommendations = recommender.get_user_recommendations(
231
+ user_id=selected_user_id,
232
+ candidate_books=candidate_books,
233
+ k=num_recommendations
234
+ )
235
+
236
+ if recommendations:
237
+ st.success(f"Generated {len(recommendations)} DLRM recommendations!")
238
+
239
+ st.subheader("🎯 DLRM Recommendations")
240
+
241
+ for i, (book_isbn, score) in enumerate(recommendations, 1):
242
+ book_info = books_df[books_df['ISBN'] == book_isbn]
243
+ if len(book_info) > 0:
244
+ with st.expander(f"{i}. Recommendation (DLRM Score: {score:.4f})", expanded=(i <= 3)):
245
+ display_book_info(book_isbn, books_df, show_rating=score)
246
+
247
+ # Additional book stats
248
+ book_ratings = ratings_df[ratings_df['ISBN'] == book_isbn]
249
+ if len(book_ratings) > 0:
250
+ avg_rating = book_ratings['Book-Rating'].mean()
251
+ num_ratings = len(book_ratings)
252
+
253
+ st.markdown('<div class="dlrm-explanation">', unsafe_allow_html=True)
254
+ st.markdown("**πŸ“Š Book Statistics:**")
255
+ st.write(f"Average Rating: {avg_rating:.1f}/10 from {num_ratings} readers")
256
+ st.write(f"DLRM Confidence: {score:.1%}")
257
+ st.markdown('</div>', unsafe_allow_html=True)
258
+ else:
259
+ st.write(f"Book with ISBN {book_isbn} not found in database")
260
+ else:
261
+ st.warning("No recommendations generated")
262
+
263
+ with tab2:
264
+ st.header("πŸ” Test DLRM Predictions")
265
+ st.info("Test how well DLRM predicts actual user ratings")
266
+
267
+ col1, col2 = st.columns(2)
268
+
269
+ with col1:
270
+ test_user_id = st.selectbox("Select user for testing", user_ids[:500], key="test_user")
271
+
272
+ with col2:
273
+ test_mode = st.radio("Test mode", ["Random books", "User's actual books"])
274
+
275
+ if st.button("πŸ§ͺ Test Predictions", type="secondary"):
276
+ with st.spinner("Testing DLRM predictions..."):
277
+
278
+ if test_mode == "User's actual books":
279
+ # Test on user's actual rated books
280
+ user_test_ratings = ratings_df[ratings_df['User-ID'] == test_user_id].sample(min(10, len(user_ratings)))
281
+
282
+ if len(user_test_ratings) > 0:
283
+ st.subheader("🎯 DLRM vs Actual Ratings")
284
+
285
+ predictions = []
286
+ actuals = []
287
+
288
+ for _, rating in user_test_ratings.iterrows():
289
+ book_isbn = rating['ISBN']
290
+ actual_rating = rating['Book-Rating']
291
+
292
+ # Get DLRM prediction
293
+ dlrm_score = recommender.predict_rating(test_user_id, book_isbn)
294
+
295
+ predictions.append(dlrm_score)
296
+ actuals.append(actual_rating >= 6) # Convert to binary
297
+
298
+ # Display comparison
299
+ book_info = books_df[books_df['ISBN'] == book_isbn]
300
+ if len(book_info) > 0:
301
+ book = book_info.iloc[0]
302
+
303
+ col1, col2, col3 = st.columns([2, 1, 1])
304
+ with col1:
305
+ st.write(f"**{book['Book-Title']}**")
306
+ st.write(f"*by {book['Book-Author']}*")
307
+
308
+ with col2:
309
+ st.metric("Actual Rating", f"{actual_rating}/10")
310
+
311
+ with col3:
312
+ st.metric("DLRM Score", f"{dlrm_score:.3f}")
313
+
314
+ # Calculate accuracy
315
+ if predictions and actuals:
316
+ # Convert DLRM scores to binary predictions
317
+ binary_preds = [1 if p > 0.5 else 0 for p in predictions]
318
+ accuracy = sum(p == a for p, a in zip(binary_preds, actuals)) / len(actuals)
319
+
320
+ st.markdown("---")
321
+ st.success(f"🎯 DLRM Accuracy: {accuracy:.1%}")
322
+
323
+ # Show correlation
324
+ actual_numeric = [rating['Book-Rating'] for _, rating in user_test_ratings.iterrows()]
325
+ correlation = np.corrcoef(predictions, actual_numeric)[0, 1] if len(predictions) > 1 else 0
326
+ st.info(f"πŸ“Š Correlation with actual ratings: {correlation:.3f}")
327
+
328
+ else:
329
+ st.warning("No ratings found for this user")
330
+
331
+ else:
332
+ # Test on random books
333
+ random_books = books_df.sample(10)['ISBN'].tolist()
334
+
335
+ st.subheader("🎲 Random Book Predictions")
336
+
337
+ for book_isbn in random_books:
338
+ dlrm_score = recommender.predict_rating(test_user_id, book_isbn)
339
+
340
+ book_info = books_df[books_df['ISBN'] == book_isbn]
341
+ if len(book_info) > 0:
342
+ book = book_info.iloc[0]
343
+
344
+ col1, col2 = st.columns([3, 1])
345
+ with col1:
346
+ st.write(f"**{book['Book-Title']}** by *{book['Book-Author']}*")
347
+
348
+ with col2:
349
+ st.metric("DLRM Score", f"{dlrm_score:.4f}")
350
+
351
+ with tab3:
352
+ st.header("πŸ“Š DLRM Model Analysis")
353
+ st.info("Analysis of the DLRM model performance and characteristics")
354
+
355
+ # Model architecture info
356
+ if recommender and recommender.preprocessing_info:
357
+ col1, col2 = st.columns(2)
358
+
359
+ with col1:
360
+ st.subheader("πŸ—οΈ Model Architecture")
361
+ st.write(f"**Dense Features ({len(recommender.dense_cols)}):**")
362
+ for col in recommender.dense_cols:
363
+ st.write(f"β€’ {col}")
364
+
365
+ st.write(f"**Categorical Features ({len(recommender.cat_cols)}):**")
366
+ for i, col in enumerate(recommender.cat_cols):
367
+ st.write(f"β€’ {col}: {recommender.emb_counts[i]} embeddings")
368
+
369
+ with col2:
370
+ st.subheader("πŸ“ˆ Dataset Statistics")
371
+ total_samples = recommender.preprocessing_info.get('total_samples', 0)
372
+ positive_rate = recommender.preprocessing_info.get('positive_rate', 0)
373
+
374
+ st.metric("Total Samples", f"{total_samples:,}")
375
+ st.metric("Positive Rate", f"{positive_rate:.1%}")
376
+ st.metric("Train Samples", f"{recommender.preprocessing_info.get('train_samples', 0):,}")
377
+ st.metric("Validation Samples", f"{recommender.preprocessing_info.get('val_samples', 0):,}")
378
+ st.metric("Test Samples", f"{recommender.preprocessing_info.get('test_samples', 0):,}")
379
+
380
+ # Feature importance analysis
381
+ st.subheader("πŸ” Feature Analysis")
382
+
383
+ if st.button("Analyze Feature Importance"):
384
+ with st.spinner("Analyzing feature importance..."):
385
+
386
+ # Sample some users and books
387
+ sample_users = users_df['User-ID'].sample(20).tolist()
388
+ sample_books = books_df['ISBN'].sample(20).tolist()
389
+
390
+ # Test different feature combinations
391
+ st.write("**Feature Impact Analysis:**")
392
+
393
+ base_predictions = []
394
+ for user_id in sample_users[:5]:
395
+ for book_isbn in sample_books[:5]:
396
+ score = recommender.predict_rating(user_id, book_isbn)
397
+ base_predictions.append(score)
398
+
399
+ avg_prediction = np.mean(base_predictions)
400
+ st.metric("Average Prediction Score", f"{avg_prediction:.4f}")
401
+
402
+ st.success("βœ… Feature analysis completed!")
403
+
404
+ # Load training results if available
405
+ if os.path.exists('dlrm_book_training_results.pkl'):
406
+ with open('dlrm_book_training_results.pkl', 'rb') as f:
407
+ training_results = pickle.load(f)
408
+
409
+ st.subheader("πŸ“ˆ Training Results")
410
+
411
+ col1, col2 = st.columns(2)
412
+
413
+ with col1:
414
+ st.metric("Final Validation AUROC", f"{training_results.get('final_val_auroc', 0):.4f}")
415
+ st.metric("Test AUROC", f"{training_results.get('test_auroc', 0):.4f}")
416
+
417
+ with col2:
418
+ val_history = training_results.get('val_aurocs_history', [])
419
+ if val_history:
420
+ st.line_chart(pd.DataFrame({
421
+ 'Epoch': range(len(val_history)),
422
+ 'Validation AUROC': val_history
423
+ }).set_index('Epoch'))
424
+
425
+ # Instructions
426
+ st.markdown("---")
427
+ st.markdown("""
428
+ ## πŸš€ How DLRM Works for Book Recommendations
429
+
430
+ **DLRM (Deep Learning Recommendation Model)** is specifically designed for recommendation systems and offers several advantages:
431
+
432
+ ### πŸ—οΈ Architecture Benefits:
433
+ - **Multi-feature Processing**: Handles both categorical (user ID, book ID, publisher) and numerical (age, ratings) features
434
+ - **Embedding Tables**: Learns rich representations for categorical features
435
+ - **Cross-feature Interactions**: Captures complex relationships between different features
436
+ - **Scalable Design**: Efficiently handles large-scale recommendation datasets
437
+
438
+ ### πŸ“Š Features Used:
439
+ **Categorical Features:**
440
+ - User ID, Book ID, Publisher, Country, Age Group, Publication Decade, Rating Level
441
+
442
+ **Dense Features:**
443
+ - Normalized Age, Publication Year, User Activity, Book Popularity, Average Ratings
444
+
445
+ ### 🎯 Why DLRM vs LLM for Recommendations:
446
+ - **Purpose-built**: Specifically designed for recommendation systems
447
+ - **Feature Integration**: Better at combining diverse feature types
448
+ - **Scalability**: More efficient for large-scale recommendation tasks
449
+ - **Performance**: Higher accuracy for rating prediction tasks
450
+ - **Production Ready**: Optimized for real-time inference
451
+
452
+ ### πŸ’‘ Best Use Cases:
453
+ - **Personalized Recommendations**: Based on user behavior and item characteristics
454
+ - **Rating Prediction**: Accurately predicts user preferences
455
+ - **Cold Start**: Handles new users and items through content features
456
+ - **Real-time Serving**: Fast inference for production systems
457
+ """)
458
+
459
+ with tab4:
460
+ st.header("πŸ“Έ Book Gallery")
461
+ st.info("Browse book covers and discover new titles")
462
+
463
+ # Gallery options
464
+ col1, col2 = st.columns([2, 1])
465
+
466
+ with col1:
467
+ gallery_mode = st.selectbox(
468
+ "Choose gallery mode",
469
+ ["Popular Books", "Recent Publications", "Random Selection", "Search Results"]
470
+ )
471
+
472
+ with col2:
473
+ books_per_row = st.slider("Books per row", 2, 6, 4)
474
+ max_books = st.slider("Maximum books", 10, 50, 20)
475
+
476
+ # Get books based on selected mode
477
+ if gallery_mode == "Popular Books":
478
+ # Get most rated books
479
+ book_popularity = ratings_df.groupby('ISBN').size().sort_values(ascending=False)
480
+ gallery_books = books_df[books_df['ISBN'].isin(book_popularity.head(max_books).index)]
481
+
482
+ elif gallery_mode == "Recent Publications":
483
+ # Get recent books
484
+ books_df_temp = books_df.copy()
485
+ books_df_temp['Year-Of-Publication'] = pd.to_numeric(books_df_temp['Year-Of-Publication'], errors='coerce')
486
+ recent_books = books_df_temp.sort_values('Year-Of-Publication', ascending=False, na_position='last')
487
+ gallery_books = recent_books.head(max_books)
488
+
489
+ elif gallery_mode == "Random Selection":
490
+ # Random books
491
+ gallery_books = books_df.sample(min(max_books, len(books_df)))
492
+
493
+ else: # Search Results
494
+ search_query = st.text_input("Search books for gallery", placeholder="Enter title, author, or publisher")
495
+ if search_query:
496
+ mask = (
497
+ books_df['Book-Title'].str.contains(search_query, case=False, na=False) |
498
+ books_df['Book-Author'].str.contains(search_query, case=False, na=False) |
499
+ books_df['Publisher'].str.contains(search_query, case=False, na=False)
500
+ )
501
+ gallery_books = books_df[mask].head(max_books)
502
+ else:
503
+ gallery_books = books_df.head(max_books)
504
+
505
+ # Display gallery
506
+ if len(gallery_books) > 0:
507
+ st.markdown(f"**πŸ“š Showing {len(gallery_books)} books**")
508
+
509
+ # Create grid layout
510
+ books_list = gallery_books.to_dict('records')
511
+
512
+ # Display books in rows
513
+ for i in range(0, len(books_list), books_per_row):
514
+ cols = st.columns(books_per_row)
515
+
516
+ for j, col in enumerate(cols):
517
+ if i + j < len(books_list):
518
+ book = books_list[i + j]
519
+
520
+ with col:
521
+ # Book cover
522
+ image_url = book.get('Image-URL-M', '')
523
+
524
+ if image_url and pd.notna(image_url) and str(image_url) != 'nan':
525
+ try:
526
+ clean_url = str(image_url).strip()
527
+ if clean_url and 'http' in clean_url:
528
+ st.image(clean_url, width='stretch')
529
+ else:
530
+ st.image("https://via.placeholder.com/150x200?text=πŸ“š&color=1f77b4&bg=f0f2f6", width='stretch')
531
+ except:
532
+ st.image("https://via.placeholder.com/150x200?text=πŸ“š&color=1f77b4&bg=f0f2f6", width='stretch')
533
+ else:
534
+ st.image("https://via.placeholder.com/150x200?text=πŸ“š&color=1f77b4&bg=f0f2f6", width='stretch')
535
+
536
+ # Book info
537
+ title = book['Book-Title']
538
+ if len(title) > 40:
539
+ title = title[:37] + "..."
540
+
541
+ author = book['Book-Author']
542
+ if len(author) > 25:
543
+ author = author[:22] + "..."
544
+
545
+ st.markdown(f"**{title}**")
546
+ st.write(f"*{author}*")
547
+ st.write(f"πŸ“… {book.get('Year-Of-Publication', 'Unknown')}")
548
+
549
+ # Book statistics
550
+ book_stats = ratings_df[ratings_df['ISBN'] == book['ISBN']]
551
+ if len(book_stats) > 0:
552
+ avg_rating = book_stats['Book-Rating'].mean()
553
+ num_ratings = len(book_stats)
554
+ st.write(f"⭐ {avg_rating:.1f}/10 ({num_ratings} ratings)")
555
+ else:
556
+ st.write("⭐ No ratings")
557
+
558
+ # DLRM prediction button
559
+ if recommender and recommender.model:
560
+ if st.button(f"🎯 DLRM Score", key=f"dlrm_{book['ISBN']}"):
561
+ with st.spinner("Calculating..."):
562
+ # Use first user as example
563
+ sample_user = users_df['User-ID'].iloc[0]
564
+ dlrm_score = recommender.predict_rating(sample_user, book['ISBN'])
565
+ st.success(f"DLRM Score: {dlrm_score:.3f}")
566
+ else:
567
+ st.info("No books found for the selected criteria")
568
+
569
+ # Quick stats
570
+ st.markdown("---")
571
+ st.subheader("πŸ“Š Gallery Statistics")
572
+
573
+ col1, col2, col3, col4 = st.columns(4)
574
+
575
+ with col1:
576
+ books_with_covers = sum(1 for _, book in gallery_books.iterrows()
577
+ if book.get('Image-URL-M') and pd.notna(book.get('Image-URL-M')))
578
+ st.metric("Books with Covers", f"{books_with_covers}/{len(gallery_books)}")
579
+
580
+ with col2:
581
+ # Convert Year-Of-Publication to numeric, coercing errors to NaN
582
+ years = pd.to_numeric(gallery_books['Year-Of-Publication'], errors='coerce')
583
+ avg_year = years.mean()
584
+ st.metric("Average Publication Year", f"{avg_year:.0f}" if not pd.isna(avg_year) else "Unknown")
585
+
586
+ with col3:
587
+ unique_authors = gallery_books['Book-Author'].nunique()
588
+ st.metric("Unique Authors", unique_authors)
589
+
590
+ with col4:
591
+ unique_publishers = gallery_books['Publisher'].nunique()
592
+ st.metric("Unique Publishers", unique_publishers)
593
 
594
+ if __name__ == "__main__":
595
+ main()