Spaces:

edwinbh
/

BookRecommendationSystem

Sleeping

App Files Files Community

edwinbh commited on Sep 14, 2025

Commit

acbbfff

verified ·

1 Parent(s): 5d8ed49

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +599 -518

src/streamlit_app.py CHANGED Viewed

@@ -1,572 +1,653 @@
 """
-DLRM Inference Engine for Book Recommendations
-Loads trained DLRM model and provides recommendation functionality
 """
 import os
 import sys
-import torch
-import numpy as np
 import pandas as pd
 import pickle
-import mlflow
-from mlflow import MlflowClient
-import tempfile
-from typing import List, Dict, Tuple, Optional, Any
-from functools import partial
 import warnings
 warnings.filterwarnings('ignore')
-# Check for CPU_ONLY environment variable
-CPU_ONLY = os.environ.get('CPU_ONLY', 'false').lower() == 'true'
-# Disable CUDA if CPU_ONLY is set
-if CPU_ONLY:
-    os.environ['CUDA_VISIBLE_DEVICES'] = ''
-    print("🔄 Running in CPU-only mode (CUDA disabled)")
 try:
-    from torchrec import EmbeddingBagCollection
-    from torchrec.models.dlrm import DLRM, DLRMTrain
-    from torchrec.modules.embedding_configs import EmbeddingBagConfig
-    from torchrec.sparse.jagged_tensor import KeyedJaggedTensor
-    from torchrec.datasets.utils import Batch
-    TORCHREC_AVAILABLE = True
 except ImportError as e:
-    print(f"⚠️ Warning: torchrec import error: {e}")
-    print("⚠️ Some functionality will be limited")
     TORCHREC_AVAILABLE = False
-class DLRMBookRecommender:
-    """DLRM-based book recommender for inference"""
-    def __init__(self, model_path: str = None, run_id: str = None):
-        """
-        Initialize DLRM book recommender
-        Args:
-            model_path: Path to saved model state dict
-            run_id: MLflow run ID to load model from
-        """
-        self.device = torch.device("cpu")
-        self.model = None
-        self.preprocessing_info = None
-        self.torchrec_available = TORCHREC_AVAILABLE
-        if not self.torchrec_available:
-            print("⚠️ Running in limited mode without torchrec")
-            return
-        # Load preprocessing info
-        self._load_preprocessing_info()
-        # Load model
-        if model_path and os.path.exists(model_path):
-            self._load_model_from_path(model_path)
-        elif run_id:
-            self._load_model_from_mlflow(run_id)
-        else:
-            print("⚠️ No model loaded. Please provide model_path or run_id")
-    def _load_preprocessing_info(self):
-        """Load preprocessing information"""
-        if os.path.exists('book_dlrm_preprocessing.pkl'):
-            with open('book_dlrm_preprocessing.pkl', 'rb') as f:
-                self.preprocessing_info = pickle.load(f)
-            self.dense_cols = self.preprocessing_info['dense_cols']
-            self.cat_cols = self.preprocessing_info['cat_cols']
-            self.emb_counts = self.preprocessing_info['emb_counts']
-            self.user_encoder = self.preprocessing_info['user_encoder']
-            self.book_encoder = self.preprocessing_info['book_encoder']
-            self.publisher_encoder = self.preprocessing_info['publisher_encoder']
-            self.location_encoder = self.preprocessing_info['location_encoder']
-            self.scaler = self.preprocessing_info['scaler']
-            print("✅ Preprocessing info loaded")
         else:
-            raise FileNotFoundError("book_dlrm_preprocessing.pkl not found. Run preprocessing first.")
-    def _load_model_from_path(self, model_path: str):
-        """Load model from saved state dict"""
-        try:
-            # Create model architecture
-            eb_configs = [
-                EmbeddingBagConfig(
-                    name=f"t_{feature_name}",
-                    embedding_dim=64,  # Default embedding dim
-                    num_embeddings=self.emb_counts[feature_idx],
-                    feature_names=[feature_name],
-                )
-                for feature_idx, feature_name in enumerate(self.cat_cols)
-            ]
-            dlrm_model = DLRM(
-                embedding_bag_collection=EmbeddingBagCollection(
-                    tables=eb_configs, device=self.device
-                ),
-                dense_in_features=len(self.dense_cols),
-                dense_arch_layer_sizes=[256, 128, 64],
-                over_arch_layer_sizes=[512, 256, 128, 1],
-                dense_device=self.device,
-            )
-            # Load state dict
-            state_dict = torch.load(model_path, map_location=self.device)
-            # Remove 'model.' prefix if present
-            if any(key.startswith('model.') for key in state_dict.keys()):
-                state_dict = {k[6:]: v for k, v in state_dict.items()}
-            dlrm_model.load_state_dict(state_dict)
-            self.model = dlrm_model
-            self.model.eval()
-            print(f"✅ Model loaded from {model_path}")
-        except Exception as e:
-            print(f"❌ Error loading model: {e}")
-    def _load_model_from_mlflow(self, run_id: str):
-        """Load model from MLflow"""
-        try:
-            client = MlflowClient()
-            run = client.get_run(run_id)
-            # Get model parameters from MLflow
-            params = run.data.params
-            cat_cols = eval(params.get('cat_cols'))
-            emb_counts = eval(params.get('emb_counts'))
-            dense_cols = eval(params.get('dense_cols'))
-            embedding_dim = int(params.get('embedding_dim', 64))
-            dense_arch_layer_sizes = eval(params.get('dense_arch_layer_sizes'))
-            over_arch_layer_sizes = eval(params.get('over_arch_layer_sizes'))
-            # Download model from MLflow
-            temp_dir = tempfile.mkdtemp()
-            # Try different artifact paths
-            for artifact_path in ['model_state_dict_final', 'model_state_dict_2', 'model_state_dict_1', 'model_state_dict_0']:
-                try:
-                    client.download_artifacts(run_id, f"{artifact_path}/state_dict.pth", temp_dir)
-                    state_dict = mlflow.pytorch.load_state_dict(f"{temp_dir}/{artifact_path}")
-                    break
-                except:
-                    continue
-            else:
-                raise Exception("No model artifacts found")
-            # Create model
-            eb_configs = [
-                EmbeddingBagConfig(
-                    name=f"t_{feature_name}",
-                    embedding_dim=embedding_dim,
-                    num_embeddings=emb_counts[feature_idx],
-                    feature_names=[feature_name],
                 )
-                for feature_idx, feature_name in enumerate(cat_cols)
-            ]
-            dlrm_model = DLRM(
-                embedding_bag_collection=EmbeddingBagCollection(
-                    tables=eb_configs, device=self.device
-                ),
-                dense_in_features=len(dense_cols),
-                dense_arch_layer_sizes=dense_arch_layer_sizes,
-                over_arch_layer_sizes=over_arch_layer_sizes,
-                dense_device=self.device,
-            )
-            # Remove prefix and load state dict
-            if any(key.startswith('model.') for key in state_dict.keys()):
-                state_dict = {k[6:]: v for k, v in state_dict.items()}
-            dlrm_model.load_state_dict(state_dict)
-            self.model = dlrm_model
-            self.model.eval()
-            print(f"✅ Model loaded from MLflow run: {run_id}")
-        except Exception as e:
-            print(f"❌ Error loading model from MLflow: {e}")
-    def _prepare_user_features(self, user_id: int, user_data: Optional[Dict] = None) -> Tuple[torch.Tensor, KeyedJaggedTensor]:
-        """Prepare user features for inference"""
-        if user_data is None:
-            # Create default user features
-            user_data = {
-                'User-ID': user_id,
-                'Age': 30,  # Default age
-                'Location': 'usa',  # Default location
-            }
-        # Encode categorical features
-        try:
-            user_id_encoded = self.user_encoder.transform([str(user_id)])[0]
-        except:
-            # Handle unknown user
-            user_id_encoded = 0
-        try:
-            location = str(user_data.get('Location', 'usa')).split(',')[-1].strip().lower()
-            country_encoded = self.location_encoder.transform([location])[0]
-        except:
-            country_encoded = 0
-        # Age group
-        age = user_data.get('Age', 30)
-        if age < 18:
-            age_group = 0
-        elif age < 25:
-            age_group = 1
-        elif age < 35:
-            age_group = 2
-        elif age < 50:
-            age_group = 3
-        elif age < 65:
-            age_group = 4
-        else:
-            age_group = 5
-        # Get user statistics (if available)
-        user_activity = user_data.get('user_activity', 10)  # Default
-        user_avg_rating = user_data.get('user_avg_rating', 6.0)  # Default
-        age_normalized = user_data.get('Age', 30)
-        # Normalize dense features
-        dense_features = np.array([[age_normalized, 2000, user_activity, 10, user_avg_rating, 6.0]])  # Default values
-        dense_features = self.scaler.transform(dense_features)
-        dense_features = torch.tensor(dense_features, dtype=torch.float32)
-        return dense_features, user_id_encoded, country_encoded, age_group
-    def _prepare_book_features(self, book_isbn: str, book_data: Optional[Dict] = None) -> Tuple[int, int, int, int]:
-        """Prepare book features for inference"""
-        if book_data is None:
-            book_data = {}
-        # Encode book ID
-        try:
-            book_id_encoded = self.book_encoder.transform([str(book_isbn)])[0]
-        except:
-            book_id_encoded = 0
-        # Encode publisher
-        try:
-            publisher = str(book_data.get('Publisher', 'Unknown'))
-            publisher_encoded = self.publisher_encoder.transform([publisher])[0]
-        except:
-            publisher_encoded = 0
-        # Publication decade
-        year = book_data.get('Year-Of-Publication', 2000)
-        decade = ((int(year) // 10) * 10)
-        try:
-            decade_encoded = preprocessing_info.get('decade_encoder', LabelEncoder()).transform([str(decade)])[0]
-        except:
-            decade_encoded = 6  # Default to 2000s
-        # Rating level (default to medium)
-        rating_level = 1
-        return book_id_encoded, publisher_encoded, decade_encoded, rating_level
-    def predict_rating(self, user_id: int, book_isbn: str,
-                      user_data: Optional[Dict] = None,
-                      book_data: Optional[Dict] = None) -> float:
-        """
-        Predict rating probability for user-book pair
-        Args:
-            user_id: User ID
-            book_isbn: Book ISBN
-            user_data: Additional user data (optional)
-            book_data: Additional book data (optional)
-        Returns:
-            Prediction probability (0-1)
-        """
-        if self.model is None:
-            print("❌ Model not loaded")
-            return 0.0
-        if not self.torchrec_available:
-            print("❌ Cannot make predictions without torchrec")
-            return 0.5  # Return default neutral prediction
-        try:
-            # Prepare features
-            dense_features, user_id_encoded, country_encoded, age_group = self._prepare_user_features(user_id, user_data)
-            book_id_encoded, publisher_encoded, decade_encoded, rating_level = self._prepare_book_features(book_isbn, book_data)
-            # Create sparse features
-            kjt_values = [user_id_encoded, book_id_encoded, publisher_encoded, country_encoded, age_group, decade_encoded, rating_level]
-            kjt_lengths = [1] * len(kjt_values)
-            sparse_features = KeyedJaggedTensor.from_lengths_sync(
-                self.cat_cols,
-                torch.tensor(kjt_values),
-                torch.tensor(kjt_lengths, dtype=torch.int32),
-            )
-            # Make prediction
-            with torch.no_grad():
-                logits = self.model(dense_features=dense_features, sparse_features=sparse_features)
-                prediction = torch.sigmoid(logits).item()
-            return prediction
-        except Exception as e:
-            print(f"Error in prediction: {e}")
-            return 0.0
-    def get_user_recommendations(self, user_id: int,
-                               candidate_books: List[str],
-                               k: int = 10,
-                               user_data: Optional[Dict] = None) -> List[Tuple[str, float]]:
-        """
-        Get top-k book recommendations for a user
-        Args:
-            user_id: User ID
-            candidate_books: List of candidate book ISBNs
-            k: Number of recommendations
-            user_data: Additional user data
-        Returns:
-            List of (book_isbn, prediction_score) tuples
-        """
-        if self.model is None or not self.torchrec_available:
-            print("❌ Model not loaded or torchrec not available")
-            return []
-        recommendations = []
-        print(f"Generating recommendations for user {user_id} from {len(candidate_books)} candidates...")
-        for book_isbn in candidate_books:
-            score = self.predict_rating(user_id, book_isbn, user_data)
-            recommendations.append((book_isbn, score))
-        # Sort by score and return top-k
-        recommendations.sort(key=lambda x: x[1], reverse=True)
-        return recommendations[:k]
-    def batch_recommend(self, user_ids: List[int],
-                       candidate_books: List[str],
-                       k: int = 10) -> Dict[int, List[Tuple[str, float]]]:
-        """
-        Generate recommendations for multiple users
-        Args:
-            user_ids: List of user IDs
-            candidate_books: List of candidate book ISBNs
-            k: Number of recommendations per user
-        Returns:
-            Dictionary mapping user_id to recommendations
-        """
-        results = {}
-        for user_id in user_ids:
-            results[user_id] = self.get_user_recommendations(user_id, candidate_books, k)
-        return results
-    def get_similar_books(self, target_book_isbn: str,
-                         candidate_books: List[str],
-                         sample_users: List[int],
-                         k: int = 10) -> List[Tuple[str, float]]:
-        """
-        Find books similar to target book by comparing user preferences
-        Args:
-            target_book_isbn: Target book ISBN
-            candidate_books: List of candidate book ISBNs
-            sample_users: Sample users to test similarity with
-            k: Number of similar books
-        Returns:
-            List of (book_isbn, similarity_score) tuples
-        """
-        target_scores = []
-        candidate_scores = {book: [] for book in candidate_books}
-        # Get predictions for target book and candidates across sample users
-        for user_id in sample_users:
-            target_score = self.predict_rating(user_id, target_book_isbn)
-            target_scores.append(target_score)
-            for book_isbn in candidate_books:
-                if book_isbn != target_book_isbn:
-                    score = self.predict_rating(user_id, book_isbn)
-                    candidate_scores[book_isbn].append(score)
-        # Calculate similarity based on correlation of user preferences
-        similarities = []
-        target_scores = np.array(target_scores)
-        for book_isbn, scores in candidate_scores.items():
-            if len(scores) > 0:
-                scores_array = np.array(scores)
-                # Calculate correlation as similarity measure
-                correlation = np.corrcoef(target_scores, scores_array)[0, 1]
-                if not np.isnan(correlation):
-                    similarities.append((book_isbn, correlation))
-        # Sort by similarity and return top-k
-        similarities.sort(key=lambda x: x[1], reverse=True)
-        return similarities[:k]
-def load_dlrm_recommender(model_source: str = "latest") -> DLRMBookRecommender:
-    """
-    Load DLRM recommender from various sources
-    Args:
-        model_source: "latest" for latest MLflow run, "file" for local file, or specific run_id
-    Returns:
-        DLRMBookRecommender instance
-    """
-    # Check if we're in CPU-only mode
-    cpu_only = os.environ.get('CPU_ONLY', 'false').lower() == 'true'
-    if cpu_only:
-        print("🔄 Loading recommender in CPU-only mode")
-    # Create recommender instance
-    recommender = DLRMBookRecommender()
-    # If torchrec is not available, return limited recommender
-    if not TORCHREC_AVAILABLE:
-        print("⚠️ torchrec not available, returning limited recommender")
-        return recommender
-    if model_source == "latest":
-        # Try to get latest MLflow run
-        try:
-            experiment = mlflow.get_experiment_by_name('dlrm-book-recommendation-book_recommender')
-            if experiment:
-                runs = mlflow.search_runs(experiment_ids=[experiment.experiment_id],
-                                        order_by=["start_time desc"], max_results=1)
-                if len(runs) > 0:
-                    latest_run_id = runs.iloc[0].run_id
-                    recommender = DLRMBookRecommender(run_id=latest_run_id)
-                    return recommender
-        except Exception as e:
-            print(f"⚠️ Error loading from MLflow: {e}")
-    elif model_source == "file":
-        # Try to load from local file
-        for filename in [
-'/home/mr-behdadi/PROJECT/ICE/notebooks/dlrm_book_model_final.pth',
-'/home/mr-behdadi/PROJECT/ICE/notebooks/dlrm_book_model_epoch_2.pth',
-'/home/mr-behdadi/PROJECT/ICE/notebooks/dlrm_book_model_epoch_0.pth',
-'/home/mr-behdadi/PROJECT/ICE/notebooks/dlrm_book_model_epoch_1.pth']:
-            if os.path.exists(filename):
-                try:
-                    recommender = DLRMBookRecommender(model_path=filename)
-                    return recommender
-                except Exception as e:
-                    print(f"⚠️ Error loading from {filename}: {e}")
-    else:
-        # Treat as run_id
-        try:
-            recommender = DLRMBookRecommender(run_id=model_source)
-            return recommender
-        except Exception as e:
-            print(f"⚠️ Error loading from run_id {model_source}: {e}")
-    print("⚠️ Could not load any trained model")
-    return recommender
-def demo_dlrm_recommendations():
-    """Demo function to show DLRM recommendations"""
-    print("🚀 DLRM Book Recommendation Demo")
-    print("=" * 50)
-    # Load book data for demo
-    books_df = pd.read_csv('Books.csv', encoding='latin-1', low_memory=False)
-    users_df = pd.read_csv('Users.csv', encoding='latin-1', low_memory=False)
-    ratings_df = pd.read_csv('Ratings.csv', encoding='latin-1', low_memory=False)
-    books_df.columns = books_df.columns.str.replace('"', '')
-    users_df.columns = users_df.columns.str.replace('"', '')
-    ratings_df.columns = ratings_df.columns.str.replace('"', '')
-    # Load recommender
-    recommender = load_dlrm_recommender("file")
-    if recommender.model is None:
-        print("❌ No trained model found. Please run training first.")
-        return
-    # Get sample user and books
-    sample_user_id = ratings_df['User-ID'].iloc[0]
-    sample_books = books_df['ISBN'].head(20).tolist()
-    print(f"\n📚 Getting recommendations for User {sample_user_id}")
-    print(f"Testing with {len(sample_books)} candidate books...")
-    # Get recommendations
-    recommendations = recommender.get_user_recommendations(
-        user_id=sample_user_id,
-        candidate_books=sample_books,
-        k=10
-    )
-    print(f"\n🎯 Top 10 DLRM Recommendations:")
-    print("-" * 50)
-    for i, (book_isbn, score) in enumerate(recommendations, 1):
-        # Get book info
-        book_info = books_df[books_df['ISBN'] == book_isbn]
-        if len(book_info) > 0:
-            book = book_info.iloc[0]
-            title = book['Book-Title']
-            author = book['Book-Author']
-            print(f"{i:2d}. {title} by {author}")
-            print(f"    ISBN: {book_isbn}, Score: {score:.4f}")
-        else:
-            print(f"{i:2d}. ISBN: {book_isbn}, Score: {score:.4f}")
-        print()
-    # Show user's actual ratings for comparison
-    user_ratings = ratings_df[ratings_df['User-ID'] == sample_user_id]
-    if len(user_ratings) > 0:
-        print(f"\n📖 User {sample_user_id}'s Actual Reading History:")
-        print("-" * 50)
-        for _, rating in user_ratings.head(5).iterrows():
-            book_info = books_df[books_df['ISBN'] == rating['ISBN']]
-            if len(book_info) > 0:
-                book = book_info.iloc[0]
-                print(f"• {book['Book-Title']} by {book['Book-Author']} - Rating: {rating['Book-Rating']}/10")
-    # Test book similarity
-    if len(recommendations) > 0:
-        target_book = recommendations[0][0]
-        print(f"\n🔍 Finding books similar to: {target_book}")
-        similar_books = recommender.get_similar_books(
-            target_book_isbn=target_book,
-            candidate_books=sample_books,
-            sample_users=ratings_df['User-ID'].head(10).tolist(),
-            k=5
-        )
-        print(f"\n📚 Similar Books:")
-        print("-" * 30)
-        for i, (book_isbn, similarity) in enumerate(similar_books, 1):
-            book_info = books_df[books_df['ISBN'] == book_isbn]
-            if len(book_info) > 0:
-                book = book_info.iloc[0]
-                print(f"{i}. {book['Book-Title']} (similarity: {similarity:.3f})")
 if __name__ == "__main__":
-    demo_dlrm_recommendations()

 """
+Streamlit Dashboard for DLRM Book Recommendation System
+Simple interface for DLRM-based book recommendations
 """
 import os
 import sys
+import streamlit as st
+# Check if CPU_ONLY mode is enabled via command line argument
+if len(sys.argv) > 1 and sys.argv[1] == '--cpu-only':
+    os.environ['CPU_ONLY'] = 'true'
+    print("🔄 Running in CPU-only mode (CUDA disabled)")
 import pandas as pd
+import numpy as np
+import torch
 import pickle
+from typing import Dict, List, Tuple, Optional
 import warnings
 warnings.filterwarnings('ignore')
+# Import our DLRM recommender
 try:
+    from dlrm_inference import DLRMBookRecommender, load_dlrm_recommender, TORCHREC_AVAILABLE
 except ImportError as e:
+    print(f"⚠️ Error importing DLRM recommender: {e}")
     TORCHREC_AVAILABLE = False
+# Page configuration
+st.set_page_config(
+    page_title="DLRM Book Recommendations",
+    page_icon="📚",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Check if running in CPU-only mode
+cpu_only_mode = os.environ.get('CPU_ONLY', 'false').lower() == 'true'
+# Custom CSS
+st.markdown("""
+<style>
+    .main-header {
+        font-size: 3rem;
+        color: #1f77b4;
+        text-align: center;
+        margin-bottom: 2rem;
+    }
+    .metric-card {
+        background-color: #f0f2f6;
+        padding: 1rem;
+        border-radius: 0.5rem;
+        border-left: 5px solid #1f77b4;
+    }
+    .dlrm-explanation {
+        background-color: #e8f4fd;
+        padding: 1rem;
+        border-radius: 0.5rem;
+        border-left: 4px solid #0066cc;
+        margin: 1rem 0;
+    }
+    .book-card {
+        background-color: #ffffff;
+        padding: 1rem;
+        border-radius: 0.5rem;
+        border: 1px solid #e1e5eb;
+        margin-bottom: 1rem;
+    }
+    .cpu-mode-banner {
+        background-color: #fff3cd;
+        color: #856404;
+        padding: 0.75rem;
+        border-radius: 0.5rem;
+        border-left: 4px solid #ffeeba;
+        margin: 1rem 0;
+        text-align: center;
+    }
+</style>
+""", unsafe_allow_html=True)
+@st.cache_data
+def load_data():
+    """Load and cache the book data"""
+    try:
+        books_df = pd.read_csv('Books.csv', encoding='latin-1', low_memory=False)
+        users_df = pd.read_csv('Users.csv', encoding='latin-1', low_memory=False)
+        ratings_df = pd.read_csv('Ratings.csv', encoding='latin-1', low_memory=False)
+        # Clean column names
+        books_df.columns = books_df.columns.str.replace('"', '')
+        users_df.columns = users_df.columns.str.replace('"', '')
+        ratings_df.columns = ratings_df.columns.str.replace('"', '')
+        return books_df, users_df, ratings_df
+    except Exception as e:
+        st.error(f"Error loading data: {e}")
+        return None, None, None
+@st.cache_resource
+def load_dlrm_model():
+    """Load and cache the DLRM model"""
+    try:
+        recommender = load_dlrm_recommender("file")
+        return recommender
+    except Exception as e:
+        st.error(f"Error loading DLRM model: {e}")
+        return None
+def display_book_info(book_isbn, books_df, show_rating=None):
+    """Display book information with actual book cover"""
+    book_info = books_df[books_df['ISBN'] == book_isbn]
+    if len(book_info) == 0:
+        st.write(f"Book with ISBN {book_isbn} not found")
+        return
+    book = book_info.iloc[0]
+    col1, col2 = st.columns([1, 3])
+    with col1:
+        # Try to display actual book cover from Image-URL-M
+        image_url = book.get('Image-URL-M', '')
+        if image_url and pd.notna(image_url) and str(image_url) != 'nan':
+            try:
+                # Clean the URL (sometimes there are issues with Amazon URLs)
+                clean_url = str(image_url).strip()
+                if clean_url and 'http' in clean_url:
+                    st.image(clean_url, width=150, caption="📚")
+                else:
+                    # Fallback to placeholder
+                    st.image("https://via.placeholder.com/150x200?text=📚&color=1f77b4&bg=f0f2f6", width=150)
+            except Exception as e:
+                # If image loading fails, show placeholder
+                st.image("https://via.placeholder.com/150x200?text=📚&color=1f77b4&bg=f0f2f6", width=150)
+                st.caption("⚠️ Cover unavailable")
         else:
+            # Show placeholder if no image URL
+            st.image("https://via.placeholder.com/150x200?text=📚&color=1f77b4&bg=f0f2f6", width=150)
+            st.caption("📚 No cover")
+    with col2:
+        st.markdown(f"**{book['Book-Title']}**")
+        st.write(f"*by {book['Book-Author']}*")
+        st.write(f"📅 Published: {book.get('Year-Of-Publication', 'Unknown')}")
+        st.write(f"🏢 Publisher: {book.get('Publisher', 'Unknown')}")
+        st.write(f"📖 ISBN: {book['ISBN']}")
+        if show_rating is not None:
+            st.markdown(f"**🎯 DLRM Score: {show_rating:.4f}**")
+def main():
+    # Header
+    st.markdown('<h1 class="main-header">📚 DLRM Book Recommendation System</h1>', unsafe_allow_html=True)
+    st.markdown("### Deep Learning Recommendation Model for Personalized Book Suggestions")
+    # CPU Mode Banner (if enabled)
+    if cpu_only_mode:
+        st.markdown('<div class="cpu-mode-banner">⚙️ Running in CPU-only mode (NVIDIA drivers not required)</div>', unsafe_allow_html=True)
+    st.markdown("---")
+    # Load data
+    with st.spinner("Loading book data..."):
+        books_df, users_df, ratings_df = load_data()
+    if books_df is None:
+        st.error("Failed to load data. Please check if CSV files are available.")
+        return
+    # Sidebar info
+    st.sidebar.title("📊 Dataset Information")
+    st.sidebar.metric("📚 Books", f"{len(books_df):,}")
+    st.sidebar.metric("👥 Users", f"{len(users_df):,}")
+    st.sidebar.metric("⭐ Ratings", f"{len(ratings_df):,}")
+    # Load DLRM model
+    with st.spinner("Loading DLRM model..."):
+        recommender = load_dlrm_model()
+    if recommender is None or not hasattr(recommender, 'model') or recommender.model is None:
+        if cpu_only_mode:
+            st.warning("⚠️ DLRM model not available in CPU-only mode")
+            st.info("The app will continue with limited functionality")
+            # Show options for browsing books without recommendations
+            st.subheader("📚 Browse Books")
+            # Simple book browser
+            search_query = st.text_input("Search for books", placeholder="Enter title, author, or publisher")
+            if search_query:
+                mask = (
+                    books_df['Book-Title'].str.contains(search_query, case=False, na=False) |
+                    books_df['Book-Author'].str.contains(search_query, case=False, na=False) |
+                    books_df['Publisher'].str.contains(search_query, case=False, na=False)
                 )
+                results = books_df[mask].head(20)
+                if len(results) > 0:
+                    st.success(f"Found {len(results)} books matching '{search_query}'")
+                    for _, book in results.iterrows():
+                        st.markdown(f"**{book['Book-Title']}** by *{book['Book-Author']}*")
+                        st.write(f"Published: {book.get('Year-Of-Publication', 'Unknown')} | ISBN: {book['ISBN']}")
+                        st.markdown("---")
+                else:
+                    st.info(f"No books found matching '{search_query}'")
+            return
+        else:
+            st.error("❌ DLRM model not available")
+            st.info("Please run the training script first: `python train_dlrm_books.py`")
+            st.markdown("### Available Options:")
+            st.markdown("1. **Train DLRM Model**: Run `python train_dlrm_books.py`")
+            st.markdown("2. **Prepare Data**: Run `python dlrm_book_recommender.py`")
+            st.markdown("3. **Check Files**: Ensure preprocessing files exist")
+            st.markdown("4. **Try CPU-only Mode**: Run `streamlit run streamlit_dlrm_app.py -- --cpu-only`")
+            return
+    if cpu_only_mode:
+        st.success("✅ DLRM model loaded successfully in CPU-only mode!")
+    else:
+        st.success("✅ DLRM model loaded successfully!")
+    # Model info
+    st.sidebar.markdown("---")
+    st.sidebar.subheader("🤖 DLRM Model Info")
+    if recommender.preprocessing_info:
+        st.sidebar.write(f"Dense features: {len(recommender.dense_cols)}")
+        st.sidebar.write(f"Categorical features: {len(recommender.cat_cols)}")
+        st.sidebar.write(f"Embedding dim: 64")
+    # Main interface
+    tab1, tab2, tab3, tab4 = st.tabs(["🎯 Get Recommendations", "🔍 Test Predictions", "📊 Model Analysis", "📸 Book Gallery"])
+    with tab1:
+        st.header("🎯 DLRM Book Recommendations")
+        st.info("Get personalized book recommendations using the trained DLRM model")
+        # User selection
+        col1, col2 = st.columns([2, 1])
+        with col1:
+            user_ids = sorted(users_df['User-ID'].unique())
+            selected_user_id = st.selectbox("Select a user", user_ids[:1000])  # Limit for performance
+        with col2:
+            num_recommendations = st.slider("Number of recommendations", 5, 20, 10)
+        # Show user info
+        user_info = users_df[users_df['User-ID'] == selected_user_id]
+        if len(user_info) > 0:
+            user = user_info.iloc[0]
+            st.markdown(f"**User Info**: Age: {user.get('Age', 'Unknown')}, Location: {user.get('Location', 'Unknown')}")
+        # User's reading history
+        user_ratings = ratings_df[ratings_df['User-ID'] == selected_user_id]
+        if len(user_ratings) > 0:
+            with st.expander(f"📖 User's Reading History ({len(user_ratings)} books)", expanded=False):
+                top_rated = user_ratings.sort_values('Book-Rating', ascending=False).head(10)
+                for _, rating in top_rated.iterrows():
+                    book_info = books_df[books_df['ISBN'] == rating['ISBN']]
+                    if len(book_info) > 0:
+                        book = book_info.iloc[0]
+                        st.write(f"• **{book['Book-Title']}** by {book['Book-Author']} - {rating['Book-Rating']}/10 ⭐")
+        if st.button("🚀 Get DLRM Recommendations", type="primary"):
+            with st.spinner("🤖 DLRM is analyzing user preferences..."):
+                # Get candidate books (popular books not rated by user)
+                user_rated_books = set(user_ratings['ISBN']) if len(user_ratings) > 0 else set()
+                # Get popular books as candidates
+                book_popularity = ratings_df.groupby('ISBN').size().sort_values(ascending=False)
+                candidate_books = [isbn for isbn in book_popularity.head(100).index if isbn not in user_rated_books]
+                if len(candidate_books) < num_recommendations:
+                    candidate_books = book_popularity.head(200).index.tolist()
+                # Get recommendations
+                recommendations = recommender.get_user_recommendations(
+                    user_id=selected_user_id,
+                    candidate_books=candidate_books,
+                    k=num_recommendations
+                )
+            if recommendations:
+                st.success(f"Generated {len(recommendations)} DLRM recommendations!")
+                st.subheader("🎯 DLRM Recommendations")
+                for i, (book_isbn, score) in enumerate(recommendations, 1):
+                    book_info = books_df[books_df['ISBN'] == book_isbn]
+                    if len(book_info) > 0:
+                        with st.expander(f"{i}. Recommendation (DLRM Score: {score:.4f})", expanded=(i <= 3)):
+                            display_book_info(book_isbn, books_df, show_rating=score)
+                            # Additional book stats
+                            book_ratings = ratings_df[ratings_df['ISBN'] == book_isbn]
+                            if len(book_ratings) > 0:
+                                avg_rating = book_ratings['Book-Rating'].mean()
+                                num_ratings = len(book_ratings)
+                                st.markdown('<div class="dlrm-explanation">', unsafe_allow_html=True)
+                                st.markdown("**📊 Book Statistics:**")
+                                st.write(f"Average Rating: {avg_rating:.1f}/10 from {num_ratings} readers")
+                                st.write(f"DLRM Confidence: {score:.1%}")
+                                st.markdown('</div>', unsafe_allow_html=True)
+                    else:
+                        st.write(f"Book with ISBN {book_isbn} not found in database")
+            else:
+                st.warning("No recommendations generated")
+    with tab2:
+        st.header("🔍 Test DLRM Predictions")
+        st.info("Test how well DLRM predicts actual user ratings")
+        col1, col2 = st.columns(2)
+        with col1:
+            test_user_id = st.selectbox("Select user for testing", user_ids[:500], key="test_user")
+        with col2:
+            test_mode = st.radio("Test mode", ["Random books", "User's actual books"])
+        if st.button("🧪 Test Predictions", type="secondary"):
+            with st.spinner("Testing DLRM predictions..."):
+                if test_mode == "User's actual books":
+                    # Test on user's actual rated books
+                    user_test_ratings = ratings_df[ratings_df['User-ID'] == test_user_id].sample(min(10, len(user_ratings)))
+                    if len(user_test_ratings) > 0:
+                        st.subheader("🎯 DLRM vs Actual Ratings")
+                        predictions = []
+                        actuals = []
+                        for _, rating in user_test_ratings.iterrows():
+                            book_isbn = rating['ISBN']
+                            actual_rating = rating['Book-Rating']
+                            # Get DLRM prediction
+                            dlrm_score = recommender.predict_rating(test_user_id, book_isbn)
+                            predictions.append(dlrm_score)
+                            actuals.append(actual_rating >= 6)  # Convert to binary
+                            # Display comparison
+                            book_info = books_df[books_df['ISBN'] == book_isbn]
+                            if len(book_info) > 0:
+                                book = book_info.iloc[0]
+                                col1, col2, col3 = st.columns([2, 1, 1])
+                                with col1:
+                                    st.write(f"**{book['Book-Title']}**")
+                                    st.write(f"*by {book['Book-Author']}*")
+                                with col2:
+                                    st.metric("Actual Rating", f"{actual_rating}/10")
+                                with col3:
+                                    st.metric("DLRM Score", f"{dlrm_score:.3f}")
+                        # Calculate accuracy
+                        if predictions and actuals:
+                            # Convert DLRM scores to binary predictions
+                            binary_preds = [1 if p > 0.5 else 0 for p in predictions]
+                            accuracy = sum(p == a for p, a in zip(binary_preds, actuals)) / len(actuals)
+                            st.markdown("---")
+                            st.success(f"🎯 DLRM Accuracy: {accuracy:.1%}")
+                            # Show correlation
+                            actual_numeric = [rating['Book-Rating'] for _, rating in user_test_ratings.iterrows()]
+                            correlation = np.corrcoef(predictions, actual_numeric)[0, 1] if len(predictions) > 1 else 0
+                            st.info(f"📊 Correlation with actual ratings: {correlation:.3f}")
+                    else:
+                        st.warning("No ratings found for this user")
+                else:
+                    # Test on random books
+                    random_books = books_df.sample(10)['ISBN'].tolist()
+                    st.subheader("🎲 Random Book Predictions")
+                    for book_isbn in random_books:
+                        dlrm_score = recommender.predict_rating(test_user_id, book_isbn)
+                        book_info = books_df[books_df['ISBN'] == book_isbn]
+                        if len(book_info) > 0:
+                            book = book_info.iloc[0]
+                            col1, col2 = st.columns([3, 1])
+                            with col1:
+                                st.write(f"**{book['Book-Title']}** by *{book['Book-Author']}*")
+                            with col2:
+                                st.metric("DLRM Score", f"{dlrm_score:.4f}")
+    with tab3:
+        st.header("📊 DLRM Model Analysis")
+        st.info("Analysis of the DLRM model performance and characteristics")
+        # Model architecture info
+        if recommender and recommender.preprocessing_info:
+            col1, col2 = st.columns(2)
+            with col1:
+                st.subheader("🏗️ Model Architecture")
+                st.write(f"**Dense Features ({len(recommender.dense_cols)}):**")
+                for col in recommender.dense_cols:
+                    st.write(f"• {col}")
+                st.write(f"**Categorical Features ({len(recommender.cat_cols)}):**")
+                for i, col in enumerate(recommender.cat_cols):
+                    st.write(f"• {col}: {recommender.emb_counts[i]} embeddings")
+            with col2:
+                st.subheader("📈 Dataset Statistics")
+                total_samples = recommender.preprocessing_info.get('total_samples', 0)
+                positive_rate = recommender.preprocessing_info.get('positive_rate', 0)
+                st.metric("Total Samples", f"{total_samples:,}")
+                st.metric("Positive Rate", f"{positive_rate:.1%}")
+                st.metric("Train Samples", f"{recommender.preprocessing_info.get('train_samples', 0):,}")
+                st.metric("Validation Samples", f"{recommender.preprocessing_info.get('val_samples', 0):,}")
+                st.metric("Test Samples", f"{recommender.preprocessing_info.get('test_samples', 0):,}")
+        # Feature importance analysis
+        st.subheader("🔍 Feature Analysis")
+        if st.button("Analyze Feature Importance"):
+            with st.spinner("Analyzing feature importance..."):
+                # Sample some users and books
+                sample_users = users_df['User-ID'].sample(20).tolist()
+                sample_books = books_df['ISBN'].sample(20).tolist()
+                # Test different feature combinations
+                st.write("**Feature Impact Analysis:**")
+                base_predictions = []
+                for user_id in sample_users[:5]:
+                    for book_isbn in sample_books[:5]:
+                        score = recommender.predict_rating(user_id, book_isbn)
+                        base_predictions.append(score)
+                avg_prediction = np.mean(base_predictions)
+                st.metric("Average Prediction Score", f"{avg_prediction:.4f}")
+                st.success("✅ Feature analysis completed!")
+        # Load training results if available
+        if os.path.exists('dlrm_book_training_results.pkl'):
+            with open('/home/mr-behdadi/PROJECT/ICE/dlrm_book_training_results.pkl', 'rb') as f:
+                training_results = pickle.load(f)
+            st.subheader("📈 Training Results")
+            col1, col2 = st.columns(2)
+            with col1:
+                st.metric("Final Validation AUROC", f"{training_results.get('final_val_auroc', 0):.4f}")
+                st.metric("Test AUROC", f"{training_results.get('test_auroc', 0):.4f}")
+            with col2:
+                val_history = training_results.get('val_aurocs_history', [])
+                if val_history:
+                    st.line_chart(pd.DataFrame({
+                        'Epoch': range(len(val_history)),
+                        'Validation AUROC': val_history
+                    }).set_index('Epoch'))
+    # Instructions
+    st.markdown("---")
+    st.markdown("""
+    ## 🚀 How DLRM Works for Book Recommendations
+    **DLRM (Deep Learning Recommendation Model)** is specifically designed for recommendation systems and offers several advantages:
+    ### 🏗️ Architecture Benefits:
+    - **Multi-feature Processing**: Handles both categorical (user ID, book ID, publisher) and numerical (age, ratings) features
+    - **Embedding Tables**: Learns rich representations for categorical features
+    - **Cross-feature Interactions**: Captures complex relationships between different features
+    - **Scalable Design**: Efficiently handles large-scale recommendation datasets
+    ### 📊 Features Used:
+    **Categorical Features:**
+    - User ID, Book ID, Publisher, Country, Age Group, Publication Decade, Rating Level
+    **Dense Features:**
+    - Normalized Age, Publication Year, User Activity, Book Popularity, Average Ratings
+    ### 🎯 Why DLRM vs LLM for Recommendations:
+    - **Purpose-built**: Specifically designed for recommendation systems
+    - **Feature Integration**: Better at combining diverse feature types
+    - **Scalability**: More efficient for large-scale recommendation tasks
+    - **Performance**: Higher accuracy for rating prediction tasks
+    - **Production Ready**: Optimized for real-time inference
+    ### 💡 Best Use Cases:
+    - **Personalized Recommendations**: Based on user behavior and item characteristics
+    - **Rating Prediction**: Accurately predicts user preferences
+    - **Cold Start**: Handles new users and items through content features
+    - **Real-time Serving**: Fast inference for production systems
+    """)
+    with tab4:
+        st.header("📸 Book Gallery")
+        st.info("Browse book covers and discover new titles")
+        # Gallery options
+        col1, col2 = st.columns([2, 1])
+        with col1:
+            gallery_mode = st.selectbox(
+                "Choose gallery mode",
+                ["Popular Books", "Recent Publications", "Random Selection", "Search Results"]
+            )
+        with col2:
+            books_per_row = st.slider("Books per row", 2, 6, 4)
+            max_books = st.slider("Maximum books", 10, 50, 20)
+        # Get books based on selected mode
+        if gallery_mode == "Popular Books":
+            # Get most rated books
+            book_popularity = ratings_df.groupby('ISBN').size().sort_values(ascending=False)
+            gallery_books = books_df[books_df['ISBN'].isin(book_popularity.head(max_books).index)]
+        elif gallery_mode == "Recent Publications":
+            # Get recent books
+            books_df_temp = books_df.copy()
+            books_df_temp['Year-Of-Publication'] = pd.to_numeric(books_df_temp['Year-Of-Publication'], errors='coerce')
+            recent_books = books_df_temp.sort_values('Year-Of-Publication', ascending=False, na_position='last')
+            gallery_books = recent_books.head(max_books)
+        elif gallery_mode == "Random Selection":
+            # Random books
+            gallery_books = books_df.sample(min(max_books, len(books_df)))
+        else:  # Search Results
+            search_query = st.text_input("Search books for gallery", placeholder="Enter title, author, or publisher")
+            if search_query:
+                mask = (
+                    books_df['Book-Title'].str.contains(search_query, case=False, na=False) |
+                    books_df['Book-Author'].str.contains(search_query, case=False, na=False) |
+                    books_df['Publisher'].str.contains(search_query, case=False, na=False)
+                )
+                gallery_books = books_df[mask].head(max_books)
+            else:
+                gallery_books = books_df.head(max_books)
+        # Display gallery
+        if len(gallery_books) > 0:
+            st.markdown(f"**📚 Showing {len(gallery_books)} books**")
+            # Create grid layout
+            books_list = gallery_books.to_dict('records')
+            # Display books in rows
+            for i in range(0, len(books_list), books_per_row):
+                cols = st.columns(books_per_row)
+                for j, col in enumerate(cols):
+                    if i + j < len(books_list):
+                        book = books_list[i + j]
+                        with col:
+                            # Book cover
+                            image_url = book.get('Image-URL-M', '')
+                            if image_url and pd.notna(image_url) and str(image_url) != 'nan':
+                                try:
+                                    clean_url = str(image_url).strip()
+                                    if clean_url and 'http' in clean_url:
+                                        st.image(clean_url, width='stretch')
+                                    else:
+                                        st.image("https://via.placeholder.com/150x200?text=📚&color=1f77b4&bg=f0f2f6", width='stretch')
+                                except:
+                                    st.image("https://via.placeholder.com/150x200?text=📚&color=1f77b4&bg=f0f2f6", width='stretch')
+                            else:
+                                st.image("https://via.placeholder.com/150x200?text=📚&color=1f77b4&bg=f0f2f6", width='stretch')
+                            # Book info
+                            title = book['Book-Title']
+                            if len(title) > 40:
+                                title = title[:37] + "..."
+                            author = book['Book-Author']
+                            if len(author) > 25:
+                                author = author[:22] + "..."
+                            st.markdown(f"**{title}**")
+                            st.write(f"*{author}*")
+                            st.write(f"📅 {book.get('Year-Of-Publication', 'Unknown')}")
+                            # Book statistics
+                            book_stats = ratings_df[ratings_df['ISBN'] == book['ISBN']]
+                            if len(book_stats) > 0:
+                                avg_rating = book_stats['Book-Rating'].mean()
+                                num_ratings = len(book_stats)
+                                st.write(f"⭐ {avg_rating:.1f}/10 ({num_ratings} ratings)")
+                            else:
+                                st.write("⭐ No ratings")
+                            # DLRM prediction button
+                            if recommender and recommender.model:
+                                if st.button(f"🎯 DLRM Score", key=f"dlrm_{book['ISBN']}"):
+                                    with st.spinner("Calculating..."):
+                                        # Use first user as example
+                                        sample_user = users_df['User-ID'].iloc[0]
+                                        dlrm_score = recommender.predict_rating(sample_user, book['ISBN'])
+                                        st.success(f"DLRM Score: {dlrm_score:.3f}")
+        else:
+            st.info("No books found for the selected criteria")
+        # Quick stats
+        st.markdown("---")
+        st.subheader("📊 Gallery Statistics")
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
+            books_with_covers = sum(1 for _, book in gallery_books.iterrows()
+                                  if book.get('Image-URL-M') and pd.notna(book.get('Image-URL-M')))
+            st.metric("Books with Covers", f"{books_with_covers}/{len(gallery_books)}")
+        with col2:
+            # Convert Year-Of-Publication to numeric, coercing errors to NaN
+            years = pd.to_numeric(gallery_books['Year-Of-Publication'], errors='coerce')
+            avg_year = years.mean()
+            st.metric("Average Publication Year", f"{avg_year:.0f}" if not pd.isna(avg_year) else "Unknown")
+        with col3:
+            unique_authors = gallery_books['Book-Author'].nunique()
+            st.metric("Unique Authors", unique_authors)
+        with col4:
+            unique_publishers = gallery_books['Publisher'].nunique()
+            st.metric("Unique Publishers", unique_publishers)
 if __name__ == "__main__":
+    main()