Spaces:
Sleeping
Sleeping
Update src/dlrm_inference.py
Browse files- src/dlrm_inference.py +792 -420
src/dlrm_inference.py
CHANGED
|
@@ -1,314 +1,692 @@
|
|
| 1 |
-
"""
|
| 2 |
-
DLRM Inference Engine for Book Recommendations
|
| 3 |
-
Loads trained DLRM model and provides recommendation functionality
|
| 4 |
-
"""
|
| 5 |
|
| 6 |
-
import os
|
| 7 |
-
import sys
|
| 8 |
-
import torch
|
| 9 |
-
import numpy as np
|
| 10 |
-
import pandas as pd
|
| 11 |
-
import pickle
|
| 12 |
-
import mlflow
|
| 13 |
-
from mlflow import MlflowClient
|
| 14 |
-
import tempfile
|
| 15 |
-
from typing import List, Dict, Tuple, Optional, Any
|
| 16 |
-
from functools import partial
|
| 17 |
-
import warnings
|
| 18 |
-
warnings.filterwarnings('ignore')
|
| 19 |
|
| 20 |
-
# Check for CPU_ONLY environment variable
|
| 21 |
-
CPU_ONLY = os.environ.get('CPU_ONLY', 'false').lower() == 'true'
|
| 22 |
|
| 23 |
-
# Disable CUDA if CPU_ONLY is set
|
| 24 |
-
if CPU_ONLY:
|
| 25 |
-
|
| 26 |
-
|
| 27 |
|
| 28 |
-
# Only import torchrec if not in CPU_ONLY mode
|
| 29 |
-
TORCHREC_AVAILABLE = False
|
| 30 |
-
if not CPU_ONLY:
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
else:
|
| 42 |
-
|
| 43 |
|
| 44 |
-
class DLRMBookRecommender:
|
| 45 |
-
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
def _load_preprocessing_info(self):
|
| 103 |
-
"""Load preprocessing information"""
|
| 104 |
-
if os.path.exists('book_dlrm_preprocessing.pkl'):
|
| 105 |
-
with open('book_dlrm_preprocessing.pkl', 'rb') as f:
|
| 106 |
-
self.preprocessing_info = pickle.load(f)
|
| 107 |
-
|
| 108 |
-
self.dense_cols = self.preprocessing_info['dense_cols']
|
| 109 |
-
self.cat_cols = self.preprocessing_info['cat_cols']
|
| 110 |
-
self.emb_counts = self.preprocessing_info['emb_counts']
|
| 111 |
-
self.user_encoder = self.preprocessing_info['user_encoder']
|
| 112 |
-
self.book_encoder = self.preprocessing_info['book_encoder']
|
| 113 |
-
self.publisher_encoder = self.preprocessing_info['publisher_encoder']
|
| 114 |
-
self.location_encoder = self.preprocessing_info['location_encoder']
|
| 115 |
-
self.scaler = self.preprocessing_info['scaler']
|
| 116 |
-
|
| 117 |
-
print("โ
Preprocessing info loaded")
|
| 118 |
-
else:
|
| 119 |
-
raise FileNotFoundError("book_dlrm_preprocessing.pkl not found. Run preprocessing first.")
|
| 120 |
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
|
| 145 |
-
|
| 146 |
-
|
| 147 |
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
|
| 156 |
-
|
| 157 |
|
| 158 |
-
|
| 159 |
-
|
| 160 |
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
|
| 219 |
-
|
| 220 |
|
| 221 |
-
|
| 222 |
-
|
| 223 |
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
-
|
| 243 |
-
location = str(user_data.get('Location', 'usa')).split(',')[-1].strip().lower()
|
| 244 |
-
country_encoded = self.location_encoder.transform([location])[0]
|
| 245 |
-
except:
|
| 246 |
-
country_encoded = 0
|
| 247 |
-
|
| 248 |
-
# Age group
|
| 249 |
-
age = user_data.get('Age', 30)
|
| 250 |
-
if age < 18:
|
| 251 |
-
age_group = 0
|
| 252 |
-
elif age < 25:
|
| 253 |
-
age_group = 1
|
| 254 |
-
elif age < 35:
|
| 255 |
-
age_group = 2
|
| 256 |
-
elif age < 50:
|
| 257 |
-
age_group = 3
|
| 258 |
-
elif age < 65:
|
| 259 |
-
age_group = 4
|
| 260 |
-
else:
|
| 261 |
-
age_group = 5
|
| 262 |
|
| 263 |
-
|
| 264 |
-
user_activity = user_data.get('user_activity', 10) # Default
|
| 265 |
-
user_avg_rating = user_data.get('user_avg_rating', 6.0) # Default
|
| 266 |
-
age_normalized = user_data.get('Age', 30)
|
| 267 |
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
dense_features = torch.tensor(dense_features, dtype=torch.float32)
|
| 272 |
|
| 273 |
-
|
|
|
|
|
|
|
| 274 |
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
publisher = str(book_data.get('Publisher', 'Unknown'))
|
| 290 |
-
publisher_encoded = self.publisher_encoder.transform([publisher])[0]
|
| 291 |
-
except:
|
| 292 |
-
publisher_encoded = 0
|
| 293 |
-
|
| 294 |
-
# Publication decade
|
| 295 |
-
year = book_data.get('Year-Of-Publication', 2000)
|
| 296 |
-
decade = ((int(year) // 10) * 10)
|
| 297 |
-
try:
|
| 298 |
-
decade_encoded = preprocessing_info.get('decade_encoder', LabelEncoder()).transform([str(decade)])[0]
|
| 299 |
-
except:
|
| 300 |
-
decade_encoded = 6 # Default to 2000s
|
| 301 |
|
| 302 |
-
|
| 303 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
|
| 305 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
|
| 307 |
def predict_rating(self, user_id: int, book_isbn: str,
|
| 308 |
user_data: Optional[Dict] = None,
|
| 309 |
book_data: Optional[Dict] = None) -> float:
|
| 310 |
"""
|
| 311 |
-
Predict rating probability for user-book pair
|
| 312 |
|
| 313 |
Args:
|
| 314 |
user_id: User ID
|
|
@@ -319,43 +697,79 @@ class DLRMBookRecommender:
|
|
| 319 |
Returns:
|
| 320 |
Prediction probability (0-1)
|
| 321 |
"""
|
| 322 |
-
if self.cpu_only:
|
| 323 |
-
print("โ ๏ธ Cannot make predictions in CPU-only mode")
|
| 324 |
-
return 0.5 # Return default neutral prediction
|
| 325 |
-
|
| 326 |
-
if self.model is None:
|
| 327 |
-
print("โ Model not loaded")
|
| 328 |
-
return 0.0
|
| 329 |
-
|
| 330 |
-
if not self.torchrec_available:
|
| 331 |
-
print("โ Cannot make predictions without torchrec")
|
| 332 |
-
return 0.5 # Return default neutral prediction
|
| 333 |
-
|
| 334 |
try:
|
| 335 |
-
#
|
| 336 |
-
|
| 337 |
-
book_id_encoded, publisher_encoded, decade_encoded, rating_level = self._prepare_book_features(book_isbn, book_data)
|
| 338 |
-
|
| 339 |
-
# Create sparse features
|
| 340 |
-
kjt_values = [user_id_encoded, book_id_encoded, publisher_encoded, country_encoded, age_group, decade_encoded, rating_level]
|
| 341 |
-
kjt_lengths = [1] * len(kjt_values)
|
| 342 |
-
|
| 343 |
-
sparse_features = KeyedJaggedTensor.from_lengths_sync(
|
| 344 |
-
self.cat_cols,
|
| 345 |
-
torch.tensor(kjt_values),
|
| 346 |
-
torch.tensor(kjt_lengths, dtype=torch.int32),
|
| 347 |
-
)
|
| 348 |
-
|
| 349 |
-
# Make prediction
|
| 350 |
-
with torch.no_grad():
|
| 351 |
-
logits = self.model(dense_features=dense_features, sparse_features=sparse_features)
|
| 352 |
-
prediction = torch.sigmoid(logits).item()
|
| 353 |
-
|
| 354 |
return prediction
|
| 355 |
|
| 356 |
except Exception as e:
|
| 357 |
print(f"Error in prediction: {e}")
|
| 358 |
-
return 0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
|
| 360 |
def get_user_recommendations(self, user_id: int,
|
| 361 |
candidate_books: List[str],
|
|
@@ -373,14 +787,10 @@ class DLRMBookRecommender:
|
|
| 373 |
Returns:
|
| 374 |
List of (book_isbn, prediction_score) tuples
|
| 375 |
"""
|
| 376 |
-
|
| 377 |
-
print("โ Model not loaded, CPU-only mode, or torchrec not available")
|
| 378 |
-
return []
|
| 379 |
|
| 380 |
recommendations = []
|
| 381 |
|
| 382 |
-
print(f"Generating recommendations for user {user_id} from {len(candidate_books)} candidates...")
|
| 383 |
-
|
| 384 |
for book_isbn in candidate_books:
|
| 385 |
score = self.predict_rating(user_id, book_isbn, user_data)
|
| 386 |
recommendations.append((book_isbn, score))
|
|
@@ -447,117 +857,108 @@ class DLRMBookRecommender:
|
|
| 447 |
if len(scores) > 0:
|
| 448 |
scores_array = np.array(scores)
|
| 449 |
# Calculate correlation as similarity measure
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 453 |
|
| 454 |
# Sort by similarity and return top-k
|
| 455 |
similarities.sort(key=lambda x: x[1], reverse=True)
|
| 456 |
return similarities[:k]
|
| 457 |
|
| 458 |
|
| 459 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 460 |
"""
|
| 461 |
-
Load DLRM recommender
|
| 462 |
|
| 463 |
Args:
|
| 464 |
-
model_source:
|
| 465 |
|
| 466 |
Returns:
|
| 467 |
DLRMBookRecommender instance
|
| 468 |
"""
|
| 469 |
-
|
| 470 |
-
cpu_only = os.environ.get('CPU_ONLY', 'false').lower() == 'true'
|
| 471 |
-
if cpu_only:
|
| 472 |
-
print("๐ Loading recommender in CPU-only mode")
|
| 473 |
-
# In CPU-only mode, just return a basic recommender instance
|
| 474 |
-
return DLRMBookRecommender()
|
| 475 |
-
|
| 476 |
-
# Create recommender instance
|
| 477 |
-
recommender = DLRMBookRecommender()
|
| 478 |
-
|
| 479 |
-
# If torchrec is not available, return limited recommender
|
| 480 |
-
if not TORCHREC_AVAILABLE:
|
| 481 |
-
print("โ ๏ธ torchrec not available, returning limited recommender")
|
| 482 |
-
return recommender
|
| 483 |
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
try:
|
| 487 |
-
experiment = mlflow.get_experiment_by_name('dlrm-book-recommendation-book_recommender')
|
| 488 |
-
if experiment:
|
| 489 |
-
runs = mlflow.search_runs(experiment_ids=[experiment.experiment_id],
|
| 490 |
-
order_by=["start_time desc"], max_results=1)
|
| 491 |
-
if len(runs) > 0:
|
| 492 |
-
latest_run_id = runs.iloc[0].run_id
|
| 493 |
-
recommender = DLRMBookRecommender(run_id=latest_run_id)
|
| 494 |
-
return recommender
|
| 495 |
-
except Exception as e:
|
| 496 |
-
print(f"โ ๏ธ Error loading from MLflow: {e}")
|
| 497 |
-
|
| 498 |
-
elif model_source == "file":
|
| 499 |
-
# Try to load from local file
|
| 500 |
-
for filename in [
|
| 501 |
-
'/home/mr-behdadi/PROJECT/ICE/notebooks/dlrm_book_model_final.pth',
|
| 502 |
-
'/home/mr-behdadi/PROJECT/ICE/notebooks/dlrm_book_model_epoch_2.pth',
|
| 503 |
-
'/home/mr-behdadi/PROJECT/ICE/notebooks/dlrm_book_model_epoch_0.pth',
|
| 504 |
-
'/home/mr-behdadi/PROJECT/ICE/notebooks/dlrm_book_model_epoch_1.pth']:
|
| 505 |
-
if os.path.exists(filename):
|
| 506 |
-
try:
|
| 507 |
-
recommender = DLRMBookRecommender(model_path=filename)
|
| 508 |
-
return recommender
|
| 509 |
-
except Exception as e:
|
| 510 |
-
print(f"โ ๏ธ Error loading from {filename}: {e}")
|
| 511 |
-
|
| 512 |
-
else:
|
| 513 |
-
# Treat as run_id
|
| 514 |
-
try:
|
| 515 |
-
recommender = DLRMBookRecommender(run_id=model_source)
|
| 516 |
-
return recommender
|
| 517 |
-
except Exception as e:
|
| 518 |
-
print(f"โ ๏ธ Error loading from run_id {model_source}: {e}")
|
| 519 |
|
| 520 |
-
print("
|
| 521 |
return recommender
|
| 522 |
|
| 523 |
|
| 524 |
def demo_dlrm_recommendations():
|
| 525 |
"""Demo function to show DLRM recommendations"""
|
| 526 |
|
| 527 |
-
print("๐ DLRM Book Recommendation Demo")
|
| 528 |
print("=" * 50)
|
| 529 |
|
| 530 |
-
#
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 534 |
|
| 535 |
-
books_df
|
| 536 |
-
users_df.columns = users_df.columns.str.replace('"', '')
|
| 537 |
-
ratings_df.columns = ratings_df.columns.str.replace('"', '')
|
| 538 |
|
| 539 |
# Load recommender
|
| 540 |
-
recommender = load_dlrm_recommender(
|
| 541 |
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
|
| 546 |
-
|
| 547 |
-
sample_user_id = ratings_df['User-ID'].iloc[0]
|
| 548 |
-
sample_books = books_df['ISBN'].head(20).tolist()
|
| 549 |
-
|
| 550 |
-
print(f"\n๐ Getting recommendations for User {sample_user_id}")
|
| 551 |
print(f"Testing with {len(sample_books)} candidate books...")
|
| 552 |
|
| 553 |
# Get recommendations
|
| 554 |
recommendations = recommender.get_user_recommendations(
|
| 555 |
user_id=sample_user_id,
|
| 556 |
candidate_books=sample_books,
|
| 557 |
-
k=
|
|
|
|
| 558 |
)
|
| 559 |
|
| 560 |
-
print(f"\n๐ฏ Top
|
| 561 |
print("-" * 50)
|
| 562 |
|
| 563 |
for i, (book_isbn, score) in enumerate(recommendations, 1):
|
|
@@ -568,42 +969,13 @@ def demo_dlrm_recommendations():
|
|
| 568 |
title = book['Book-Title']
|
| 569 |
author = book['Book-Author']
|
| 570 |
print(f"{i:2d}. {title} by {author}")
|
| 571 |
-
print(f" ISBN: {book_isbn}, Score: {score:.4f}")
|
| 572 |
else:
|
| 573 |
-
print(f"{i:2d}. ISBN: {book_isbn}, Score: {score:.4f}")
|
| 574 |
print()
|
| 575 |
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
if len(user_ratings) > 0:
|
| 579 |
-
print(f"\n๐ User {sample_user_id}'s Actual Reading History:")
|
| 580 |
-
print("-" * 50)
|
| 581 |
-
|
| 582 |
-
for _, rating in user_ratings.head(5).iterrows():
|
| 583 |
-
book_info = books_df[books_df['ISBN'] == rating['ISBN']]
|
| 584 |
-
if len(book_info) > 0:
|
| 585 |
-
book = book_info.iloc[0]
|
| 586 |
-
print(f"โข {book['Book-Title']} by {book['Book-Author']} - Rating: {rating['Book-Rating']}/10")
|
| 587 |
-
|
| 588 |
-
# Test book similarity
|
| 589 |
-
if len(recommendations) > 0:
|
| 590 |
-
target_book = recommendations[0][0]
|
| 591 |
-
print(f"\n๐ Finding books similar to: {target_book}")
|
| 592 |
-
|
| 593 |
-
similar_books = recommender.get_similar_books(
|
| 594 |
-
target_book_isbn=target_book,
|
| 595 |
-
candidate_books=sample_books,
|
| 596 |
-
sample_users=ratings_df['User-ID'].head(10).tolist(),
|
| 597 |
-
k=5
|
| 598 |
-
)
|
| 599 |
-
|
| 600 |
-
print(f"\n๐ Similar Books:")
|
| 601 |
-
print("-" * 30)
|
| 602 |
-
for i, (book_isbn, similarity) in enumerate(similar_books, 1):
|
| 603 |
-
book_info = books_df[books_df['ISBN'] == book_isbn]
|
| 604 |
-
if len(book_info) > 0:
|
| 605 |
-
book = book_info.iloc[0]
|
| 606 |
-
print(f"{i}. {book['Book-Title']} (similarity: {similarity:.3f})")
|
| 607 |
|
| 608 |
if __name__ == "__main__":
|
| 609 |
demo_dlrm_recommendations()
|
|
|
|
| 1 |
+
# """
|
| 2 |
+
# DLRM Inference Engine for Book Recommendations
|
| 3 |
+
# Loads trained DLRM model and provides recommendation functionality
|
| 4 |
+
# """
|
| 5 |
|
| 6 |
+
# import os
|
| 7 |
+
# import sys
|
| 8 |
+
# import torch
|
| 9 |
+
# import numpy as np
|
| 10 |
+
# import pandas as pd
|
| 11 |
+
# import pickle
|
| 12 |
+
# import mlflow
|
| 13 |
+
# from mlflow import MlflowClient
|
| 14 |
+
# import tempfile
|
| 15 |
+
# from typing import List, Dict, Tuple, Optional, Any
|
| 16 |
+
# from functools import partial
|
| 17 |
+
# import warnings
|
| 18 |
+
# warnings.filterwarnings('ignore')
|
| 19 |
|
| 20 |
+
# # Check for CPU_ONLY environment variable
|
| 21 |
+
# CPU_ONLY = os.environ.get('CPU_ONLY', 'false').lower() == 'true'
|
| 22 |
|
| 23 |
+
# # Disable CUDA if CPU_ONLY is set
|
| 24 |
+
# if CPU_ONLY:
|
| 25 |
+
# os.environ['CUDA_VISIBLE_DEVICES'] = ''
|
| 26 |
+
# print("๐ Running in CPU-only mode (CUDA disabled)")
|
| 27 |
|
| 28 |
+
# # Only import torchrec if not in CPU_ONLY mode
|
| 29 |
+
# TORCHREC_AVAILABLE = False
|
| 30 |
+
# if not CPU_ONLY:
|
| 31 |
+
# try:
|
| 32 |
+
# from torchrec import EmbeddingBagCollection
|
| 33 |
+
# from torchrec.models.dlrm import DLRM, DLRMTrain
|
| 34 |
+
# from torchrec.modules.embedding_configs import EmbeddingBagConfig
|
| 35 |
+
# from torchrec.sparse.jagged_tensor import KeyedJaggedTensor
|
| 36 |
+
# from torchrec.datasets.utils import Batch
|
| 37 |
+
# TORCHREC_AVAILABLE = True
|
| 38 |
+
# except ImportError as e:
|
| 39 |
+
# print(f"โ ๏ธ Warning: torchrec import error: {e}")
|
| 40 |
+
# print("โ ๏ธ Some functionality will be limited")
|
| 41 |
+
# else:
|
| 42 |
+
# print("โ ๏ธ Running in CPU-only mode without torchrec")
|
| 43 |
|
| 44 |
+
# class DLRMBookRecommender:
|
| 45 |
+
# """DLRM-based book recommender for inference"""
|
| 46 |
|
| 47 |
+
# def __init__(self, model_path: str = None, run_id: str = None):
|
| 48 |
+
# """
|
| 49 |
+
# Initialize DLRM book recommender
|
| 50 |
+
|
| 51 |
+
# Args:
|
| 52 |
+
# model_path: Path to saved model state dict
|
| 53 |
+
# run_id: MLflow run ID to load model from
|
| 54 |
+
# """
|
| 55 |
+
# self.device = torch.device("cpu")
|
| 56 |
+
# self.model = None
|
| 57 |
+
# self.preprocessing_info = None
|
| 58 |
+
# self.torchrec_available = TORCHREC_AVAILABLE
|
| 59 |
+
# self.cpu_only = CPU_ONLY
|
| 60 |
+
# self.dense_cols = []
|
| 61 |
+
# self.cat_cols = []
|
| 62 |
+
# self.emb_counts = []
|
| 63 |
+
|
| 64 |
+
# if self.cpu_only:
|
| 65 |
+
# print("โ ๏ธ Running in CPU-only mode with limited functionality")
|
| 66 |
+
# # Load minimal preprocessing info for browsing
|
| 67 |
+
# self._load_minimal_preprocessing()
|
| 68 |
+
# return
|
| 69 |
+
|
| 70 |
+
# if not self.torchrec_available:
|
| 71 |
+
# print("โ ๏ธ Running in limited mode without torchrec")
|
| 72 |
+
# return
|
| 73 |
+
|
| 74 |
+
# # Load preprocessing info
|
| 75 |
+
# self._load_preprocessing_info()
|
| 76 |
+
|
| 77 |
+
# # Load model
|
| 78 |
+
# if model_path and os.path.exists(model_path):
|
| 79 |
+
# self._load_model_from_path(model_path)
|
| 80 |
+
# elif run_id:
|
| 81 |
+
# self._load_model_from_mlflow(run_id)
|
| 82 |
+
# else:
|
| 83 |
+
# print("โ ๏ธ No model loaded. Please provide model_path or run_id")
|
| 84 |
|
| 85 |
+
# def _load_minimal_preprocessing(self):
|
| 86 |
+
# """Load minimal preprocessing info for CPU-only mode"""
|
| 87 |
+
# try:
|
| 88 |
+
# if os.path.exists('book_dlrm_preprocessing.pkl'):
|
| 89 |
+
# with open('book_dlrm_preprocessing.pkl', 'rb') as f:
|
| 90 |
+
# self.preprocessing_info = pickle.load(f)
|
| 91 |
|
| 92 |
+
# self.dense_cols = self.preprocessing_info.get('dense_cols', [])
|
| 93 |
+
# self.cat_cols = self.preprocessing_info.get('cat_cols', [])
|
| 94 |
+
# self.emb_counts = self.preprocessing_info.get('emb_counts', [])
|
| 95 |
|
| 96 |
+
# print("โ
Minimal preprocessing info loaded for CPU-only mode")
|
| 97 |
+
# else:
|
| 98 |
+
# print("โ ๏ธ No preprocessing info found for CPU-only mode")
|
| 99 |
+
# except Exception as e:
|
| 100 |
+
# print(f"โ ๏ธ Error loading minimal preprocessing: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
+
# def _load_preprocessing_info(self):
|
| 103 |
+
# """Load preprocessing information"""
|
| 104 |
+
# if os.path.exists('book_dlrm_preprocessing.pkl'):
|
| 105 |
+
# with open('book_dlrm_preprocessing.pkl', 'rb') as f:
|
| 106 |
+
# self.preprocessing_info = pickle.load(f)
|
| 107 |
+
|
| 108 |
+
# self.dense_cols = self.preprocessing_info['dense_cols']
|
| 109 |
+
# self.cat_cols = self.preprocessing_info['cat_cols']
|
| 110 |
+
# self.emb_counts = self.preprocessing_info['emb_counts']
|
| 111 |
+
# self.user_encoder = self.preprocessing_info['user_encoder']
|
| 112 |
+
# self.book_encoder = self.preprocessing_info['book_encoder']
|
| 113 |
+
# self.publisher_encoder = self.preprocessing_info['publisher_encoder']
|
| 114 |
+
# self.location_encoder = self.preprocessing_info['location_encoder']
|
| 115 |
+
# self.scaler = self.preprocessing_info['scaler']
|
| 116 |
+
|
| 117 |
+
# print("โ
Preprocessing info loaded")
|
| 118 |
+
# else:
|
| 119 |
+
# raise FileNotFoundError("book_dlrm_preprocessing.pkl not found. Run preprocessing first.")
|
| 120 |
+
|
| 121 |
+
# def _load_model_from_path(self, model_path: str):
|
| 122 |
+
# """Load model from saved state dict"""
|
| 123 |
+
# try:
|
| 124 |
+
# # Create model architecture
|
| 125 |
+
# eb_configs = [
|
| 126 |
+
# EmbeddingBagConfig(
|
| 127 |
+
# name=f"t_{feature_name}",
|
| 128 |
+
# embedding_dim=64, # Default embedding dim
|
| 129 |
+
# num_embeddings=self.emb_counts[feature_idx],
|
| 130 |
+
# feature_names=[feature_name],
|
| 131 |
+
# )
|
| 132 |
+
# for feature_idx, feature_name in enumerate(self.cat_cols)
|
| 133 |
+
# ]
|
| 134 |
|
| 135 |
+
# dlrm_model = DLRM(
|
| 136 |
+
# embedding_bag_collection=EmbeddingBagCollection(
|
| 137 |
+
# tables=eb_configs, device=self.device
|
| 138 |
+
# ),
|
| 139 |
+
# dense_in_features=len(self.dense_cols),
|
| 140 |
+
# dense_arch_layer_sizes=[256, 128, 64],
|
| 141 |
+
# over_arch_layer_sizes=[512, 256, 128, 1],
|
| 142 |
+
# dense_device=self.device,
|
| 143 |
+
# )
|
| 144 |
|
| 145 |
+
# # Load state dict
|
| 146 |
+
# state_dict = torch.load(model_path, map_location=self.device)
|
| 147 |
|
| 148 |
+
# # Remove 'model.' prefix if present
|
| 149 |
+
# if any(key.startswith('model.') for key in state_dict.keys()):
|
| 150 |
+
# state_dict = {k[6:]: v for k, v in state_dict.items()}
|
| 151 |
|
| 152 |
+
# dlrm_model.load_state_dict(state_dict)
|
| 153 |
+
# self.model = dlrm_model
|
| 154 |
+
# self.model.eval()
|
| 155 |
|
| 156 |
+
# print(f"โ
Model loaded from {model_path}")
|
| 157 |
|
| 158 |
+
# except Exception as e:
|
| 159 |
+
# print(f"โ Error loading model: {e}")
|
| 160 |
|
| 161 |
+
# def _load_model_from_mlflow(self, run_id: str):
|
| 162 |
+
# """Load model from MLflow"""
|
| 163 |
+
# try:
|
| 164 |
+
# client = MlflowClient()
|
| 165 |
+
# run = client.get_run(run_id)
|
| 166 |
+
|
| 167 |
+
# # Get model parameters from MLflow
|
| 168 |
+
# params = run.data.params
|
| 169 |
+
# cat_cols = eval(params.get('cat_cols'))
|
| 170 |
+
# emb_counts = eval(params.get('emb_counts'))
|
| 171 |
+
# dense_cols = eval(params.get('dense_cols'))
|
| 172 |
+
# embedding_dim = int(params.get('embedding_dim', 64))
|
| 173 |
+
# dense_arch_layer_sizes = eval(params.get('dense_arch_layer_sizes'))
|
| 174 |
+
# over_arch_layer_sizes = eval(params.get('over_arch_layer_sizes'))
|
| 175 |
+
|
| 176 |
+
# # Download model from MLflow
|
| 177 |
+
# temp_dir = tempfile.mkdtemp()
|
| 178 |
+
|
| 179 |
+
# # Try different artifact paths
|
| 180 |
+
# for artifact_path in ['model_state_dict_final', 'model_state_dict_2', 'model_state_dict_1', 'model_state_dict_0']:
|
| 181 |
+
# try:
|
| 182 |
+
# client.download_artifacts(run_id, f"{artifact_path}/state_dict.pth", temp_dir)
|
| 183 |
+
# state_dict = mlflow.pytorch.load_state_dict(f"{temp_dir}/{artifact_path}")
|
| 184 |
+
# break
|
| 185 |
+
# except:
|
| 186 |
+
# continue
|
| 187 |
+
# else:
|
| 188 |
+
# raise Exception("No model artifacts found")
|
| 189 |
+
|
| 190 |
+
# # Create model
|
| 191 |
+
# eb_configs = [
|
| 192 |
+
# EmbeddingBagConfig(
|
| 193 |
+
# name=f"t_{feature_name}",
|
| 194 |
+
# embedding_dim=embedding_dim,
|
| 195 |
+
# num_embeddings=emb_counts[feature_idx],
|
| 196 |
+
# feature_names=[feature_name],
|
| 197 |
+
# )
|
| 198 |
+
# for feature_idx, feature_name in enumerate(cat_cols)
|
| 199 |
+
# ]
|
| 200 |
|
| 201 |
+
# dlrm_model = DLRM(
|
| 202 |
+
# embedding_bag_collection=EmbeddingBagCollection(
|
| 203 |
+
# tables=eb_configs, device=self.device
|
| 204 |
+
# ),
|
| 205 |
+
# dense_in_features=len(dense_cols),
|
| 206 |
+
# dense_arch_layer_sizes=dense_arch_layer_sizes,
|
| 207 |
+
# over_arch_layer_sizes=over_arch_layer_sizes,
|
| 208 |
+
# dense_device=self.device,
|
| 209 |
+
# )
|
| 210 |
|
| 211 |
+
# # Remove prefix and load state dict
|
| 212 |
+
# if any(key.startswith('model.') for key in state_dict.keys()):
|
| 213 |
+
# state_dict = {k[6:]: v for k, v in state_dict.items()}
|
| 214 |
|
| 215 |
+
# dlrm_model.load_state_dict(state_dict)
|
| 216 |
+
# self.model = dlrm_model
|
| 217 |
+
# self.model.eval()
|
| 218 |
|
| 219 |
+
# print(f"โ
Model loaded from MLflow run: {run_id}")
|
| 220 |
|
| 221 |
+
# except Exception as e:
|
| 222 |
+
# print(f"โ Error loading model from MLflow: {e}")
|
| 223 |
|
| 224 |
+
# def _prepare_user_features(self, user_id: int, user_data: Optional[Dict] = None) -> Tuple[torch.Tensor, KeyedJaggedTensor]:
|
| 225 |
+
# """Prepare user features for inference"""
|
| 226 |
+
|
| 227 |
+
# if user_data is None:
|
| 228 |
+
# # Create default user features
|
| 229 |
+
# user_data = {
|
| 230 |
+
# 'User-ID': user_id,
|
| 231 |
+
# 'Age': 30, # Default age
|
| 232 |
+
# 'Location': 'usa', # Default location
|
| 233 |
+
# }
|
| 234 |
+
|
| 235 |
+
# # Encode categorical features
|
| 236 |
+
# try:
|
| 237 |
+
# user_id_encoded = self.user_encoder.transform([str(user_id)])[0]
|
| 238 |
+
# except:
|
| 239 |
+
# # Handle unknown user
|
| 240 |
+
# user_id_encoded = 0
|
| 241 |
+
|
| 242 |
+
# try:
|
| 243 |
+
# location = str(user_data.get('Location', 'usa')).split(',')[-1].strip().lower()
|
| 244 |
+
# country_encoded = self.location_encoder.transform([location])[0]
|
| 245 |
+
# except:
|
| 246 |
+
# country_encoded = 0
|
| 247 |
+
|
| 248 |
+
# # Age group
|
| 249 |
+
# age = user_data.get('Age', 30)
|
| 250 |
+
# if age < 18:
|
| 251 |
+
# age_group = 0
|
| 252 |
+
# elif age < 25:
|
| 253 |
+
# age_group = 1
|
| 254 |
+
# elif age < 35:
|
| 255 |
+
# age_group = 2
|
| 256 |
+
# elif age < 50:
|
| 257 |
+
# age_group = 3
|
| 258 |
+
# elif age < 65:
|
| 259 |
+
# age_group = 4
|
| 260 |
+
# else:
|
| 261 |
+
# age_group = 5
|
| 262 |
+
|
| 263 |
+
# # Get user statistics (if available)
|
| 264 |
+
# user_activity = user_data.get('user_activity', 10) # Default
|
| 265 |
+
# user_avg_rating = user_data.get('user_avg_rating', 6.0) # Default
|
| 266 |
+
# age_normalized = user_data.get('Age', 30)
|
| 267 |
+
|
| 268 |
+
# # Normalize dense features
|
| 269 |
+
# dense_features = np.array([[age_normalized, 2000, user_activity, 10, user_avg_rating, 6.0]]) # Default values
|
| 270 |
+
# dense_features = self.scaler.transform(dense_features)
|
| 271 |
+
# dense_features = torch.tensor(dense_features, dtype=torch.float32)
|
| 272 |
+
|
| 273 |
+
# return dense_features, user_id_encoded, country_encoded, age_group
|
| 274 |
+
|
| 275 |
+
# def _prepare_book_features(self, book_isbn: str, book_data: Optional[Dict] = None) -> Tuple[int, int, int, int]:
|
| 276 |
+
# """Prepare book features for inference"""
|
| 277 |
+
|
| 278 |
+
# if book_data is None:
|
| 279 |
+
# book_data = {}
|
| 280 |
+
|
| 281 |
+
# # Encode book ID
|
| 282 |
+
# try:
|
| 283 |
+
# book_id_encoded = self.book_encoder.transform([str(book_isbn)])[0]
|
| 284 |
+
# except:
|
| 285 |
+
# book_id_encoded = 0
|
| 286 |
+
|
| 287 |
+
# # Encode publisher
|
| 288 |
+
# try:
|
| 289 |
+
# publisher = str(book_data.get('Publisher', 'Unknown'))
|
| 290 |
+
# publisher_encoded = self.publisher_encoder.transform([publisher])[0]
|
| 291 |
+
# except:
|
| 292 |
+
# publisher_encoded = 0
|
| 293 |
+
|
| 294 |
+
# # Publication decade
|
| 295 |
+
# year = book_data.get('Year-Of-Publication', 2000)
|
| 296 |
+
# decade = ((int(year) // 10) * 10)
|
| 297 |
+
# try:
|
| 298 |
+
# decade_encoded = preprocessing_info.get('decade_encoder', LabelEncoder()).transform([str(decade)])[0]
|
| 299 |
+
# except:
|
| 300 |
+
# decade_encoded = 6 # Default to 2000s
|
| 301 |
+
|
| 302 |
+
# # Rating level (default to medium)
|
| 303 |
+
# rating_level = 1
|
| 304 |
+
|
| 305 |
+
# return book_id_encoded, publisher_encoded, decade_encoded, rating_level
|
| 306 |
+
|
| 307 |
+
# def predict_rating(self, user_id: int, book_isbn: str,
|
| 308 |
+
# user_data: Optional[Dict] = None,
|
| 309 |
+
# book_data: Optional[Dict] = None) -> float:
|
| 310 |
+
# """
|
| 311 |
+
# Predict rating probability for user-book pair
|
| 312 |
+
|
| 313 |
+
# Args:
|
| 314 |
+
# user_id: User ID
|
| 315 |
+
# book_isbn: Book ISBN
|
| 316 |
+
# user_data: Additional user data (optional)
|
| 317 |
+
# book_data: Additional book data (optional)
|
| 318 |
+
|
| 319 |
+
# Returns:
|
| 320 |
+
# Prediction probability (0-1)
|
| 321 |
+
# """
|
| 322 |
+
# if self.cpu_only:
|
| 323 |
+
# print("โ ๏ธ Cannot make predictions in CPU-only mode")
|
| 324 |
+
# return 0.5 # Return default neutral prediction
|
| 325 |
+
|
| 326 |
+
# if self.model is None:
|
| 327 |
+
# print("โ Model not loaded")
|
| 328 |
+
# return 0.0
|
| 329 |
+
|
| 330 |
+
# if not self.torchrec_available:
|
| 331 |
+
# print("โ Cannot make predictions without torchrec")
|
| 332 |
+
# return 0.5 # Return default neutral prediction
|
| 333 |
+
|
| 334 |
+
# try:
|
| 335 |
+
# # Prepare features
|
| 336 |
+
# dense_features, user_id_encoded, country_encoded, age_group = self._prepare_user_features(user_id, user_data)
|
| 337 |
+
# book_id_encoded, publisher_encoded, decade_encoded, rating_level = self._prepare_book_features(book_isbn, book_data)
|
| 338 |
+
|
| 339 |
+
# # Create sparse features
|
| 340 |
+
# kjt_values = [user_id_encoded, book_id_encoded, publisher_encoded, country_encoded, age_group, decade_encoded, rating_level]
|
| 341 |
+
# kjt_lengths = [1] * len(kjt_values)
|
| 342 |
+
|
| 343 |
+
# sparse_features = KeyedJaggedTensor.from_lengths_sync(
|
| 344 |
+
# self.cat_cols,
|
| 345 |
+
# torch.tensor(kjt_values),
|
| 346 |
+
# torch.tensor(kjt_lengths, dtype=torch.int32),
|
| 347 |
+
# )
|
| 348 |
+
|
| 349 |
+
# # Make prediction
|
| 350 |
+
# with torch.no_grad():
|
| 351 |
+
# logits = self.model(dense_features=dense_features, sparse_features=sparse_features)
|
| 352 |
+
# prediction = torch.sigmoid(logits).item()
|
| 353 |
+
|
| 354 |
+
# return prediction
|
| 355 |
+
|
| 356 |
+
# except Exception as e:
|
| 357 |
+
# print(f"Error in prediction: {e}")
|
| 358 |
+
# return 0.0
|
| 359 |
+
|
| 360 |
+
# def get_user_recommendations(self, user_id: int,
|
| 361 |
+
# candidate_books: List[str],
|
| 362 |
+
# k: int = 10,
|
| 363 |
+
# user_data: Optional[Dict] = None) -> List[Tuple[str, float]]:
|
| 364 |
+
# """
|
| 365 |
+
# Get top-k book recommendations for a user
|
| 366 |
+
|
| 367 |
+
# Args:
|
| 368 |
+
# user_id: User ID
|
| 369 |
+
# candidate_books: List of candidate book ISBNs
|
| 370 |
+
# k: Number of recommendations
|
| 371 |
+
# user_data: Additional user data
|
| 372 |
+
|
| 373 |
+
# Returns:
|
| 374 |
+
# List of (book_isbn, prediction_score) tuples
|
| 375 |
+
# """
|
| 376 |
+
# if self.cpu_only or self.model is None or not self.torchrec_available:
|
| 377 |
+
# print("โ Model not loaded, CPU-only mode, or torchrec not available")
|
| 378 |
+
# return []
|
| 379 |
|
| 380 |
+
# recommendations = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 381 |
|
| 382 |
+
# print(f"Generating recommendations for user {user_id} from {len(candidate_books)} candidates...")
|
|
|
|
|
|
|
|
|
|
| 383 |
|
| 384 |
+
# for book_isbn in candidate_books:
|
| 385 |
+
# score = self.predict_rating(user_id, book_isbn, user_data)
|
| 386 |
+
# recommendations.append((book_isbn, score))
|
|
|
|
| 387 |
|
| 388 |
+
# # Sort by score and return top-k
|
| 389 |
+
# recommendations.sort(key=lambda x: x[1], reverse=True)
|
| 390 |
+
# return recommendations[:k]
|
| 391 |
|
| 392 |
+
# def batch_recommend(self, user_ids: List[int],
|
| 393 |
+
# candidate_books: List[str],
|
| 394 |
+
# k: int = 10) -> Dict[int, List[Tuple[str, float]]]:
|
| 395 |
+
# """
|
| 396 |
+
# Generate recommendations for multiple users
|
| 397 |
+
|
| 398 |
+
# Args:
|
| 399 |
+
# user_ids: List of user IDs
|
| 400 |
+
# candidate_books: List of candidate book ISBNs
|
| 401 |
+
# k: Number of recommendations per user
|
| 402 |
+
|
| 403 |
+
# Returns:
|
| 404 |
+
# Dictionary mapping user_id to recommendations
|
| 405 |
+
# """
|
| 406 |
+
# results = {}
|
| 407 |
|
| 408 |
+
# for user_id in user_ids:
|
| 409 |
+
# results[user_id] = self.get_user_recommendations(user_id, candidate_books, k)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
|
| 411 |
+
# return results
|
| 412 |
+
|
| 413 |
+
# def get_similar_books(self, target_book_isbn: str,
|
| 414 |
+
# candidate_books: List[str],
|
| 415 |
+
# sample_users: List[int],
|
| 416 |
+
# k: int = 10) -> List[Tuple[str, float]]:
|
| 417 |
+
# """
|
| 418 |
+
# Find books similar to target book by comparing user preferences
|
| 419 |
+
|
| 420 |
+
# Args:
|
| 421 |
+
# target_book_isbn: Target book ISBN
|
| 422 |
+
# candidate_books: List of candidate book ISBNs
|
| 423 |
+
# sample_users: Sample users to test similarity with
|
| 424 |
+
# k: Number of similar books
|
| 425 |
+
|
| 426 |
+
# Returns:
|
| 427 |
+
# List of (book_isbn, similarity_score) tuples
|
| 428 |
+
# """
|
| 429 |
+
# target_scores = []
|
| 430 |
+
# candidate_scores = {book: [] for book in candidate_books}
|
| 431 |
+
|
| 432 |
+
# # Get predictions for target book and candidates across sample users
|
| 433 |
+
# for user_id in sample_users:
|
| 434 |
+
# target_score = self.predict_rating(user_id, target_book_isbn)
|
| 435 |
+
# target_scores.append(target_score)
|
| 436 |
+
|
| 437 |
+
# for book_isbn in candidate_books:
|
| 438 |
+
# if book_isbn != target_book_isbn:
|
| 439 |
+
# score = self.predict_rating(user_id, book_isbn)
|
| 440 |
+
# candidate_scores[book_isbn].append(score)
|
| 441 |
+
|
| 442 |
+
# # Calculate similarity based on correlation of user preferences
|
| 443 |
+
# similarities = []
|
| 444 |
+
# target_scores = np.array(target_scores)
|
| 445 |
+
|
| 446 |
+
# for book_isbn, scores in candidate_scores.items():
|
| 447 |
+
# if len(scores) > 0:
|
| 448 |
+
# scores_array = np.array(scores)
|
| 449 |
+
# # Calculate correlation as similarity measure
|
| 450 |
+
# correlation = np.corrcoef(target_scores, scores_array)[0, 1]
|
| 451 |
+
# if not np.isnan(correlation):
|
| 452 |
+
# similarities.append((book_isbn, correlation))
|
| 453 |
+
|
| 454 |
+
# # Sort by similarity and return top-k
|
| 455 |
+
# similarities.sort(key=lambda x: x[1], reverse=True)
|
| 456 |
+
# return similarities[:k]
|
| 457 |
+
|
| 458 |
+
|
| 459 |
+
# def load_dlrm_recommender(model_source: str = "latest") -> DLRMBookRecommender:
|
| 460 |
+
# """
|
| 461 |
+
# Load DLRM recommender from various sources
|
| 462 |
+
|
| 463 |
+
# Args:
|
| 464 |
+
# model_source: "latest" for latest MLflow run, "file" for local file, or specific run_id
|
| 465 |
+
|
| 466 |
+
# Returns:
|
| 467 |
+
# DLRMBookRecommender instance
|
| 468 |
+
# """
|
| 469 |
+
# # Check if we're in CPU-only mode
|
| 470 |
+
# cpu_only = os.environ.get('CPU_ONLY', 'false').lower() == 'true'
|
| 471 |
+
# if cpu_only:
|
| 472 |
+
# print("๐ Loading recommender in CPU-only mode")
|
| 473 |
+
# # In CPU-only mode, just return a basic recommender instance
|
| 474 |
+
# return DLRMBookRecommender()
|
| 475 |
+
|
| 476 |
+
# # Create recommender instance
|
| 477 |
+
# recommender = DLRMBookRecommender()
|
| 478 |
+
|
| 479 |
+
# # If torchrec is not available, return limited recommender
|
| 480 |
+
# if not TORCHREC_AVAILABLE:
|
| 481 |
+
# print("โ ๏ธ torchrec not available, returning limited recommender")
|
| 482 |
+
# return recommender
|
| 483 |
+
|
| 484 |
+
# if model_source == "latest":
|
| 485 |
+
# # Try to get latest MLflow run
|
| 486 |
+
# try:
|
| 487 |
+
# experiment = mlflow.get_experiment_by_name('dlrm-book-recommendation-book_recommender')
|
| 488 |
+
# if experiment:
|
| 489 |
+
# runs = mlflow.search_runs(experiment_ids=[experiment.experiment_id],
|
| 490 |
+
# order_by=["start_time desc"], max_results=1)
|
| 491 |
+
# if len(runs) > 0:
|
| 492 |
+
# latest_run_id = runs.iloc[0].run_id
|
| 493 |
+
# recommender = DLRMBookRecommender(run_id=latest_run_id)
|
| 494 |
+
# return recommender
|
| 495 |
+
# except Exception as e:
|
| 496 |
+
# print(f"โ ๏ธ Error loading from MLflow: {e}")
|
| 497 |
+
|
| 498 |
+
# elif model_source == "file":
|
| 499 |
+
# # Try to load from local file
|
| 500 |
+
# for filename in [
|
| 501 |
+
# '/home/mr-behdadi/PROJECT/ICE/notebooks/dlrm_book_model_final.pth',
|
| 502 |
+
# '/home/mr-behdadi/PROJECT/ICE/notebooks/dlrm_book_model_epoch_2.pth',
|
| 503 |
+
# '/home/mr-behdadi/PROJECT/ICE/notebooks/dlrm_book_model_epoch_0.pth',
|
| 504 |
+
# '/home/mr-behdadi/PROJECT/ICE/notebooks/dlrm_book_model_epoch_1.pth']:
|
| 505 |
+
# if os.path.exists(filename):
|
| 506 |
+
# try:
|
| 507 |
+
# recommender = DLRMBookRecommender(model_path=filename)
|
| 508 |
+
# return recommender
|
| 509 |
+
# except Exception as e:
|
| 510 |
+
# print(f"โ ๏ธ Error loading from {filename}: {e}")
|
| 511 |
+
|
| 512 |
+
# else:
|
| 513 |
+
# # Treat as run_id
|
| 514 |
+
# try:
|
| 515 |
+
# recommender = DLRMBookRecommender(run_id=model_source)
|
| 516 |
+
# return recommender
|
| 517 |
+
# except Exception as e:
|
| 518 |
+
# print(f"โ ๏ธ Error loading from run_id {model_source}: {e}")
|
| 519 |
+
|
| 520 |
+
# print("โ ๏ธ Could not load any trained model")
|
| 521 |
+
# return recommender
|
| 522 |
+
|
| 523 |
+
|
| 524 |
+
# def demo_dlrm_recommendations():
|
| 525 |
+
# """Demo function to show DLRM recommendations"""
|
| 526 |
+
|
| 527 |
+
# print("๐ DLRM Book Recommendation Demo")
|
| 528 |
+
# print("=" * 50)
|
| 529 |
+
|
| 530 |
+
# # Load book data for demo
|
| 531 |
+
# books_df = pd.read_csv('Books.csv', encoding='latin-1', low_memory=False)
|
| 532 |
+
# users_df = pd.read_csv('Users.csv', encoding='latin-1', low_memory=False)
|
| 533 |
+
# ratings_df = pd.read_csv('Ratings.csv', encoding='latin-1', low_memory=False)
|
| 534 |
+
|
| 535 |
+
# books_df.columns = books_df.columns.str.replace('"', '')
|
| 536 |
+
# users_df.columns = users_df.columns.str.replace('"', '')
|
| 537 |
+
# ratings_df.columns = ratings_df.columns.str.replace('"', '')
|
| 538 |
+
|
| 539 |
+
# # Load recommender
|
| 540 |
+
# recommender = load_dlrm_recommender("file")
|
| 541 |
+
|
| 542 |
+
# if recommender.model is None:
|
| 543 |
+
# print("โ No trained model found. Please run training first.")
|
| 544 |
+
# return
|
| 545 |
+
|
| 546 |
+
# # Get sample user and books
|
| 547 |
+
# sample_user_id = ratings_df['User-ID'].iloc[0]
|
| 548 |
+
# sample_books = books_df['ISBN'].head(20).tolist()
|
| 549 |
+
|
| 550 |
+
# print(f"\n๐ Getting recommendations for User {sample_user_id}")
|
| 551 |
+
# print(f"Testing with {len(sample_books)} candidate books...")
|
| 552 |
+
|
| 553 |
+
# # Get recommendations
|
| 554 |
+
# recommendations = recommender.get_user_recommendations(
|
| 555 |
+
# user_id=sample_user_id,
|
| 556 |
+
# candidate_books=sample_books,
|
| 557 |
+
# k=10
|
| 558 |
+
# )
|
| 559 |
+
|
| 560 |
+
# print(f"\n๐ฏ Top 10 DLRM Recommendations:")
|
| 561 |
+
# print("-" * 50)
|
| 562 |
+
|
| 563 |
+
# for i, (book_isbn, score) in enumerate(recommendations, 1):
|
| 564 |
+
# # Get book info
|
| 565 |
+
# book_info = books_df[books_df['ISBN'] == book_isbn]
|
| 566 |
+
# if len(book_info) > 0:
|
| 567 |
+
# book = book_info.iloc[0]
|
| 568 |
+
# title = book['Book-Title']
|
| 569 |
+
# author = book['Book-Author']
|
| 570 |
+
# print(f"{i:2d}. {title} by {author}")
|
| 571 |
+
# print(f" ISBN: {book_isbn}, Score: {score:.4f}")
|
| 572 |
+
# else:
|
| 573 |
+
# print(f"{i:2d}. ISBN: {book_isbn}, Score: {score:.4f}")
|
| 574 |
+
# print()
|
| 575 |
+
|
| 576 |
+
# # Show user's actual ratings for comparison
|
| 577 |
+
# user_ratings = ratings_df[ratings_df['User-ID'] == sample_user_id]
|
| 578 |
+
# if len(user_ratings) > 0:
|
| 579 |
+
# print(f"\n๐ User {sample_user_id}'s Actual Reading History:")
|
| 580 |
+
# print("-" * 50)
|
| 581 |
+
|
| 582 |
+
# for _, rating in user_ratings.head(5).iterrows():
|
| 583 |
+
# book_info = books_df[books_df['ISBN'] == rating['ISBN']]
|
| 584 |
+
# if len(book_info) > 0:
|
| 585 |
+
# book = book_info.iloc[0]
|
| 586 |
+
# print(f"โข {book['Book-Title']} by {book['Book-Author']} - Rating: {rating['Book-Rating']}/10")
|
| 587 |
+
|
| 588 |
+
# # Test book similarity
|
| 589 |
+
# if len(recommendations) > 0:
|
| 590 |
+
# target_book = recommendations[0][0]
|
| 591 |
+
# print(f"\n๐ Finding books similar to: {target_book}")
|
| 592 |
+
|
| 593 |
+
# similar_books = recommender.get_similar_books(
|
| 594 |
+
# target_book_isbn=target_book,
|
| 595 |
+
# candidate_books=sample_books,
|
| 596 |
+
# sample_users=ratings_df['User-ID'].head(10).tolist(),
|
| 597 |
+
# k=5
|
| 598 |
+
# )
|
| 599 |
+
|
| 600 |
+
# print(f"\n๐ Similar Books:")
|
| 601 |
+
# print("-" * 30)
|
| 602 |
+
# for i, (book_isbn, similarity) in enumerate(similar_books, 1):
|
| 603 |
+
# book_info = books_df[books_df['ISBN'] == book_isbn]
|
| 604 |
+
# if len(book_info) > 0:
|
| 605 |
+
# book = book_info.iloc[0]
|
| 606 |
+
# print(f"{i}. {book['Book-Title']} (similarity: {similarity:.3f})")
|
| 607 |
+
|
| 608 |
+
# if __name__ == "__main__":
|
| 609 |
+
# demo_dlrm_recommendations()
|
| 610 |
+
|
| 611 |
+
|
| 612 |
+
"""
|
| 613 |
+
DLRM Inference Engine for Book Recommendations - Hugging Face Space Compatible
|
| 614 |
+
Lightweight version without PyTorch dependencies
|
| 615 |
+
"""
|
| 616 |
+
|
| 617 |
+
import os
|
| 618 |
+
import sys
|
| 619 |
+
import numpy as np
|
| 620 |
+
import pandas as pd
|
| 621 |
+
import pickle
|
| 622 |
+
from typing import List, Dict, Tuple, Optional, Any
|
| 623 |
+
import warnings
|
| 624 |
+
warnings.filterwarnings('ignore')
|
| 625 |
+
|
| 626 |
+
# Force CPU-only mode for HF Spaces
|
| 627 |
+
CPU_ONLY = True
|
| 628 |
+
os.environ['CPU_ONLY'] = 'true'
|
| 629 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = ''
|
| 630 |
+
|
| 631 |
+
print("๐ Running in HF Spaces CPU-only mode (no PyTorch dependencies)")
|
| 632 |
+
|
| 633 |
+
class DLRMBookRecommender:
|
| 634 |
+
"""DLRM-based book recommender for inference - HF Spaces compatible"""
|
| 635 |
+
|
| 636 |
+
def __init__(self, model_path: str = None, run_id: str = None):
|
| 637 |
+
"""
|
| 638 |
+
Initialize DLRM book recommender
|
| 639 |
|
| 640 |
+
Args:
|
| 641 |
+
model_path: Path to saved model state dict (not used in HF Spaces)
|
| 642 |
+
run_id: MLflow run ID (not used in HF Spaces)
|
| 643 |
+
"""
|
| 644 |
+
self.device = "cpu"
|
| 645 |
+
self.model = None
|
| 646 |
+
self.preprocessing_info = None
|
| 647 |
+
self.cpu_only = True
|
| 648 |
+
self.dense_cols = [
|
| 649 |
+
'Age_normalized', 'Year-Of-Publication', 'user_activity',
|
| 650 |
+
'book_popularity', 'user_avg_rating', 'book_avg_rating'
|
| 651 |
+
]
|
| 652 |
+
self.cat_cols = [
|
| 653 |
+
'User-ID', 'ISBN', 'Publisher', 'Country',
|
| 654 |
+
'Age_Group', 'Publication_Decade', 'Rating_Level'
|
| 655 |
+
]
|
| 656 |
+
self.emb_counts = [1000, 5000, 500, 50, 6, 8, 3] # Example counts
|
| 657 |
+
|
| 658 |
+
print("โ
DLRM recommender initialized in HF Spaces mode")
|
| 659 |
+
|
| 660 |
+
# Load minimal preprocessing info for demo
|
| 661 |
+
self._create_demo_preprocessing_info()
|
| 662 |
+
|
| 663 |
+
def _create_demo_preprocessing_info(self):
|
| 664 |
+
"""Create demo preprocessing info for HF Spaces"""
|
| 665 |
+
self.preprocessing_info = {
|
| 666 |
+
'dense_cols': self.dense_cols,
|
| 667 |
+
'cat_cols': self.cat_cols,
|
| 668 |
+
'emb_counts': self.emb_counts,
|
| 669 |
+
'total_samples': 100000,
|
| 670 |
+
'positive_rate': 0.6,
|
| 671 |
+
'train_samples': 70000,
|
| 672 |
+
'val_samples': 15000,
|
| 673 |
+
'test_samples': 15000
|
| 674 |
+
}
|
| 675 |
+
|
| 676 |
+
# Create mock encoders (for demo purposes)
|
| 677 |
+
self.user_encoder = MockEncoder()
|
| 678 |
+
self.book_encoder = MockEncoder()
|
| 679 |
+
self.publisher_encoder = MockEncoder()
|
| 680 |
+
self.location_encoder = MockEncoder()
|
| 681 |
+
self.scaler = MockScaler()
|
| 682 |
+
|
| 683 |
+
print("โ
Demo preprocessing info created")
|
| 684 |
|
| 685 |
def predict_rating(self, user_id: int, book_isbn: str,
|
| 686 |
user_data: Optional[Dict] = None,
|
| 687 |
book_data: Optional[Dict] = None) -> float:
|
| 688 |
"""
|
| 689 |
+
Predict rating probability for user-book pair using simulation
|
| 690 |
|
| 691 |
Args:
|
| 692 |
user_id: User ID
|
|
|
|
| 697 |
Returns:
|
| 698 |
Prediction probability (0-1)
|
| 699 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 700 |
try:
|
| 701 |
+
# Simulate DLRM prediction using heuristic approach
|
| 702 |
+
prediction = self._simulate_dlrm_prediction(user_id, book_isbn, user_data, book_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 703 |
return prediction
|
| 704 |
|
| 705 |
except Exception as e:
|
| 706 |
print(f"Error in prediction: {e}")
|
| 707 |
+
return 0.5 # Return neutral prediction on error
|
| 708 |
+
|
| 709 |
+
def _simulate_dlrm_prediction(self, user_id: int, book_isbn: str,
|
| 710 |
+
user_data: Optional[Dict] = None,
|
| 711 |
+
book_data: Optional[Dict] = None) -> float:
|
| 712 |
+
"""Simulate DLRM prediction using heuristics"""
|
| 713 |
+
|
| 714 |
+
# Use deterministic random based on inputs
|
| 715 |
+
np.random.seed(hash(f"{user_id}_{book_isbn}") % 2**32)
|
| 716 |
+
|
| 717 |
+
# Base prediction
|
| 718 |
+
base_score = 0.5
|
| 719 |
+
|
| 720 |
+
# User factors
|
| 721 |
+
user_age = user_data.get('Age', 30) if user_data else 30
|
| 722 |
+
age_factor = 0.0
|
| 723 |
+
if 18 <= user_age <= 25:
|
| 724 |
+
age_factor = 0.1 # Young adults like popular books
|
| 725 |
+
elif 26 <= user_age <= 40:
|
| 726 |
+
age_factor = 0.05 # Adults have varied tastes
|
| 727 |
+
else:
|
| 728 |
+
age_factor = -0.05 # Older users are more selective
|
| 729 |
+
|
| 730 |
+
# Book factors
|
| 731 |
+
book_year = book_data.get('Year-Of-Publication', 2000) if book_data else 2000
|
| 732 |
+
book_year = int(book_year) if str(book_year).isdigit() else 2000
|
| 733 |
+
|
| 734 |
+
year_factor = 0.0
|
| 735 |
+
if book_year >= 2010:
|
| 736 |
+
year_factor = 0.1 # Recent books get slight boost
|
| 737 |
+
elif book_year >= 2000:
|
| 738 |
+
year_factor = 0.05
|
| 739 |
+
elif book_year >= 1990:
|
| 740 |
+
year_factor = 0.0
|
| 741 |
+
else:
|
| 742 |
+
year_factor = -0.1 # Very old books less likely
|
| 743 |
+
|
| 744 |
+
# Publisher factor (simplified)
|
| 745 |
+
publisher = book_data.get('Publisher', '') if book_data else ''
|
| 746 |
+
publisher_factor = 0.0
|
| 747 |
+
popular_publishers = ['penguin', 'random', 'harper', 'simon', 'macmillan']
|
| 748 |
+
if any(pub in publisher.lower() for pub in popular_publishers):
|
| 749 |
+
publisher_factor = 0.08
|
| 750 |
+
|
| 751 |
+
# User-book interaction simulation
|
| 752 |
+
interaction_factor = np.random.uniform(-0.15, 0.15)
|
| 753 |
+
|
| 754 |
+
# Genre preference simulation (based on book title/publisher)
|
| 755 |
+
genre_factor = np.random.uniform(-0.1, 0.1)
|
| 756 |
+
|
| 757 |
+
# Combine all factors
|
| 758 |
+
final_score = (base_score +
|
| 759 |
+
age_factor +
|
| 760 |
+
year_factor +
|
| 761 |
+
publisher_factor +
|
| 762 |
+
interaction_factor +
|
| 763 |
+
genre_factor)
|
| 764 |
+
|
| 765 |
+
# Add some controlled randomness
|
| 766 |
+
noise = np.random.uniform(-0.05, 0.05)
|
| 767 |
+
final_score += noise
|
| 768 |
+
|
| 769 |
+
# Clamp to valid range
|
| 770 |
+
final_score = max(0.0, min(1.0, final_score))
|
| 771 |
+
|
| 772 |
+
return final_score
|
| 773 |
|
| 774 |
def get_user_recommendations(self, user_id: int,
|
| 775 |
candidate_books: List[str],
|
|
|
|
| 787 |
Returns:
|
| 788 |
List of (book_isbn, prediction_score) tuples
|
| 789 |
"""
|
| 790 |
+
print(f"Generating simulated recommendations for user {user_id} from {len(candidate_books)} candidates...")
|
|
|
|
|
|
|
| 791 |
|
| 792 |
recommendations = []
|
| 793 |
|
|
|
|
|
|
|
| 794 |
for book_isbn in candidate_books:
|
| 795 |
score = self.predict_rating(user_id, book_isbn, user_data)
|
| 796 |
recommendations.append((book_isbn, score))
|
|
|
|
| 857 |
if len(scores) > 0:
|
| 858 |
scores_array = np.array(scores)
|
| 859 |
# Calculate correlation as similarity measure
|
| 860 |
+
if len(scores_array) > 1:
|
| 861 |
+
correlation = np.corrcoef(target_scores, scores_array)[0, 1]
|
| 862 |
+
if not np.isnan(correlation):
|
| 863 |
+
similarities.append((book_isbn, correlation))
|
| 864 |
+
else:
|
| 865 |
+
# Fallback similarity measure
|
| 866 |
+
similarity = 1.0 - abs(target_scores[0] - scores_array[0])
|
| 867 |
+
similarities.append((book_isbn, similarity))
|
| 868 |
|
| 869 |
# Sort by similarity and return top-k
|
| 870 |
similarities.sort(key=lambda x: x[1], reverse=True)
|
| 871 |
return similarities[:k]
|
| 872 |
|
| 873 |
|
| 874 |
+
class MockEncoder:
|
| 875 |
+
"""Mock encoder for demo purposes"""
|
| 876 |
+
|
| 877 |
+
def __init__(self):
|
| 878 |
+
self.classes_ = []
|
| 879 |
+
|
| 880 |
+
def transform(self, values):
|
| 881 |
+
"""Mock transform that returns hash-based encoding"""
|
| 882 |
+
return [hash(str(val)) % 1000 for val in values]
|
| 883 |
+
|
| 884 |
+
|
| 885 |
+
class MockScaler:
|
| 886 |
+
"""Mock scaler for demo purposes"""
|
| 887 |
+
|
| 888 |
+
def transform(self, X):
|
| 889 |
+
"""Mock transform that returns normalized values"""
|
| 890 |
+
X = np.array(X)
|
| 891 |
+
# Simple min-max normalization simulation
|
| 892 |
+
return (X - X.min()) / (X.max() - X.min() + 1e-8)
|
| 893 |
+
|
| 894 |
+
|
| 895 |
+
def load_dlrm_recommender(model_source: str = "demo") -> DLRMBookRecommender:
|
| 896 |
"""
|
| 897 |
+
Load DLRM recommender for HF Spaces
|
| 898 |
|
| 899 |
Args:
|
| 900 |
+
model_source: Always returns demo version in HF Spaces
|
| 901 |
|
| 902 |
Returns:
|
| 903 |
DLRMBookRecommender instance
|
| 904 |
"""
|
| 905 |
+
print("๐ Loading DLRM recommender in HF Spaces demo mode")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 906 |
|
| 907 |
+
# Always return demo version for HF Spaces
|
| 908 |
+
recommender = DLRMBookRecommender()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 909 |
|
| 910 |
+
print("โ
DLRM recommender loaded successfully")
|
| 911 |
return recommender
|
| 912 |
|
| 913 |
|
| 914 |
def demo_dlrm_recommendations():
|
| 915 |
"""Demo function to show DLRM recommendations"""
|
| 916 |
|
| 917 |
+
print("๐ DLRM Book Recommendation Demo - HF Spaces Version")
|
| 918 |
print("=" * 50)
|
| 919 |
|
| 920 |
+
# Create sample data for demo
|
| 921 |
+
sample_books_data = {
|
| 922 |
+
'ISBN': ['0439023483', '0439358078', '0316666343', '0452264464', '0061120081'],
|
| 923 |
+
'Book-Title': [
|
| 924 |
+
'The Hunger Games',
|
| 925 |
+
'Harry Potter and the Chamber of Secrets',
|
| 926 |
+
'The Catcher in the Rye',
|
| 927 |
+
'1984',
|
| 928 |
+
'To Kill a Mockingbird'
|
| 929 |
+
],
|
| 930 |
+
'Book-Author': [
|
| 931 |
+
'Suzanne Collins',
|
| 932 |
+
'J.K. Rowling',
|
| 933 |
+
'J.D. Salinger',
|
| 934 |
+
'George Orwell',
|
| 935 |
+
'Harper Lee'
|
| 936 |
+
],
|
| 937 |
+
'Year-Of-Publication': [2008, 1999, 1951, 1949, 1960],
|
| 938 |
+
'Publisher': ['Scholastic', 'Scholastic', 'Little, Brown', 'Signet', 'Harper']
|
| 939 |
+
}
|
| 940 |
|
| 941 |
+
books_df = pd.DataFrame(sample_books_data)
|
|
|
|
|
|
|
| 942 |
|
| 943 |
# Load recommender
|
| 944 |
+
recommender = load_dlrm_recommender()
|
| 945 |
|
| 946 |
+
# Demo user
|
| 947 |
+
sample_user_id = 1
|
| 948 |
+
sample_books = books_df['ISBN'].tolist()
|
| 949 |
|
| 950 |
+
print(f"\n๐ Getting simulated recommendations for User {sample_user_id}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 951 |
print(f"Testing with {len(sample_books)} candidate books...")
|
| 952 |
|
| 953 |
# Get recommendations
|
| 954 |
recommendations = recommender.get_user_recommendations(
|
| 955 |
user_id=sample_user_id,
|
| 956 |
candidate_books=sample_books,
|
| 957 |
+
k=3,
|
| 958 |
+
user_data={'Age': 25, 'Location': 'New York, USA'}
|
| 959 |
)
|
| 960 |
|
| 961 |
+
print(f"\n๐ฏ Top 3 Simulated DLRM Recommendations:")
|
| 962 |
print("-" * 50)
|
| 963 |
|
| 964 |
for i, (book_isbn, score) in enumerate(recommendations, 1):
|
|
|
|
| 969 |
title = book['Book-Title']
|
| 970 |
author = book['Book-Author']
|
| 971 |
print(f"{i:2d}. {title} by {author}")
|
| 972 |
+
print(f" ISBN: {book_isbn}, Simulated Score: {score:.4f}")
|
| 973 |
else:
|
| 974 |
+
print(f"{i:2d}. ISBN: {book_isbn}, Simulated Score: {score:.4f}")
|
| 975 |
print()
|
| 976 |
|
| 977 |
+
print("โ
Demo completed successfully!")
|
| 978 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 979 |
|
| 980 |
if __name__ == "__main__":
|
| 981 |
demo_dlrm_recommendations()
|