| """ |
| User Simulator for offline evaluation of the recommendation policy. |
| |
| Why simulate users? |
| Offline evaluation typically measures precision/recall against held-out ratings, |
| but this misses the sequential, contextual nature of real interactions. |
| The simulator models a user as a bandit arm whose behaviour follows a |
| logistic function of item relevance, enabling us to measure: |
| • Cumulative reward (clicks / ratings) over a session |
| • Exploration–exploitation trade-off |
| • Effects of re-ranking on engagement |
| |
| Simulator model: |
| P(click | user, item) = σ( true_pref(user, item) + context_noise ) |
| where true_pref = user_features ⊙ item_features (dot product) |
| and context_noise ~ N(0, noise_std²) |
| |
| P(high_rating | click) = σ( true_pref - rating_threshold ) |
| """ |
|
|
| from __future__ import annotations |
|
|
| import logging |
| from dataclasses import dataclass, field |
| from typing import Any |
|
|
| import numpy as np |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| @dataclass |
| class SimulatedSession: |
| user_id: int |
| clicks: list[int] = field(default_factory=list) |
| skips: list[int] = field(default_factory=list) |
| ratings: dict[int, float] = field(default_factory=dict) |
| total_reward: float = 0.0 |
|
|
|
|
| class UserSimulator: |
| """ |
| Simulates user interactions given a recommendation list. |
| |
| Parameters |
| ---------- |
| user_features : (num_users, feat_dim) array of user feature vectors |
| item_features : (num_movies, feat_dim) array of item feature vectors |
| rating_df : ground-truth ratings DataFrame (user_idx, movie_idx, rating) |
| used to calibrate the simulator's preference model |
| noise_std : standard deviation of per-interaction Gaussian noise |
| rating_threshold : logit threshold above which a click becomes a high-rating |
| """ |
|
|
| def __init__( |
| self, |
| user_features: np.ndarray, |
| item_features: np.ndarray, |
| noise_std: float = 0.5, |
| rating_threshold: float = 0.2, |
| ): |
| self.user_features = user_features.astype(np.float32) |
| self.item_features = item_features.astype(np.float32) |
| self.noise_std = noise_std |
| self.rating_threshold = rating_threshold |
| self.rng = np.random.default_rng(42) |
|
|
| def _true_preference(self, user_idx: int, movie_idx: int) -> float: |
| """Dot product preference score (unnormalised).""" |
| u = self.user_features[user_idx] |
| v = self.item_features[movie_idx] |
| |
| d = min(len(u), len(v)) |
| return float(np.dot(u[:d], v[:d])) |
|
|
| @staticmethod |
| def _sigmoid(x: float) -> float: |
| return 1.0 / (1.0 + np.exp(-np.clip(x, -30, 30))) |
|
|
| def simulate_click(self, user_idx: int, movie_idx: int) -> bool: |
| """Stochastically decide if the user clicks on the item.""" |
| pref = self._true_preference(user_idx, movie_idx) |
| noise = self.rng.normal(0, self.noise_std) |
| p_click = self._sigmoid(pref + noise) |
| return self.rng.random() < p_click |
|
|
| def simulate_rating(self, user_idx: int, movie_idx: int) -> float: |
| """Sample a rating given that the user clicked (in [1, 5]).""" |
| pref = self._true_preference(user_idx, movie_idx) |
| noise = self.rng.normal(0, self.noise_std * 0.5) |
| logit = pref + noise - self.rating_threshold |
| p_high = self._sigmoid(logit) |
| |
| raw = 1.0 + 4.0 * p_high + self.rng.normal(0, 0.3) |
| return float(np.clip(raw, 1.0, 5.0)) |
|
|
| def run_session( |
| self, |
| user_idx: int, |
| recommended_ids: list[int], |
| position_bias: bool = True, |
| ) -> SimulatedSession: |
| """ |
| Simulate a full session for one user. |
| |
| position_bias: items at rank r get click probability multiplied by 1/log2(r+2), |
| modelling the fact that users rarely scroll past the first few rows. |
| """ |
| session = SimulatedSession(user_id=user_idx) |
| for rank, movie_idx in enumerate(recommended_ids): |
| click = self.simulate_click(user_idx, movie_idx) |
| if position_bias: |
| decay = 1.0 / np.log2(rank + 2) |
| click = click and (self.rng.random() < decay) |
|
|
| if click: |
| session.clicks.append(movie_idx) |
| rating = self.simulate_rating(user_idx, movie_idx) |
| session.ratings[movie_idx] = rating |
| |
| session.total_reward += (rating - 1.0) / 4.0 |
| else: |
| session.skips.append(movie_idx) |
|
|
| return session |
|
|
| def evaluate_policy( |
| self, |
| recommender_fn: Any, |
| test_user_idxs: list[int], |
| top_k: int = 10, |
| ) -> dict[str, float]: |
| """ |
| Evaluate a recommendation policy over a set of test users. |
| |
| recommender_fn should accept a user_idx (int) and return a list of movie_idx. |
| Returns aggregated metrics. |
| """ |
| ctr_list, reward_list, cov_items = [], [], set() |
|
|
| for user_idx in test_user_idxs: |
| recs = recommender_fn(user_idx)[:top_k] |
| if not recs: |
| continue |
| session = self.run_session(user_idx, recs, position_bias=True) |
|
|
| ctr = len(session.clicks) / max(len(recs), 1) |
| ctr_list.append(ctr) |
| reward_list.append(session.total_reward) |
| cov_items.update(session.clicks) |
|
|
| return { |
| "simulated_ctr": float(np.mean(ctr_list)) if ctr_list else 0.0, |
| "simulated_avg_reward": float(np.mean(reward_list)) if reward_list else 0.0, |
| "simulated_coverage": len(cov_items), |
| "n_users_evaluated": len(ctr_list), |
| } |
|
|