Cinewatch-recommender / models /user_simulator.py
IntimateUser6969
Deploy CineMatch backend: Two-Tower + DeepFM + MMR + Upstash Redis
1359487
Raw
History Blame Contribute Delete
5.8 kB
"""
User Simulator for offline evaluation of the recommendation policy.
Why simulate users?
Offline evaluation typically measures precision/recall against held-out ratings,
but this misses the sequential, contextual nature of real interactions.
The simulator models a user as a bandit arm whose behaviour follows a
logistic function of item relevance, enabling us to measure:
• Cumulative reward (clicks / ratings) over a session
• Exploration–exploitation trade-off
• Effects of re-ranking on engagement
Simulator model:
P(click | user, item) = σ( true_pref(user, item) + context_noise )
where true_pref = user_features ⊙ item_features (dot product)
and context_noise ~ N(0, noise_std²)
P(high_rating | click) = σ( true_pref - rating_threshold )
"""
from __future__ import annotations
import logging
from dataclasses import dataclass, field
from typing import Any
import numpy as np
logger = logging.getLogger(__name__)
@dataclass
class SimulatedSession:
user_id: int
clicks: list[int] = field(default_factory=list)
skips: list[int] = field(default_factory=list)
ratings: dict[int, float] = field(default_factory=dict)
total_reward: float = 0.0
class UserSimulator:
"""
Simulates user interactions given a recommendation list.
Parameters
----------
user_features : (num_users, feat_dim) array of user feature vectors
item_features : (num_movies, feat_dim) array of item feature vectors
rating_df : ground-truth ratings DataFrame (user_idx, movie_idx, rating)
used to calibrate the simulator's preference model
noise_std : standard deviation of per-interaction Gaussian noise
rating_threshold : logit threshold above which a click becomes a high-rating
"""
def __init__(
self,
user_features: np.ndarray,
item_features: np.ndarray,
noise_std: float = 0.5,
rating_threshold: float = 0.2,
):
self.user_features = user_features.astype(np.float32)
self.item_features = item_features.astype(np.float32)
self.noise_std = noise_std
self.rating_threshold = rating_threshold
self.rng = np.random.default_rng(42)
def _true_preference(self, user_idx: int, movie_idx: int) -> float:
"""Dot product preference score (unnormalised)."""
u = self.user_features[user_idx]
v = self.item_features[movie_idx]
# Use only as many dims as both share
d = min(len(u), len(v))
return float(np.dot(u[:d], v[:d]))
@staticmethod
def _sigmoid(x: float) -> float:
return 1.0 / (1.0 + np.exp(-np.clip(x, -30, 30)))
def simulate_click(self, user_idx: int, movie_idx: int) -> bool:
"""Stochastically decide if the user clicks on the item."""
pref = self._true_preference(user_idx, movie_idx)
noise = self.rng.normal(0, self.noise_std)
p_click = self._sigmoid(pref + noise)
return self.rng.random() < p_click
def simulate_rating(self, user_idx: int, movie_idx: int) -> float:
"""Sample a rating given that the user clicked (in [1, 5])."""
pref = self._true_preference(user_idx, movie_idx)
noise = self.rng.normal(0, self.noise_std * 0.5)
logit = pref + noise - self.rating_threshold
p_high = self._sigmoid(logit)
# Map probability to a 1–5 rating
raw = 1.0 + 4.0 * p_high + self.rng.normal(0, 0.3)
return float(np.clip(raw, 1.0, 5.0))
def run_session(
self,
user_idx: int,
recommended_ids: list[int],
position_bias: bool = True,
) -> SimulatedSession:
"""
Simulate a full session for one user.
position_bias: items at rank r get click probability multiplied by 1/log2(r+2),
modelling the fact that users rarely scroll past the first few rows.
"""
session = SimulatedSession(user_id=user_idx)
for rank, movie_idx in enumerate(recommended_ids):
click = self.simulate_click(user_idx, movie_idx)
if position_bias:
decay = 1.0 / np.log2(rank + 2)
click = click and (self.rng.random() < decay)
if click:
session.clicks.append(movie_idx)
rating = self.simulate_rating(user_idx, movie_idx)
session.ratings[movie_idx] = rating
# Reward = normalised rating
session.total_reward += (rating - 1.0) / 4.0
else:
session.skips.append(movie_idx)
return session
def evaluate_policy(
self,
recommender_fn: Any, # callable(user_idx) -> list[int] of movie_idxs
test_user_idxs: list[int],
top_k: int = 10,
) -> dict[str, float]:
"""
Evaluate a recommendation policy over a set of test users.
recommender_fn should accept a user_idx (int) and return a list of movie_idx.
Returns aggregated metrics.
"""
ctr_list, reward_list, cov_items = [], [], set()
for user_idx in test_user_idxs:
recs = recommender_fn(user_idx)[:top_k]
if not recs:
continue
session = self.run_session(user_idx, recs, position_bias=True)
ctr = len(session.clicks) / max(len(recs), 1)
ctr_list.append(ctr)
reward_list.append(session.total_reward)
cov_items.update(session.clicks)
return {
"simulated_ctr": float(np.mean(ctr_list)) if ctr_list else 0.0,
"simulated_avg_reward": float(np.mean(reward_list)) if reward_list else 0.0,
"simulated_coverage": len(cov_items),
"n_users_evaluated": len(ctr_list),
}