Spaces:

IntimateUser6969
/

Cinewatch-recommender

Sleeping

Cinewatch-recommender / models /user_simulator.py

IntimateUser6969

Deploy CineMatch backend: Two-Tower + DeepFM + MMR + Upstash Redis

1359487 2 months ago

5.8 kB

	"""
	User Simulator for offline evaluation of the recommendation policy.

	Why simulate users?
	Offline evaluation typically measures precision/recall against held-out ratings,
	but this misses the sequential, contextual nature of real interactions.
	The simulator models a user as a bandit arm whose behaviour follows a
	logistic function of item relevance, enabling us to measure:
	• Cumulative reward (clicks / ratings) over a session
	• Exploration–exploitation trade-off
	• Effects of re-ranking on engagement

	Simulator model:
	P(click \| user, item) = σ( true_pref(user, item) + context_noise )
	where true_pref = user_features ⊙ item_features (dot product)
	and context_noise ~ N(0, noise_std²)

	P(high_rating \| click) = σ( true_pref - rating_threshold )
	"""

	from __future__ import annotations

	import logging
	from dataclasses import dataclass, field
	from typing import Any

	import numpy as np

	logger = logging.getLogger(__name__)


	@dataclass
	class SimulatedSession:
	user_id: int
	clicks: list[int] = field(default_factory=list)
	skips: list[int] = field(default_factory=list)
	ratings: dict[int, float] = field(default_factory=dict)
	total_reward: float = 0.0


	class UserSimulator:
	"""
	Simulates user interactions given a recommendation list.

	Parameters
	----------
	user_features : (num_users, feat_dim) array of user feature vectors
	item_features : (num_movies, feat_dim) array of item feature vectors
	rating_df : ground-truth ratings DataFrame (user_idx, movie_idx, rating)
	used to calibrate the simulator's preference model
	noise_std : standard deviation of per-interaction Gaussian noise
	rating_threshold : logit threshold above which a click becomes a high-rating
	"""

	def __init__(
	self,
	user_features: np.ndarray,
	item_features: np.ndarray,
	noise_std: float = 0.5,
	rating_threshold: float = 0.2,
	):
	self.user_features = user_features.astype(np.float32)
	self.item_features = item_features.astype(np.float32)
	self.noise_std = noise_std
	self.rating_threshold = rating_threshold
	self.rng = np.random.default_rng(42)

	def _true_preference(self, user_idx: int, movie_idx: int) -> float:
	"""Dot product preference score (unnormalised)."""
	u = self.user_features[user_idx]
	v = self.item_features[movie_idx]
	# Use only as many dims as both share
	d = min(len(u), len(v))
	return float(np.dot(u[:d], v[:d]))

	@staticmethod
	def _sigmoid(x: float) -> float:
	return 1.0 / (1.0 + np.exp(-np.clip(x, -30, 30)))

	def simulate_click(self, user_idx: int, movie_idx: int) -> bool:
	"""Stochastically decide if the user clicks on the item."""
	pref = self._true_preference(user_idx, movie_idx)
	noise = self.rng.normal(0, self.noise_std)
	p_click = self._sigmoid(pref + noise)
	return self.rng.random() < p_click

	def simulate_rating(self, user_idx: int, movie_idx: int) -> float:
	"""Sample a rating given that the user clicked (in [1, 5])."""
	pref = self._true_preference(user_idx, movie_idx)
	noise = self.rng.normal(0, self.noise_std * 0.5)
	logit = pref + noise - self.rating_threshold
	p_high = self._sigmoid(logit)
	# Map probability to a 1–5 rating
	raw = 1.0 + 4.0 * p_high + self.rng.normal(0, 0.3)
	return float(np.clip(raw, 1.0, 5.0))

	def run_session(
	self,
	user_idx: int,
	recommended_ids: list[int],
	position_bias: bool = True,
	) -> SimulatedSession:
	"""
	Simulate a full session for one user.

	position_bias: items at rank r get click probability multiplied by 1/log2(r+2),
	modelling the fact that users rarely scroll past the first few rows.
	"""
	session = SimulatedSession(user_id=user_idx)
	for rank, movie_idx in enumerate(recommended_ids):
	click = self.simulate_click(user_idx, movie_idx)
	if position_bias:
	decay = 1.0 / np.log2(rank + 2)
	click = click and (self.rng.random() < decay)

	if click:
	session.clicks.append(movie_idx)
	rating = self.simulate_rating(user_idx, movie_idx)
	session.ratings[movie_idx] = rating
	# Reward = normalised rating
	session.total_reward += (rating - 1.0) / 4.0
	else:
	session.skips.append(movie_idx)

	return session

	def evaluate_policy(
	self,
	recommender_fn: Any, # callable(user_idx) -> list[int] of movie_idxs
	test_user_idxs: list[int],
	top_k: int = 10,
	) -> dict[str, float]:
	"""
	Evaluate a recommendation policy over a set of test users.

	recommender_fn should accept a user_idx (int) and return a list of movie_idx.
	Returns aggregated metrics.
	"""
	ctr_list, reward_list, cov_items = [], [], set()

	for user_idx in test_user_idxs:
	recs = recommender_fn(user_idx)[:top_k]
	if not recs:
	continue
	session = self.run_session(user_idx, recs, position_bias=True)

	ctr = len(session.clicks) / max(len(recs), 1)
	ctr_list.append(ctr)
	reward_list.append(session.total_reward)
	cov_items.update(session.clicks)

	return {
	"simulated_ctr": float(np.mean(ctr_list)) if ctr_list else 0.0,
	"simulated_avg_reward": float(np.mean(reward_list)) if reward_list else 0.0,
	"simulated_coverage": len(cov_items),
	"n_users_evaluated": len(ctr_list),
	}