Spaces:

IntimateUser6969
/

Cinewatch-recommender

Sleeping

Cinewatch-recommender / serving /inference.py

IntimateUser6969

Deploy CineMatch backend: Two-Tower + DeepFM + MMR + Upstash Redis

1359487 2 months ago

14.4 kB

	"""
	RecommendationEngine — full inference pipeline.

	Request flow:
	user_idx
	→ FeatureStore.get_user_embedding() (or compute on-the-fly)
	→ FAISSIndex.search(k=200) (candidate generation)
	→ filter seen items
	→ DeepFM.predict_proba() for all candidates (ranking)
	→ sort by DeepFM score, take top-50
	→ MMR re-rank to top-k (diversity)
	→ generate explanation strings
	→ return recommendations

	Also exposes:
	• search(query) — text search over movie titles/genres
	• get_similar(movie_idx) — similar movies via FAISS
	• get_explanation(user_idx, movie_idx) — human-readable reason
	"""

	from __future__ import annotations

	import logging
	import pickle
	import re
	from pathlib import Path
	from typing import Any, Optional

	import numpy as np
	import torch

	from data.preprocessor import Preprocessor
	from models.deepfm import DeepFM, build_dense_features
	from models.reranker import diversify
	from models.two_tower import TwoTowerModel
	from serving.faiss_index import FAISSIndex
	from serving.feature_store import FeatureStore

	logger = logging.getLogger(__name__)


	def _year_to_bucket(year: int, n_buckets: int = 50) -> int:
	return max(0, min(n_buckets - 1, (year - 1920) // 2))


	class RecommendationEngine:
	"""
	Single object owning all models and indexes needed for serving.
	Loaded once at API startup; methods are called per-request.
	"""

	def __init__(
	self,
	prep: Preprocessor,
	two_tower: TwoTowerModel,
	deepfm: DeepFM,
	faiss_index: FAISSIndex,
	item_embeddings: np.ndarray, # (num_movies, embed_dim) — for MMR
	feature_store: FeatureStore,
	device: torch.device,
	):
	self.prep = prep
	self.two_tower = two_tower.to(device).eval()
	self.deepfm = deepfm.to(device).eval()
	self.faiss_index = faiss_index
	self.item_embeddings = item_embeddings
	self.feature_store = feature_store
	self.device = device

	# Precompute movie_idx → metadata map for fast lookups
	self._build_movie_meta()

	def _build_movie_meta(self) -> None:
	"""Build a dict movie_idx → {title, year, genres, movie_id}."""
	self.movie_meta: dict[int, dict] = {}
	if self.prep.movie_df is None:
	return
	for _, row in self.prep.movie_df.iterrows():
	idx = self.prep.movie_id_map.get(row["movieId"])
	if idx is None:
	continue
	self.movie_meta[idx] = {
	"movie_id": int(row["movieId"]),
	"title": str(row["title"]),
	"year": int(row.get("year", 0)),
	"genres": row.get("genre_list", []),
	}

	# ------------------------------------------------------------------
	# User embedding retrieval / computation
	# ------------------------------------------------------------------

	@torch.no_grad()
	def _get_user_embedding(self, user_idx: int) -> np.ndarray:
	"""Get or compute user embedding, caching in feature store."""
	cached = self.feature_store.get_user_embedding(user_idx)
	if cached is not None:
	return cached

	# Compute from scratch
	user_feats = self.feature_store.get_user_features(user_idx)
	if user_feats is None:
	if user_idx < len(self.prep.user_features):
	user_feats = self.prep.user_features[user_idx]
	else:
	user_feats = np.zeros(self.prep.user_features.shape[1], dtype=np.float32)

	uid_t = torch.tensor([user_idx], dtype=torch.long, device=self.device)
	uf_t = torch.tensor(user_feats, dtype=torch.float32, device=self.device).unsqueeze(0)
	emb = self.two_tower.encode_user(uid_t, uf_t).cpu().numpy()[0] # (D,)

	self.feature_store.set_user_embedding(user_idx, emb)
	return emb

	# ------------------------------------------------------------------
	# Core recommendation
	# ------------------------------------------------------------------

	@torch.no_grad()
	def recommend_by_idx(
	self,
	user_idx: int,
	limit: int = 20,
	seen_movie_idxs: Optional[set[int]] = None,
	faiss_k: int = 200,
	mmr_pool: int = 50,
	lambda_mmr: float = 0.7,
	) -> list[int]:
	"""Return a list of recommended movie_idx values."""
	if seen_movie_idxs is None:
	seen_movie_idxs = set()

	# Step 1: candidate retrieval via FAISS
	user_emb = self._get_user_embedding(user_idx)
	_, candidate_idxs = self.faiss_index.search(user_emb, k=faiss_k)
	candidate_idxs = candidate_idxs[0].tolist()

	# Step 2: filter seen
	candidate_idxs = [c for c in candidate_idxs if c not in seen_movie_idxs]
	if not candidate_idxs:
	return list(range(min(limit, self.prep.num_movies)))

	# Step 3: DeepFM scoring
	scores = self._score_candidates(user_idx, candidate_idxs)

	# Step 4: sort by DeepFM, take top mmr_pool
	pool_size = min(mmr_pool, len(candidate_idxs))
	top_idxs = np.argsort(-scores)[:pool_size]
	pool_candidates = [candidate_idxs[i] for i in top_idxs]
	pool_scores = scores[top_idxs]

	# Step 5: MMR re-rank
	final_idxs = diversify(
	pool_candidates,
	pool_scores,
	self.item_embeddings,
	top_k=limit,
	lambda_param=lambda_mmr,
	)
	return final_idxs

	@torch.no_grad()
	def _score_candidates(
	self, user_idx: int, candidate_idxs: list[int]
	) -> np.ndarray:
	"""Score candidates with DeepFM. Returns (N,) float32 array."""
	user_feats = self.prep.user_features[min(user_idx, len(self.prep.user_features) - 1)]
	item_feats = self.prep.item_features

	uid_t = torch.tensor([user_idx] * len(candidate_idxs), dtype=torch.long, device=self.device)
	mid_t = torch.tensor(candidate_idxs, dtype=torch.long, device=self.device)

	# Year buckets
	year_buckets = []
	for c in candidate_idxs:
	meta = self.movie_meta.get(c, {})
	year_buckets.append(_year_to_bucket(meta.get("year", 2000)))
	yb_t = torch.tensor(year_buckets, dtype=torch.long, device=self.device)

	uf_t = torch.tensor(
	np.tile(user_feats, (len(candidate_idxs), 1)), dtype=torch.float32, device=self.device
	)
	if_t = torch.tensor(
	item_feats[candidate_idxs], dtype=torch.float32, device=self.device
	)
	dense = build_dense_features(uf_t, if_t)

	probs = self.deepfm.predict_proba(uid_t, mid_t, yb_t, dense).cpu().numpy()
	return probs

	# ------------------------------------------------------------------
	# Public API methods
	# ------------------------------------------------------------------

	def recommend(
	self,
	user_id: int,
	limit: int = 20,
	seen_movie_ids: Optional[list[int]] = None,
	) -> list[dict]:
	"""
	Main recommendation endpoint.
	Accepts and returns original MovieLens IDs (not internal indices).
	"""
	user_idx = self.prep.user_id_map.get(user_id)
	if user_idx is None:
	# Cold-start: return popular items by falling back to sorted catalog
	return self._popular_fallback(limit)

	seen_idxs = set()
	if seen_movie_ids:
	seen_idxs = {
	self.prep.movie_id_map[m]
	for m in seen_movie_ids
	if m in self.prep.movie_id_map
	}

	rec_idxs = self.recommend_by_idx(user_idx, limit=limit, seen_movie_idxs=seen_idxs)

	return [
	{
	**self._idx_to_movie(idx),
	"score": float(i + 1), # rank order as score
	"explanation": self.get_explanation(user_idx, idx),
	}
	for i, idx in enumerate(rec_idxs)
	]

	def get_similar(self, movie_id: int, limit: int = 10) -> list[dict]:
	"""Find similar movies using FAISS item–item nearest neighbours."""
	movie_idx = self.prep.movie_id_map.get(movie_id)
	if movie_idx is None or movie_idx >= len(self.item_embeddings):
	return []

	query = self.item_embeddings[movie_idx]
	_, indices = self.faiss_index.search(query, k=limit + 1)
	similar = [
	self._idx_to_movie(idx)
	for idx in indices[0].tolist()
	if idx != movie_idx
	]
	return similar[:limit]

	def search(self, query: str, limit: int = 20) -> list[dict]:
	"""Full-text search over movie titles and genres (in-memory trie-free scan)."""
	q = query.lower().strip()
	results = []
	for idx, meta in self.movie_meta.items():
	title_lower = meta["title"].lower()
	genres_lower = " ".join(meta.get("genres", [])).lower()
	if q in title_lower or q in genres_lower:
	results.append((idx, meta))

	# Rank: exact title start > title contains > genre contains
	def rank_key(item):
	idx, meta = item
	t = meta["title"].lower()
	if t.startswith(q):
	return 0
	if q in t:
	return 1
	return 2

	results.sort(key=rank_key)
	return [self._idx_to_movie(idx) for idx, _ in results[:limit]]

	def get_movie(self, movie_id: int) -> Optional[dict]:
	"""Get full movie metadata by MovieLens ID."""
	movie_idx = self.prep.movie_id_map.get(movie_id)
	if movie_idx is None:
	return None
	return self._idx_to_movie(movie_idx)

	def get_taste_profile(self, user_id: int) -> dict:
	"""Return a user's genre preferences and activity statistics."""
	user_idx = self.prep.user_id_map.get(user_id)
	if user_idx is None:
	return {"error": "User not found"}

	if user_idx < len(self.prep.user_features):
	feats = self.prep.user_features[user_idx]
	else:
	feats = np.zeros(self.prep.user_features.shape[1], dtype=np.float32)

	from data.loader import ALL_GENRES
	genre_prefs = {
	genre: float(feats[i])
	for i, genre in enumerate(ALL_GENRES)
	if i < len(feats)
	}
	avg_rating = float(feats[len(ALL_GENRES)]) * 5.0 if len(feats) > len(ALL_GENRES) else 3.5
	import math
	log_count = float(feats[len(ALL_GENRES) + 1]) if len(feats) > len(ALL_GENRES) + 1 else 0.0
	rating_count = int(math.expm1(log_count * 10))

	return {
	"user_id": user_id,
	"genre_preferences": genre_prefs,
	"avg_rating": round(avg_rating, 2),
	"rating_count": rating_count,
	"top_genres": sorted(genre_prefs, key=genre_prefs.get, reverse=True)[:5],
	}

	def get_explanation(self, user_idx: int, movie_idx: int) -> str:
	"""Generate a human-readable explanation for a recommendation."""
	meta = self.movie_meta.get(movie_idx, {})
	genres = meta.get("genres", [])

	if user_idx < len(self.prep.user_features):
	user_feats = self.prep.user_features[user_idx]
	from data.loader import ALL_GENRES
	genre_scores = {
	g: float(user_feats[i])
	for i, g in enumerate(ALL_GENRES)
	if i < len(user_feats)
	}
	matching = [g for g in genres if genre_scores.get(g, 0) > 0.3]
	else:
	matching = genres[:2]

	if matching:
	return f"Because you enjoy {', '.join(matching[:2])} films"
	elif genres:
	return f"A top-rated {genres[0]} film you haven't seen"
	return "Highly rated and popular with viewers like you"

	# ------------------------------------------------------------------
	# Helpers
	# ------------------------------------------------------------------

	def _idx_to_movie(self, movie_idx: int) -> dict:
	meta = self.movie_meta.get(movie_idx, {})
	return {
	"movie_id": meta.get("movie_id", movie_idx),
	"title": meta.get("title", f"Movie {movie_idx}"),
	"year": meta.get("year", 0),
	"genres": meta.get("genres", []),
	}

	def _popular_fallback(self, limit: int) -> list[dict]:
	"""Return a fixed popular-movie list for cold-start users."""
	popular_idxs = list(range(min(limit, self.prep.num_movies)))
	return [self._idx_to_movie(i) for i in popular_idxs]

	# ------------------------------------------------------------------
	# Factory
	# ------------------------------------------------------------------

	@classmethod
	def load(
	cls,
	artifact_dir: str \| Path,
	device_str: str = "cpu",
	feature_store: Optional[FeatureStore] = None,
	) -> "RecommendationEngine":
	artifact_dir = Path(artifact_dir).resolve() # always absolute
	device = torch.device(device_str)

	prep = Preprocessor.load(artifact_dir)

	# Two-Tower
	tt_ckpt = torch.load(artifact_dir / "two_tower.pt", map_location=device)
	tt_cfg = tt_ckpt["config"]
	two_tower = TwoTowerModel(**tt_cfg)
	two_tower.load_state_dict(tt_ckpt["state_dict"])

	# DeepFM
	dfm_ckpt = torch.load(artifact_dir / "deepfm_best.pt", map_location=device)
	dfm_cfg = dfm_ckpt["config"]
	deepfm = DeepFM(**dfm_cfg)
	deepfm.load_state_dict(dfm_ckpt["state_dict"])

	# FAISS index
	faiss_index = FAISSIndex().load(artifact_dir / "faiss.index")

	# Item embeddings (for MMR)
	item_embeddings = np.load(artifact_dir / "item_embeddings.npy")

	# Feature store
	if feature_store is None:
	feature_store = FeatureStore(
	sqlite_path=artifact_dir / "feature_store.db"
	)

	engine = cls(
	prep=prep,
	two_tower=two_tower,
	deepfm=deepfm,
	faiss_index=faiss_index,
	item_embeddings=item_embeddings,
	feature_store=feature_store,
	device=device,
	)
	logger.info("RecommendationEngine loaded successfully.")
	return engine