""" app/api/profile_analytics.py FastAPI route for /profile/analytics. Covers: Vibe Radar, Flavor Fingerprint (with coverage), Blind Spot, Adventurousness, and Temporal Drift. Adapted for the continuous rating pipeline. """ from __future__ import annotations import numpy as np from fastapi import APIRouter from pydantic import BaseModel from typing import Optional, List from app.core.ml_manager import ml_manager router = APIRouter() # ── Excluded tag names (generic / noise) ────────────────────────────────────── _GENERIC_TAGS = { "drama", "comedy", "action", "thriller", "romance", "horror", "adventure", "animation", "family", "documentary", "fantasy", "mystery", "based on a true story", "independent film", "biopic", } # ── Minimum history sizes ────────────────────────────────────────────────────── _MIN_DRIFT = 8 # need at least this many films to show drift _RECENT_WINDOW = 5 # always use the last N films as "recent" # 🛠️ THE FIX: Updated to match the frontend's new Zustand payload class Interaction(BaseModel): itemIndex: int rating: float timestamp: int class HistoryRequest(BaseModel): interactions: List[Interaction] # ── Helpers ─────────────────────────────────────────────────────────────────── def _user_vector(item_indices: list[int]) -> np.ndarray: """Mean of L2-normalized plot embeddings for the given item indices.""" vecs = ml_manager.plot_embeddings[item_indices] # already normalized at load time centroid = vecs.mean(axis=0) norm = np.linalg.norm(centroid) return centroid / norm if norm > 1e-9 else centroid def _project_macro(user_vec: np.ndarray) -> list[dict]: """Cosine sim of user vector against 6 macro-axes, min-max scaled 0-100.""" raw = ml_manager.macro_matrix @ user_vec # (6,) lo, hi = raw.min(), raw.max() if hi > lo: scaled = (raw - lo) / (hi - lo) * 100 else: scaled = np.full_like(raw, 50.0) return [ {"axis": name, "score": round(float(s), 1)} for name, s in zip(ml_manager.macro_axes_names, scaled) ] def _top_macro_name(user_vec: np.ndarray) -> str: """Name of the highest-scoring macro axis for a given user vector.""" scores = ml_manager.macro_matrix @ user_vec return ml_manager.macro_axes_names[int(scores.argmax())] def _adventurousness(item_indices: list[int]) -> int: """ Mean pairwise cosine distance between watched film embeddings, scaled 0-100. Higher = more eclectic taste. """ vecs = ml_manager.plot_embeddings[item_indices] # (n, 1024) already unit-norm sim_matrix = vecs @ vecs.T # (n, n) n = len(item_indices) if n < 2: return 0 # Sum off-diagonal / n*(n-1) avg_sim = (sim_matrix.sum() - np.trace(sim_matrix)) / (n * (n - 1)) score = (1.0 - float(avg_sim)) * 100 return int(round(min(max(score, 0), 100))) def _blind_spot(user_vec: np.ndarray, item_indices: list[int]) -> Optional[dict]: """ For each FC cluster compute: - affinity = cosine sim(user_vec, fc_prototype) - density = films watched in cluster / total films in cluster catalog Blind spot = highest (affinity - density) gap. """ affinities = (ml_manager.fc_matrix @ user_vec) # (29,) # Build a set of item indices for fast membership lookup watched_set = set(item_indices) best_gap = -1.0 best_fc = None for i, fc_key in enumerate(ml_manager.fc_keys): aff = float(affinities[i]) if aff < 0.4: continue fc_items = ml_manager.ontology[fc_key].get("item_indices", []) catalog_size = len(fc_items) if fc_items else 1 watched_in_fc = len(watched_set & set(fc_items)) density = watched_in_fc / catalog_size if catalog_size else 0.0 if density > 0.25: continue gap = aff - density if gap > best_gap: best_gap = gap best_fc = { "cluster_name": ml_manager.fc_names.get(fc_key, fc_key), "description": ml_manager.ontology[fc_key].get("description", ""), "affinity_score": round(aff, 4), "watch_density": round(density, 4), } return best_fc def _flavor_fingerprint(item_indices: list[int]) -> dict: """ Returns top tags from Tag Genome, with coverage metadata. """ if ml_manager.tag_scores is None: return {"top_tags": [], "genome_covered_count": 0, "total_count": len(item_indices)} tag_mat = ml_manager.tag_scores # (N_films, N_tags) mmap # Identify which items have any Genome coverage (non-zero row) covered_indices = [i for i in item_indices if tag_mat[i].max() > 0] genome_covered = len(covered_indices) if genome_covered == 0: return { "top_tags": [], "genome_covered_count": 0, "total_count": len(item_indices), } # Aggregate: mean score across covered films for each tag column agg = tag_mat[covered_indices].mean(axis=0) # (N_tags,) # Rank tags by aggregated score ranked_cols = np.argsort(agg)[::-1] top_tags = [] for col in ranked_cols: if len(top_tags) >= 15: break tag_id = ml_manager.tag_col_to_id.get(int(col), "") tag_name = ml_manager.tag_id_to_name.get(str(tag_id), "").strip() if not tag_name: continue if tag_name.lower() in _GENERIC_TAGS: continue top_tags.append(tag_name) return { "top_tags": top_tags, "genome_covered_count": genome_covered, "total_count": len(item_indices), } def _temporal_drift(item_indices: list[int]) -> Optional[dict]: """ Fixed recency window: - recent = last _RECENT_WINDOW films - historic = everything before that """ n = len(item_indices) if n < _MIN_DRIFT: return None recent_items = item_indices[-_RECENT_WINDOW:] historic_items = item_indices[:-_RECENT_WINDOW] if len(historic_items) == 0: return None recent_vec = _user_vector(recent_items) historic_vec = _user_vector(historic_items) # Cosine distance = 1 - cosine_sim (both already unit-norm) cos_sim = float(np.dot(recent_vec, historic_vec)) magnitude = round(1.0 - cos_sim, 4) # 0 = identical, 1 = opposite early_focus = _top_macro_name(historic_vec) recent_focus = _top_macro_name(recent_vec) return { "early_focus": early_focus, "recent_focus": recent_focus, "shift_magnitude": magnitude, } # ── Endpoint ────────────────────────────────────────────────────────────────── @router.post("/profile/analytics") def profile_analytics(req: HistoryRequest): # 🛠️ THE FIX: Sort interactions chronologically based on timestamp # This ensures your `_temporal_drift` logic remains perfectly accurate. sorted_interactions = sorted(req.interactions, key=lambda x: x.timestamp) # Extract only the items the user rated favorably (>= 3.0) history = [i.itemIndex for i in sorted_interactions if i.rating >= 3.0] if not history: return {"status": "empty"} # Clamp to valid item indices n_items = ml_manager.plot_embeddings.shape[0] valid = [i for i in history if 0 <= i < n_items] if not valid: return {"status": "empty"} user_vec = _user_vector(valid) radar = _project_macro(user_vec) fingerprint = _flavor_fingerprint(valid) blind_spot = _blind_spot(user_vec, valid) adv = _adventurousness(valid) drift = _temporal_drift(valid) return { "status": "ok", "radar": radar, "top_tags": fingerprint["top_tags"], "tag_coverage": { "covered": fingerprint["genome_covered_count"], "total": fingerprint["total_count"], }, "blind_spot": blind_spot, "adventurousness": adv, "drift": drift, }