Spaces:
Sleeping
Sleeping
| """ | |
| app/api/profile_analytics.py | |
| FastAPI route for /profile/analytics. | |
| Covers: Vibe Radar, Flavor Fingerprint (with coverage), Blind Spot, | |
| Adventurousness, and Temporal Drift. | |
| Adapted for the continuous rating pipeline. | |
| """ | |
| from __future__ import annotations | |
| import numpy as np | |
| from fastapi import APIRouter | |
| from pydantic import BaseModel | |
| from typing import Optional, List | |
| from app.core.ml_manager import ml_manager | |
| router = APIRouter() | |
| # ββ Excluded tag names (generic / noise) ββββββββββββββββββββββββββββββββββββββ | |
| _GENERIC_TAGS = { | |
| "drama", "comedy", "action", "thriller", "romance", "horror", | |
| "adventure", "animation", "family", "documentary", "fantasy", | |
| "mystery", "based on a true story", "independent film", "biopic", | |
| } | |
| # ββ Minimum history sizes ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| _MIN_DRIFT = 8 # need at least this many films to show drift | |
| _RECENT_WINDOW = 5 # always use the last N films as "recent" | |
| # π οΈ THE FIX: Updated to match the frontend's new Zustand payload | |
| class Interaction(BaseModel): | |
| itemIndex: int | |
| rating: float | |
| timestamp: int | |
| class HistoryRequest(BaseModel): | |
| interactions: List[Interaction] | |
| # ββ Helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _user_vector(item_indices: list[int]) -> np.ndarray: | |
| """Mean of L2-normalized plot embeddings for the given item indices.""" | |
| vecs = ml_manager.plot_embeddings[item_indices] # already normalized at load time | |
| centroid = vecs.mean(axis=0) | |
| norm = np.linalg.norm(centroid) | |
| return centroid / norm if norm > 1e-9 else centroid | |
| def _project_macro(user_vec: np.ndarray) -> list[dict]: | |
| """Cosine sim of user vector against 6 macro-axes, min-max scaled 0-100.""" | |
| raw = ml_manager.macro_matrix @ user_vec # (6,) | |
| lo, hi = raw.min(), raw.max() | |
| if hi > lo: | |
| scaled = (raw - lo) / (hi - lo) * 100 | |
| else: | |
| scaled = np.full_like(raw, 50.0) | |
| return [ | |
| {"axis": name, "score": round(float(s), 1)} | |
| for name, s in zip(ml_manager.macro_axes_names, scaled) | |
| ] | |
| def _top_macro_name(user_vec: np.ndarray) -> str: | |
| """Name of the highest-scoring macro axis for a given user vector.""" | |
| scores = ml_manager.macro_matrix @ user_vec | |
| return ml_manager.macro_axes_names[int(scores.argmax())] | |
| def _adventurousness(item_indices: list[int]) -> int: | |
| """ | |
| Mean pairwise cosine distance between watched film embeddings, | |
| scaled 0-100. Higher = more eclectic taste. | |
| """ | |
| vecs = ml_manager.plot_embeddings[item_indices] # (n, 1024) already unit-norm | |
| sim_matrix = vecs @ vecs.T # (n, n) | |
| n = len(item_indices) | |
| if n < 2: | |
| return 0 | |
| # Sum off-diagonal / n*(n-1) | |
| avg_sim = (sim_matrix.sum() - np.trace(sim_matrix)) / (n * (n - 1)) | |
| score = (1.0 - float(avg_sim)) * 100 | |
| return int(round(min(max(score, 0), 100))) | |
| def _blind_spot(user_vec: np.ndarray, item_indices: list[int]) -> Optional[dict]: | |
| """ | |
| For each FC cluster compute: | |
| - affinity = cosine sim(user_vec, fc_prototype) | |
| - density = films watched in cluster / total films in cluster catalog | |
| Blind spot = highest (affinity - density) gap. | |
| """ | |
| affinities = (ml_manager.fc_matrix @ user_vec) # (29,) | |
| # Build a set of item indices for fast membership lookup | |
| watched_set = set(item_indices) | |
| best_gap = -1.0 | |
| best_fc = None | |
| for i, fc_key in enumerate(ml_manager.fc_keys): | |
| aff = float(affinities[i]) | |
| if aff < 0.4: | |
| continue | |
| fc_items = ml_manager.ontology[fc_key].get("item_indices", []) | |
| catalog_size = len(fc_items) if fc_items else 1 | |
| watched_in_fc = len(watched_set & set(fc_items)) | |
| density = watched_in_fc / catalog_size if catalog_size else 0.0 | |
| if density > 0.25: | |
| continue | |
| gap = aff - density | |
| if gap > best_gap: | |
| best_gap = gap | |
| best_fc = { | |
| "cluster_name": ml_manager.fc_names.get(fc_key, fc_key), | |
| "description": ml_manager.ontology[fc_key].get("description", ""), | |
| "affinity_score": round(aff, 4), | |
| "watch_density": round(density, 4), | |
| } | |
| return best_fc | |
| def _flavor_fingerprint(item_indices: list[int]) -> dict: | |
| """ | |
| Returns top tags from Tag Genome, with coverage metadata. | |
| """ | |
| if ml_manager.tag_scores is None: | |
| return {"top_tags": [], "genome_covered_count": 0, "total_count": len(item_indices)} | |
| tag_mat = ml_manager.tag_scores # (N_films, N_tags) mmap | |
| # Identify which items have any Genome coverage (non-zero row) | |
| covered_indices = [i for i in item_indices if tag_mat[i].max() > 0] | |
| genome_covered = len(covered_indices) | |
| if genome_covered == 0: | |
| return { | |
| "top_tags": [], | |
| "genome_covered_count": 0, | |
| "total_count": len(item_indices), | |
| } | |
| # Aggregate: mean score across covered films for each tag column | |
| agg = tag_mat[covered_indices].mean(axis=0) # (N_tags,) | |
| # Rank tags by aggregated score | |
| ranked_cols = np.argsort(agg)[::-1] | |
| top_tags = [] | |
| for col in ranked_cols: | |
| if len(top_tags) >= 15: | |
| break | |
| tag_id = ml_manager.tag_col_to_id.get(int(col), "") | |
| tag_name = ml_manager.tag_id_to_name.get(str(tag_id), "").strip() | |
| if not tag_name: | |
| continue | |
| if tag_name.lower() in _GENERIC_TAGS: | |
| continue | |
| top_tags.append(tag_name) | |
| return { | |
| "top_tags": top_tags, | |
| "genome_covered_count": genome_covered, | |
| "total_count": len(item_indices), | |
| } | |
| def _temporal_drift(item_indices: list[int]) -> Optional[dict]: | |
| """ | |
| Fixed recency window: | |
| - recent = last _RECENT_WINDOW films | |
| - historic = everything before that | |
| """ | |
| n = len(item_indices) | |
| if n < _MIN_DRIFT: | |
| return None | |
| recent_items = item_indices[-_RECENT_WINDOW:] | |
| historic_items = item_indices[:-_RECENT_WINDOW] | |
| if len(historic_items) == 0: | |
| return None | |
| recent_vec = _user_vector(recent_items) | |
| historic_vec = _user_vector(historic_items) | |
| # Cosine distance = 1 - cosine_sim (both already unit-norm) | |
| cos_sim = float(np.dot(recent_vec, historic_vec)) | |
| magnitude = round(1.0 - cos_sim, 4) # 0 = identical, 1 = opposite | |
| early_focus = _top_macro_name(historic_vec) | |
| recent_focus = _top_macro_name(recent_vec) | |
| return { | |
| "early_focus": early_focus, | |
| "recent_focus": recent_focus, | |
| "shift_magnitude": magnitude, | |
| } | |
| # ββ Endpoint ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def profile_analytics(req: HistoryRequest): | |
| # π οΈ THE FIX: Sort interactions chronologically based on timestamp | |
| # This ensures your `_temporal_drift` logic remains perfectly accurate. | |
| sorted_interactions = sorted(req.interactions, key=lambda x: x.timestamp) | |
| # Extract only the items the user rated favorably (>= 3.0) | |
| history = [i.itemIndex for i in sorted_interactions if i.rating >= 3.0] | |
| if not history: | |
| return {"status": "empty"} | |
| # Clamp to valid item indices | |
| n_items = ml_manager.plot_embeddings.shape[0] | |
| valid = [i for i in history if 0 <= i < n_items] | |
| if not valid: | |
| return {"status": "empty"} | |
| user_vec = _user_vector(valid) | |
| radar = _project_macro(user_vec) | |
| fingerprint = _flavor_fingerprint(valid) | |
| blind_spot = _blind_spot(user_vec, valid) | |
| adv = _adventurousness(valid) | |
| drift = _temporal_drift(valid) | |
| return { | |
| "status": "ok", | |
| "radar": radar, | |
| "top_tags": fingerprint["top_tags"], | |
| "tag_coverage": { | |
| "covered": fingerprint["genome_covered_count"], | |
| "total": fingerprint["total_count"], | |
| }, | |
| "blind_spot": blind_spot, | |
| "adventurousness": adv, | |
| "drift": drift, | |
| } |