Spaces:

sandy898
/

resonate-api

Sleeping

App Files Files Community

resonate-api / app /api /profile_analytics.py

sandy898

Initial backend deployment

d597de7 24 days ago

Raw

History Blame Contribute Delete

8.54 kB

	"""
	app/api/profile_analytics.py

	FastAPI route for /profile/analytics.
	Covers: Vibe Radar, Flavor Fingerprint (with coverage), Blind Spot,
	Adventurousness, and Temporal Drift.
	Adapted for the continuous rating pipeline.
	"""

	from __future__ import annotations

	import numpy as np
	from fastapi import APIRouter
	from pydantic import BaseModel
	from typing import Optional, List

	from app.core.ml_manager import ml_manager

	router = APIRouter()

	# ── Excluded tag names (generic / noise) ──────────────────────────────────────
	_GENERIC_TAGS = {
	"drama", "comedy", "action", "thriller", "romance", "horror",
	"adventure", "animation", "family", "documentary", "fantasy",
	"mystery", "based on a true story", "independent film", "biopic",
	}

	# ── Minimum history sizes ──────────────────────────────────────────────────────
	_MIN_DRIFT = 8 # need at least this many films to show drift
	_RECENT_WINDOW = 5 # always use the last N films as "recent"


	# 🛠️ THE FIX: Updated to match the frontend's new Zustand payload
	class Interaction(BaseModel):
	itemIndex: int
	rating: float
	timestamp: int

	class HistoryRequest(BaseModel):
	interactions: List[Interaction]


	# ── Helpers ───────────────────────────────────────────────────────────────────

	def _user_vector(item_indices: list[int]) -> np.ndarray:
	"""Mean of L2-normalized plot embeddings for the given item indices."""
	vecs = ml_manager.plot_embeddings[item_indices] # already normalized at load time
	centroid = vecs.mean(axis=0)
	norm = np.linalg.norm(centroid)
	return centroid / norm if norm > 1e-9 else centroid


	def _project_macro(user_vec: np.ndarray) -> list[dict]:
	"""Cosine sim of user vector against 6 macro-axes, min-max scaled 0-100."""
	raw = ml_manager.macro_matrix @ user_vec # (6,)
	lo, hi = raw.min(), raw.max()
	if hi > lo:
	scaled = (raw - lo) / (hi - lo) * 100
	else:
	scaled = np.full_like(raw, 50.0)
	return [
	{"axis": name, "score": round(float(s), 1)}
	for name, s in zip(ml_manager.macro_axes_names, scaled)
	]


	def _top_macro_name(user_vec: np.ndarray) -> str:
	"""Name of the highest-scoring macro axis for a given user vector."""
	scores = ml_manager.macro_matrix @ user_vec
	return ml_manager.macro_axes_names[int(scores.argmax())]


	def _adventurousness(item_indices: list[int]) -> int:
	"""
	Mean pairwise cosine distance between watched film embeddings,
	scaled 0-100. Higher = more eclectic taste.
	"""
	vecs = ml_manager.plot_embeddings[item_indices] # (n, 1024) already unit-norm
	sim_matrix = vecs @ vecs.T # (n, n)
	n = len(item_indices)
	if n < 2:
	return 0
	# Sum off-diagonal / n*(n-1)
	avg_sim = (sim_matrix.sum() - np.trace(sim_matrix)) / (n * (n - 1))
	score = (1.0 - float(avg_sim)) * 100
	return int(round(min(max(score, 0), 100)))


	def _blind_spot(user_vec: np.ndarray, item_indices: list[int]) -> Optional[dict]:
	"""
	For each FC cluster compute:
	- affinity = cosine sim(user_vec, fc_prototype)
	- density = films watched in cluster / total films in cluster catalog

	Blind spot = highest (affinity - density) gap.
	"""
	affinities = (ml_manager.fc_matrix @ user_vec) # (29,)

	# Build a set of item indices for fast membership lookup
	watched_set = set(item_indices)

	best_gap = -1.0
	best_fc = None

	for i, fc_key in enumerate(ml_manager.fc_keys):
	aff = float(affinities[i])
	if aff < 0.4:
	continue

	fc_items = ml_manager.ontology[fc_key].get("item_indices", [])
	catalog_size = len(fc_items) if fc_items else 1
	watched_in_fc = len(watched_set & set(fc_items))
	density = watched_in_fc / catalog_size if catalog_size else 0.0

	if density > 0.25:
	continue

	gap = aff - density
	if gap > best_gap:
	best_gap = gap
	best_fc = {
	"cluster_name": ml_manager.fc_names.get(fc_key, fc_key),
	"description": ml_manager.ontology[fc_key].get("description", ""),
	"affinity_score": round(aff, 4),
	"watch_density": round(density, 4),
	}

	return best_fc


	def _flavor_fingerprint(item_indices: list[int]) -> dict:
	"""
	Returns top tags from Tag Genome, with coverage metadata.
	"""
	if ml_manager.tag_scores is None:
	return {"top_tags": [], "genome_covered_count": 0, "total_count": len(item_indices)}

	tag_mat = ml_manager.tag_scores # (N_films, N_tags) mmap

	# Identify which items have any Genome coverage (non-zero row)
	covered_indices = [i for i in item_indices if tag_mat[i].max() > 0]
	genome_covered = len(covered_indices)

	if genome_covered == 0:
	return {
	"top_tags": [],
	"genome_covered_count": 0,
	"total_count": len(item_indices),
	}

	# Aggregate: mean score across covered films for each tag column
	agg = tag_mat[covered_indices].mean(axis=0) # (N_tags,)

	# Rank tags by aggregated score
	ranked_cols = np.argsort(agg)[::-1]

	top_tags = []
	for col in ranked_cols:
	if len(top_tags) >= 15:
	break
	tag_id = ml_manager.tag_col_to_id.get(int(col), "")
	tag_name = ml_manager.tag_id_to_name.get(str(tag_id), "").strip()
	if not tag_name:
	continue
	if tag_name.lower() in _GENERIC_TAGS:
	continue
	top_tags.append(tag_name)

	return {
	"top_tags": top_tags,
	"genome_covered_count": genome_covered,
	"total_count": len(item_indices),
	}


	def _temporal_drift(item_indices: list[int]) -> Optional[dict]:
	"""
	Fixed recency window:
	- recent = last _RECENT_WINDOW films
	- historic = everything before that
	"""
	n = len(item_indices)
	if n < _MIN_DRIFT:
	return None

	recent_items = item_indices[-_RECENT_WINDOW:]
	historic_items = item_indices[:-_RECENT_WINDOW]

	if len(historic_items) == 0:
	return None

	recent_vec = _user_vector(recent_items)
	historic_vec = _user_vector(historic_items)

	# Cosine distance = 1 - cosine_sim (both already unit-norm)
	cos_sim = float(np.dot(recent_vec, historic_vec))
	magnitude = round(1.0 - cos_sim, 4) # 0 = identical, 1 = opposite

	early_focus = _top_macro_name(historic_vec)
	recent_focus = _top_macro_name(recent_vec)

	return {
	"early_focus": early_focus,
	"recent_focus": recent_focus,
	"shift_magnitude": magnitude,
	}


	# ── Endpoint ──────────────────────────────────────────────────────────────────

	@router.post("/profile/analytics")
	def profile_analytics(req: HistoryRequest):
	# 🛠️ THE FIX: Sort interactions chronologically based on timestamp
	# This ensures your `_temporal_drift` logic remains perfectly accurate.
	sorted_interactions = sorted(req.interactions, key=lambda x: x.timestamp)

	# Extract only the items the user rated favorably (>= 3.0)
	history = [i.itemIndex for i in sorted_interactions if i.rating >= 3.0]

	if not history:
	return {"status": "empty"}

	# Clamp to valid item indices
	n_items = ml_manager.plot_embeddings.shape[0]
	valid = [i for i in history if 0 <= i < n_items]

	if not valid:
	return {"status": "empty"}

	user_vec = _user_vector(valid)

	radar = _project_macro(user_vec)
	fingerprint = _flavor_fingerprint(valid)
	blind_spot = _blind_spot(user_vec, valid)
	adv = _adventurousness(valid)
	drift = _temporal_drift(valid)

	return {
	"status": "ok",
	"radar": radar,
	"top_tags": fingerprint["top_tags"],
	"tag_coverage": {
	"covered": fingerprint["genome_covered_count"],
	"total": fingerprint["total_count"],
	},
	"blind_spot": blind_spot,
	"adventurousness": adv,
	"drift": drift,
	}