"""
app/api/profile_analytics.py

FastAPI route for /profile/analytics.
Covers: Vibe Radar, Flavor Fingerprint (with coverage), Blind Spot, 
        Adventurousness, and Temporal Drift.
Adapted for the continuous rating pipeline.
"""

from __future__ import annotations

import numpy as np
from fastapi import APIRouter
from pydantic import BaseModel
from typing import Optional, List

from app.core.ml_manager import ml_manager

router = APIRouter()

# ── Excluded tag names (generic / noise) ──────────────────────────────────────
_GENERIC_TAGS = {
    "drama", "comedy", "action", "thriller", "romance", "horror",
    "adventure", "animation", "family", "documentary", "fantasy",
    "mystery", "based on a true story", "independent film", "biopic",
}

# ── Minimum history sizes ──────────────────────────────────────────────────────
_MIN_DRIFT      = 8   # need at least this many films to show drift
_RECENT_WINDOW  = 5   # always use the last N films as "recent"


# 🛠️ THE FIX: Updated to match the frontend's new Zustand payload
class Interaction(BaseModel):
    itemIndex: int
    rating: float
    timestamp: int

class HistoryRequest(BaseModel):
    interactions: List[Interaction]


# ── Helpers ───────────────────────────────────────────────────────────────────

def _user_vector(item_indices: list[int]) -> np.ndarray:
    """Mean of L2-normalized plot embeddings for the given item indices."""
    vecs = ml_manager.plot_embeddings[item_indices]  # already normalized at load time
    centroid = vecs.mean(axis=0)
    norm = np.linalg.norm(centroid)
    return centroid / norm if norm > 1e-9 else centroid


def _project_macro(user_vec: np.ndarray) -> list[dict]:
    """Cosine sim of user vector against 6 macro-axes, min-max scaled 0-100."""
    raw = ml_manager.macro_matrix @ user_vec  # (6,)
    lo, hi = raw.min(), raw.max()
    if hi > lo:
        scaled = (raw - lo) / (hi - lo) * 100
    else:
        scaled = np.full_like(raw, 50.0)
    return [
        {"axis": name, "score": round(float(s), 1)}
        for name, s in zip(ml_manager.macro_axes_names, scaled)
    ]


def _top_macro_name(user_vec: np.ndarray) -> str:
    """Name of the highest-scoring macro axis for a given user vector."""
    scores = ml_manager.macro_matrix @ user_vec
    return ml_manager.macro_axes_names[int(scores.argmax())]


def _adventurousness(item_indices: list[int]) -> int:
    """
    Mean pairwise cosine distance between watched film embeddings, 
    scaled 0-100. Higher = more eclectic taste.
    """
    vecs = ml_manager.plot_embeddings[item_indices]  # (n, 1024) already unit-norm
    sim_matrix = vecs @ vecs.T  # (n, n)
    n = len(item_indices)
    if n < 2:
        return 0
    # Sum off-diagonal / n*(n-1)
    avg_sim = (sim_matrix.sum() - np.trace(sim_matrix)) / (n * (n - 1))
    score = (1.0 - float(avg_sim)) * 100
    return int(round(min(max(score, 0), 100)))


def _blind_spot(user_vec: np.ndarray, item_indices: list[int]) -> Optional[dict]:
    """
    For each FC cluster compute:
      - affinity  = cosine sim(user_vec, fc_prototype)
      - density   = films watched in cluster / total films in cluster catalog

    Blind spot = highest (affinity - density) gap.
    """
    affinities = (ml_manager.fc_matrix @ user_vec)  # (29,)

    # Build a set of item indices for fast membership lookup
    watched_set = set(item_indices)

    best_gap = -1.0
    best_fc  = None

    for i, fc_key in enumerate(ml_manager.fc_keys):
        aff = float(affinities[i])
        if aff < 0.4:
            continue

        fc_items = ml_manager.ontology[fc_key].get("item_indices", [])
        catalog_size = len(fc_items) if fc_items else 1
        watched_in_fc = len(watched_set & set(fc_items))
        density = watched_in_fc / catalog_size if catalog_size else 0.0

        if density > 0.25:
            continue

        gap = aff - density
        if gap > best_gap:
            best_gap = gap
            best_fc  = {
                "cluster_name":  ml_manager.fc_names.get(fc_key, fc_key),
                "description":   ml_manager.ontology[fc_key].get("description", ""),
                "affinity_score": round(aff, 4),
                "watch_density":  round(density, 4),
            }

    return best_fc


def _flavor_fingerprint(item_indices: list[int]) -> dict:
    """
    Returns top tags from Tag Genome, with coverage metadata.
    """
    if ml_manager.tag_scores is None:
        return {"top_tags": [], "genome_covered_count": 0, "total_count": len(item_indices)}

    tag_mat = ml_manager.tag_scores  # (N_films, N_tags) mmap

    # Identify which items have any Genome coverage (non-zero row)
    covered_indices = [i for i in item_indices if tag_mat[i].max() > 0]
    genome_covered  = len(covered_indices)

    if genome_covered == 0:
        return {
            "top_tags": [],
            "genome_covered_count": 0,
            "total_count": len(item_indices),
        }

    # Aggregate: mean score across covered films for each tag column
    agg = tag_mat[covered_indices].mean(axis=0)  # (N_tags,)

    # Rank tags by aggregated score
    ranked_cols = np.argsort(agg)[::-1]

    top_tags = []
    for col in ranked_cols:
        if len(top_tags) >= 15:
            break
        tag_id  = ml_manager.tag_col_to_id.get(int(col), "")
        tag_name = ml_manager.tag_id_to_name.get(str(tag_id), "").strip()
        if not tag_name:
            continue
        if tag_name.lower() in _GENERIC_TAGS:
            continue
        top_tags.append(tag_name)

    return {
        "top_tags":             top_tags,
        "genome_covered_count": genome_covered,
        "total_count":          len(item_indices),
    }


def _temporal_drift(item_indices: list[int]) -> Optional[dict]:
    """
    Fixed recency window: 
      - recent   = last _RECENT_WINDOW films
      - historic = everything before that
    """
    n = len(item_indices)
    if n < _MIN_DRIFT:
        return None

    recent_items   = item_indices[-_RECENT_WINDOW:]
    historic_items = item_indices[:-_RECENT_WINDOW]

    if len(historic_items) == 0:
        return None

    recent_vec   = _user_vector(recent_items)
    historic_vec = _user_vector(historic_items)

    # Cosine distance = 1 - cosine_sim (both already unit-norm)
    cos_sim   = float(np.dot(recent_vec, historic_vec))
    magnitude = round(1.0 - cos_sim, 4)  # 0 = identical, 1 = opposite

    early_focus  = _top_macro_name(historic_vec)
    recent_focus = _top_macro_name(recent_vec)

    return {
        "early_focus":     early_focus,
        "recent_focus":    recent_focus,
        "shift_magnitude": magnitude,
    }


# ── Endpoint ──────────────────────────────────────────────────────────────────

@router.post("/profile/analytics")
def profile_analytics(req: HistoryRequest):
    # 🛠️ THE FIX: Sort interactions chronologically based on timestamp 
    # This ensures your `_temporal_drift` logic remains perfectly accurate.
    sorted_interactions = sorted(req.interactions, key=lambda x: x.timestamp)

    # Extract only the items the user rated favorably (>= 3.0)
    history = [i.itemIndex for i in sorted_interactions if i.rating >= 3.0]

    if not history:
        return {"status": "empty"}

    # Clamp to valid item indices
    n_items = ml_manager.plot_embeddings.shape[0]
    valid   = [i for i in history if 0 <= i < n_items]

    if not valid:
        return {"status": "empty"}

    user_vec = _user_vector(valid)

    radar       = _project_macro(user_vec)
    fingerprint = _flavor_fingerprint(valid)
    blind_spot  = _blind_spot(user_vec, valid)
    adv         = _adventurousness(valid)
    drift       = _temporal_drift(valid)

    return {
        "status":           "ok",
        "radar":            radar,
        "top_tags":         fingerprint["top_tags"],
        "tag_coverage": {
            "covered": fingerprint["genome_covered_count"],
            "total":   fingerprint["total_count"],
        },
        "blind_spot":       blind_spot,
        "adventurousness":  adv,
        "drift":            drift,
    }