"""
Emotion detection using a PyTorch vision transformer (no TensorFlow needed).

Model: dima806/face_emotions_image_detection (ViT-tiny, ~28MB)
Labels: angry, disgust, fear, happy, sad, surprise, neutral
Downloaded from HuggingFace Hub on first use.
"""

from __future__ import annotations

import os
# Must be set before any transformers import to block TF (crashes without AVX on Rosetta)
os.environ.setdefault("USE_TF",    "0")
os.environ.setdefault("USE_JAX",   "0")
os.environ.setdefault("USE_TORCH", "1")
os.environ.setdefault("TRANSFORMERS_NO_TF",  "1")
os.environ.setdefault("TRANSFORMERS_NO_JAX", "1")

from typing import Dict, Tuple

import numpy as np
import torch
from PIL import Image


EMOTION_LABELS = ["angry", "disgust", "fear", "happy", "sad", "surprise", "neutral"]
DISPLAY_LABELS = ["Angry", "Disgust", "Fear", "Happy", "Sad", "Surprise", "Neutral"]

# Map model output labels → display labels (capitalized)
_LABEL_MAP = {l.lower(): l.capitalize() for l in DISPLAY_LABELS}
# Also map variants
_LABEL_MAP.update({"happiness": "Happy", "sadness": "Sad",
                   "anger": "Angry", "neutral": "Neutral",
                   "disgust": "Disgust", "fear": "Fear", "surprise": "Surprise"})

MODEL_ID = "dima806/face_emotions_image_detection"


class EmotionDetector:
    """
    Lightweight ViT-based facial expression recognizer.
    Input : face crop as RGB numpy array (any size).
    Output: dict mapping emotion label → probability.
    """

    def __init__(self) -> None:
        self._pipe = None   # lazy load

    def _load(self):
        import os
        # Prevent transformers from loading TensorFlow (crashes on machines without AVX)
        os.environ["USE_TF"]    = "0"
        os.environ["USE_JAX"]   = "0"
        os.environ["USE_TORCH"] = "1"
        os.environ["TRANSFORMERS_NO_TF"]  = "1"
        os.environ["TRANSFORMERS_NO_JAX"] = "1"
        from transformers import pipeline
        print("[emotion] Loading emotion model (first run)…")
        pipe = pipeline(
            "image-classification",
            model  = MODEL_ID,
            top_k  = None,
            device = -1,   # CPU
        )
        print("[emotion] Loaded.")
        return pipe

    # ── public ────────────────────────────────────────────────────────────

    def predict(self, face_rgb: np.ndarray) -> Dict[str, float]:
        """
        Returns dict: { 'Happy': 0.87, 'Sad': 0.05, … }
        """
        if self._pipe is None:
            self._pipe = self._load()

        pil = Image.fromarray(face_rgb).convert("RGB")
        results = self._pipe(pil)

        # Normalise labels → Title Case and fill any missing emotions
        out: Dict[str, float] = {lbl: 0.0 for lbl in DISPLAY_LABELS}
        for r in results:
            label = _LABEL_MAP.get(r["label"].lower(), r["label"].capitalize())
            out[label] = float(r["score"])
        return out

    def top_emotion(self, face_rgb: np.ndarray) -> "Tuple[str, float]":
        probs = self.predict(face_rgb)
        label = max(probs, key=probs.get)
        return label, probs[label]