"""
Chatterbox Multilingual TTS Backend with Voice Cloning support.
This is the default backend for the Phone Announcements engine.
"""

from typing import Optional

import numpy as np
from loguru import logger

from .base import BackendConfig, TTSBackend, TTSResult

# Default voice prompts per language (high-quality reference samples)
DEFAULT_VOICE_PROMPTS = {
    "ar": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/ar_f/ar_prompts2.flac",
    "da": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/da_m1.flac",
    "de": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/de_f1.flac",
    "el": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/el_m.flac",
    "en": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/en_f1.flac",
    "es": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/es_f1.flac",
    "fi": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/fi_m.flac",
    "fr": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/fr_f1.flac",
    "he": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/he_m1.flac",
    "hi": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/hi_f1.flac",
    "it": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/it_m1.flac",
    "ja": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/ja/ja_prompts1.flac",
    "ko": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/ko_f.flac",
    "ms": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/ms_f.flac",
    "nl": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/nl_m.flac",
    "no": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/no_f1.flac",
    "pl": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/pl_m.flac",
    "pt": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/pt_m1.flac",
    "ru": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/ru_m.flac",
    "sv": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/sv_f.flac",
    "sw": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/sw_m.flac",
    "tr": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/tr_m.flac",
    "zh": "https://storage.googleapis.com/chatterbox-demo-samples/mtl_prompts/zh_f2.flac",
}


class ChatterboxBackend(TTSBackend):
    """
    Chatterbox Multilingual TTS Backend.

    Features:
    - 23 language support
    - High-quality voice cloning
    - Expressive speech synthesis

    This backend uses the ResembleAI Chatterbox model for synthesis.
    """

    # Optimal defaults for phone announcements (clear, professional)
    DEFAULT_EXAGGERATION = (
        0.35  # Slightly less expressive for professional announcements
    )
    DEFAULT_TEMPERATURE = 0.7  # Balanced randomness
    DEFAULT_CFG_WEIGHT = 0.5  # Standard guidance

    SUPPORTED_LANGUAGES = {
        "ar": "Arabic",
        "da": "Danish",
        "de": "German",
        "el": "Greek",
        "en": "English",
        "es": "Spanish",
        "fi": "Finnish",
        "fr": "French",
        "he": "Hebrew",
        "hi": "Hindi",
        "it": "Italian",
        "ja": "Japanese",
        "ko": "Korean",
        "ms": "Malay",
        "nl": "Dutch",
        "no": "Norwegian",
        "pl": "Polish",
        "pt": "Portuguese",
        "ru": "Russian",
        "sv": "Swedish",
        "sw": "Swahili",
        "tr": "Turkish",
        "zh": "Chinese",
    }

    def __init__(self, config: Optional[BackendConfig] = None):
        super().__init__(config)
        self._model = None
        self._device = None

    @property
    def name(self) -> str:
        return "Chatterbox Multilingual"

    @property
    def supports_voice_cloning(self) -> bool:
        return True

    @property
    def supported_languages(self) -> dict[str, str]:
        return self.SUPPORTED_LANGUAGES.copy()

    def load(self) -> None:
        """Load the Chatterbox model."""
        if self._is_loaded:
            logger.info("Chatterbox model already loaded")
            return

        logger.info("Loading Chatterbox Multilingual model...")

        from src.chatterbox.mtl_tts import ChatterboxMultilingualTTS

        self._device = self.config.resolve_device()
        logger.info(f"Using device: {self._device}")

        try:
            self._model = ChatterboxMultilingualTTS.from_pretrained(self._device)
            self._is_loaded = True
            logger.info("Chatterbox model loaded successfully")
        except Exception as e:
            logger.error(f"Failed to load Chatterbox model: {e}")
            raise

    def unload(self) -> None:
        """Unload the model to free memory."""
        if self._model is not None:
            import torch

            del self._model
            self._model = None
            if self._device == "cuda":
                torch.cuda.empty_cache()
            self._is_loaded = False
            logger.info("Chatterbox model unloaded")

    def get_default_voice(self, language: str) -> Optional[str]:
        """Get the default voice prompt URL for a language."""
        return DEFAULT_VOICE_PROMPTS.get(language.lower())

    def generate(
        self,
        text: str,
        language: str = "de",
        voice_audio_path: Optional[str] = None,
        exaggeration: Optional[float] = None,
        temperature: Optional[float] = None,
        cfg_weight: Optional[float] = None,
        seed: Optional[int] = None,
        **kwargs,
    ) -> TTSResult:
        """
        Generate speech from text using Chatterbox.

        Args:
            text: Text to synthesize
            language: Language code (default: "de" for German)
            voice_audio_path: Path/URL to reference audio for voice cloning
            exaggeration: Speech expressiveness (0.25-2.0, default: 0.35)
            temperature: Generation randomness (0.05-5.0, default: 0.7)
            cfg_weight: CFG guidance weight (0.2-1.0, default: 0.5)
            seed: Random seed for reproducibility (default: None = random)

        Returns:
            TTSResult with audio waveform and sample rate
        """
        if not self._is_loaded:
            self.load()

        import random

        import torch

        # Apply seed if provided
        if seed is not None and seed != 0:
            torch.manual_seed(seed)
            random.seed(seed)
            np.random.seed(seed)
            if self._device == "cuda":
                torch.cuda.manual_seed_all(seed)

        # Use defaults for unspecified parameters
        exaggeration = (
            exaggeration if exaggeration is not None else self.DEFAULT_EXAGGERATION
        )
        temperature = (
            temperature if temperature is not None else self.DEFAULT_TEMPERATURE
        )
        cfg_weight = cfg_weight if cfg_weight is not None else self.DEFAULT_CFG_WEIGHT

        # Resolve voice prompt
        audio_prompt = voice_audio_path or self.get_default_voice(language)

        # Validate language
        lang_code = language.lower()
        if lang_code not in self.SUPPORTED_LANGUAGES:
            available = ", ".join(sorted(self.SUPPORTED_LANGUAGES.keys()))
            raise ValueError(
                f"Unsupported language '{language}'. Available: {available}"
            )

        logger.info(f"Generating speech: lang={lang_code}, text='{text[:50]}...'")

        try:
            wav = self._model.generate(
                text=text,
                language_id=lang_code,
                audio_prompt_path=audio_prompt,
                exaggeration=exaggeration,
                temperature=temperature,
                cfg_weight=cfg_weight,
            )

            # Convert to numpy array
            audio_np = wav.squeeze().numpy()

            return TTSResult(audio=audio_np, sample_rate=self._model.sr)

        except Exception as e:
            logger.error(f"TTS generation failed: {e}")
            raise