"""NVIDIA NIM / Riva offline ASR for voice notes (provider-owned transport)."""

from __future__ import annotations

from pathlib import Path

from loguru import logger

# NVIDIA NIM Whisper model mapping: (function_id, language_code)
_NIM_ASR_MODEL_MAP: dict[str, tuple[str, str]] = {
    "nvidia/parakeet-ctc-0.6b-zh-tw": ("8473f56d-51ef-473c-bb26-efd4f5def2bf", "zh-TW"),
    "nvidia/parakeet-ctc-0.6b-zh-cn": ("9add5ef7-322e-47e0-ad7a-5653fb8d259b", "zh-CN"),
    # function-id from NVIDIA NIM API docs (parakeet-ctc-0.6b-es).
    "nvidia/parakeet-ctc-0.6b-es": ("a9eeee8f-b509-4712-b19d-194361fa5f31", "es-US"),
    "nvidia/parakeet-ctc-0.6b-vi": ("f3dff2bb-99f9-403d-a5f1-f574a757deb0", "vi-VN"),
    "nvidia/parakeet-ctc-1.1b-asr": ("1598d209-5e27-4d3c-8079-4751568b1081", "en-US"),
    "nvidia/parakeet-ctc-0.6b-asr": ("d8dd4e9b-fbf5-4fb0-9dba-8cf436c8d965", "en-US"),
    "nvidia/parakeet-1.1b-rnnt-multilingual-asr": (
        "71203149-d3b7-4460-8231-1be2543a1fca",
        "",
    ),
    "openai/whisper-large-v3": ("b702f636-f60c-4a3d-a6f4-f3568c13bd7d", "multi"),
}

_RIVA_SERVER = "grpc.nvcf.nvidia.com:443"


def transcribe_audio_file(
    file_path: Path,
    model: str,
    *,
    api_key: str,
) -> str:
    """Transcribe audio using NVIDIA NIM / Riva gRPC (offline recognition).

    Args:
        file_path: Path to encoded audio bytes readable by Riva.
        model: Hugging Face-style NIM model id (see ``_NIM_ASR_MODEL_MAP``).
        api_key: NVIDIA API key (Bearer token); must be non-empty.

    Returns:
        Transcript text, or ``(no speech detected)`` when empty.
    """
    key = (api_key or "").strip()
    if not key:
        raise ValueError(
            "NVIDIA NIM transcription requires a non-empty nvidia_nim_api_key "
            "(configure NVIDIA_NIM_API_KEY or pass api_key explicitly)."
        )

    try:
        import riva.client
    except ImportError as e:
        raise ImportError(
            "NVIDIA NIM transcription requires the voice extra. "
            "Install with: uv sync --extra voice"
        ) from e

    model_config = _NIM_ASR_MODEL_MAP.get(model)
    if not model_config:
        raise ValueError(
            f"No NVIDIA NIM config found for model: {model}. "
            f"Supported models: {', '.join(_NIM_ASR_MODEL_MAP.keys())}"
        )
    function_id, language_code = model_config

    auth = riva.client.Auth(
        use_ssl=True,
        uri=_RIVA_SERVER,
        metadata_args=[
            ["function-id", function_id],
            ["authorization", f"Bearer {key}"],
        ],
    )

    asr_service = riva.client.ASRService(auth)

    config = riva.client.RecognitionConfig(
        language_code=language_code,
        max_alternatives=1,
        verbatim_transcripts=True,
    )

    with open(file_path, "rb") as f:
        data = f.read()

    response = asr_service.offline_recognize(data, config)

    transcript = ""
    results = getattr(response, "results", None)
    if results and results[0].alternatives:
        transcript = results[0].alternatives[0].transcript

    logger.debug(f"NIM transcription: {len(transcript)} chars")
    return transcript or "(no speech detected)"