"""Google Colab deployment utilities.

Phase 8: Helpers for running VoxDoc in Colab environments.
- GPU runtime auto-detection
- Ngrok tunneling for external access
- Colab-specific optimizations (memory, paths)

Set ``COLAB_MODE=true`` in config or .env to enable.
"""

import logging
import os
import sys
from typing import Dict, Any, Optional

from app.config import settings

logger = logging.getLogger(__name__)


def is_colab_environment() -> bool:
    """Detect if running inside Google Colab."""
    try:
        import google.colab  # noqa: F401
        return True
    except ImportError:
        return False


def detect_gpu_runtime() -> Dict[str, Any]:
    """Detect GPU type and available VRAM in Colab."""
    import torch

    info: Dict[str, Any] = {
        "cuda_available": torch.cuda.is_available(),
        "device_count": torch.cuda.device_count() if torch.cuda.is_available() else 0,
        "gpu_name": None,
        "vram_total_gb": 0.0,
        "vram_free_gb": 0.0,
        "recommended_quantization": None,
    }

    if torch.cuda.is_available():
        props = torch.cuda.get_device_properties(0)
        total_gb = props.total_mem / (1024 ** 3)
        free_gb = torch.cuda.mem_get_info(0)[0] / (1024 ** 3)

        info["gpu_name"] = props.name
        info["vram_total_gb"] = round(total_gb, 2)
        info["vram_free_gb"] = round(free_gb, 2)

        # Recommend quantization based on VRAM
        if total_gb < 8:
            info["recommended_quantization"] = 4
        elif total_gb < 16:
            info["recommended_quantization"] = 8
        else:
            info["recommended_quantization"] = None  # Full precision OK

    return info


def setup_colab_environment() -> Dict[str, str]:
    """Apply Colab-specific environment optimizations."""
    changes = {}

    if not settings.colab_mode:
        return changes

    # Use /content for model cache (Colab persistent storage)
    if os.path.isdir("/content"):
        model_dir = "/content/models"
        os.makedirs(model_dir, exist_ok=True)
        os.environ.setdefault("HF_HOME", "/content/hf_cache")
        changes["model_cache"] = model_dir
        changes["hf_home"] = "/content/hf_cache"

    # Reduce memory pressure
    os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "max_split_size_mb:128")
    changes["cuda_alloc_conf"] = "max_split_size_mb:128"

    # Enable TF32 for A100/T4 (faster matmul)
    import torch
    if torch.cuda.is_available():
        torch.backends.cuda.matmul.allow_tf32 = True
        torch.backends.cudnn.allow_tf32 = True
        changes["tf32_enabled"] = "true"

    logger.info("Colab environment configured: %s", changes)
    return changes


async def start_ngrok_tunnel(port: int = 8000) -> Optional[str]:
    """Start ngrok tunnel for external access to Colab.

    Requires ``COLAB_NGROK_TOKEN`` to be set.
    Returns the public URL or None if unavailable.
    """
    token = settings.colab_ngrok_token
    if not token:
        logger.warning("No ngrok token configured; external access unavailable")
        return None

    try:
        from pyngrok import ngrok, conf
        conf.get_default().auth_token = token
        tunnel = ngrok.connect(port, "http")
        public_url = tunnel.public_url
        logger.info("Ngrok tunnel active: %s -> localhost:%d", public_url, port)
        return public_url
    except ImportError:
        logger.warning("pyngrok not installed; run: pip install pyngrok")
        return None
    except Exception as e:
        logger.error("Failed to start ngrok tunnel: %s", e)
        return None


def get_colab_launch_info() -> Dict[str, Any]:
    """Get summary info for Colab launch display."""
    gpu_info = detect_gpu_runtime()
    return {
        "is_colab": is_colab_environment(),
        "colab_mode_enabled": settings.colab_mode,
        "gpu": gpu_info,
        "recommended_settings": {
            "enable_gpu": gpu_info["cuda_available"],
            "model_quantization_enabled": gpu_info["recommended_quantization"] is not None,
            "model_quantization_bits": gpu_info["recommended_quantization"] or 4,
            "streaming_interval_seconds": 0.5 if gpu_info["cuda_available"] else 4.0,
        },
    }