"""
COGNITIVE-CORE: Utility Functions
==================================

Common utilities for cognitive model development, including:
- Environment setup for Kaggle/Colab
- Device detection
- Memory optimization helpers
- Logging utilities

Copyright © 2026 Mike Amega (Logo) - Ame Web Studio
License: Proprietary - All Rights Reserved
"""

import os
import sys
import torch
import warnings
from typing import Optional, Dict, Any


# ==============================================================================
# ENVIRONNEMENT & CACHE
# ==============================================================================


def setup_environment(cache_dir: Optional[str] = None) -> str:
    """
    Configure l'environnement pour Kaggle/Colab/Local.

    Résout les problèmes de:
    - Read-only file system sur Kaggle
    - Chemins de cache HuggingFace

    Args:
        cache_dir: Répertoire cache personnalisé (optionnel)

    Returns:
        Chemin du répertoire cache configuré
    """
    if cache_dir is None:
        # Détecter l'environnement
        if os.path.exists("/kaggle"):
            cache_dir = "/kaggle/working/.cache"
        elif os.path.exists("/content"):  # Colab
            cache_dir = "/content/.cache"
        else:
            cache_dir = os.path.expanduser("~/.cache/cognitive")

    # Créer le répertoire
    os.makedirs(cache_dir, exist_ok=True)
    os.makedirs(os.path.join(cache_dir, "datasets"), exist_ok=True)

    # Configurer les variables d'environnement
    os.environ["HF_HOME"] = cache_dir
    os.environ["TRANSFORMERS_CACHE"] = cache_dir
    os.environ["HF_DATASETS_CACHE"] = os.path.join(cache_dir, "datasets")

    # Désactiver les warnings non critiques
    warnings.filterwarnings("ignore", category=FutureWarning)
    warnings.filterwarnings("ignore", category=UserWarning, module="transformers")

    return cache_dir


def get_device(prefer_gpu: bool = True) -> torch.device:
    """
    Détecte et retourne le meilleur device disponible.

    Args:
        prefer_gpu: Préférer GPU si disponible

    Returns:
        torch.device configuré
    """
    if prefer_gpu and torch.cuda.is_available():
        device = torch.device("cuda")
        gpu_name = torch.cuda.get_device_name(0)
        gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1e9
        print(f"🔧 GPU: {gpu_name} ({gpu_mem:.1f} GB)")
    elif (
        prefer_gpu
        and hasattr(torch.backends, "mps")
        and torch.backends.mps.is_available()
    ):
        device = torch.device("mps")
        print("🔧 Apple MPS")
    else:
        device = torch.device("cpu")
        print("🔧 CPU")

    return device


def get_optimal_dtype(device: torch.device) -> torch.dtype:
    """
    Retourne le dtype optimal pour le device.

    Args:
        device: Le device cible

    Returns:
        torch.dtype optimal (float16 pour GPU, float32 pour CPU)
    """
    if device.type == "cuda":
        # Vérifier support BF16
        if torch.cuda.is_bf16_supported():
            return torch.bfloat16
        return torch.float16
    return torch.float32


# ==============================================================================
# MÉMOIRE & OPTIMISATION
# ==============================================================================


def get_memory_info() -> Dict[str, float]:
    """
    Retourne les informations mémoire (GPU si disponible).

    Returns:
        Dict avec allocated, reserved, free en GB
    """
    if torch.cuda.is_available():
        allocated = torch.cuda.memory_allocated() / 1e9
        reserved = torch.cuda.memory_reserved() / 1e9
        total = torch.cuda.get_device_properties(0).total_memory / 1e9
        return {
            "allocated_gb": allocated,
            "reserved_gb": reserved,
            "free_gb": total - allocated,
            "total_gb": total,
        }
    return {"allocated_gb": 0, "reserved_gb": 0, "free_gb": 0, "total_gb": 0}


def clear_memory():
    """Libère la mémoire GPU si possible."""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.synchronize()


def estimate_model_memory(model, dtype: torch.dtype = torch.float32) -> float:
    """
    Estime la mémoire nécessaire pour un modèle.

    Args:
        model: Le modèle PyTorch
        dtype: Le dtype utilisé

    Returns:
        Estimation en GB
    """
    param_bytes = sum(p.numel() * p.element_size() for p in model.parameters())
    buffer_bytes = sum(b.numel() * b.element_size() for b in model.buffers())

    # Facteur pour activations (estimation: 2x les paramètres)
    activation_factor = 2.0

    total_bytes = (param_bytes + buffer_bytes) * activation_factor

    # Ajuster selon dtype
    if dtype in (torch.float16, torch.bfloat16):
        total_bytes *= 0.5

    return total_bytes / 1e9


# ==============================================================================
# LOGGING & AFFICHAGE
# ==============================================================================


def print_model_info(model, show_params: bool = True):
    """
    Affiche les informations du modèle.

    Args:
        model: Le modèle à analyser
        show_params: Afficher le détail des paramètres
    """
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

    print(f"\n📊 MODÈLE: {model.__class__.__name__}")
    print(f"   Total paramètres: {total_params:,}")
    print(f"   Paramètres entraînables: {trainable_params:,}")
    print(f"   Mémoire estimée: {estimate_model_memory(model):.2f} GB")

    if show_params and hasattr(model, "config"):
        print(f"\n   Configuration:")
        for key in ["d_model", "n_layers", "n_heads", "vocab_size"]:
            if hasattr(model.config, key):
                print(f"   - {key}: {getattr(model.config, key)}")


def print_training_progress(
    step: int,
    total_steps: int,
    loss: float,
    lr: Optional[float] = None,
    extras: Optional[Dict[str, float]] = None,
):
    """
    Affiche la progression d'entraînement.

    Args:
        step: Étape actuelle
        total_steps: Nombre total d'étapes
        loss: Valeur de la loss
        lr: Learning rate actuel
        extras: Métriques additionnelles
    """
    progress = step / total_steps * 100
    msg = f"[{step:>6}/{total_steps}] ({progress:>5.1f}%) | Loss: {loss:.4f}"

    if lr is not None:
        msg += f" | LR: {lr:.2e}"

    if extras:
        for key, val in extras.items():
            msg += f" | {key}: {val:.4f}"

    print(msg)


# ==============================================================================
# TOKEN HUGGINGFACE
# ==============================================================================


def get_hf_token() -> Optional[str]:
    """
    Récupère le token HuggingFace depuis différentes sources.

    Ordre de recherche:
    1. Variable d'environnement HF_TOKEN
    2. Secrets Kaggle
    3. Secrets Colab
    4. Token local HuggingFace CLI

    Returns:
        Token ou None si non trouvé
    """
    # Env var
    token = os.environ.get("HF_TOKEN")
    if token:
        return token

    # Kaggle
    try:
        from kaggle_secrets import UserSecretsClient

        token = UserSecretsClient().get_secret("HF_TOKEN")
        if token:
            return token
    except Exception:
        pass

    # Colab
    try:
        from google.colab import userdata

        token = userdata.get("HF_TOKEN")
        if token:
            return token
    except Exception:
        pass

    # Local HuggingFace CLI
    try:
        from huggingface_hub import HfFolder

        token = HfFolder.get_token()
        if token:
            return token
    except Exception:
        pass

    return None