"""
    Each loader caches its model in module-level globals so the Space pays the
    cold-start cost exactly once. If a load fails (no GPU, missing weights, no
    llama-cpp-python), the loader returns ``None`` — agents are responsible for
    falling back to a mocked output instead of crashing the UI.
"""

from __future__ import annotations

import logging
from typing import Any, Optional

from src import config

log = logging.getLogger(__name__)

_vision: Any = None
_planner: Any = None
_flux: Any = None
_tts: Any = None


def get_vision_model() -> Optional[Any]:
    """MiniCPM-V-4.6 GGUF + mmproj for ingredient ID and progress validation."""
    global _vision
    if _vision is not None:
        return _vision
    if config.is_mock():
        return None
    try:
        from huggingface_hub import hf_hub_download
        from llama_cpp import Llama
        from llama_cpp.llama_chat_format import MiniCPMv26ChatHandler

        log.info("Downloading vision GGUF...")
        model_path = hf_hub_download(repo_id=config.VISION_REPO, filename=config.VISION_MODEL_FILE)
        mmproj_path = hf_hub_download(repo_id=config.VISION_REPO, filename=config.VISION_MMPROJ_FILE)
        handler = MiniCPMv26ChatHandler(clip_model_path=mmproj_path)
        _vision = Llama(
            model_path=model_path,
            chat_handler=handler,
            n_ctx=config.N_CTX,
            n_threads=config.N_THREADS,
            verbose=False,
        )
        log.info("Vision model ready.")
    except Exception as e:
        log.warning("Vision model unavailable (%s); falling back to mock.", e)
        _vision = None
    return _vision