""" Each loader caches its model in module-level globals so the Space pays the cold-start cost exactly once. If a load fails (no GPU, missing weights, no llama-cpp-python), the loader returns ``None`` — agents are responsible for falling back to a mocked output instead of crashing the UI. """ from __future__ import annotations import logging from typing import Any, Optional from src import config log = logging.getLogger(__name__) _vision: Any = None _planner: Any = None _flux: Any = None _tts: Any = None def get_vision_model() -> Optional[Any]: """MiniCPM-V-4.6 GGUF + mmproj for ingredient ID and progress validation.""" global _vision if _vision is not None: return _vision if config.is_mock(): return None try: from huggingface_hub import hf_hub_download from llama_cpp import Llama from llama_cpp.llama_chat_format import MiniCPMv26ChatHandler log.info("Downloading vision GGUF...") model_path = hf_hub_download(repo_id=config.VISION_REPO, filename=config.VISION_MODEL_FILE) mmproj_path = hf_hub_download(repo_id=config.VISION_REPO, filename=config.VISION_MMPROJ_FILE) handler = MiniCPMv26ChatHandler(clip_model_path=mmproj_path) _vision = Llama( model_path=model_path, chat_handler=handler, n_ctx=config.N_CTX, n_threads=config.N_THREADS, verbose=False, ) log.info("Vision model ready.") except Exception as e: log.warning("Vision model unavailable (%s); falling back to mock.", e) _vision = None return _vision