Cook_with_a_LLM / src /models /loader.py
FredinVázquez
add vision model
6d25a57
"""
Each loader caches its model in module-level globals so the Space pays the
cold-start cost exactly once. If a load fails (no GPU, missing weights, no
llama-cpp-python), the loader returns ``None`` — agents are responsible for
falling back to a mocked output instead of crashing the UI.
"""
from __future__ import annotations
import logging
from typing import Any, Optional
from src import config
log = logging.getLogger(__name__)
_vision: Any = None
_planner: Any = None
_flux: Any = None
_tts: Any = None
def get_vision_model() -> Optional[Any]:
"""MiniCPM-V-4.6 GGUF + mmproj for ingredient ID and progress validation."""
global _vision
if _vision is not None:
return _vision
if config.is_mock():
return None
try:
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
from llama_cpp.llama_chat_format import MiniCPMv26ChatHandler
log.info("Downloading vision GGUF...")
model_path = hf_hub_download(repo_id=config.VISION_REPO, filename=config.VISION_MODEL_FILE)
mmproj_path = hf_hub_download(repo_id=config.VISION_REPO, filename=config.VISION_MMPROJ_FILE)
handler = MiniCPMv26ChatHandler(clip_model_path=mmproj_path)
_vision = Llama(
model_path=model_path,
chat_handler=handler,
n_ctx=config.N_CTX,
n_threads=config.N_THREADS,
verbose=False,
)
log.info("Vision model ready.")
except Exception as e:
log.warning("Vision model unavailable (%s); falling back to mock.", e)
_vision = None
return _vision