Instructions to use vikhyatk/moondream2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use vikhyatk/moondream2 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="vikhyatk/moondream2", trust_remote_code=True)# Load model directly from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("vikhyatk/moondream2", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use vikhyatk/moondream2 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "vikhyatk/moondream2" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "vikhyatk/moondream2", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/vikhyatk/moondream2
- SGLang
How to use vikhyatk/moondream2 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "vikhyatk/moondream2" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "vikhyatk/moondream2", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "vikhyatk/moondream2" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "vikhyatk/moondream2", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use vikhyatk/moondream2 with Docker Model Runner:
docker model run hf.co/vikhyatk/moondream2
| import functools | |
| import os | |
| import shutil | |
| import torch | |
| from pathlib import Path | |
| from urllib.request import Request, urlopen | |
| from typing import Optional | |
| def variant_cache_dir(): | |
| hf_hub_cache = os.environ.get("HF_HUB_CACHE") | |
| if hf_hub_cache is not None: | |
| return Path(hf_hub_cache) / "md_variants" | |
| hf_home = os.environ.get("HF_HOME") | |
| if hf_home is not None: | |
| return Path(hf_home) / "hub" / "md_variants" | |
| return Path("~/.cache/huggingface/hub").expanduser() / "md_variants" | |
| def cached_variant_path(variant_id: str): | |
| variant, *rest = variant_id.split("/", 1) | |
| step = rest[0] if rest else "final" | |
| cache_dir = variant_cache_dir() / variant | |
| os.makedirs(cache_dir, exist_ok=True) | |
| dest = cache_dir / f"{step}.pt" | |
| if dest.exists(): | |
| return dest | |
| md_endpoint = os.getenv("MOONDREAM_ENDPOINT", "https://api.moondream.ai") | |
| headers = {"User-Agent": "moondream-torch"} | |
| api_key = os.getenv("MOONDREAM_API_KEY") | |
| if api_key is not None: | |
| headers["X-Moondream-Auth"] = api_key | |
| req = Request(f"{md_endpoint}/v1/variants/{variant_id}/download", headers=headers) | |
| with urlopen(req) as r, open(dest, "wb") as f: | |
| shutil.copyfileobj(r, f) | |
| return dest | |
| def nest(flat): | |
| tree = {} | |
| for k, v in flat.items(): | |
| parts = k.split(".") | |
| d = tree | |
| for p in parts[:-1]: | |
| d = d.setdefault(p, {}) | |
| d[parts[-1]] = v | |
| return tree | |
| def variant_state_dict(variant_id: Optional[str] = None, device: str = "cpu"): | |
| if variant_id is None: | |
| return None | |
| state_dict = torch.load( | |
| cached_variant_path(variant_id), map_location=device, weights_only=True | |
| ) | |
| # TODO: Move these into the training code that saves checkpoints... | |
| rename_rules = [ | |
| ("text_model.transformer.h", "text.blocks"), | |
| (".mixer", ".attn"), | |
| (".out_proj", ".proj"), | |
| (".Wqkv", ".qkv"), | |
| (".parametrizations.weight.0", ""), | |
| ] | |
| new_state_dict = {} | |
| for key, tensor in state_dict.items(): | |
| new_key = key | |
| for old, new in rename_rules: | |
| if old in new_key: | |
| new_key = new_key.replace(old, new) | |
| new_state_dict[new_key] = tensor | |
| return nest(new_state_dict) | |