Instructions to use moondream/moondream3-preview with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use moondream/moondream3-preview with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="moondream/moondream3-preview", trust_remote_code=True)# Load model directly from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("moondream/moondream3-preview", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use moondream/moondream3-preview with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "moondream/moondream3-preview" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "moondream/moondream3-preview", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/moondream/moondream3-preview
- SGLang
How to use moondream/moondream3-preview with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "moondream/moondream3-preview" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "moondream/moondream3-preview", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "moondream/moondream3-preview" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "moondream/moondream3-preview", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use moondream/moondream3-preview with Docker Model Runner:
docker model run hf.co/moondream/moondream3-preview
| from dataclasses import dataclass, field | |
| from typing import Dict, List, Optional | |
| class TextMoeConfig: | |
| num_experts: int = 64 | |
| start_layer: int = 4 | |
| experts_per_token: int = 8 | |
| expert_inner_dim: int = 1024 | |
| class TextConfig: | |
| dim: int = 2048 | |
| ff_dim: int = 8192 | |
| n_layers: int = 24 | |
| vocab_size: int = 51200 | |
| max_context: int = 4096 | |
| n_heads: int = 32 | |
| n_kv_heads: int = 32 | |
| prefix_attn: int = 730 | |
| group_size: Optional[int] = None | |
| moe: Optional[TextMoeConfig] = TextMoeConfig() | |
| class VisionConfig: | |
| enc_dim: int = 1152 | |
| enc_patch_size: int = 14 | |
| enc_n_layers: int = 27 | |
| enc_ff_dim: int = 4304 | |
| enc_n_heads: int = 16 | |
| proj_out_dim: int = 2048 | |
| crop_size: int = 378 | |
| in_channels: int = 3 | |
| max_crops: int = 12 | |
| overlap_margin: int = 4 | |
| proj_inner_dim: int = 8192 | |
| class RegionConfig: | |
| dim: int = 2048 | |
| coord_feat_dim: int = 256 | |
| coord_out_dim: int = 1024 | |
| size_feat_dim: int = 512 | |
| size_out_dim: int = 2048 | |
| group_size: Optional[int] = None | |
| class TokenizerConfig: | |
| bos_id: int = 0 | |
| eos_id: int = 0 | |
| answer_id: int = 3 | |
| thinking_id: int = 4 | |
| coord_id: int = 5 | |
| size_id: int = 6 | |
| start_ground_points_id: int = 7 | |
| end_ground_id: int = 9 | |
| templates: Dict[str, Optional[Dict[str, List[int]]]] = field( | |
| default_factory=lambda: { | |
| "caption": { | |
| "short": [1, 32708, 2, 12492, 3], | |
| "normal": [1, 32708, 2, 6382, 3], | |
| "long": [1, 32708, 2, 4059, 3], | |
| }, | |
| "query": {"prefix": [1, 15381, 2], "suffix": [3]}, | |
| "detect": {"prefix": [1, 7235, 476, 2], "suffix": [3]}, | |
| "point": {"prefix": [1, 2581, 2], "suffix": [3]}, | |
| } | |
| ) | |
| class MoondreamConfig: | |
| text: TextConfig = TextConfig() | |
| vision: VisionConfig = VisionConfig() | |
| region: RegionConfig = RegionConfig() | |
| tokenizer: TokenizerConfig = TokenizerConfig() | |
| def from_dict(cls, config_dict: dict): | |
| text_config = TextConfig(**config_dict.get("text", {})) | |
| vision_config = VisionConfig(**config_dict.get("vision", {})) | |
| region_config = RegionConfig(**config_dict.get("region", {})) | |
| tokenizer_config = TokenizerConfig(**config_dict.get("tokenizer", {})) | |
| return cls( | |
| text=text_config, | |
| vision=vision_config, | |
| region=region_config, | |
| tokenizer=tokenizer_config, | |
| ) | |
| def to_dict(self): | |
| return { | |
| "text": self.text.__dict__, | |
| "vision": self.vision.__dict__, | |
| "region": self.region.__dict__, | |
| "tokenizer": self.tokenizer.__dict__, | |
| } | |