RnJ-1-Instruct-FP8 / docker /docker-compose.yml
Doradus AI
Upload folder using huggingface_hub
6a70e5e verified
# RnJ-1-Instruct-FP8 Docker Compose
#
# Usage:
# docker compose up
#
# With specific GPU:
# GPU_ID=0 docker compose up
services:
rnj-1-instruct-fp8:
image: vllm/vllm-openai:v0.12.0
ports:
- "8000:8000"
volumes:
- hf_cache:/root/.cache/huggingface
environment:
- VLLM_ATTENTION_BACKEND=FLASHINFER
- HF_HUB_ENABLE_HF_TRANSFER=1
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["${GPU_ID:-0}"]
capabilities: [gpu]
shm_size: "4g"
command: >
--model Doradus/RnJ-1-Instruct-FP8
--host 0.0.0.0
--port 8000
--tensor-parallel-size 1
--max-model-len 8192
--gpu-memory-utilization 0.90
--dtype auto
--trust-remote-code
--served-model-name rnj-1-instruct-fp8
--enable-chunked-prefill
--max-num-seqs 32
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 120s
volumes:
hf_cache: