# RnJ-1-Instruct-FP8 Docker Compose # # Usage: # docker compose up # # With specific GPU: # GPU_ID=0 docker compose up services: rnj-1-instruct-fp8: image: vllm/vllm-openai:v0.12.0 ports: - "8000:8000" volumes: - hf_cache:/root/.cache/huggingface environment: - VLLM_ATTENTION_BACKEND=FLASHINFER - HF_HUB_ENABLE_HF_TRANSFER=1 deploy: resources: reservations: devices: - driver: nvidia device_ids: ["${GPU_ID:-0}"] capabilities: [gpu] shm_size: "4g" command: > --model Doradus/RnJ-1-Instruct-FP8 --host 0.0.0.0 --port 8000 --tensor-parallel-size 1 --max-model-len 8192 --gpu-memory-utilization 0.90 --dtype auto --trust-remote-code --served-model-name rnj-1-instruct-fp8 --enable-chunked-prefill --max-num-seqs 32 healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/health"] interval: 30s timeout: 10s retries: 3 start_period: 120s volumes: hf_cache: