docker/docker-compose.yml · Doradus-AI/RnJ-1-Instruct-FP8 at main

RnJ-1-Instruct-FP8 / docker /docker-compose.yml

Doradus AI

Upload folder using huggingface_hub

6a70e5e verified 2 months ago

1.09 kB

	# RnJ-1-Instruct-FP8 Docker Compose
	#
	# Usage:
	# docker compose up
	#
	# With specific GPU:
	# GPU_ID=0 docker compose up

	services:
	rnj-1-instruct-fp8:
	image: vllm/vllm-openai:v0.12.0
	ports:
	- "8000:8000"
	volumes:
	- hf_cache:/root/.cache/huggingface
	environment:
	- VLLM_ATTENTION_BACKEND=FLASHINFER
	- HF_HUB_ENABLE_HF_TRANSFER=1
	deploy:
	resources:
	reservations:
	devices:
	- driver: nvidia
	device_ids: ["${GPU_ID:-0}"]
	capabilities: [gpu]
	shm_size: "4g"
	command: >
	--model Doradus/RnJ-1-Instruct-FP8
	--host 0.0.0.0
	--port 8000
	--tensor-parallel-size 1
	--max-model-len 8192
	--gpu-memory-utilization 0.90
	--dtype auto
	--trust-remote-code
	--served-model-name rnj-1-instruct-fp8
	--enable-chunked-prefill
	--max-num-seqs 32
	healthcheck:
	test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
	interval: 30s
	timeout: 10s
	retries: 3
	start_period: 120s

	volumes:
	hf_cache: