Spaces:

Nomearod
/

agentbench

Running

agentbench / docker /docker-compose.vllm.yml

Jane Yeung

feat: infrastructure sprint — vLLM/Modal, Helm, Terraform (#8)

a9d4375 about 2 months ago

1.16 kB

	# Local GPU serving via vLLM + agent-bench API.
	# Requires: nvidia-container-toolkit
	# See modal/serve_vllm.py for serverless alternative.
	#
	# Usage:
	# docker compose -f docker/docker-compose.vllm.yml up --build

	services:
	vllm:
	image: vllm/vllm-openai:latest
	command:
	- --model=mistralai/Mistral-7B-Instruct-v0.3
	- --max-model-len=4096
	- --dtype=half
	- --gpu-memory-utilization=0.85
	- --host=0.0.0.0
	- --port=8000
	ports:
	- "8001:8000"
	deploy:
	resources:
	reservations:
	devices:
	- driver: nvidia
	count: 1
	capabilities: [gpu]
	volumes:
	- vllm-cache:/root/.cache/huggingface
	healthcheck:
	test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
	interval: 30s
	timeout: 10s
	retries: 5
	start_period: 120s

	app:
	build:
	context: ..
	dockerfile: docker/Dockerfile
	environment:
	- MODAL_VLLM_URL=http://vllm:8000/v1
	- AGENT_BENCH_ENV=selfhosted_local
	depends_on:
	vllm:
	condition: service_healthy
	ports:
	- "8080:7860"

	volumes:
	vllm-cache: