Spaces:

MapoTofu9
/

why-agent

Sleeping

App Files Files Community

why-agent / scripts /start_vllm.sh

MapoTofu9

deploy: HF Spaces

5d30bdc about 2 months ago

Raw

History Blame Contribute Delete

2.03 kB

	#!/usr/bin/env bash
	# Start vLLM server on AMD MI300X using the pre-pulled Docker image.
	# The AMD Developer Cloud droplet already has vllm/vllm-openai-rocm:v0.17.1 cached.
	# Usage: bash scripts/start_vllm.sh
	# Endpoint after start: http://165.245.128.117:8000/v1

	set -euo pipefail

	DROPLET="root@165.245.128.117"
	MODEL="Qwen/Qwen3-30B-A3B"
	PORT=8000
	CONTAINER="vllm-server"
	IMAGE="vllm/vllm-openai-rocm:v0.17.1"
	HF_TOKEN="${HF_TOKEN:-}"

	echo "==> Connecting to AMD droplet..."

	ssh "$DROPLET" bash << EOF
	set -euo pipefail

	# Stop and remove any existing container
	if docker ps -a --format '{{.Names}}' \| grep -q "^${CONTAINER}\$"; then
	echo "==> Removing existing container '${CONTAINER}'..."
	docker rm -f "${CONTAINER}" >/dev/null
	fi

	echo "==> Starting vLLM container..."
	echo " Image : ${IMAGE}"
	echo " Model : ${MODEL}"
	echo " Port : ${PORT}"

	docker run -d \
	--name "${CONTAINER}" \
	--network=host \
	--device=/dev/kfd \
	--device=/dev/dri \
	--group-add=video \
	--cap-add=SYS_PTRACE \
	--security-opt seccomp=unconfined \
	--shm-size=16gb \
	-v /root/.cache/huggingface:/root/.cache/huggingface \
	-e HF_TOKEN="${HF_TOKEN}" \
	-e GLOO_SOCKET_IFNAME=eth0 \
	-e NCCL_SOCKET_IFNAME=eth0 \
	"${IMAGE}" \
	--model "${MODEL}" \
	--port ${PORT} \
	--host 0.0.0.0 \
	--dtype bfloat16 \
	--max-model-len 32768 \
	--gpu-memory-utilization 0.90 \
	--trust-remote-code \
	--enable-auto-tool-choice \
	--tool-call-parser hermes

	echo "==> Container started. Waiting for server to be ready (model download + load)..."

	# Poll /health — up to 10 min for first run (model download)
	for i in \$(seq 1 60); do
	if curl -sf http://localhost:${PORT}/health >/dev/null 2>&1; then
	echo "==> Server is UP at http://165.245.128.117:${PORT}/v1"
	echo " Set: VLLM_ENDPOINT=http://165.245.128.117:${PORT}/v1"
	exit 0
	fi
	echo " Waiting... (\${i}/60) — check logs: docker logs ${CONTAINER}"
	sleep 10
	done

	echo "==> Timed out. Last logs:"
	docker logs --tail 40 "${CONTAINER}"
	exit 1
	EOF