Spaces:

lablab-ai-amd-developer-hackathon
/

ForgeSight

Sleeping

ForgeSight / backend /start_vllm.sh

feat: finalize MI300X inference connection and live status update

8d2779b 26 days ago

925 Bytes

	#!/bin/bash
	# ============================================================
	# ForgeSight — Start vLLM Inference Server on AMD MI300X
	# ============================================================
	# Default configuration
	MODEL_NAME=${AMD_MODEL_NAME:-"Qwen/Qwen2-VL-7B-Instruct"}
	PORT=${PORT:-8000}

	echo "🚀 Starting vLLM Server with $MODEL_NAME on port $PORT..."

	# Use the venv if it exists
	if [ -f "/opt/forgesight/venv/bin/activate" ]; then
	source /opt/forgesight/venv/bin/activate
	fi

	# vLLM on ROCm requires some specific environment variables for best performance
	export HSA_OVERRIDE_GFX_VERSION=11.0.0
	export NCCL_DEBUG=ERROR

	vllm serve "$MODEL_NAME" \
	--host 0.0.0.0 \
	--port "$PORT" \
	--tensor-parallel-size 8 \
	--enable-expert-parallel \
	--mm-encoder-tp-mode data \
	--mm-processor-cache-type shm \
	--reasoning-parser qwen3 \
	--enable-prefix-caching \
	--trust-remote-code