ForgeSight / backend /start_vllm.sh
rasAli02's picture
feat: finalize MI300X inference connection and live status update
8d2779b
#!/bin/bash
# ============================================================
# ForgeSight โ€” Start vLLM Inference Server on AMD MI300X
# ============================================================
# Default configuration
MODEL_NAME=${AMD_MODEL_NAME:-"Qwen/Qwen2-VL-7B-Instruct"}
PORT=${PORT:-8000}
echo "๐Ÿš€ Starting vLLM Server with $MODEL_NAME on port $PORT..."
# Use the venv if it exists
if [ -f "/opt/forgesight/venv/bin/activate" ]; then
source /opt/forgesight/venv/bin/activate
fi
# vLLM on ROCm requires some specific environment variables for best performance
export HSA_OVERRIDE_GFX_VERSION=11.0.0
export NCCL_DEBUG=ERROR
vllm serve "$MODEL_NAME" \
--host 0.0.0.0 \
--port "$PORT" \
--tensor-parallel-size 8 \
--enable-expert-parallel \
--mm-encoder-tp-mode data \
--mm-processor-cache-type shm \
--reasoning-parser qwen3 \
--enable-prefix-caching \
--trust-remote-code