#!/usr/bin/env bash # DealFlow AI — Start vLLM server on AMD MI300X # Requires: vllm installed, AMD ROCm drivers, MI300X GPU # Usage: ./scripts/start_vllm.sh set -euo pipefail MODEL="${VLLM_MODEL:-Qwen/Qwen3-VL-32B-Instruct-FP8}" PORT="${VLLM_PORT:-8000}" GPU_MEMORY_UTIL="${GPU_MEMORY_UTIL:-0.90}" MAX_MODEL_LEN="${MAX_MODEL_LEN:-8192}" echo "==> Starting vLLM server" echo " Model: $MODEL" echo " Port: $PORT" echo " GPU util: $GPU_MEMORY_UTIL" echo " Max context: $MAX_MODEL_LEN" # AMD MI300X — use ROCm backend python3 -m vllm.entrypoints.openai.api_server \ --model "$MODEL" \ --port "$PORT" \ --gpu-memory-utilization "$GPU_MEMORY_UTIL" \ --max-model-len "$MAX_MODEL_LEN" \ --dtype float16 \ --trust-remote-code \ --served-model-name "$MODEL"