Spaces:
Sleeping
Sleeping
File size: 812 Bytes
8dcf472 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | #!/usr/bin/env bash
# DealFlow AI — Start vLLM server on AMD MI300X
# Requires: vllm installed, AMD ROCm drivers, MI300X GPU
# Usage: ./scripts/start_vllm.sh
set -euo pipefail
MODEL="${VLLM_MODEL:-Qwen/Qwen3-VL-32B-Instruct-FP8}"
PORT="${VLLM_PORT:-8000}"
GPU_MEMORY_UTIL="${GPU_MEMORY_UTIL:-0.90}"
MAX_MODEL_LEN="${MAX_MODEL_LEN:-8192}"
echo "==> Starting vLLM server"
echo " Model: $MODEL"
echo " Port: $PORT"
echo " GPU util: $GPU_MEMORY_UTIL"
echo " Max context: $MAX_MODEL_LEN"
# AMD MI300X — use ROCm backend
python3 -m vllm.entrypoints.openai.api_server \
--model "$MODEL" \
--port "$PORT" \
--gpu-memory-utilization "$GPU_MEMORY_UTIL" \
--max-model-len "$MAX_MODEL_LEN" \
--dtype float16 \
--trust-remote-code \
--served-model-name "$MODEL"
|