File size: 812 Bytes
8dcf472
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/env bash
# DealFlow AI — Start vLLM server on AMD MI300X
# Requires: vllm installed, AMD ROCm drivers, MI300X GPU
# Usage: ./scripts/start_vllm.sh
set -euo pipefail

MODEL="${VLLM_MODEL:-Qwen/Qwen3-VL-32B-Instruct-FP8}"
PORT="${VLLM_PORT:-8000}"
GPU_MEMORY_UTIL="${GPU_MEMORY_UTIL:-0.90}"
MAX_MODEL_LEN="${MAX_MODEL_LEN:-8192}"

echo "==> Starting vLLM server"
echo "    Model:       $MODEL"
echo "    Port:        $PORT"
echo "    GPU util:    $GPU_MEMORY_UTIL"
echo "    Max context: $MAX_MODEL_LEN"

# AMD MI300X — use ROCm backend
python3 -m vllm.entrypoints.openai.api_server \
    --model "$MODEL" \
    --port "$PORT" \
    --gpu-memory-utilization "$GPU_MEMORY_UTIL" \
    --max-model-len "$MAX_MODEL_LEN" \
    --dtype float16 \
    --trust-remote-code \
    --served-model-name "$MODEL"