Spaces:
Running
Running
| # DealFlow AI β Start vLLM server on AMD MI300X | |
| # Requires: vllm installed, AMD ROCm drivers, MI300X GPU | |
| # Usage: ./scripts/start_vllm.sh | |
| set -euo pipefail | |
| MODEL="${VLLM_MODEL:-Qwen/Qwen3-VL-32B-Instruct-FP8}" | |
| PORT="${VLLM_PORT:-8000}" | |
| GPU_MEMORY_UTIL="${GPU_MEMORY_UTIL:-0.90}" | |
| MAX_MODEL_LEN="${MAX_MODEL_LEN:-8192}" | |
| echo "==> Starting vLLM server" | |
| echo " Model: $MODEL" | |
| echo " Port: $PORT" | |
| echo " GPU util: $GPU_MEMORY_UTIL" | |
| echo " Max context: $MAX_MODEL_LEN" | |
| # AMD MI300X β use ROCm backend | |
| python3 -m vllm.entrypoints.openai.api_server \ | |
| --model "$MODEL" \ | |
| --port "$PORT" \ | |
| --gpu-memory-utilization "$GPU_MEMORY_UTIL" \ | |
| --max-model-len "$MAX_MODEL_LEN" \ | |
| --dtype float16 \ | |
| --trust-remote-code \ | |
| --served-model-name "$MODEL" | |