File size: 353 Bytes
c7a6fe6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 | #!/bin/bash
# 1. Set Device Order and Visibility
# This ensures we are targeting the physical GPU ID 1 as requested.
export CUDA_DEVICE_ORDER="PCI_BUS_ID"
export CUDA_VISIBLE_DEVICES="1"
vllm serve Qwen/Qwen3-30B-A3B-Thinking-2507 \
--trust-remote-code \
--dtype bfloat16 \
--max-model-len 16384 \
--gpu-memory-utilization 0.95 \
--port 8015 |