#!/bin/bash # 1. Set Device Order and Visibility # This ensures we are targeting the physical GPU ID 1 as requested. export CUDA_DEVICE_ORDER="PCI_BUS_ID" export CUDA_VISIBLE_DEVICES="1" vllm serve Qwen/Qwen3-30B-A3B-Thinking-2507 \ --trust-remote-code \ --dtype bfloat16 \ --max-model-len 16384 \ --gpu-memory-utilization 0.95 \ --port 8015