| # 1. Set Device Order and Visibility | |
| # This ensures we are targeting the physical GPU ID 1 as requested. | |
| export CUDA_DEVICE_ORDER="PCI_BUS_ID" | |
| export CUDA_VISIBLE_DEVICES="1" | |
| vllm serve Qwen/Qwen3-30B-A3B-Thinking-2507 \ | |
| --trust-remote-code \ | |
| --dtype bfloat16 \ | |
| --max-model-len 16384 \ | |
| --gpu-memory-utilization 0.95 \ | |
| --port 8015 |