File size: 353 Bytes
c7a6fe6
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
#!/bin/bash

# 1. Set Device Order and Visibility
# This ensures we are targeting the physical GPU ID 1 as requested.
export CUDA_DEVICE_ORDER="PCI_BUS_ID"
export CUDA_VISIBLE_DEVICES="1"

vllm serve Qwen/Qwen3-30B-A3B-Thinking-2507 \
  --trust-remote-code \
  --dtype bfloat16 \
  --max-model-len 16384 \
  --gpu-memory-utilization 0.95 \
  --port 8015