readctrl / code /bash_script /vllm_server_v2.sh
shahidul034's picture
Add files using upload-large-folder tool
c7a6fe6 verified
#!/bin/bash
# 1. Set Device Order and Visibility
# This ensures we are targeting the physical GPU ID 1 as requested.
export CUDA_DEVICE_ORDER="PCI_BUS_ID"
export CUDA_VISIBLE_DEVICES="1"
vllm serve Qwen/Qwen3-30B-A3B-Thinking-2507 \
--trust-remote-code \
--dtype bfloat16 \
--max-model-len 16384 \
--gpu-memory-utilization 0.95 \
--port 8015