File size: 333 Bytes
93694bb | 1 2 3 4 5 6 7 8 9 | CUDA_DEVICE_ORDER="PCI_BUS_ID" CUDA_VISIBLE_DEVICES=5 vllm serve google/gemma-3-27b-it \
--gpu-memory-utilization 0.95 \
--max-model-len 16384 \
--enable-prefix-caching \
--kv-cache-dtype fp8 \
--max-num-batched-tokens 32768 \
--trust-remote-code \
--port 8055 \
--served-model-name subclaim-extractor |