File size: 225 Bytes
030876e | 1 2 3 4 5 6 | CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=0 vllm serve meta-llama/Llama-3.1-8B-Instruct \
--port 4090 \
--served-model-name dspy \
--dtype bfloat16 \
--tensor-parallel-size 1
--max-model-len 16384 |