CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=0 vllm serve meta-llama/Llama-3.1-8B-Instruct \ --port 4090 \ --served-model-name dspy \ --dtype bfloat16 \ --tensor-parallel-size 1 --max-model-len 16384