| CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=0 vllm serve meta-llama/Llama-3.1-8B-Instruct \ | |
| --port 4090 \ | |
| --served-model-name dspy \ | |
| --dtype bfloat16 \ | |
| --tensor-parallel-size 1 | |
| --max-model-len 16384 |
| CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=0 vllm serve meta-llama/Llama-3.1-8B-Instruct \ | |
| --port 4090 \ | |
| --served-model-name dspy \ | |
| --dtype bfloat16 \ | |
| --tensor-parallel-size 1 | |
| --max-model-len 16384 |