Spaces:
Running
on
Zero
Running
on
Zero
File size: 327 Bytes
46861c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 |
MODEL_PATH=meta-llama/Meta-Llama-3.1-8B-Instruct
vllm serve $MODEL_PATH \
--served-model-name llama3.1-8b \
--api-key sk-abc123 \
--tensor-parallel-size 1 \
--pipeline-parallel-size 1 \
--trust-remote-code \
--dtype bfloat16 \
--gpu-memory-utilization 0.85 \
--port 8007 \
--host localhost
|