File size: 327 Bytes
46861c5
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
MODEL_PATH=meta-llama/Meta-Llama-3.1-8B-Instruct

vllm serve $MODEL_PATH \
    --served-model-name llama3.1-8b \
    --api-key sk-abc123 \
    --tensor-parallel-size 1 \
    --pipeline-parallel-size 1 \
    --trust-remote-code \
    --dtype bfloat16 \
    --gpu-memory-utilization 0.85 \
    --port 8007 \
    --host localhost