File size: 368 Bytes
304bac0
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
export VLLM_USE_V1=0
MODEL_PATH=''
vllm serve $MODEL_PATH \
--served-model-name medguide-v \
--port 8231 \
--host 0.0.0.0 \
--dtype bfloat16 \
--limit-mm-per-prompt image=6,video=0 \
--max-model-len 12000 \
--gpu-memory-utilization 0.9 \
--tensor-parallel-size 8 \
--api-key medguide-v