| eval "$(conda shell.bash hook)" | |
| conda activate lmdeploy | |
| # # MODEL_NAME=OpenGVLab/InternVL2_5-8B-AWQ | |
| MODEL_NAME=OpenGVLab/InternVL2_5-8B-MPO-AWQ | |
| PORT_LIST=( $(seq 2011 1 2011) ) | |
| for PORT in "${PORT_LIST[@]}"; do | |
| # get random device id from 0 to 3 | |
| # RANDOM_DEVICE_ID=$((RANDOM % 3)) | |
| # RANDOM_DEVICE_ID=3 | |
| # CUDA_VISIBLE_DEVICES=0,1 \ | |
| # CUDA_VISIBLE_DEVICES=2,3 \ | |
| CUDA_VISIBLE_DEVICES=1 \ | |
| lmdeploy serve api_server $MODEL_NAME \ | |
| --server-port $PORT \ | |
| --backend turbomind \ | |
| --dtype float16 --proxy-url http://0.0.0.0:7089 \ | |
| --vision-max-batch-size 64 & | |
| # --cache-max-entry-count 0.4 & | |
| # --tp 1 & | |
| # & | |
| done | |
| PORT_LIST=( $(seq 5972 1 5972) ) | |
| for PORT in "${PORT_LIST[@]}"; do | |
| # get random device id from 0 to 3 | |
| # RANDOM_DEVICE_ID=$((RANDOM % 3)) | |
| # RANDOM_DEVICE_ID=3 | |
| # CUDA_VISIBLE_DEVICES=0,1 \ | |
| # CUDA_VISIBLE_DEVICES=2,3 \ | |
| CUDA_VISIBLE_DEVICES=2 \ | |
| lmdeploy serve api_server $MODEL_NAME \ | |
| --server-port $PORT \ | |
| --backend turbomind \ | |
| --dtype float16 --proxy-url http://0.0.0.0:7089 & | |
| # --vision-max-batch-size 64 & | |
| # --cache-max-entry-count 0.4 & | |
| # --tp 1 & | |
| # & | |
| done | |
| PORT_LIST=( $(seq 5171 1 5171) ) | |
| for PORT in "${PORT_LIST[@]}"; do | |
| # get random device id from 0 to 3 | |
| # RANDOM_DEVICE_ID=$((RANDOM % 3)) | |
| # RANDOM_DEVICE_ID=3 | |
| # CUDA_VISIBLE_DEVICES=0,1 \ | |
| # CUDA_VISIBLE_DEVICES=2,3 \ | |
| CUDA_VISIBLE_DEVICES=1 \ | |
| lmdeploy serve api_server $MODEL_NAME \ | |
| --server-port $PORT \ | |
| --backend turbomind \ | |
| --dtype float16 --proxy-url http://0.0.0.0:7089 \ | |
| --vision-max-batch-size 64 & | |
| # --cache-max-entry-count 0.4 & | |
| # --tp 1 & | |
| # & | |
| done | |