Upload ms-swift/examples/infer/vllm/mllm_ddp.sh with huggingface_hub
Browse files
ms-swift/examples/infer/vllm/mllm_ddp.sh
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# You need to use flash-attn (manual installation) instead of xformers.
|
| 2 |
+
NPROC_PER_NODE=2 \
|
| 3 |
+
CUDA_VISIBLE_DEVICES=0,1 \
|
| 4 |
+
swift infer \
|
| 5 |
+
--model Qwen/Qwen2-Audio-7B-Instruct \
|
| 6 |
+
--infer_backend vllm \
|
| 7 |
+
--val_dataset speech_asr/speech_asr_aishell1_trainsets:validation#1000 \
|
| 8 |
+
--gpu_memory_utilization 0.9 \
|
| 9 |
+
--max_model_len 8192 \
|
| 10 |
+
--max_new_tokens 2048 \
|
| 11 |
+
--limit_mm_per_prompt '{"audio": 5}'
|