| #!/bin/bash |
|
|
| |
| |
| MODEL_NAME="Qwen-2.5-Omni-7B" |
| EXP_MARKING="_20251024" |
| MODEL_PATH="/path/to/model" |
| DATASET_NAME="UNO-Bench" |
| |
| DATASET_LOCAL_DIR="/path/to/dataset" |
| |
| HF_CACHE_DIR="~/.cache/huggingface/hub" |
|
|
| |
| |
| INFERENCE_BACKEND="hf" |
| TARGET_PORT=8000 |
| TARGET_GPU_IDS="0,1" |
| TARGET_TP_SIZE=2 |
|
|
| |
| SCORER_MODEL_PATH="/path/to/scorer" |
| SCORER_PORT=8001 |
| SCORER_GPU_IDS="0,1" |
| SCORER_TP_SIZE=2 |
| |
|
|
| set -e |
|
|
| |
| TARGET_VLLM_PID="" |
| SCORER_VLLM_PID="" |
|
|
| |
| cleanup() { |
| echo "--- [Cleanup] Checking for background processes... ---" |
| if [ -n "$TARGET_VLLM_PID" ]; then |
| if ps -p $TARGET_VLLM_PID > /dev/null; then |
| echo "Stopping Target Inference VLLM (PID: $TARGET_VLLM_PID)..." |
| kill $TARGET_VLLM_PID |
| wait $TARGET_VLLM_PID 2>/dev/null || true |
| echo "Target VLLM stopped." |
| fi |
| fi |
| if [ -n "$SCORER_VLLM_PID" ]; then |
| if ps -p $SCORER_VLLM_PID > /dev/null; then |
| echo "Stopping Scorer VLLM (PID: $SCORER_VLLM_PID)..." |
| kill $SCORER_VLLM_PID |
| wait $SCORER_VLLM_PID 2>/dev/null || true |
| echo "Scorer VLLM stopped." |
| fi |
| fi |
| } |
| trap cleanup EXIT SIGINT SIGTERM |
|
|
| |
| |
| |
| echo ">>> Stage 1: Running Inference ($INFERENCE_BACKEND mode)..." |
|
|
| if [ "$INFERENCE_BACKEND" == "vllm" ]; then |
| |
|
|
| export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 |
| |
| echo "Starting Target Model VLLM Server..." |
| echo "VIDEO_MAX_PIXELS set to: $VIDEO_MAX_PIXELS" |
|
|
| |
| CUDA_VISIBLE_DEVICES=$TARGET_GPU_IDS vllm serve "$MODEL_PATH" \ |
| --port $TARGET_PORT \ |
| --allowed-local-media-path "$HF_CACHE_DIR" \ |
| --limit_mm_per_prompt image=8 \ |
| --max-model-len 131072 \ |
| --tensor-parallel-size $TARGET_TP_SIZE \ |
| --trust-remote-code \ |
| > target_vllm.log 2>&1 & |
| |
| TARGET_VLLM_PID=$! |
| echo "Target VLLM PID: $TARGET_VLLM_PID" |
|
|
| |
| |
| CUDA_VISIBLE_DEVICES="" python3 eval.py \ |
| --mode inference \ |
| --model_name "$MODEL_NAME" \ |
| --model_path "$MODEL_PATH" \ |
| --model_api_url "http://localhost:$TARGET_PORT/v1/chat/completions" \ |
| --dataset_name "$DATASET_NAME" \ |
| --hf_cache_dir "$HF_CACHE_DIR" \ |
| --dataset_local_dir "$DATASET_LOCAL_DIR" \ |
| --exp_marking "$EXP_MARKING" \ |
| --batch_size 16 |
|
|
| echo ">>> Inference finished. Stopping Target VLLM to release GPUs..." |
| |
| |
| kill $TARGET_VLLM_PID |
| wait $TARGET_VLLM_PID 2>/dev/null || true |
| TARGET_VLLM_PID="" |
| |
| unset VLLM_ALLOW_LONG_MAX_MODEL_LEN |
| |
| echo ">>> Target GPU resources released." |
|
|
| else |
| |
| CUDA_VISIBLE_DEVICES=$TARGET_GPU_IDS python3 eval.py \ |
| --mode inference \ |
| --model_name "$MODEL_NAME" \ |
| --model_path "$MODEL_PATH" \ |
| --dataset_name "$DATASET_NAME" \ |
| --exp_marking "$EXP_MARKING" \ |
| --hf_cache_dir "$HF_CACHE_DIR" \ |
| --dataset_local_dir "$DATASET_LOCAL_DIR" \ |
| --batch_size 1 |
| fi |
|
|
| |
| |
| |
| echo ">>> Stage 2: Starting Scorer VLLM Server..." |
|
|
| |
| CUDA_VISIBLE_DEVICES=$SCORER_GPU_IDS vllm serve "$SCORER_MODEL_PATH" \ |
| --port $SCORER_PORT \ |
| --max-model-len 32768 \ |
| --tensor-parallel-size $SCORER_TP_SIZE \ |
| --trust-remote-code \ |
| --gpu-memory-utilization 0.9 \ |
| > scorer_vllm.log 2>&1 & |
|
|
| SCORER_VLLM_PID=$! |
| echo "Scorer VLLM PID: $SCORER_VLLM_PID" |
|
|
| |
| |
| |
| echo ">>> Stage 3: Running Evaluation/Scoring..." |
|
|
| CUDA_VISIBLE_DEVICES="" python3 eval.py \ |
| --mode scoring \ |
| --model_name "$MODEL_NAME" \ |
| --exp_marking "$EXP_MARKING" \ |
| --scorer_api_url "http://localhost:$SCORER_PORT/v1/chat/completions" \ |
| --dataset_name "$DATASET_NAME" \ |
| --hf_cache_dir "$HF_CACHE_DIR" \ |
| --dataset_local_dir "$DATASET_LOCAL_DIR" |
|
|
| echo ">>> Benchmark Workflow Completed Successfully." |