File size: 5,083 Bytes
f1f682e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | #!/bin/bash
# ---------------- Configuration Area ----------------
# 1. Inference Model Configuration
MODEL_NAME="Qwen-2.5-Omni-7B" # # requied, registered in ./models
EXP_MARKING="_20251024" # recommended
MODEL_PATH="/path/to/model" # requied
DATASET_NAME="UNO-Bench"
# Option 1: Use a local dataset path
DATASET_LOCAL_DIR="/path/to/dataset"
# Option 2: Use Hugging Face cache path and the program will download this dataset from Hugging Face
HF_CACHE_DIR="~/.cache/huggingface/hub"
# Inference Backend Configuration
# Options: "hf" (local HF loading) or "vllm" (start VLLM service)
INFERENCE_BACKEND="hf" # requied
TARGET_PORT=8000
TARGET_GPU_IDS="0,1"
TARGET_TP_SIZE=2
# 2. Scorer Model Configuration (UNO-Scorer)
SCORER_MODEL_PATH="/path/to/scorer" # requied
SCORER_PORT=8001
SCORER_GPU_IDS="0,1"
SCORER_TP_SIZE=2
# ------------------------------------------
set -e
# Global variables to store PIDs for cleanup
TARGET_VLLM_PID=""
SCORER_VLLM_PID=""
# === Cleanup Function ===
cleanup() {
echo "--- [Cleanup] Checking for background processes... ---"
if [ -n "$TARGET_VLLM_PID" ]; then
if ps -p $TARGET_VLLM_PID > /dev/null; then
echo "Stopping Target Inference VLLM (PID: $TARGET_VLLM_PID)..."
kill $TARGET_VLLM_PID
wait $TARGET_VLLM_PID 2>/dev/null || true
echo "Target VLLM stopped."
fi
fi
if [ -n "$SCORER_VLLM_PID" ]; then
if ps -p $SCORER_VLLM_PID > /dev/null; then
echo "Stopping Scorer VLLM (PID: $SCORER_VLLM_PID)..."
kill $SCORER_VLLM_PID
wait $SCORER_VLLM_PID 2>/dev/null || true
echo "Scorer VLLM stopped."
fi
fi
}
trap cleanup EXIT SIGINT SIGTERM
# ==========================================
# Stage 1: Inference
# ==========================================
echo ">>> Stage 1: Running Inference ($INFERENCE_BACKEND mode)..."
if [ "$INFERENCE_BACKEND" == "vllm" ]; then
# --- 1.1 Set environment variables specific to Qwen2.5-Omni ---
export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
echo "Starting Target Model VLLM Server..."
echo "VIDEO_MAX_PIXELS set to: $VIDEO_MAX_PIXELS"
# --- 1.2 Start VLLM (based on your parameters) ---
CUDA_VISIBLE_DEVICES=$TARGET_GPU_IDS vllm serve "$MODEL_PATH" \
--port $TARGET_PORT \
--allowed-local-media-path "$HF_CACHE_DIR" \
--limit_mm_per_prompt image=8 \
--max-model-len 131072 \
--tensor-parallel-size $TARGET_TP_SIZE \
--trust-remote-code \
> target_vllm.log 2>&1 &
TARGET_VLLM_PID=$!
echo "Target VLLM PID: $TARGET_VLLM_PID"
# --- 1.3 Run Client for inference ---
# Note: eval.py does not require a GPU, and --dataset_local_dir should preferably point to the media directory or its parent.
CUDA_VISIBLE_DEVICES="" python3 eval.py \
--mode inference \
--model_name "$MODEL_NAME" \
--model_path "$MODEL_PATH" \
--model_api_url "http://localhost:$TARGET_PORT/v1/chat/completions" \
--dataset_name "$DATASET_NAME" \
--hf_cache_dir "$HF_CACHE_DIR" \
--dataset_local_dir "$DATASET_LOCAL_DIR" \
--exp_marking "$EXP_MARKING" \
--batch_size 16
echo ">>> Inference finished. Stopping Target VLLM to release GPUs..."
# --- 1.4 Force release of resources (critical step) ---
kill $TARGET_VLLM_PID
wait $TARGET_VLLM_PID 2>/dev/null || true
TARGET_VLLM_PID=""
unset VLLM_ALLOW_LONG_MAX_MODEL_LEN
echo ">>> Target GPU resources released."
else
# --- Local HF Mode ---
CUDA_VISIBLE_DEVICES=$TARGET_GPU_IDS python3 eval.py \
--mode inference \
--model_name "$MODEL_NAME" \
--model_path "$MODEL_PATH" \
--dataset_name "$DATASET_NAME" \
--exp_marking "$EXP_MARKING" \
--hf_cache_dir "$HF_CACHE_DIR" \
--dataset_local_dir "$DATASET_LOCAL_DIR" \
--batch_size 1
fi
# ==========================================
# Stage 2: Start Scorer Service (VLLM Scorer)
# ==========================================
echo ">>> Stage 2: Starting Scorer VLLM Server..."
# The Scorer does not need those special Omni environment variables and parameters
CUDA_VISIBLE_DEVICES=$SCORER_GPU_IDS vllm serve "$SCORER_MODEL_PATH" \
--port $SCORER_PORT \
--max-model-len 32768 \
--tensor-parallel-size $SCORER_TP_SIZE \
--trust-remote-code \
--gpu-memory-utilization 0.9 \
> scorer_vllm.log 2>&1 &
SCORER_VLLM_PID=$!
echo "Scorer VLLM PID: $SCORER_VLLM_PID"
# ==========================================
# Stage 3: Evaluation
# ==========================================
echo ">>> Stage 3: Running Evaluation/Scoring..."
CUDA_VISIBLE_DEVICES="" python3 eval.py \
--mode scoring \
--model_name "$MODEL_NAME" \
--exp_marking "$EXP_MARKING" \
--scorer_api_url "http://localhost:$SCORER_PORT/v1/chat/completions" \
--dataset_name "$DATASET_NAME" \
--hf_cache_dir "$HF_CACHE_DIR" \
--dataset_local_dir "$DATASET_LOCAL_DIR"
echo ">>> Benchmark Workflow Completed Successfully." |