File size: 1,255 Bytes
33569f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/bin/bash
# testing MODEL_NAME on $EVAL_DATASET dataset using vLLM inference
# $EVAL_DATASET filepath: ./dataset/$EVAL_DATASET

GPU_LIST="0"
BASE_PATH="./ckpts"
MODEL_NAME="Qwen2.5-VL-7B-Instruct_my"
# specify the dataset you want to use, choose from: ["charades", "activitynet", "tvgbench", "mvbench", "videomme", "egoschema", "tempcompass"]
EVAL_DATASET="charades"  
# for tempcompass, default is "multi-choice"
# for egoschema, default is full-set 
SPLIT="test"  

IFS=',' read -ra gpus <<< "$GPU_LIST"
num_gpus=${#gpus[@]}
# 执行推理任务
for ((i=0; i<num_gpus; i++)); do
    gpu=${gpus[i]}
    CUDA_VISIBLE_DEVICES=$gpu python evaluate.py \
        --model_base "$BASE_PATH/$MODEL_NAME" \
        --batch_size 4 \
        --curr_idx $i \
        --total_idx $num_gpus \
        --max_new_tokens 1024 \
        --split $SPLIT \
        --datasets $EVAL_DATASET \
        --output_dir "logs/eval/$MODEL_NAME/$EVAL_DATASET" \
        --use_r1_thinking_prompt \
        --use_vllm_inference \
        --use_nothink & # uncomment this line to use no-think prompt, especially for VQA tasks
done
wait

# calculate metrics
# default inference code:
python src/vllm_inference/eval_all.py --model_name $MODEL_NAME --split $SPLIT --dataset $EVAL_DATASET