llm_cp2 / src /lmms-eval /examples /models /vllm_qwen3vl.sh

Upload folder using huggingface_hub

b0c0df0 verified about 1 month ago

3.04 kB

	#!/bin/bash

	# Qwen3-VL Evaluation Script with vLLM Backend
	# This script demonstrates how to evaluate Qwen3-VL models using vLLM for accelerated inference
	#
	# Requirements:
	# - vllm>=0.11.0
	# - qwen-vl-utils
	# - CUDA-enabled GPU(s)
	#
	# Installation:
	# uv add vllm qwen-vl-utils
	# OR
	# pip install vllm>=0.11.0 qwen-vl-utils

	# ============================================================================
	# Configuration
	# ============================================================================

	# Model Configuration
	# Available Qwen3-VL models:
	# - Qwen/Qwen3-VL-30B-A3B-Instruct
	# - Qwen/Qwen3-VL-30B-A3B-Thinking
	# - Qwen/Qwen3-VL-235B-A22B-Instruct
	# - Qwen/Qwen3-VL-235B-A22B-Thinking
	MODEL="Qwen/Qwen3-VL-30B-A3B-Instruct"

	# Parallelization Settings
	# Adjust based on your GPU configuration
	TENSOR_PARALLEL_SIZE=4 # Number of GPUs for tensor parallelism
	DATA_PARALLEL_SIZE=1 # Number of GPUs for data parallelism

	# Memory and Performance Settings
	GPU_MEMORY_UTILIZATION=0.85 # Fraction of GPU memory to use (0.0 - 1.0)
	BATCH_SIZE=64 # Batch size for evaluation

	# Task Configuration
	# Common tasks: mmmu_val, mme, mathvista, ai2d, etc.
	TASKS="mmmu_val,mme"

	# Output Configuration
	OUTPUT_PATH="./logs/qwen3vl_vllm"
	LOG_SAMPLES=true
	LOG_SUFFIX="qwen3vl_vllm"

	# Evaluation Limits (optional)
	# LIMIT=100 # Uncomment to limit number of samples (for testing)

	# ============================================================================
	# NCCL Configuration (for multi-GPU setups)
	# ============================================================================
	export NCCL_BLOCKING_WAIT=1
	export NCCL_TIMEOUT=18000000
	# export NCCL_DEBUG=INFO # Uncomment for debugging

	# ============================================================================
	# Run Evaluation
	# ============================================================================

	echo "=========================================="
	echo "Qwen3-VL Evaluation with vLLM"
	echo "=========================================="
	echo "Model: $MODEL"
	echo "Tensor Parallel Size: $TENSOR_PARALLEL_SIZE"
	echo "Data Parallel Size: $DATA_PARALLEL_SIZE"
	echo "Tasks: $TASKS"
	echo "Batch Size: $BATCH_SIZE"
	echo "Output Path: $OUTPUT_PATH"
	echo "=========================================="

	# Build the command
	CMD="uv run python -m lmms_eval \
	--model vllm \
	--model_args model=${MODEL},tensor_parallel_size=${TENSOR_PARALLEL_SIZE},data_parallel_size=${DATA_PARALLEL_SIZE},gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
	--tasks ${TASKS} \
	--batch_size ${BATCH_SIZE} \
	--output_path ${OUTPUT_PATH}"

	# Add optional arguments
	if [ "$LOG_SAMPLES" = true ]; then
	CMD="$CMD --log_samples --log_samples_suffix ${LOG_SUFFIX}"
	fi

	if [ ! -z "$LIMIT" ]; then
	CMD="$CMD --limit ${LIMIT}"
	fi

	# Execute
	echo "Running command:"
	echo "$CMD"
	echo ""

	eval $CMD

	echo ""
	echo "=========================================="
	echo "Evaluation Complete!"
	echo "Results saved to: $OUTPUT_PATH"
	echo "=========================================="