csuhan's picture
Upload folder using huggingface_hub
b0c0df0 verified
# Run and exactly reproduce qwen2vl results!
# mme as an example
export HF_HOME="~/.cache/huggingface"
# pip install git+https://github.com/EvolvingLMMs-Lab/lmms-eval.git
# pip3 install qwen_vl_utils
# use `interleave_visuals=True` to control the visual token position, currently only for mmmu_val and mmmu_pro (and potentially for other interleaved image-text tasks), please do not use it unless you are sure about the operation details.
# accelerate launch --num_processes=8 --main_process_port=12346 -m lmms_eval \
# --model qwen2_vl \
# --model_args=pretrained=Qwen/Qwen2-VL-7B-Instruct,max_pixels=12845056,attn_implementation=flash_attention_2,interleave_visuals=True \
# --tasks mmmu_pro \
# --batch_size 1
accelerate launch --num_processes=8 --main_process_port=12346 -m lmms_eval \
--model qwen2_5_vl \
--model_args=pretrained=Qwen/Qwen2.5-VL-7B-Instruct,max_pixels=12845056,attn_implementation=flash_attention_2,interleave_visuals=False \
--tasks mme \
--batch_size 1