Update: add training and testing scripts

Browse files

Files changed (12) hide show

scripts/finetune/hf_download.sh +4 -0
scripts/finetune/test_llava.sh +4 -0
scripts/finetune/train_llava.sh +3 -0
scripts/interleave/eval_all.sh +5 -0
scripts/interleave/eval_interleave_3d.sh +28 -0
scripts/interleave/eval_multiprocess.sh +54 -0
scripts/video/demo/video_demo.sh +40 -0
scripts/video/eval/activitynet_eval.sh +96 -0
scripts/video/eval/video_chatgpt_benchmark_eval_shard.sh +242 -0
scripts/video/eval/video_description_from_t2v.sh +98 -0
scripts/video/eval/video_detail_description_eval_only.sh +24 -0
scripts/video/eval/video_detail_description_eval_shard.sh +95 -0

scripts/finetune/hf_download.sh ADDED Viewed

	@@ -0,0 +1,4 @@

+while :
+do
+	python debug_code/test_llavanext.py
+done

scripts/finetune/test_llava.sh ADDED Viewed

	@@ -0,0 +1,4 @@

+accelerate launch --config_file ./hf_config/single_gpu_config.yml \
+run_finetune_llava.py \
+--test \
+--checkpoint_path ./model_ckpt/llava3_mix_instr/checkpoints/checkpoint_00003 \

scripts/finetune/train_llava.sh ADDED Viewed

	@@ -0,0 +1,3 @@

+accelerate launch --config_file /mnt1/lyc/LLaVA-NeXT/multi_gpu_config.yml \
+finetune_llava.py \
+--wandb \

scripts/interleave/eval_all.sh ADDED Viewed

	@@ -0,0 +1,5 @@

+# evaluate
+./scripts/interleave/eval_interleave_3d.sh /path/to/ckpt /path/to/images multi_image_in_domain
+./scripts/interleave/eval_interleave_3d.sh /path/to/ckpt /path/to/images multi_image_out_domain
+./scripts/interleave/eval_interleave_3d.sh /path/to/ckpt /path/to/images multi_view_in_domain

scripts/interleave/eval_interleave_3d.sh ADDED Viewed

	@@ -0,0 +1,28 @@

+alias python=python3
+CKPT_PATH=$1
+NAME=$(echo "$CKPT_PATH" | awk -F'/' '{print $NF}')
+echo $NAME
+##### set images path
+DATA_PATH=$2
+EVAL_TYPE=$3
+JSON_PATH=$2/$3.json
+############################### eval multi-image
+RESULT_NAME="logs/${NAME}/${EVAL_TYPE}"
+echo $RESULT_NAME
+mkdir -p logs/${NAME}
+file_path=${RESULT_NAME}/result.jsonl
+bash scripts/interleave/eval_multiprocess.sh \
+${CKPT_PATH} \
+${JSON_PATH} \
+${RESULT_NAME} \
+${DATA_PATH} \
+"" \
+8 0
+python3 llava/eval/evaluate_interleave.py --result-dir ${RESULT_NAME}

scripts/interleave/eval_multiprocess.sh ADDED Viewed

	@@ -0,0 +1,54 @@

+#!/bin/bash
+# Check if three arguments are passed
+if [ "$#" -ne 7 ]; then
+    echo "Usage: $0 <model_path> <question_path> <base_answer_path> <image_folder> <extra_prompt> <N> <temperature>"
+    exit 1
+fi
+# Assign the command line arguments to variables
+model_path=$1
+question_path=$2
+base_answer_path=$3
+image_folder=$4
+extra_prompt=$5
+N=$6
+temperature=$7
+# Loop over each chunk/process
+for (( chunk_id=0; chunk_id<N; chunk_id++ ))
+do
+    # Define the answer path for each chunk
+    answer_path="${base_answer_path}/result_${chunk_id}.jsonl"
+    if [ -f "$answer_path" ]; then
+        rm "$answer_path"
+    fi
+    # Run the Python program in the background
+    CUDA_VISIBLE_DEVICES="$chunk_id" python3 llava/eval/model_vqa.py --model-path "$model_path" --question-file "$question_path" --answers-file "$answer_path" --num-chunks "$N" --chunk-idx "$chunk_id" --image-folder "$image_folder" --extra-prompt "$extra_prompt" --temperature "$temperature" &
+    # Uncomment below if you need a slight delay between starting each process
+    # sleep 0.1
+done
+# Wait for all background processes to finish
+wait
+merged_file="${base_answer_path}/result.jsonl"
+if [ -f "$merged_file" ]; then
+    rm "$merged_file"
+fi
+# Merge all the JSONL files into one
+#cat "${base_answer_path}"_*.jsonl > "${base_answer_path}.jsonl"
+for ((i=0; i<N; i++)); do
+  input_file="${base_answer_path}/result_${i}.jsonl"
+  cat "$input_file" >> "${base_answer_path}/result.jsonl"
+done
+# remove the unmerged files
+for (( chunk_id=0; chunk_id<N; chunk_id++ ))
+do
+    # Define the answer path for each chunk
+    answer_path="${base_answer_path}/result_${chunk_id}.jsonl"
+    if [ -f "$answer_path" ]; then
+        rm "$answer_path"
+    fi
+done

scripts/video/demo/video_demo.sh ADDED Viewed

	@@ -0,0 +1,40 @@

+#!/bin/bash
+ROOT_DIR="/mnt/bn/vl-research/workspace/yhzhang/LLaVA-NeXT"
+if [ ! -e $ROOT_DIR ]; then
+    echo "The root dir does not exist. Exiting the script."
+    exit 1
+fi
+cd $ROOT_DIR
+export PYTHONWARNINGS=ignore
+export TOKENIZERS_PARALLELISM=false
+CKPT=$1
+CONV_MODE=$2
+FRAMES=$3
+POOL_STRIDE=$4
+OVERWRITE=$5
+VIDEO_PATH=$6
+if [ "$OVERWRITE" = False ]; then
+    SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_stride_${POOL_STRIDE}_overwrite_${OVERWRITE}
+else
+    SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_stride_${POOL_STRIDE}
+fi
+python3 playground/demo/video_demo.py \
+    --model-path $CKPT \
+    --video_path ${VIDEO_PATH} \
+    --output_dir ./work_dirs/video_demo/$SAVE_DIR \
+    --output_name pred \
+    --chunk-idx $(($IDX - 1)) \
+    --overwrite ${OVERWRITE} \
+    --mm_spatial_pool_stride ${POOL_STRIDE:-4} \
+    --for_get_frames_num $FRAMES \
+    --conv-mode $CONV_MODE

scripts/video/eval/activitynet_eval.sh ADDED Viewed

	@@ -0,0 +1,96 @@

+#!/bin/bash
+ROOT_DIR="root to LLaVA-NeXT-Video"
+if [ ! -e $ROOT_DIR ]; then
+    echo "The root dir does not exist. Exiting the script."
+    exit 1
+fi
+cd $ROOT_DIR
+export PYTHONWARNINGS=ignore
+export TOKENIZERS_PARALLELISM=false
+CUDA_VISIBLE_DEVICES='0,1,2,3,4,5,6,7'
+gpu_list="${CUDA_VISIBLE_DEVICES}"
+GPULIST=(${(s:,:)gpu_list})
+CHUNKS=${#GPULIST[@]}
+echo "Using $CHUNKS GPUs"
+CKPT=$1
+CONV_MODE=$2
+FRAMES=$3
+OVERWRITE=$4
+PREDEFINED_CONFIGURE=$5
+mm_spatial_pool_stride=$6
+MODEL_MAX_LENGTH=${7:-0}
+CKPT=$1
+CONV_MODE=$2
+FRAMES=$3
+POOL_STRIDE=$4
+OVERWRITE=$5
+CHUNKS=${6:-1}
+PATCHIFY=False
+OPENAIKEY="INPUT YOUR OPENAI API"
+if [ "$OVERWRITE" = False ]; then
+    SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_stride_${POOL_STRIDE}_overwrite_${OVERWRITE}
+else
+    SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_stride_${POOL_STRIDE}
+fi
+echo $SAVE_DIR
+# for IDX in {1..$CHUNKS}; do
+#     GPU_ID=${GPULIST[$IDX]}  # Note: Zsh arrays are 1-indexed by default
+#     # GPU_FREE=0
+#     # while [ $GPU_FREE -eq 0 ]; do
+#     #     # Using nvidia-smi to get the memory usage of the GPU with ID $GPU_ID
+#     #     # Parsing the output to extract the memory usage, and checking if it is "0"
+#     #     MEM_USAGE=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i $GPU_ID | tr -d '[:space:]')
+#     #     if [ "$MEM_USAGE" -eq 0 ]; then
+#     #         GPU_FREE=1
+#     #         echo "GPU $GPU_ID is free."
+#     #     else
+#     #         echo "GPU $GPU_ID is in use. Memory used: ${MEM_USAGE}MiB. Checking again in 100 seconds..."
+#     #         sleep 100
+#     #     fi
+#     # done
+#     echo "Running on GPU $GPU_ID"
+#     CUDA_VISIBLE_DEVICES=$GPU_ID python3 llavavid/eval/model_activitynet_qa.py \
+#     --model-path $CKPT \
+#     --video_dir ./data/llava_video/ActivityNet-QA/all_test \
+#     --gt_file_question ./data/llava_video/ActivityNet-QA/test_q.json \
+#     --gt_file_answers ./data/llava_videoActivityNet-QA/test_a.json \
+#     --output_dir ./work_dirs/eval_activitynet/$SAVE_DIR \
+#     --output_name pred \
+#     --num-chunks $CHUNKS \
+#     --chunk-idx $(($IDX - 1)) \
+#     --overwrite ${OVERWRITE} \
+#     --patchify_video_feature ${PATCHIFY} \
+#     --predefined_configure ${PREDEFINED_CONFIGURE} \
+#     --mm_spatial_pool_stride ${mm_spatial_pool_stride:-4} \
+#     --for_get_frames_num $FRAMES \
+#     --model-max-length ${MODEL_MAX_LENGTH:-0} \
+#     --conv-mode $CONV_MODE &
+# done
+# wait
+python3 llavavid/eval/eval_activitynet_qa.py \
+    --pred_path ./work_dirs/eval_activitynet/$SAVE_DIR \
+    --output_dir ./work_dirs/eval_activitynet/$SAVE_DIR/results \
+    --output_json ./work_dirs/eval_activitynet/$SAVE_DIR/results.json \
+    --num_chunks $CHUNKS \
+    --api_key $OPENAIKEY \
+    # --num_tasks 16 \

scripts/video/eval/video_chatgpt_benchmark_eval_shard.sh ADDED Viewed

	@@ -0,0 +1,242 @@

+#!/bin/bash
+ROOT_DIR="root to LLaVA-NeXT-Video"
+if [ ! -e $ROOT_DIR ]; then
+    echo "The root dir does not exist. Exiting the script."
+    exit 1
+fi
+cd $ROOT_DIR
+export python3WARNINGS=ignore
+export TOKENIZERS_PARALLELISM=false
+# CUDA_VISIBLE_DEVICES='0,1,2,3,4,5,6,7'
+gpu_list="${CUDA_VISIBLE_DEVICES}"
+GPULIST=(${(s:,:)gpu_list})
+# CHUNKS=${#GPULIST[@]}
+# echo "Using $CHUNKS GPUs"
+CKPT=$1
+CONV_MODE=$2
+FRAMES=$3
+POOL_STRIDE=$4
+OVERWRITE=$5
+CHUNKS=${6:-1}
+OPENAIKEY="INPUT YOUR OPENAI API"
+if [ "$OVERWRITE" = False ]; then
+    SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_stride_${POOL_STRIDE}_overwrite_${OVERWRITE}
+else
+    SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_stride_${POOL_STRIDE}
+fi
+echo $SAVE_DIR
+# Assuming GPULIST is a bash array containing your GPUs
+GPULIST=(0 1 2 3 4 5 6 7)
+# Get the number of GPUs
+NUM_GPUS=${#GPULIST[@]}
+# Calculate GPUs per chunk
+GPUS_PER_CHUNK=$((NUM_GPUS / CHUNKS))
+for IDX in $(seq 1 $CHUNKS); do
+    START=$(((IDX-1) * GPUS_PER_CHUNK))
+    LENGTH=$GPUS_PER_CHUNK # Length for slicing, not the end index
+    CHUNK_GPUS=(${GPULIST[@]:$START:$LENGTH})
+    # Convert the chunk GPUs array to a comma-separated string
+    CHUNK_GPUS_STR=$(IFS=,; echo "${CHUNK_GPUS[*]}")
+    # ALL_GPUS_FREE=0
+    # while [ $ALL_GPUS_FREE -eq 0 ]; do
+    #     ALL_GPUS_FREE=1  # Assume all GPUs are free initially
+    #     for GPU_ID in $CHUNK_GPUS; do
+    #         MEM_USAGE=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i $GPU_ID | tr -d '[:space:]')
+    #         # Assuming a GPU is considered free if its memory usage is less than 100 MiB
+    #         if [ "$MEM_USAGE" -ge 100 ]; then
+    #             ALL_GPUS_FREE=0
+    #             echo "GPU $GPU_ID is in use. Memory used: ${MEM_USAGE}MiB."
+    #             break  # Exit the loop early as we found a GPU that is not free
+    #         fi
+    #     done
+    #     if [ $ALL_GPUS_FREE -eq 0 ]; then
+    #         echo "Not all GPUs in chunk are free. Checking again in 100 seconds..."
+    #         sleep 100
+    #     fi
+    # done
+    echo "CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR"
+    CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR python3 llava/eval/model_video_chatgpt_general.py \
+        --model-path $CKPT \
+        --video_dir ./data/llava_video/video-chatgpt/evaluation/Test_Videos/ \
+        --gt_file ./data/llava_video/video-chatgpt/evaluation/generic_qa.json \
+        --output_dir ./work_dirs/eval_video_chatgpt/$SAVE_DIR \
+        --output_name pred \
+        --num-chunks $CHUNKS \
+        --chunk-idx $(($IDX - 1)) \
+        --overwrite ${OVERWRITE:-true} \
+        --mm_spatial_pool_stride ${POOL_STRIDE:-4} \
+        --for_get_frames_num $FRAMES \
+        --conv-mode $CONV_MODE &
+done
+wait
+python3 llava/eval/evaluate_benchmark_1_correctness.py \
+    --pred_path ./work_dirs/eval_video_chatgpt/$SAVE_DIR \
+    --output_dir ./work_dirs/eval_video_chatgpt/$SAVE_DIR/correctness_results \
+    --output_json ./work_dirs/eval_video_chatgpt/$SAVE_DIR/correctness_results.json \
+    --num_chunks $CHUNKS \
+    --output_name pred \
+    --num_tasks 16 \
+    --api_key $OPENAIKEY \
+python3 llava/eval/evaluate_benchmark_2_detailed_orientation.py \
+    --pred_path ./work_dirs/eval_video_chatgpt/$SAVE_DIR \
+    --output_dir ./work_dirs/eval_video_chatgpt/$SAVE_DIR/detail_results \
+    --output_json ./work_dirs/eval_video_chatgpt/$SAVE_DIR/detail_results.json \
+    --num_chunks $CHUNKS \
+    --output_name pred \
+    --num_tasks 16 \
+    --api_key $OPENAIKEY \
+python3 llava/eval/evaluate_benchmark_3_context.py \
+    --pred_path ./work_dirs/eval_video_chatgpt/$SAVE_DIR \
+    --output_dir ./work_dirs/eval_video_chatgpt/$SAVE_DIR/context_results \
+    --output_json ./work_dirs/eval_video_chatgpt/$SAVE_DIR/context_results.json \
+    --num_chunks $CHUNKS \
+    --output_name pred \
+    --num_tasks 16 \
+    --api_key $OPENAIKEY \
+for IDX in $(seq 1 $CHUNKS); do
+    START=$(((IDX-1) * GPUS_PER_CHUNK))
+    LENGTH=$GPUS_PER_CHUNK # Length for slicing, not the end index
+    CHUNK_GPUS=(${GPULIST[@]:$START:$LENGTH})
+    # Convert the chunk GPUs array to a comma-separated string
+    CHUNK_GPUS_STR=$(IFS=,; echo "${CHUNK_GPUS[*]}")
+    # ALL_GPUS_FREE=0
+    # while [ $ALL_GPUS_FREE -eq 0 ]; do
+    #     ALL_GPUS_FREE=1  # Assume all GPUs are free initially
+    #     for GPU_ID in $CHUNK_GPUS; do
+    #         MEM_USAGE=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i $GPU_ID | tr -d '[:space:]')
+    #         # Assuming a GPU is considered free if its memory usage is less than 100 MiB
+    #         if [ "$MEM_USAGE" -ge 100 ]; then
+    #             ALL_GPUS_FREE=0
+    #             echo "GPU $GPU_ID is in use. Memory used: ${MEM_USAGE}MiB."
+    #             break  # Exit the loop early as we found a GPU that is not free
+    #         fi
+    #     done
+    #     if [ $ALL_GPUS_FREE -eq 0 ]; then
+    #         echo "Not all GPUs in chunk are free. Checking again in 100 seconds..."
+    #         sleep 100
+    #     fi
+    # done
+    echo "CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR"
+    CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR python3 llava/eval/model_video_chatgpt_general.py \
+        --model-path $CKPT \
+        --video_dir ./data/llava_video/video-chatgpt/evaluation/Test_Videos/ \
+        --gt_file ./data/llava_video/video-chatgpt/evaluation/temporal_qa.json \
+        --output_dir ./work_dirs/eval_video_chatgpt/$SAVE_DIR \
+        --output_name pred_temporal \
+        --num-chunks $CHUNKS \
+        --chunk-idx $(($IDX - 1)) \
+        --for_get_frames_num $FRAMES \
+        --overwrite ${OVERWRITE} \
+        --mm_spatial_pool_stride ${POOL_STRIDE:-4} \
+        --conv-mode $CONV_MODE &
+done
+wait
+python3 llava/eval/evaluate_benchmark_4_temporal.py \
+    --pred_path ./work_dirs/eval_video_chatgpt/$SAVE_DIR \
+    --output_dir ./work_dirs/eval_video_chatgpt/$SAVE_DIR/temporal_results \
+    --output_json ./work_dirs/eval_video_chatgpt/$SAVE_DIR/temporal_results.json \
+    --num_chunks $CHUNKS \
+    --output_name pred_temporal \
+    --num_tasks 16 \
+    --api_key $OPENAIKEY \
+for IDX in $(seq 1 $CHUNKS); do
+    START=$(((IDX-1) * GPUS_PER_CHUNK))
+    LENGTH=$GPUS_PER_CHUNK # Length for slicing, not the end index
+    CHUNK_GPUS=(${GPULIST[@]:$START:$LENGTH})
+    # Convert the chunk GPUs array to a comma-separated string
+    CHUNK_GPUS_STR=$(IFS=,; echo "${CHUNK_GPUS[*]}")
+    # ALL_GPUS_FREE=0
+    # while [ $ALL_GPUS_FREE -eq 0 ]; do
+    #     ALL_GPUS_FREE=1  # Assume all GPUs are free initially
+    #     for GPU_ID in $CHUNK_GPUS; do
+    #         MEM_USAGE=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i $GPU_ID | tr -d '[:space:]')
+    #         # Assuming a GPU is considered free if its memory usage is less than 100 MiB
+    #         if [ "$MEM_USAGE" -ge 100 ]; then
+    #             ALL_GPUS_FREE=0
+    #             echo "GPU $GPU_ID is in use. Memory used: ${MEM_USAGE}MiB."
+    #             break  # Exit the loop early as we found a GPU that is not free
+    #         fi
+    #     done
+    #     if [ $ALL_GPUS_FREE -eq 0 ]; then
+    #         echo "Not all GPUs in chunk are free. Checking again in 100 seconds..."
+    #         sleep 100
+    #     fi
+    # done
+    echo "CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR"
+    CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR python3 llava/eval/model_video_chatgpt_consistency.py \
+        --model-path $CKPT \
+        --video_dir ./data/llava_video/video-chatgpt/evaluation/Test_Videos/ \
+        --gt_file ./data/llava_video/video-chatgpt/evaluation/consistency_qa.json \
+        --output_dir ./work_dirs/eval_video_chatgpt/$SAVE_DIR \
+        --output_name pred_consistency \
+        --num-chunks $CHUNKS \
+        --chunk-idx $(($IDX - 1)) \
+        --mm_spatial_pool_stride ${POOL_STRIDE:-4} \
+        --for_get_frames_num $FRAMES \
+        --overwrite ${OVERWRITE} \
+        --conv-mode $CONV_MODE &
+done
+wait
+python3 llava/eval/evaluate_benchmark_5_consistency.py \
+    --pred_path ./work_dirs/eval_video_chatgpt/$SAVE_DIR \
+    --output_dir ./work_dirs/eval_video_chatgpt/$SAVE_DIR/consistency_results \
+    --output_json ./work_dirs/eval_video_chatgpt/$SAVE_DIR/consistency_results.json \
+    --num_chunks $CHUNKS \
+    --output_name pred_consistency \
+    --num_tasks 16 \
+    --api_key $OPENAIKEY \

scripts/video/eval/video_description_from_t2v.sh ADDED Viewed

	@@ -0,0 +1,98 @@

+#!/bin/bash
+ROOT_DIR="/mnt/bn/vl-research/workspace/yhzhang/llava-next-video"
+if [ ! -e $ROOT_DIR ]; then
+    echo "The root dir does not exist. Exiting the script."
+    exit 1
+fi
+cd $ROOT_DIR
+export PYTHONWARNINGS=ignore
+export TOKENIZERS_PARALLELISM=false
+CKPT=$1
+CONV_MODE=$2
+FRAMES=$3
+POOL_STRIDE=$4
+OVERWRITE=$5
+CHUNKS=${6:-1}
+DO_CENTER_CROP=${7:-False}
+echo "Using $CHUNKS GPUs"
+LOAD_8BIT=False
+if [ "$OVERWRITE" = False ]; then
+    if [ "$MODEL_MAX_LENGTH" = 0 ]; then
+        SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_overwrite_${OVERWRITE}
+    else
+        SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_overwrite_${OVERWRITE}
+    fi
+else
+    SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_stride_${POOL_STRIDE}
+fi
+SAVE_DIR=${SAVE_DIR}_do_center_crop_${DO_CENTER_CROP}
+# Assuming GPULIST is a bash array containing your GPUs
+GPULIST=(0 1 2 3 4 5 6 7)
+# GPULIST=(0)
+# Get the number of GPUs
+NUM_GPUS=${#GPULIST[@]}
+# Calculate GPUs per chunk
+GPUS_PER_CHUNK=$((NUM_GPUS / CHUNKS))
+for IDX in $(seq 1 $CHUNKS); do
+    START=$(((IDX-1) * GPUS_PER_CHUNK))
+    LENGTH=$GPUS_PER_CHUNK # Length for slicing, not the end index
+    CHUNK_GPUS=(${GPULIST[@]:$START:$LENGTH})
+    # Convert the chunk GPUs array to a comma-separated string
+    CHUNK_GPUS_STR=$(IFS=,; echo "${CHUNK_GPUS[*]}")
+    # ALL_GPUS_FREE=0
+    # while [ $ALL_GPUS_FREE -eq 0 ]; do
+    #     ALL_GPUS_FREE=1  # Assume all GPUs are free initially
+    #     for GPU_ID in $CHUNK_GPUS; do
+    #         MEM_USAGE=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i $GPU_ID | tr -d '[:space:]')
+    #         # Assuming a GPU is considered free if its memory usage is less than 100 MiB
+    #         if [ "$MEM_USAGE" -ge 100 ]; then
+    #             ALL_GPUS_FREE=0
+    #             echo "GPU $GPU_ID is in use. Memory used: ${MEM_USAGE}MiB."
+    #             break  # Exit the loop early as we found a GPU that is not free
+    #         fi
+    #     done
+    #     if [ $ALL_GPUS_FREE -eq 0 ]; then
+    #         echo "Not all GPUs in chunk are free. Checking again in 100 seconds..."
+    #         sleep 100
+    #     fi
+    # done
+    echo "CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR"
+    CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR python3 llava/eval/model_video_description_from_t2v.py \
+        --model-path $CKPT \
+        --gt_file /mnt/bn/vl-research-1t/tuyen/webvid_hdvg_movie_pond5_for_captioning_evaluation/webvid_hdvg_movie_pond5_for_captioning_evaluation.processed.csv \
+        --output_dir ./work_dirs/eval_video_description_from_t2v/$SAVE_DIR \
+        --output_name pred \
+        --num-chunks $CHUNKS \
+        --chunk-idx $(($IDX - 1)) \
+        --overwrite ${OVERWRITE} \
+        --mm_spatial_pool_stride ${POOL_STRIDE:-4} \
+        --for_get_frames_num $FRAMES \
+        --load_8bit $LOAD_8BIT \
+        --do_center_crop $DO_CENTER_CROP \
+        --conv-mode $CONV_MODE &
+done
+wait
+cat ${ROOT_DIR}/work_dirs/eval_video_description_from_t2v/$SAVE_DIR/${CHUNKS}* > ${ROOT_DIR}/work_dirs/eval_video_description_from_t2v/$SAVE_DIR/pred.json

scripts/video/eval/video_detail_description_eval_only.sh ADDED Viewed

	@@ -0,0 +1,24 @@

+#!/bin/bash
+ROOT_DIR="root to LLaVA-NeXT-Video"
+if [ ! -e $ROOT_DIR ]; then
+    echo "The root dir does not exist. Exiting the script."
+    exit 1
+fi
+cd $ROOT_DIR
+export PYTHONWARNINGS=ignore
+export TOKENIZERS_PARALLELISM=false
+OPENAIKEY="INPUT YOUR OPENAI API"
+SAVE_DIR=$1
+python3 llava/eval/evaluate_benchmark_video_detail_description.py \
+    --pred_path ./work_dirs/eval_video_detail_description/$SAVE_DIR/pred.json \
+    --output_dir ./work_dirs/eval_video_detail_description/$SAVE_DIR/detail_results \
+    --output_json ./work_dirs/eval_video_detail_description/$SAVE_DIR/detail_results.json \
+    --num_chunks 1 \
+    --num_tasks 16 \
+    --api_key $OPENAIKEY \

scripts/video/eval/video_detail_description_eval_shard.sh ADDED Viewed

	@@ -0,0 +1,95 @@

+#!/bin/bash
+ROOT_DIR="/mnt/bn/vl-research/workspace/yhzhang/llava-next-video"
+if [ ! -e $ROOT_DIR ]; then
+    echo "The root dir does not exist. Exiting the script."
+    exit 1
+fi
+cd $ROOT_DIR
+export PYTHONWARNINGS=ignore
+export TOKENIZERS_PARALLELISM=false
+OPENAIKEY="INPUT YOUR OPENAI API"
+CKPT=$1
+CONV_MODE=$2
+FRAMES=$3
+POOL_STRIDE=$4
+OVERWRITE=$5
+CHUNKS=${6:-1}
+echo "Using $CHUNKS GPUs"
+if [ "$OVERWRITE" = False ]; then
+    SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_stride_${POOL_STRIDE}_overwrite_${OVERWRITE}
+else
+    SAVE_DIR=$(basename $CKPT)_${CONV_MODE}_frames_${FRAMES}_stride_${POOL_STRIDE}
+fi
+# Assuming GPULIST is a bash array containing your GPUs
+GPULIST=(0 1 2 3 4 5 6 7)
+# Get the number of GPUs
+NUM_GPUS=${#GPULIST[@]}
+# Calculate GPUs per chunk
+GPUS_PER_CHUNK=$((NUM_GPUS / CHUNKS))
+for IDX in $(seq 1 $CHUNKS); do
+    START=$(((IDX-1) * GPUS_PER_CHUNK))
+    LENGTH=$GPUS_PER_CHUNK # Length for slicing, not the end index
+    CHUNK_GPUS=(${GPULIST[@]:$START:$LENGTH})
+    # Convert the chunk GPUs array to a comma-separated string
+    CHUNK_GPUS_STR=$(IFS=,; echo "${CHUNK_GPUS[*]}")
+    # ALL_GPUS_FREE=0
+    # while [ $ALL_GPUS_FREE -eq 0 ]; do
+    #     ALL_GPUS_FREE=1  # Assume all GPUs are free initially
+    #     for GPU_ID in $CHUNK_GPUS; do
+    #         MEM_USAGE=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i $GPU_ID | tr -d '[:space:]')
+    #         # Assuming a GPU is considered free if its memory usage is less than 100 MiB
+    #         if [ "$MEM_USAGE" -ge 100 ]; then
+    #             ALL_GPUS_FREE=0
+    #             echo "GPU $GPU_ID is in use. Memory used: ${MEM_USAGE}MiB."
+    #             break  # Exit the loop early as we found a GPU that is not free
+    #         fi
+    #     done
+    #     if [ $ALL_GPUS_FREE -eq 0 ]; then
+    #         echo "Not all GPUs in chunk are free. Checking again in 100 seconds..."
+    #         sleep 100
+    #     fi
+    # done
+    echo "CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR"
+    CUDA_VISIBLE_DEVICES=$CHUNK_GPUS_STR python3 llava/eval/model_video_detail_description.py \
+        --model-path $CKPT \
+        --video_dir ./data/llava_video/video-chatgpt/evaluation/Test_Videos/ \
+        --output_dir ./work_dirs/eval_video_detail_description/$SAVE_DIR \
+        --output_name pred \
+        --num-chunks $CHUNKS \
+        --chunk-idx $(($IDX - 1)) \
+        --overwrite ${OVERWRITE} \
+        --mm_spatial_pool_stride ${POOL_STRIDE:-4} \
+        --for_get_frames_num $FRAMES \
+        --conv-mode $CONV_MODE &
+done
+wait
+python3 llava/eval/evaluate_benchmark_video_detail_description.py \
+    --pred_path ./work_dirs/eval_video_detail_description/$SAVE_DIR \
+    --output_dir ./work_dirs/eval_video_detail_description/$SAVE_DIR/detail_results \
+    --output_json ./work_dirs/eval_video_detail_description/$SAVE_DIR/detail_results.json \
+    --num_chunks $CHUNKS \
+    --num_tasks 16 \
+    --api_key $OPENAIKEY \