#!/bin/bash -x #SBATCH --nodes=1 #SBATCH --ntasks=4 #SBATCH --ntasks-per-node=4 #SBATCH --wait-all-nodes=1 #SBATCH --partition=a6000 #SBATCH -w c31 export HF_HOME=/mnt/beegfs/dzhu6/.cache eval "$(/home/dzhu6/miniconda3/bin/conda shell.bash hook)" # init conda export CUDA_VISIBLE_DEVICES=0,1,2,3 export MASTER_PORT=12780 master_addr=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1) export MASTER_ADDR=$master_addr conda activate grounded-rl datadirs=( "/mnt/beegfs/dzhu6/SpatialEval/outputs/images" "/mnt/beegfs/dzhu6/coco_images/" # "blinkimages" ) script_paths=( "SpatialEval" "3dsrbench" # "blink" # "vsi_bench" # "maze" # "spar_bench" # "mmsi_bench" ) model_name=ViLaSR-cold-start CKPT=../ViLaSR-cold-start # zero_shot/cold_start/reflective/rl CKPT MODE=cold-start # zero_shot, cold_start, reflective, rl # model_name=ViLaSR # CKPT=../ViLaSR # zero_shot/cold_start/reflective/rl CKPT # MODE=rl # zero_shot, cold_start, reflective, rl echo "Processing shard $split of $all" # rm -rf /mnt/beegfs/dzhu6/ViLaSR/eval/results/rl/blink # rm /mnt/beegfs/dzhu6/VisualSketchpad/ViLaSR_traj_False_prune_None_maxnewtokens_16384_split_0_all_4.jsonl # rm /mnt/beegfs/dzhu6/VisualSketchpad/ViLaSR_traj_False_prune_None_maxnewtokens_16384_split_1_all_4.jsonl # rm /mnt/beegfs/dzhu6/VisualSketchpad/ViLaSR_traj_False_prune_None_maxnewtokens_16384_split_2_all_4.jsonl # rm /mnt/beegfs/dzhu6/VisualSketchpad/ViLaSR_traj_False_prune_None_maxnewtokens_16384_split_3_all_4.jsonl for ((i=0; i<${#script_paths[@]}; i++)); do QUESTION_FILE=${script_paths[i]} IMAGE_FOLDER=${datadirs[i]} if [ -z "$IMAGE_FOLDER" ]; then echo "Warning: No image folder defined for $QUESTION_FILE. Skipping..." continue fi RESULTDIR=./eval/results/$MODE/${script_paths[i]}/ mkdir -p $RESULTDIR ALL=4 for gpu in $(seq 0 $(($ALL - 1))); do echo "****** GPU ${gpu} ${QUESTION_FILE}--${IMAGE_FOLDER} ******" CUDA_VISIBLE_DEVICES=$gpu srun --cpu_bind=v --accel-bind=gn --exclusive --ntasks=1 --ntasks-per-node=1 --output=${model_name}_${QUESTION_FILE}_rank${gpu}_all${ALL}.log \ python eval/infer.py \ --model-path $CKPT \ --model-name ${model_name} \ --dataset ${script_paths[i]} \ --input-file $IMAGE_FOLDER/${QUESTION_FILE}.json \ --image-folder $IMAGE_FOLDER \ --output-dir $RESULTDIR \ --temperature 0.75 \ --max-frames 16 \ --max-pixels 351232 \ --split $gpu \ --all $ALL & # > ${model_name}_${QUESTION_FILE}_rank${gpu}_all${ALL}.log 2>&1 & sleep 2 done wait ################### vscan # RESULTDIR=./eval/results/$MODE-vscan/${script_paths[i]}/ # mkdir -p $RESULTDIR # ALL=4 # for gpu in $(seq 0 $(($ALL - 1))); do # echo "****** GPU ${gpu} ${QUESTION_FILE}--${IMAGE_FOLDER} ******" # CUDA_VISIBLE_DEVICES=$gpu srun --cpu_bind=v --accel-bind=gn --exclusive --ntasks=1 --ntasks-per-node=1 --output=${model_name}_prune_${QUESTION_FILE}_rank${gpu}_all${ALL}.log \ # python eval/infer_multi.py \ # --model-path $CKPT \ # --model-name ${model_name} \ # --dataset ${script_paths[i]} \ # --input-file $IMAGE_FOLDER/${QUESTION_FILE}.json \ # --image-folder $IMAGE_FOLDER \ # --output-dir $RESULTDIR \ # --temperature 0.75 \ # --max-frames 16 \ # --max-pixels 351232 \ # --prune vscan \ # --split $gpu \ # --all $ALL & # > ${model_name}_${QUESTION_FILE}_rank${gpu}_prune_all${ALL}.log 2>&1 & # sleep 2 # done # wait done