| #SBATCH --nodes=1 | |
| #SBATCH --ntasks=4 | |
| #SBATCH --ntasks-per-node=4 | |
| #SBATCH --wait-all-nodes=1 | |
| #SBATCH --partition=a6000 | |
| #SBATCH -w c31 | |
| export HF_HOME=/mnt/beegfs/dzhu6/.cache | |
| eval "$(/home/dzhu6/miniconda3/bin/conda shell.bash hook)" # init conda | |
| export CUDA_VISIBLE_DEVICES=0,1,2,3 | |
| export MASTER_PORT=12780 | |
| master_addr=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1) | |
| export MASTER_ADDR=$master_addr | |
| conda activate grounded-rl | |
| datadirs=( | |
| "/mnt/beegfs/dzhu6/SpatialEval/outputs/images" | |
| "/mnt/beegfs/dzhu6/coco_images/" | |
| # "blinkimages" | |
| ) | |
| script_paths=( | |
| "SpatialEval" | |
| "3dsrbench" | |
| # "blink" | |
| # "vsi_bench" | |
| # "maze" | |
| # "spar_bench" | |
| # "mmsi_bench" | |
| ) | |
| model_name=ViLaSR-cold-start | |
| CKPT=../ViLaSR-cold-start # zero_shot/cold_start/reflective/rl CKPT | |
| MODE=cold-start # zero_shot, cold_start, reflective, rl | |
| # model_name=ViLaSR | |
| # CKPT=../ViLaSR # zero_shot/cold_start/reflective/rl CKPT | |
| # MODE=rl # zero_shot, cold_start, reflective, rl | |
| echo "Processing shard $split of $all" | |
| # rm -rf /mnt/beegfs/dzhu6/ViLaSR/eval/results/rl/blink | |
| # rm /mnt/beegfs/dzhu6/VisualSketchpad/ViLaSR_traj_False_prune_None_maxnewtokens_16384_split_0_all_4.jsonl | |
| # rm /mnt/beegfs/dzhu6/VisualSketchpad/ViLaSR_traj_False_prune_None_maxnewtokens_16384_split_1_all_4.jsonl | |
| # rm /mnt/beegfs/dzhu6/VisualSketchpad/ViLaSR_traj_False_prune_None_maxnewtokens_16384_split_2_all_4.jsonl | |
| # rm /mnt/beegfs/dzhu6/VisualSketchpad/ViLaSR_traj_False_prune_None_maxnewtokens_16384_split_3_all_4.jsonl | |
| for ((i=0; i<${#script_paths[@]}; i++)); do | |
| QUESTION_FILE=${script_paths[i]} | |
| IMAGE_FOLDER=${datadirs[i]} | |
| if [ -z "$IMAGE_FOLDER" ]; then | |
| echo "Warning: No image folder defined for $QUESTION_FILE. Skipping..." | |
| continue | |
| fi | |
| RESULTDIR=./eval/results/$MODE/${script_paths[i]}/ | |
| mkdir -p $RESULTDIR | |
| ALL=4 | |
| for gpu in $(seq 0 $(($ALL - 1))); do | |
| echo "****** GPU ${gpu} ${QUESTION_FILE}--${IMAGE_FOLDER} ******" | |
| CUDA_VISIBLE_DEVICES=$gpu srun --cpu_bind=v --accel-bind=gn --exclusive --ntasks=1 --ntasks-per-node=1 --output=${model_name}_${QUESTION_FILE}_rank${gpu}_all${ALL}.log \ | |
| python eval/infer.py \ | |
| --model-path $CKPT \ | |
| --model-name ${model_name} \ | |
| --dataset ${script_paths[i]} \ | |
| --input-file $IMAGE_FOLDER/${QUESTION_FILE}.json \ | |
| --image-folder $IMAGE_FOLDER \ | |
| --output-dir $RESULTDIR \ | |
| --temperature 0.75 \ | |
| --max-frames 16 \ | |
| --max-pixels 351232 \ | |
| --split $gpu \ | |
| --all $ALL & # > ${model_name}_${QUESTION_FILE}_rank${gpu}_all${ALL}.log 2>&1 & | |
| sleep 2 | |
| done | |
| wait | |
| ################### vscan | |
| # RESULTDIR=./eval/results/$MODE-vscan/${script_paths[i]}/ | |
| # mkdir -p $RESULTDIR | |
| # ALL=4 | |
| # for gpu in $(seq 0 $(($ALL - 1))); do | |
| # echo "****** GPU ${gpu} ${QUESTION_FILE}--${IMAGE_FOLDER} ******" | |
| # CUDA_VISIBLE_DEVICES=$gpu srun --cpu_bind=v --accel-bind=gn --exclusive --ntasks=1 --ntasks-per-node=1 --output=${model_name}_prune_${QUESTION_FILE}_rank${gpu}_all${ALL}.log \ | |
| # python eval/infer_multi.py \ | |
| # --model-path $CKPT \ | |
| # --model-name ${model_name} \ | |
| # --dataset ${script_paths[i]} \ | |
| # --input-file $IMAGE_FOLDER/${QUESTION_FILE}.json \ | |
| # --image-folder $IMAGE_FOLDER \ | |
| # --output-dir $RESULTDIR \ | |
| # --temperature 0.75 \ | |
| # --max-frames 16 \ | |
| # --max-pixels 351232 \ | |
| # --prune vscan \ | |
| # --split $gpu \ | |
| # --all $ALL & # > ${model_name}_${QUESTION_FILE}_rank${gpu}_prune_all${ALL}.log 2>&1 & | |
| # sleep 2 | |
| # done | |
| # wait | |
| done |