ViLaSR / eval /infer.sh
cornuHGF's picture
Upload folder using huggingface_hub
1efcb3c verified
#!/bin/bash -x
#SBATCH --nodes=1
#SBATCH --ntasks=4
#SBATCH --ntasks-per-node=4
#SBATCH --wait-all-nodes=1
#SBATCH --partition=a6000
#SBATCH -w c31
export HF_HOME=/mnt/beegfs/dzhu6/.cache
eval "$(/home/dzhu6/miniconda3/bin/conda shell.bash hook)" # init conda
export CUDA_VISIBLE_DEVICES=0,1,2,3
export MASTER_PORT=12780
master_addr=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)
export MASTER_ADDR=$master_addr
conda activate grounded-rl
datadirs=(
"/mnt/beegfs/dzhu6/SpatialEval/outputs/images"
"/mnt/beegfs/dzhu6/coco_images/"
# "blinkimages"
)
script_paths=(
"SpatialEval"
"3dsrbench"
# "blink"
# "vsi_bench"
# "maze"
# "spar_bench"
# "mmsi_bench"
)
model_name=ViLaSR-cold-start
CKPT=../ViLaSR-cold-start # zero_shot/cold_start/reflective/rl CKPT
MODE=cold-start # zero_shot, cold_start, reflective, rl
# model_name=ViLaSR
# CKPT=../ViLaSR # zero_shot/cold_start/reflective/rl CKPT
# MODE=rl # zero_shot, cold_start, reflective, rl
echo "Processing shard $split of $all"
# rm -rf /mnt/beegfs/dzhu6/ViLaSR/eval/results/rl/blink
# rm /mnt/beegfs/dzhu6/VisualSketchpad/ViLaSR_traj_False_prune_None_maxnewtokens_16384_split_0_all_4.jsonl
# rm /mnt/beegfs/dzhu6/VisualSketchpad/ViLaSR_traj_False_prune_None_maxnewtokens_16384_split_1_all_4.jsonl
# rm /mnt/beegfs/dzhu6/VisualSketchpad/ViLaSR_traj_False_prune_None_maxnewtokens_16384_split_2_all_4.jsonl
# rm /mnt/beegfs/dzhu6/VisualSketchpad/ViLaSR_traj_False_prune_None_maxnewtokens_16384_split_3_all_4.jsonl
for ((i=0; i<${#script_paths[@]}; i++)); do
QUESTION_FILE=${script_paths[i]}
IMAGE_FOLDER=${datadirs[i]}
if [ -z "$IMAGE_FOLDER" ]; then
echo "Warning: No image folder defined for $QUESTION_FILE. Skipping..."
continue
fi
RESULTDIR=./eval/results/$MODE/${script_paths[i]}/
mkdir -p $RESULTDIR
ALL=4
for gpu in $(seq 0 $(($ALL - 1))); do
echo "****** GPU ${gpu} ${QUESTION_FILE}--${IMAGE_FOLDER} ******"
CUDA_VISIBLE_DEVICES=$gpu srun --cpu_bind=v --accel-bind=gn --exclusive --ntasks=1 --ntasks-per-node=1 --output=${model_name}_${QUESTION_FILE}_rank${gpu}_all${ALL}.log \
python eval/infer.py \
--model-path $CKPT \
--model-name ${model_name} \
--dataset ${script_paths[i]} \
--input-file $IMAGE_FOLDER/${QUESTION_FILE}.json \
--image-folder $IMAGE_FOLDER \
--output-dir $RESULTDIR \
--temperature 0.75 \
--max-frames 16 \
--max-pixels 351232 \
--split $gpu \
--all $ALL & # > ${model_name}_${QUESTION_FILE}_rank${gpu}_all${ALL}.log 2>&1 &
sleep 2
done
wait
################### vscan
# RESULTDIR=./eval/results/$MODE-vscan/${script_paths[i]}/
# mkdir -p $RESULTDIR
# ALL=4
# for gpu in $(seq 0 $(($ALL - 1))); do
# echo "****** GPU ${gpu} ${QUESTION_FILE}--${IMAGE_FOLDER} ******"
# CUDA_VISIBLE_DEVICES=$gpu srun --cpu_bind=v --accel-bind=gn --exclusive --ntasks=1 --ntasks-per-node=1 --output=${model_name}_prune_${QUESTION_FILE}_rank${gpu}_all${ALL}.log \
# python eval/infer_multi.py \
# --model-path $CKPT \
# --model-name ${model_name} \
# --dataset ${script_paths[i]} \
# --input-file $IMAGE_FOLDER/${QUESTION_FILE}.json \
# --image-folder $IMAGE_FOLDER \
# --output-dir $RESULTDIR \
# --temperature 0.75 \
# --max-frames 16 \
# --max-pixels 351232 \
# --prune vscan \
# --split $gpu \
# --all $ALL & # > ${model_name}_${QUESTION_FILE}_rank${gpu}_prune_all${ALL}.log 2>&1 &
# sleep 2
# done
# wait
done