#!/bin/bash #SBATCH --job-name=lavt-rz-trial #SBATCH --partition=a6000 #SBATCH --gres=gpu:2 #SBATCH --time=13-11:30:00 # d-hh:mm:ss ??¨ö?, ¨¬??? job?? max time limit ???¢´ #SBATCH --mem=28000 # cpu memory size #SBATCH --cpus-per-task=4 # cpu ¡Æ©ø¨ù? #SBATCH --output=./logs/lavt_refzom_repro_bs32.log ml purge ml load cuda/11.8 eval "$(conda shell.bash hook)" conda activate risall cd /data2/projects/chaeyun/LAVT-RIS/ export NCCL_P2P_DISABLE=1 export NVIDIA_TF32_OVERRIDE=0 GPUS=2 OUTPUT_DIR=$1 EXP_NAME=$2 PORT=7852 # TRAIN # hardpos_only, hardpos_only_rev CUDA_VISIBLE_DEVICES=0,1 torchrun \ --nproc_per_node=$GPUS --master_port=$PORT train_refzom.py \ --model lavt_one \ --dataset ref-zom \ --splitBy final \ --split test \ --output-dir ${OUTPUT_DIR} \ --model_id ${EXP_NAME} \ --batch-size 16 \ --lr 0.00005 \ --wd 1e-2 \ --swin_type base \ --pretrained_swin_weights ./pretrained_weights/swin_base_patch4_window12_384_22k.pth \ --resume /data2/projects/chaeyun/LAVT-RIS/models/refzom_lavt_bs32_repro/model_best_refzom_lavt_bs32_repro.pth \ --epochs 40 \ --img_size 480 # lavt-rzom # sbatch ./scripts/baseline_test_lr2.sh ./models/gref_m05_tmp010_4gpu_bs32_orig gref_m05_tmp010_4gpu_bs32_orig 10 0.10 hardpos_only 0.05 # gref_m10_tmp005_4gpu_bs32 # margin 10 tmp 0.05 refined # sbatch ./scripts/baseline_test_lr2.sh ./models/gref_m10_tmp005_4gpu_bs32 gref_m10_tmp005_4gpu_bs32 10 0.05 hardpos_only_refined # sbatch ./scripts/baseline_repro_refzom.sh ./models/refzom_lavt_bs32_repro refzom_lavt_bs32_repro # sbatch ./scripts/baseline_repro_refzom.sh ./models/refzom_lavt_bs16_repro refzom_lavt_bs16_repro