| #SBATCH --job-name=lavt-rz-trial | |
| #SBATCH --partition=a6000 | |
| #SBATCH --gres=gpu:2 | |
| #SBATCH --time=13-11:30:00 # d-hh:mm:ss ??��?, ��??? job?? max time limit ???�� | |
| #SBATCH --mem=28000 # cpu memory size | |
| #SBATCH --cpus-per-task=4 # cpu �Ʃ���? | |
| #SBATCH --output=./logs/lavt_refzom_repro_bs32.log | |
| ml purge | |
| ml load cuda/11.8 | |
| eval "$(conda shell.bash hook)" | |
| conda activate risall | |
| cd /data2/projects/chaeyun/LAVT-RIS/ | |
| export NCCL_P2P_DISABLE=1 | |
| export NVIDIA_TF32_OVERRIDE=0 | |
| GPUS=2 | |
| OUTPUT_DIR=$1 | |
| EXP_NAME=$2 | |
| PORT=7852 | |
| # TRAIN | |
| # hardpos_only, hardpos_only_rev | |
| CUDA_VISIBLE_DEVICES=0,1 torchrun \ | |
| --nproc_per_node=$GPUS --master_port=$PORT train_refzom.py \ | |
| --model lavt_one \ | |
| --dataset ref-zom \ | |
| --splitBy final \ | |
| --split test \ | |
| --output-dir ${OUTPUT_DIR} \ | |
| --model_id ${EXP_NAME} \ | |
| --batch-size 16 \ | |
| --lr 0.00005 \ | |
| --wd 1e-2 \ | |
| --swin_type base \ | |
| --pretrained_swin_weights ./pretrained_weights/swin_base_patch4_window12_384_22k.pth \ | |
| --resume /data2/projects/chaeyun/LAVT-RIS/models/refzom_lavt_bs32_repro/model_best_refzom_lavt_bs32_repro.pth \ | |
| --epochs 40 \ | |
| --img_size 480 | |
| # lavt-rzom | |
| # sbatch ./scripts/baseline_test_lr2.sh ./models/gref_m05_tmp010_4gpu_bs32_orig gref_m05_tmp010_4gpu_bs32_orig 10 0.10 hardpos_only 0.05 | |
| # gref_m10_tmp005_4gpu_bs32 | |
| # margin 10 tmp 0.05 refined | |
| # sbatch ./scripts/baseline_test_lr2.sh ./models/gref_m10_tmp005_4gpu_bs32 gref_m10_tmp005_4gpu_bs32 10 0.05 hardpos_only_refined | |
| # sbatch ./scripts/baseline_repro_refzom.sh ./models/refzom_lavt_bs32_repro refzom_lavt_bs32_repro | |
| # sbatch ./scripts/baseline_repro_refzom.sh ./models/refzom_lavt_bs16_repro refzom_lavt_bs16_repro | |